aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTony Lindgren <tony@atomide.com>2015-11-25 13:56:40 -0500
committerTony Lindgren <tony@atomide.com>2015-11-25 13:56:40 -0500
commit970259bff472579204108c6f27036ec4d1206ae1 (patch)
tree869bdcda9aac4c67712d93e6141056fdf3f41bd8 /net
parent9b1b61cd8e31d9beba871333d7a798b3adb89288 (diff)
parent29f5b34ca1a191c2cf4f6c8c12f4dec56e8d3bc1 (diff)
Merge branch '81xx' into omap-for-v4.4/fixes
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/bluetooth/hci_core.c17
-rw-r--r--net/bluetooth/l2cap_core.c20
-rw-r--r--net/bridge/br_stp.c15
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/ceph/auth_x.c36
-rw-r--r--net/ceph/ceph_common.c18
-rw-r--r--net/ceph/crypto.h4
-rw-r--r--net/ceph/messenger.c88
-rw-r--r--net/ceph/osd_client.c34
-rw-r--r--net/core/dev.c25
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/rtnetlink.c274
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/ipv4/fib_semantics.c13
-rw-r--r--net/ipv4/igmp.c12
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/ip_sockglue.c45
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/raw.c8
-rw-r--r--net/ipv4/sysctl_net_ipv4.c4
-rw-r--r--net/ipv4/tcp.c21
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_ipv4.c16
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv6/addrconf.c1
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/route.c22
-rw-r--r--net/ipv6/tcp_ipv6.c39
-rw-r--r--net/netfilter/Kconfig6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h17
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c64
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c18
-rw-r--r--net/netfilter/ipset/ip_set_core.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h26
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c16
-rw-r--r--net/netfilter/nf_nat_redirect.c2
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nft_counter.c49
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_meta.c36
-rw-r--r--net/netfilter/xt_TEE.c6
-rw-r--r--net/netfilter/xt_owner.c6
-rw-r--r--net/packet/af_packet.c172
-rw-r--r--net/sched/cls_flow.c15
-rw-r--r--net/sched/em_meta.c138
-rw-r--r--net/sctp/auth.c4
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c13
-rw-r--r--net/sunrpc/cache.c53
-rw-r--r--net/sunrpc/svcsock.c40
-rw-r--r--net/unix/af_unix.c24
-rw-r--r--net/vmw_vsock/vmci_transport.c2
57 files changed, 896 insertions, 596 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 496b27588493..e2ed69850489 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@ bool vlan_do_receive(struct sk_buff **skbp)
30 skb->pkt_type = PACKET_HOST; 30 skb->pkt_type = PACKET_HOST;
31 } 31 }
32 32
33 if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { 33 if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
34 !netif_is_macvlan_port(vlan_dev) &&
35 !netif_is_bridge_port(vlan_dev)) {
34 unsigned int offset = skb->data - skb_mac_header(skb); 36 unsigned int offset = skb->data - skb_mac_header(skb);
35 37
36 /* 38 /*
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 83a6aacfab31..62edbf1b114e 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -508,12 +508,6 @@ static void le_setup(struct hci_request *req)
508 /* Read LE Supported States */ 508 /* Read LE Supported States */
509 hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); 509 hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
510 510
511 /* Read LE White List Size */
512 hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
513
514 /* Clear LE White List */
515 hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
516
517 /* LE-only controllers have LE implicitly enabled */ 511 /* LE-only controllers have LE implicitly enabled */
518 if (!lmp_bredr_capable(hdev)) 512 if (!lmp_bredr_capable(hdev))
519 hci_dev_set_flag(hdev, HCI_LE_ENABLED); 513 hci_dev_set_flag(hdev, HCI_LE_ENABLED);
@@ -832,6 +826,17 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
832 hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); 826 hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
833 } 827 }
834 828
829 if (hdev->commands[26] & 0x40) {
830 /* Read LE White List Size */
831 hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE,
832 0, NULL);
833 }
834
835 if (hdev->commands[26] & 0x80) {
836 /* Clear LE White List */
837 hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
838 }
839
835 if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { 840 if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
836 /* Read LE Maximum Data Length */ 841 /* Read LE Maximum Data Length */
837 hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL); 842 hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 7c65ee200c29..66e8b6ee19a5 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -239,7 +239,7 @@ static u16 l2cap_alloc_cid(struct l2cap_conn *conn)
239 else 239 else
240 dyn_end = L2CAP_CID_DYN_END; 240 dyn_end = L2CAP_CID_DYN_END;
241 241
242 for (cid = L2CAP_CID_DYN_START; cid < dyn_end; cid++) { 242 for (cid = L2CAP_CID_DYN_START; cid <= dyn_end; cid++) {
243 if (!__l2cap_get_chan_by_scid(conn, cid)) 243 if (!__l2cap_get_chan_by_scid(conn, cid))
244 return cid; 244 return cid;
245 } 245 }
@@ -5250,7 +5250,9 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
5250 credits = __le16_to_cpu(rsp->credits); 5250 credits = __le16_to_cpu(rsp->credits);
5251 result = __le16_to_cpu(rsp->result); 5251 result = __le16_to_cpu(rsp->result);
5252 5252
5253 if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23)) 5253 if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23 ||
5254 dcid < L2CAP_CID_DYN_START ||
5255 dcid > L2CAP_CID_LE_DYN_END))
5254 return -EPROTO; 5256 return -EPROTO;
5255 5257
5256 BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x", 5258 BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x",
@@ -5270,6 +5272,11 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
5270 5272
5271 switch (result) { 5273 switch (result) {
5272 case L2CAP_CR_SUCCESS: 5274 case L2CAP_CR_SUCCESS:
5275 if (__l2cap_get_chan_by_dcid(conn, dcid)) {
5276 err = -EBADSLT;
5277 break;
5278 }
5279
5273 chan->ident = 0; 5280 chan->ident = 0;
5274 chan->dcid = dcid; 5281 chan->dcid = dcid;
5275 chan->omtu = mtu; 5282 chan->omtu = mtu;
@@ -5437,9 +5444,16 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
5437 goto response_unlock; 5444 goto response_unlock;
5438 } 5445 }
5439 5446
5447 /* Check for valid dynamic CID range */
5448 if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) {
5449 result = L2CAP_CR_INVALID_SCID;
5450 chan = NULL;
5451 goto response_unlock;
5452 }
5453
5440 /* Check if we already have channel with that dcid */ 5454 /* Check if we already have channel with that dcid */
5441 if (__l2cap_get_chan_by_dcid(conn, scid)) { 5455 if (__l2cap_get_chan_by_dcid(conn, scid)) {
5442 result = L2CAP_CR_NO_MEM; 5456 result = L2CAP_CR_SCID_IN_USE;
5443 chan = NULL; 5457 chan = NULL;
5444 goto response_unlock; 5458 goto response_unlock;
5445 } 5459 }
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 80c34d70218c..5f3f64553179 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -48,7 +48,7 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
48 48
49 p->state = state; 49 p->state = state;
50 err = switchdev_port_attr_set(p->dev, &attr); 50 err = switchdev_port_attr_set(p->dev, &attr);
51 if (err) 51 if (err && err != -EOPNOTSUPP)
52 br_warn(p->br, "error setting offload STP state on port %u(%s)\n", 52 br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
53 (unsigned int) p->port_no, p->dev->name); 53 (unsigned int) p->port_no, p->dev->name);
54} 54}
@@ -600,12 +600,17 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
600int br_set_forward_delay(struct net_bridge *br, unsigned long val) 600int br_set_forward_delay(struct net_bridge *br, unsigned long val)
601{ 601{
602 unsigned long t = clock_t_to_jiffies(val); 602 unsigned long t = clock_t_to_jiffies(val);
603 603 int err = -ERANGE;
604 if (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)
605 return -ERANGE;
606 604
607 spin_lock_bh(&br->lock); 605 spin_lock_bh(&br->lock);
606 if (br->stp_enabled != BR_NO_STP &&
607 (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY))
608 goto unlock;
609
608 __br_set_forward_delay(br, t); 610 __br_set_forward_delay(br, t);
611 err = 0;
612
613unlock:
609 spin_unlock_bh(&br->lock); 614 spin_unlock_bh(&br->lock);
610 return 0; 615 return err;
611} 616}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index fa53d7a89f48..5396ff08af32 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -50,7 +50,7 @@ void br_init_port(struct net_bridge_port *p)
50 p->config_pending = 0; 50 p->config_pending = 0;
51 51
52 err = switchdev_port_attr_set(p->dev, &attr); 52 err = switchdev_port_attr_set(p->dev, &attr);
53 if (err) 53 if (err && err != -EOPNOTSUPP)
54 netdev_err(p->dev, "failed to set HW ageing time\n"); 54 netdev_err(p->dev, "failed to set HW ageing time\n");
55} 55}
56 56
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index ba6eb17226da..10d87753ed87 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/ceph/decode.h> 9#include <linux/ceph/decode.h>
10#include <linux/ceph/auth.h> 10#include <linux/ceph/auth.h>
11#include <linux/ceph/libceph.h>
11#include <linux/ceph/messenger.h> 12#include <linux/ceph/messenger.h>
12 13
13#include "crypto.h" 14#include "crypto.h"
@@ -279,6 +280,15 @@ bad:
279 return -EINVAL; 280 return -EINVAL;
280} 281}
281 282
283static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
284{
285 ceph_crypto_key_destroy(&au->session_key);
286 if (au->buf) {
287 ceph_buffer_put(au->buf);
288 au->buf = NULL;
289 }
290}
291
282static int ceph_x_build_authorizer(struct ceph_auth_client *ac, 292static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
283 struct ceph_x_ticket_handler *th, 293 struct ceph_x_ticket_handler *th,
284 struct ceph_x_authorizer *au) 294 struct ceph_x_authorizer *au)
@@ -297,7 +307,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
297 ceph_crypto_key_destroy(&au->session_key); 307 ceph_crypto_key_destroy(&au->session_key);
298 ret = ceph_crypto_key_clone(&au->session_key, &th->session_key); 308 ret = ceph_crypto_key_clone(&au->session_key, &th->session_key);
299 if (ret) 309 if (ret)
300 return ret; 310 goto out_au;
301 311
302 maxlen = sizeof(*msg_a) + sizeof(msg_b) + 312 maxlen = sizeof(*msg_a) + sizeof(msg_b) +
303 ceph_x_encrypt_buflen(ticket_blob_len); 313 ceph_x_encrypt_buflen(ticket_blob_len);
@@ -309,8 +319,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
309 if (!au->buf) { 319 if (!au->buf) {
310 au->buf = ceph_buffer_new(maxlen, GFP_NOFS); 320 au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
311 if (!au->buf) { 321 if (!au->buf) {
312 ceph_crypto_key_destroy(&au->session_key); 322 ret = -ENOMEM;
313 return -ENOMEM; 323 goto out_au;
314 } 324 }
315 } 325 }
316 au->service = th->service; 326 au->service = th->service;
@@ -340,7 +350,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
340 ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b), 350 ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b),
341 p, end - p); 351 p, end - p);
342 if (ret < 0) 352 if (ret < 0)
343 goto out_buf; 353 goto out_au;
344 p += ret; 354 p += ret;
345 au->buf->vec.iov_len = p - au->buf->vec.iov_base; 355 au->buf->vec.iov_len = p - au->buf->vec.iov_base;
346 dout(" built authorizer nonce %llx len %d\n", au->nonce, 356 dout(" built authorizer nonce %llx len %d\n", au->nonce,
@@ -348,9 +358,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
348 BUG_ON(au->buf->vec.iov_len > maxlen); 358 BUG_ON(au->buf->vec.iov_len > maxlen);
349 return 0; 359 return 0;
350 360
351out_buf: 361out_au:
352 ceph_buffer_put(au->buf); 362 ceph_x_authorizer_cleanup(au);
353 au->buf = NULL;
354 return ret; 363 return ret;
355} 364}
356 365
@@ -624,8 +633,7 @@ static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
624{ 633{
625 struct ceph_x_authorizer *au = (void *)a; 634 struct ceph_x_authorizer *au = (void *)a;
626 635
627 ceph_crypto_key_destroy(&au->session_key); 636 ceph_x_authorizer_cleanup(au);
628 ceph_buffer_put(au->buf);
629 kfree(au); 637 kfree(au);
630} 638}
631 639
@@ -653,8 +661,7 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
653 remove_ticket_handler(ac, th); 661 remove_ticket_handler(ac, th);
654 } 662 }
655 663
656 if (xi->auth_authorizer.buf) 664 ceph_x_authorizer_cleanup(&xi->auth_authorizer);
657 ceph_buffer_put(xi->auth_authorizer.buf);
658 665
659 kfree(ac->private); 666 kfree(ac->private);
660 ac->private = NULL; 667 ac->private = NULL;
@@ -691,8 +698,10 @@ static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
691 struct ceph_msg *msg) 698 struct ceph_msg *msg)
692{ 699{
693 int ret; 700 int ret;
694 if (!auth->authorizer) 701
702 if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
695 return 0; 703 return 0;
704
696 ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, 705 ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
697 msg, &msg->footer.sig); 706 msg, &msg->footer.sig);
698 if (ret < 0) 707 if (ret < 0)
@@ -707,8 +716,9 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
707 __le64 sig_check; 716 __le64 sig_check;
708 int ret; 717 int ret;
709 718
710 if (!auth->authorizer) 719 if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
711 return 0; 720 return 0;
721
712 ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, 722 ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
713 msg, &sig_check); 723 msg, &sig_check);
714 if (ret < 0) 724 if (ret < 0)
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 78f098a20796..bcbec33c6a14 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -245,6 +245,8 @@ enum {
245 Opt_nocrc, 245 Opt_nocrc,
246 Opt_cephx_require_signatures, 246 Opt_cephx_require_signatures,
247 Opt_nocephx_require_signatures, 247 Opt_nocephx_require_signatures,
248 Opt_cephx_sign_messages,
249 Opt_nocephx_sign_messages,
248 Opt_tcp_nodelay, 250 Opt_tcp_nodelay,
249 Opt_notcp_nodelay, 251 Opt_notcp_nodelay,
250}; 252};
@@ -267,6 +269,8 @@ static match_table_t opt_tokens = {
267 {Opt_nocrc, "nocrc"}, 269 {Opt_nocrc, "nocrc"},
268 {Opt_cephx_require_signatures, "cephx_require_signatures"}, 270 {Opt_cephx_require_signatures, "cephx_require_signatures"},
269 {Opt_nocephx_require_signatures, "nocephx_require_signatures"}, 271 {Opt_nocephx_require_signatures, "nocephx_require_signatures"},
272 {Opt_cephx_sign_messages, "cephx_sign_messages"},
273 {Opt_nocephx_sign_messages, "nocephx_sign_messages"},
270 {Opt_tcp_nodelay, "tcp_nodelay"}, 274 {Opt_tcp_nodelay, "tcp_nodelay"},
271 {Opt_notcp_nodelay, "notcp_nodelay"}, 275 {Opt_notcp_nodelay, "notcp_nodelay"},
272 {-1, NULL} 276 {-1, NULL}
@@ -491,6 +495,12 @@ ceph_parse_options(char *options, const char *dev_name,
491 case Opt_nocephx_require_signatures: 495 case Opt_nocephx_require_signatures:
492 opt->flags |= CEPH_OPT_NOMSGAUTH; 496 opt->flags |= CEPH_OPT_NOMSGAUTH;
493 break; 497 break;
498 case Opt_cephx_sign_messages:
499 opt->flags &= ~CEPH_OPT_NOMSGSIGN;
500 break;
501 case Opt_nocephx_sign_messages:
502 opt->flags |= CEPH_OPT_NOMSGSIGN;
503 break;
494 504
495 case Opt_tcp_nodelay: 505 case Opt_tcp_nodelay:
496 opt->flags |= CEPH_OPT_TCP_NODELAY; 506 opt->flags |= CEPH_OPT_TCP_NODELAY;
@@ -534,6 +544,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
534 seq_puts(m, "nocrc,"); 544 seq_puts(m, "nocrc,");
535 if (opt->flags & CEPH_OPT_NOMSGAUTH) 545 if (opt->flags & CEPH_OPT_NOMSGAUTH)
536 seq_puts(m, "nocephx_require_signatures,"); 546 seq_puts(m, "nocephx_require_signatures,");
547 if (opt->flags & CEPH_OPT_NOMSGSIGN)
548 seq_puts(m, "nocephx_sign_messages,");
537 if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) 549 if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
538 seq_puts(m, "notcp_nodelay,"); 550 seq_puts(m, "notcp_nodelay,");
539 551
@@ -596,11 +608,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
596 if (ceph_test_opt(client, MYIP)) 608 if (ceph_test_opt(client, MYIP))
597 myaddr = &client->options->my_addr; 609 myaddr = &client->options->my_addr;
598 610
599 ceph_messenger_init(&client->msgr, myaddr, 611 ceph_messenger_init(&client->msgr, myaddr);
600 client->supported_features,
601 client->required_features,
602 ceph_test_opt(client, NOCRC),
603 ceph_test_opt(client, TCP_NODELAY));
604 612
605 /* subsystems */ 613 /* subsystems */
606 err = ceph_monc_init(&client->monc, client); 614 err = ceph_monc_init(&client->monc, client);
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index d1498224c49d..2e9cab09f37b 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -16,8 +16,10 @@ struct ceph_crypto_key {
16 16
17static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) 17static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
18{ 18{
19 if (key) 19 if (key) {
20 kfree(key->key); 20 kfree(key->key);
21 key->key = NULL;
22 }
21} 23}
22 24
23int ceph_crypto_key_clone(struct ceph_crypto_key *dst, 25int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b9b0e3b5da49..9981039ef4ff 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -509,7 +509,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
509 return ret; 509 return ret;
510 } 510 }
511 511
512 if (con->msgr->tcp_nodelay) { 512 if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
513 int optval = 1; 513 int optval = 1;
514 514
515 ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, 515 ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
@@ -637,9 +637,6 @@ static int con_close_socket(struct ceph_connection *con)
637static void ceph_msg_remove(struct ceph_msg *msg) 637static void ceph_msg_remove(struct ceph_msg *msg)
638{ 638{
639 list_del_init(&msg->list_head); 639 list_del_init(&msg->list_head);
640 BUG_ON(msg->con == NULL);
641 msg->con->ops->put(msg->con);
642 msg->con = NULL;
643 640
644 ceph_msg_put(msg); 641 ceph_msg_put(msg);
645} 642}
@@ -662,15 +659,14 @@ static void reset_connection(struct ceph_connection *con)
662 659
663 if (con->in_msg) { 660 if (con->in_msg) {
664 BUG_ON(con->in_msg->con != con); 661 BUG_ON(con->in_msg->con != con);
665 con->in_msg->con = NULL;
666 ceph_msg_put(con->in_msg); 662 ceph_msg_put(con->in_msg);
667 con->in_msg = NULL; 663 con->in_msg = NULL;
668 con->ops->put(con);
669 } 664 }
670 665
671 con->connect_seq = 0; 666 con->connect_seq = 0;
672 con->out_seq = 0; 667 con->out_seq = 0;
673 if (con->out_msg) { 668 if (con->out_msg) {
669 BUG_ON(con->out_msg->con != con);
674 ceph_msg_put(con->out_msg); 670 ceph_msg_put(con->out_msg);
675 con->out_msg = NULL; 671 con->out_msg = NULL;
676 } 672 }
@@ -1205,7 +1201,7 @@ static void prepare_write_message_footer(struct ceph_connection *con)
1205 con->out_kvec[v].iov_base = &m->footer; 1201 con->out_kvec[v].iov_base = &m->footer;
1206 if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { 1202 if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
1207 if (con->ops->sign_message) 1203 if (con->ops->sign_message)
1208 con->ops->sign_message(con, m); 1204 con->ops->sign_message(m);
1209 else 1205 else
1210 m->footer.sig = 0; 1206 m->footer.sig = 0;
1211 con->out_kvec[v].iov_len = sizeof(m->footer); 1207 con->out_kvec[v].iov_len = sizeof(m->footer);
@@ -1432,7 +1428,8 @@ static int prepare_write_connect(struct ceph_connection *con)
1432 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, 1428 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
1433 con->connect_seq, global_seq, proto); 1429 con->connect_seq, global_seq, proto);
1434 1430
1435 con->out_connect.features = cpu_to_le64(con->msgr->supported_features); 1431 con->out_connect.features =
1432 cpu_to_le64(from_msgr(con->msgr)->supported_features);
1436 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); 1433 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
1437 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); 1434 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
1438 con->out_connect.global_seq = cpu_to_le32(global_seq); 1435 con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1527,7 +1524,7 @@ static int write_partial_message_data(struct ceph_connection *con)
1527{ 1524{
1528 struct ceph_msg *msg = con->out_msg; 1525 struct ceph_msg *msg = con->out_msg;
1529 struct ceph_msg_data_cursor *cursor = &msg->cursor; 1526 struct ceph_msg_data_cursor *cursor = &msg->cursor;
1530 bool do_datacrc = !con->msgr->nocrc; 1527 bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
1531 u32 crc; 1528 u32 crc;
1532 1529
1533 dout("%s %p msg %p\n", __func__, con, msg); 1530 dout("%s %p msg %p\n", __func__, con, msg);
@@ -1552,8 +1549,8 @@ static int write_partial_message_data(struct ceph_connection *con)
1552 bool need_crc; 1549 bool need_crc;
1553 int ret; 1550 int ret;
1554 1551
1555 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length, 1552 page = ceph_msg_data_next(cursor, &page_offset, &length,
1556 &last_piece); 1553 &last_piece);
1557 ret = ceph_tcp_sendpage(con->sock, page, page_offset, 1554 ret = ceph_tcp_sendpage(con->sock, page, page_offset,
1558 length, !last_piece); 1555 length, !last_piece);
1559 if (ret <= 0) { 1556 if (ret <= 0) {
@@ -1564,7 +1561,7 @@ static int write_partial_message_data(struct ceph_connection *con)
1564 } 1561 }
1565 if (do_datacrc && cursor->need_crc) 1562 if (do_datacrc && cursor->need_crc)
1566 crc = ceph_crc32c_page(crc, page, page_offset, length); 1563 crc = ceph_crc32c_page(crc, page, page_offset, length);
1567 need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret); 1564 need_crc = ceph_msg_data_advance(cursor, (size_t)ret);
1568 } 1565 }
1569 1566
1570 dout("%s %p msg %p done\n", __func__, con, msg); 1567 dout("%s %p msg %p done\n", __func__, con, msg);
@@ -2005,8 +2002,8 @@ static int process_banner(struct ceph_connection *con)
2005 2002
2006static int process_connect(struct ceph_connection *con) 2003static int process_connect(struct ceph_connection *con)
2007{ 2004{
2008 u64 sup_feat = con->msgr->supported_features; 2005 u64 sup_feat = from_msgr(con->msgr)->supported_features;
2009 u64 req_feat = con->msgr->required_features; 2006 u64 req_feat = from_msgr(con->msgr)->required_features;
2010 u64 server_feat = ceph_sanitize_features( 2007 u64 server_feat = ceph_sanitize_features(
2011 le64_to_cpu(con->in_reply.features)); 2008 le64_to_cpu(con->in_reply.features));
2012 int ret; 2009 int ret;
@@ -2232,7 +2229,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
2232{ 2229{
2233 struct ceph_msg *msg = con->in_msg; 2230 struct ceph_msg *msg = con->in_msg;
2234 struct ceph_msg_data_cursor *cursor = &msg->cursor; 2231 struct ceph_msg_data_cursor *cursor = &msg->cursor;
2235 const bool do_datacrc = !con->msgr->nocrc; 2232 bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
2236 struct page *page; 2233 struct page *page;
2237 size_t page_offset; 2234 size_t page_offset;
2238 size_t length; 2235 size_t length;
@@ -2246,8 +2243,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
2246 if (do_datacrc) 2243 if (do_datacrc)
2247 crc = con->in_data_crc; 2244 crc = con->in_data_crc;
2248 while (cursor->resid) { 2245 while (cursor->resid) {
2249 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length, 2246 page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
2250 NULL);
2251 ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); 2247 ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
2252 if (ret <= 0) { 2248 if (ret <= 0) {
2253 if (do_datacrc) 2249 if (do_datacrc)
@@ -2258,7 +2254,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
2258 2254
2259 if (do_datacrc) 2255 if (do_datacrc)
2260 crc = ceph_crc32c_page(crc, page, page_offset, ret); 2256 crc = ceph_crc32c_page(crc, page, page_offset, ret);
2261 (void) ceph_msg_data_advance(&msg->cursor, (size_t)ret); 2257 (void) ceph_msg_data_advance(cursor, (size_t)ret);
2262 } 2258 }
2263 if (do_datacrc) 2259 if (do_datacrc)
2264 con->in_data_crc = crc; 2260 con->in_data_crc = crc;
@@ -2278,7 +2274,7 @@ static int read_partial_message(struct ceph_connection *con)
2278 int end; 2274 int end;
2279 int ret; 2275 int ret;
2280 unsigned int front_len, middle_len, data_len; 2276 unsigned int front_len, middle_len, data_len;
2281 bool do_datacrc = !con->msgr->nocrc; 2277 bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
2282 bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH); 2278 bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
2283 u64 seq; 2279 u64 seq;
2284 u32 crc; 2280 u32 crc;
@@ -2423,7 +2419,7 @@ static int read_partial_message(struct ceph_connection *con)
2423 } 2419 }
2424 2420
2425 if (need_sign && con->ops->check_message_signature && 2421 if (need_sign && con->ops->check_message_signature &&
2426 con->ops->check_message_signature(con, m)) { 2422 con->ops->check_message_signature(m)) {
2427 pr_err("read_partial_message %p signature check failed\n", m); 2423 pr_err("read_partial_message %p signature check failed\n", m);
2428 return -EBADMSG; 2424 return -EBADMSG;
2429 } 2425 }
@@ -2438,13 +2434,10 @@ static int read_partial_message(struct ceph_connection *con)
2438 */ 2434 */
2439static void process_message(struct ceph_connection *con) 2435static void process_message(struct ceph_connection *con)
2440{ 2436{
2441 struct ceph_msg *msg; 2437 struct ceph_msg *msg = con->in_msg;
2442 2438
2443 BUG_ON(con->in_msg->con != con); 2439 BUG_ON(con->in_msg->con != con);
2444 con->in_msg->con = NULL;
2445 msg = con->in_msg;
2446 con->in_msg = NULL; 2440 con->in_msg = NULL;
2447 con->ops->put(con);
2448 2441
2449 /* if first message, set peer_name */ 2442 /* if first message, set peer_name */
2450 if (con->peer_name.type == 0) 2443 if (con->peer_name.type == 0)
@@ -2677,7 +2670,7 @@ more:
2677 if (ret <= 0) { 2670 if (ret <= 0) {
2678 switch (ret) { 2671 switch (ret) {
2679 case -EBADMSG: 2672 case -EBADMSG:
2680 con->error_msg = "bad crc"; 2673 con->error_msg = "bad crc/signature";
2681 /* fall through */ 2674 /* fall through */
2682 case -EBADE: 2675 case -EBADE:
2683 ret = -EIO; 2676 ret = -EIO;
@@ -2918,10 +2911,8 @@ static void con_fault(struct ceph_connection *con)
2918 2911
2919 if (con->in_msg) { 2912 if (con->in_msg) {
2920 BUG_ON(con->in_msg->con != con); 2913 BUG_ON(con->in_msg->con != con);
2921 con->in_msg->con = NULL;
2922 ceph_msg_put(con->in_msg); 2914 ceph_msg_put(con->in_msg);
2923 con->in_msg = NULL; 2915 con->in_msg = NULL;
2924 con->ops->put(con);
2925 } 2916 }
2926 2917
2927 /* Requeue anything that hasn't been acked */ 2918 /* Requeue anything that hasn't been acked */
@@ -2952,15 +2943,8 @@ static void con_fault(struct ceph_connection *con)
2952 * initialize a new messenger instance 2943 * initialize a new messenger instance
2953 */ 2944 */
2954void ceph_messenger_init(struct ceph_messenger *msgr, 2945void ceph_messenger_init(struct ceph_messenger *msgr,
2955 struct ceph_entity_addr *myaddr, 2946 struct ceph_entity_addr *myaddr)
2956 u64 supported_features,
2957 u64 required_features,
2958 bool nocrc,
2959 bool tcp_nodelay)
2960{ 2947{
2961 msgr->supported_features = supported_features;
2962 msgr->required_features = required_features;
2963
2964 spin_lock_init(&msgr->global_seq_lock); 2948 spin_lock_init(&msgr->global_seq_lock);
2965 2949
2966 if (myaddr) 2950 if (myaddr)
@@ -2970,8 +2954,6 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
2970 msgr->inst.addr.type = 0; 2954 msgr->inst.addr.type = 0;
2971 get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); 2955 get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
2972 encode_my_addr(msgr); 2956 encode_my_addr(msgr);
2973 msgr->nocrc = nocrc;
2974 msgr->tcp_nodelay = tcp_nodelay;
2975 2957
2976 atomic_set(&msgr->stopping, 0); 2958 atomic_set(&msgr->stopping, 0);
2977 write_pnet(&msgr->net, get_net(current->nsproxy->net_ns)); 2959 write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
@@ -2986,6 +2968,15 @@ void ceph_messenger_fini(struct ceph_messenger *msgr)
2986} 2968}
2987EXPORT_SYMBOL(ceph_messenger_fini); 2969EXPORT_SYMBOL(ceph_messenger_fini);
2988 2970
2971static void msg_con_set(struct ceph_msg *msg, struct ceph_connection *con)
2972{
2973 if (msg->con)
2974 msg->con->ops->put(msg->con);
2975
2976 msg->con = con ? con->ops->get(con) : NULL;
2977 BUG_ON(msg->con != con);
2978}
2979
2989static void clear_standby(struct ceph_connection *con) 2980static void clear_standby(struct ceph_connection *con)
2990{ 2981{
2991 /* come back from STANDBY? */ 2982 /* come back from STANDBY? */
@@ -3017,9 +3008,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
3017 return; 3008 return;
3018 } 3009 }
3019 3010
3020 BUG_ON(msg->con != NULL); 3011 msg_con_set(msg, con);
3021 msg->con = con->ops->get(con);
3022 BUG_ON(msg->con == NULL);
3023 3012
3024 BUG_ON(!list_empty(&msg->list_head)); 3013 BUG_ON(!list_empty(&msg->list_head));
3025 list_add_tail(&msg->list_head, &con->out_queue); 3014 list_add_tail(&msg->list_head, &con->out_queue);
@@ -3047,16 +3036,15 @@ void ceph_msg_revoke(struct ceph_msg *msg)
3047{ 3036{
3048 struct ceph_connection *con = msg->con; 3037 struct ceph_connection *con = msg->con;
3049 3038
3050 if (!con) 3039 if (!con) {
3040 dout("%s msg %p null con\n", __func__, msg);
3051 return; /* Message not in our possession */ 3041 return; /* Message not in our possession */
3042 }
3052 3043
3053 mutex_lock(&con->mutex); 3044 mutex_lock(&con->mutex);
3054 if (!list_empty(&msg->list_head)) { 3045 if (!list_empty(&msg->list_head)) {
3055 dout("%s %p msg %p - was on queue\n", __func__, con, msg); 3046 dout("%s %p msg %p - was on queue\n", __func__, con, msg);
3056 list_del_init(&msg->list_head); 3047 list_del_init(&msg->list_head);
3057 BUG_ON(msg->con == NULL);
3058 msg->con->ops->put(msg->con);
3059 msg->con = NULL;
3060 msg->hdr.seq = 0; 3048 msg->hdr.seq = 0;
3061 3049
3062 ceph_msg_put(msg); 3050 ceph_msg_put(msg);
@@ -3080,16 +3068,13 @@ void ceph_msg_revoke(struct ceph_msg *msg)
3080 */ 3068 */
3081void ceph_msg_revoke_incoming(struct ceph_msg *msg) 3069void ceph_msg_revoke_incoming(struct ceph_msg *msg)
3082{ 3070{
3083 struct ceph_connection *con; 3071 struct ceph_connection *con = msg->con;
3084 3072
3085 BUG_ON(msg == NULL); 3073 if (!con) {
3086 if (!msg->con) {
3087 dout("%s msg %p null con\n", __func__, msg); 3074 dout("%s msg %p null con\n", __func__, msg);
3088
3089 return; /* Message not in our possession */ 3075 return; /* Message not in our possession */
3090 } 3076 }
3091 3077
3092 con = msg->con;
3093 mutex_lock(&con->mutex); 3078 mutex_lock(&con->mutex);
3094 if (con->in_msg == msg) { 3079 if (con->in_msg == msg) {
3095 unsigned int front_len = le32_to_cpu(con->in_hdr.front_len); 3080 unsigned int front_len = le32_to_cpu(con->in_hdr.front_len);
@@ -3335,9 +3320,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
3335 } 3320 }
3336 if (msg) { 3321 if (msg) {
3337 BUG_ON(*skip); 3322 BUG_ON(*skip);
3323 msg_con_set(msg, con);
3338 con->in_msg = msg; 3324 con->in_msg = msg;
3339 con->in_msg->con = con->ops->get(con);
3340 BUG_ON(con->in_msg->con == NULL);
3341 } else { 3325 } else {
3342 /* 3326 /*
3343 * Null message pointer means either we should skip 3327 * Null message pointer means either we should skip
@@ -3384,6 +3368,8 @@ static void ceph_msg_release(struct kref *kref)
3384 dout("%s %p\n", __func__, m); 3368 dout("%s %p\n", __func__, m);
3385 WARN_ON(!list_empty(&m->list_head)); 3369 WARN_ON(!list_empty(&m->list_head));
3386 3370
3371 msg_con_set(m, NULL);
3372
3387 /* drop middle, data, if any */ 3373 /* drop middle, data, if any */
3388 if (m->middle) { 3374 if (m->middle) {
3389 ceph_buffer_put(m->middle); 3375 ceph_buffer_put(m->middle);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index f79ccac6699f..f8f235930d88 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -120,11 +120,13 @@ static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
120} 120}
121#endif /* CONFIG_BLOCK */ 121#endif /* CONFIG_BLOCK */
122 122
123#define osd_req_op_data(oreq, whch, typ, fld) \ 123#define osd_req_op_data(oreq, whch, typ, fld) \
124 ({ \ 124({ \
125 BUG_ON(whch >= (oreq)->r_num_ops); \ 125 struct ceph_osd_request *__oreq = (oreq); \
126 &(oreq)->r_ops[whch].typ.fld; \ 126 unsigned int __whch = (whch); \
127 }) 127 BUG_ON(__whch >= __oreq->r_num_ops); \
128 &__oreq->r_ops[__whch].typ.fld; \
129})
128 130
129static struct ceph_osd_data * 131static struct ceph_osd_data *
130osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which) 132osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
@@ -1750,8 +1752,7 @@ static void complete_request(struct ceph_osd_request *req)
1750 * handle osd op reply. either call the callback if it is specified, 1752 * handle osd op reply. either call the callback if it is specified,
1751 * or do the completion to wake up the waiting thread. 1753 * or do the completion to wake up the waiting thread.
1752 */ 1754 */
1753static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, 1755static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1754 struct ceph_connection *con)
1755{ 1756{
1756 void *p, *end; 1757 void *p, *end;
1757 struct ceph_osd_request *req; 1758 struct ceph_osd_request *req;
@@ -2807,7 +2808,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
2807 ceph_osdc_handle_map(osdc, msg); 2808 ceph_osdc_handle_map(osdc, msg);
2808 break; 2809 break;
2809 case CEPH_MSG_OSD_OPREPLY: 2810 case CEPH_MSG_OSD_OPREPLY:
2810 handle_reply(osdc, msg, con); 2811 handle_reply(osdc, msg);
2811 break; 2812 break;
2812 case CEPH_MSG_WATCH_NOTIFY: 2813 case CEPH_MSG_WATCH_NOTIFY:
2813 handle_watch_notify(osdc, msg); 2814 handle_watch_notify(osdc, msg);
@@ -2849,9 +2850,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2849 goto out; 2850 goto out;
2850 } 2851 }
2851 2852
2852 if (req->r_reply->con)
2853 dout("%s revoking msg %p from old con %p\n", __func__,
2854 req->r_reply, req->r_reply->con);
2855 ceph_msg_revoke_incoming(req->r_reply); 2853 ceph_msg_revoke_incoming(req->r_reply);
2856 2854
2857 if (front_len > req->r_reply->front_alloc_len) { 2855 if (front_len > req->r_reply->front_alloc_len) {
@@ -2978,17 +2976,19 @@ static int invalidate_authorizer(struct ceph_connection *con)
2978 return ceph_monc_validate_auth(&osdc->client->monc); 2976 return ceph_monc_validate_auth(&osdc->client->monc);
2979} 2977}
2980 2978
2981static int sign_message(struct ceph_connection *con, struct ceph_msg *msg) 2979static int osd_sign_message(struct ceph_msg *msg)
2982{ 2980{
2983 struct ceph_osd *o = con->private; 2981 struct ceph_osd *o = msg->con->private;
2984 struct ceph_auth_handshake *auth = &o->o_auth; 2982 struct ceph_auth_handshake *auth = &o->o_auth;
2983
2985 return ceph_auth_sign_message(auth, msg); 2984 return ceph_auth_sign_message(auth, msg);
2986} 2985}
2987 2986
2988static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg) 2987static int osd_check_message_signature(struct ceph_msg *msg)
2989{ 2988{
2990 struct ceph_osd *o = con->private; 2989 struct ceph_osd *o = msg->con->private;
2991 struct ceph_auth_handshake *auth = &o->o_auth; 2990 struct ceph_auth_handshake *auth = &o->o_auth;
2991
2992 return ceph_auth_check_message_signature(auth, msg); 2992 return ceph_auth_check_message_signature(auth, msg);
2993} 2993}
2994 2994
@@ -3000,7 +3000,7 @@ static const struct ceph_connection_operations osd_con_ops = {
3000 .verify_authorizer_reply = verify_authorizer_reply, 3000 .verify_authorizer_reply = verify_authorizer_reply,
3001 .invalidate_authorizer = invalidate_authorizer, 3001 .invalidate_authorizer = invalidate_authorizer,
3002 .alloc_msg = alloc_msg, 3002 .alloc_msg = alloc_msg,
3003 .sign_message = sign_message, 3003 .sign_message = osd_sign_message,
3004 .check_message_signature = check_message_signature, 3004 .check_message_signature = osd_check_message_signature,
3005 .fault = osd_reset, 3005 .fault = osd_reset,
3006}; 3006};
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ce3f74cd6b9..ae00b894e675 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2403,17 +2403,20 @@ static void skb_warn_bad_offload(const struct sk_buff *skb)
2403{ 2403{
2404 static const netdev_features_t null_features = 0; 2404 static const netdev_features_t null_features = 0;
2405 struct net_device *dev = skb->dev; 2405 struct net_device *dev = skb->dev;
2406 const char *driver = ""; 2406 const char *name = "";
2407 2407
2408 if (!net_ratelimit()) 2408 if (!net_ratelimit())
2409 return; 2409 return;
2410 2410
2411 if (dev && dev->dev.parent) 2411 if (dev) {
2412 driver = dev_driver_string(dev->dev.parent); 2412 if (dev->dev.parent)
2413 2413 name = dev_driver_string(dev->dev.parent);
2414 else
2415 name = netdev_name(dev);
2416 }
2414 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " 2417 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2415 "gso_type=%d ip_summed=%d\n", 2418 "gso_type=%d ip_summed=%d\n",
2416 driver, dev ? &dev->features : &null_features, 2419 name, dev ? &dev->features : &null_features,
2417 skb->sk ? &skb->sk->sk_route_caps : &null_features, 2420 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2418 skb->len, skb->data_len, skb_shinfo(skb)->gso_size, 2421 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2419 skb_shinfo(skb)->gso_type, skb->ip_summed); 2422 skb_shinfo(skb)->gso_type, skb->ip_summed);
@@ -6402,7 +6405,7 @@ int __netdev_update_features(struct net_device *dev)
6402 struct net_device *upper, *lower; 6405 struct net_device *upper, *lower;
6403 netdev_features_t features; 6406 netdev_features_t features;
6404 struct list_head *iter; 6407 struct list_head *iter;
6405 int err = 0; 6408 int err = -1;
6406 6409
6407 ASSERT_RTNL(); 6410 ASSERT_RTNL();
6408 6411
@@ -6419,21 +6422,27 @@ int __netdev_update_features(struct net_device *dev)
6419 features = netdev_sync_upper_features(dev, upper, features); 6422 features = netdev_sync_upper_features(dev, upper, features);
6420 6423
6421 if (dev->features == features) 6424 if (dev->features == features)
6422 return 0; 6425 goto sync_lower;
6423 6426
6424 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n", 6427 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
6425 &dev->features, &features); 6428 &dev->features, &features);
6426 6429
6427 if (dev->netdev_ops->ndo_set_features) 6430 if (dev->netdev_ops->ndo_set_features)
6428 err = dev->netdev_ops->ndo_set_features(dev, features); 6431 err = dev->netdev_ops->ndo_set_features(dev, features);
6432 else
6433 err = 0;
6429 6434
6430 if (unlikely(err < 0)) { 6435 if (unlikely(err < 0)) {
6431 netdev_err(dev, 6436 netdev_err(dev,
6432 "set_features() failed (%d); wanted %pNF, left %pNF\n", 6437 "set_features() failed (%d); wanted %pNF, left %pNF\n",
6433 err, &features, &dev->features); 6438 err, &features, &dev->features);
6439 /* return non-0 since some features might have changed and
6440 * it's better to fire a spurious notification than miss it
6441 */
6434 return -1; 6442 return -1;
6435 } 6443 }
6436 6444
6445sync_lower:
6437 /* some features must be disabled on lower devices when disabled 6446 /* some features must be disabled on lower devices when disabled
6438 * on an upper device (think: bonding master or bridge) 6447 * on an upper device (think: bonding master or bridge)
6439 */ 6448 */
@@ -6443,7 +6452,7 @@ int __netdev_update_features(struct net_device *dev)
6443 if (!err) 6452 if (!err)
6444 dev->features = features; 6453 dev->features = features;
6445 6454
6446 return 1; 6455 return err < 0 ? 0 : 1;
6447} 6456}
6448 6457
6449/** 6458/**
diff --git a/net/core/dst.c b/net/core/dst.c
index 2a1818065e12..e6dc77252fe9 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -306,7 +306,7 @@ void dst_release(struct dst_entry *dst)
306 if (unlikely(newrefcnt < 0)) 306 if (unlikely(newrefcnt < 0))
307 net_warn_ratelimited("%s: dst:%p refcnt:%d\n", 307 net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
308 __func__, dst, newrefcnt); 308 __func__, dst, newrefcnt);
309 if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) 309 if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE))
310 call_rcu(&dst->rcu_head, dst_destroy_rcu); 310 call_rcu(&dst->rcu_head, dst_destroy_rcu);
311 } 311 }
312} 312}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1aa8437ed6c4..e6af42da28d9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -857,7 +857,7 @@ static void neigh_probe(struct neighbour *neigh)
857 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 857 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
858 /* keep skb alive even if arp_queue overflows */ 858 /* keep skb alive even if arp_queue overflows */
859 if (skb) 859 if (skb)
860 skb = skb_copy(skb, GFP_ATOMIC); 860 skb = skb_clone(skb, GFP_ATOMIC);
861 write_unlock(&neigh->lock); 861 write_unlock(&neigh->lock);
862 neigh->ops->solicit(neigh, skb); 862 neigh->ops->solicit(neigh, skb);
863 atomic_inc(&neigh->probes); 863 atomic_inc(&neigh->probes);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 504bd17b7456..34ba7a08876d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1045,15 +1045,156 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
1045 return 0; 1045 return 0;
1046} 1046}
1047 1047
1048static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
1049 struct net_device *dev)
1050{
1051 const struct rtnl_link_stats64 *stats;
1052 struct rtnl_link_stats64 temp;
1053 struct nlattr *attr;
1054
1055 stats = dev_get_stats(dev, &temp);
1056
1057 attr = nla_reserve(skb, IFLA_STATS,
1058 sizeof(struct rtnl_link_stats));
1059 if (!attr)
1060 return -EMSGSIZE;
1061
1062 copy_rtnl_link_stats(nla_data(attr), stats);
1063
1064 attr = nla_reserve(skb, IFLA_STATS64,
1065 sizeof(struct rtnl_link_stats64));
1066 if (!attr)
1067 return -EMSGSIZE;
1068
1069 copy_rtnl_link_stats64(nla_data(attr), stats);
1070
1071 return 0;
1072}
1073
1074static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
1075 struct net_device *dev,
1076 int vfs_num,
1077 struct nlattr *vfinfo)
1078{
1079 struct ifla_vf_rss_query_en vf_rss_query_en;
1080 struct ifla_vf_link_state vf_linkstate;
1081 struct ifla_vf_spoofchk vf_spoofchk;
1082 struct ifla_vf_tx_rate vf_tx_rate;
1083 struct ifla_vf_stats vf_stats;
1084 struct ifla_vf_trust vf_trust;
1085 struct ifla_vf_vlan vf_vlan;
1086 struct ifla_vf_rate vf_rate;
1087 struct nlattr *vf, *vfstats;
1088 struct ifla_vf_mac vf_mac;
1089 struct ifla_vf_info ivi;
1090
1091 /* Not all SR-IOV capable drivers support the
1092 * spoofcheck and "RSS query enable" query. Preset to
1093 * -1 so the user space tool can detect that the driver
1094 * didn't report anything.
1095 */
1096 ivi.spoofchk = -1;
1097 ivi.rss_query_en = -1;
1098 ivi.trusted = -1;
1099 memset(ivi.mac, 0, sizeof(ivi.mac));
1100 /* The default value for VF link state is "auto"
1101 * IFLA_VF_LINK_STATE_AUTO which equals zero
1102 */
1103 ivi.linkstate = 0;
1104 if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
1105 return 0;
1106
1107 vf_mac.vf =
1108 vf_vlan.vf =
1109 vf_rate.vf =
1110 vf_tx_rate.vf =
1111 vf_spoofchk.vf =
1112 vf_linkstate.vf =
1113 vf_rss_query_en.vf =
1114 vf_trust.vf = ivi.vf;
1115
1116 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
1117 vf_vlan.vlan = ivi.vlan;
1118 vf_vlan.qos = ivi.qos;
1119 vf_tx_rate.rate = ivi.max_tx_rate;
1120 vf_rate.min_tx_rate = ivi.min_tx_rate;
1121 vf_rate.max_tx_rate = ivi.max_tx_rate;
1122 vf_spoofchk.setting = ivi.spoofchk;
1123 vf_linkstate.link_state = ivi.linkstate;
1124 vf_rss_query_en.setting = ivi.rss_query_en;
1125 vf_trust.setting = ivi.trusted;
1126 vf = nla_nest_start(skb, IFLA_VF_INFO);
1127 if (!vf) {
1128 nla_nest_cancel(skb, vfinfo);
1129 return -EMSGSIZE;
1130 }
1131 if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
1132 nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
1133 nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
1134 &vf_rate) ||
1135 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
1136 &vf_tx_rate) ||
1137 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
1138 &vf_spoofchk) ||
1139 nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
1140 &vf_linkstate) ||
1141 nla_put(skb, IFLA_VF_RSS_QUERY_EN,
1142 sizeof(vf_rss_query_en),
1143 &vf_rss_query_en) ||
1144 nla_put(skb, IFLA_VF_TRUST,
1145 sizeof(vf_trust), &vf_trust))
1146 return -EMSGSIZE;
1147 memset(&vf_stats, 0, sizeof(vf_stats));
1148 if (dev->netdev_ops->ndo_get_vf_stats)
1149 dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
1150 &vf_stats);
1151 vfstats = nla_nest_start(skb, IFLA_VF_STATS);
1152 if (!vfstats) {
1153 nla_nest_cancel(skb, vf);
1154 nla_nest_cancel(skb, vfinfo);
1155 return -EMSGSIZE;
1156 }
1157 if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
1158 vf_stats.rx_packets) ||
1159 nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
1160 vf_stats.tx_packets) ||
1161 nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
1162 vf_stats.rx_bytes) ||
1163 nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
1164 vf_stats.tx_bytes) ||
1165 nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
1166 vf_stats.broadcast) ||
1167 nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
1168 vf_stats.multicast))
1169 return -EMSGSIZE;
1170 nla_nest_end(skb, vfstats);
1171 nla_nest_end(skb, vf);
1172 return 0;
1173}
1174
1175static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
1176{
1177 struct rtnl_link_ifmap map = {
1178 .mem_start = dev->mem_start,
1179 .mem_end = dev->mem_end,
1180 .base_addr = dev->base_addr,
1181 .irq = dev->irq,
1182 .dma = dev->dma,
1183 .port = dev->if_port,
1184 };
1185 if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
1186 return -EMSGSIZE;
1187
1188 return 0;
1189}
1190
1048static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 1191static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1049 int type, u32 pid, u32 seq, u32 change, 1192 int type, u32 pid, u32 seq, u32 change,
1050 unsigned int flags, u32 ext_filter_mask) 1193 unsigned int flags, u32 ext_filter_mask)
1051{ 1194{
1052 struct ifinfomsg *ifm; 1195 struct ifinfomsg *ifm;
1053 struct nlmsghdr *nlh; 1196 struct nlmsghdr *nlh;
1054 struct rtnl_link_stats64 temp; 1197 struct nlattr *af_spec;
1055 const struct rtnl_link_stats64 *stats;
1056 struct nlattr *attr, *af_spec;
1057 struct rtnl_af_ops *af_ops; 1198 struct rtnl_af_ops *af_ops;
1058 struct net_device *upper_dev = netdev_master_upper_dev_get(dev); 1199 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
1059 1200
@@ -1096,18 +1237,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1096 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) 1237 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
1097 goto nla_put_failure; 1238 goto nla_put_failure;
1098 1239
1099 if (1) { 1240 if (rtnl_fill_link_ifmap(skb, dev))
1100 struct rtnl_link_ifmap map = { 1241 goto nla_put_failure;
1101 .mem_start = dev->mem_start,
1102 .mem_end = dev->mem_end,
1103 .base_addr = dev->base_addr,
1104 .irq = dev->irq,
1105 .dma = dev->dma,
1106 .port = dev->if_port,
1107 };
1108 if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
1109 goto nla_put_failure;
1110 }
1111 1242
1112 if (dev->addr_len) { 1243 if (dev->addr_len) {
1113 if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) || 1244 if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
@@ -1124,128 +1255,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1124 if (rtnl_phys_switch_id_fill(skb, dev)) 1255 if (rtnl_phys_switch_id_fill(skb, dev))
1125 goto nla_put_failure; 1256 goto nla_put_failure;
1126 1257
1127 attr = nla_reserve(skb, IFLA_STATS, 1258 if (rtnl_fill_stats(skb, dev))
1128 sizeof(struct rtnl_link_stats));
1129 if (attr == NULL)
1130 goto nla_put_failure;
1131
1132 stats = dev_get_stats(dev, &temp);
1133 copy_rtnl_link_stats(nla_data(attr), stats);
1134
1135 attr = nla_reserve(skb, IFLA_STATS64,
1136 sizeof(struct rtnl_link_stats64));
1137 if (attr == NULL)
1138 goto nla_put_failure; 1259 goto nla_put_failure;
1139 copy_rtnl_link_stats64(nla_data(attr), stats);
1140 1260
1141 if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && 1261 if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
1142 nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent))) 1262 nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
1143 goto nla_put_failure; 1263 goto nla_put_failure;
1144 1264
1145 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent 1265 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
1146 && (ext_filter_mask & RTEXT_FILTER_VF)) { 1266 ext_filter_mask & RTEXT_FILTER_VF) {
1147 int i; 1267 int i;
1148 1268 struct nlattr *vfinfo;
1149 struct nlattr *vfinfo, *vf, *vfstats;
1150 int num_vfs = dev_num_vf(dev->dev.parent); 1269 int num_vfs = dev_num_vf(dev->dev.parent);
1151 1270
1152 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); 1271 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
1153 if (!vfinfo) 1272 if (!vfinfo)
1154 goto nla_put_failure; 1273 goto nla_put_failure;
1155 for (i = 0; i < num_vfs; i++) { 1274 for (i = 0; i < num_vfs; i++) {
1156 struct ifla_vf_info ivi; 1275 if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
1157 struct ifla_vf_mac vf_mac;
1158 struct ifla_vf_vlan vf_vlan;
1159 struct ifla_vf_rate vf_rate;
1160 struct ifla_vf_tx_rate vf_tx_rate;
1161 struct ifla_vf_spoofchk vf_spoofchk;
1162 struct ifla_vf_link_state vf_linkstate;
1163 struct ifla_vf_rss_query_en vf_rss_query_en;
1164 struct ifla_vf_stats vf_stats;
1165 struct ifla_vf_trust vf_trust;
1166
1167 /*
1168 * Not all SR-IOV capable drivers support the
1169 * spoofcheck and "RSS query enable" query. Preset to
1170 * -1 so the user space tool can detect that the driver
1171 * didn't report anything.
1172 */
1173 ivi.spoofchk = -1;
1174 ivi.rss_query_en = -1;
1175 ivi.trusted = -1;
1176 memset(ivi.mac, 0, sizeof(ivi.mac));
1177 /* The default value for VF link state is "auto"
1178 * IFLA_VF_LINK_STATE_AUTO which equals zero
1179 */
1180 ivi.linkstate = 0;
1181 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
1182 break;
1183 vf_mac.vf =
1184 vf_vlan.vf =
1185 vf_rate.vf =
1186 vf_tx_rate.vf =
1187 vf_spoofchk.vf =
1188 vf_linkstate.vf =
1189 vf_rss_query_en.vf =
1190 vf_trust.vf = ivi.vf;
1191
1192 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
1193 vf_vlan.vlan = ivi.vlan;
1194 vf_vlan.qos = ivi.qos;
1195 vf_tx_rate.rate = ivi.max_tx_rate;
1196 vf_rate.min_tx_rate = ivi.min_tx_rate;
1197 vf_rate.max_tx_rate = ivi.max_tx_rate;
1198 vf_spoofchk.setting = ivi.spoofchk;
1199 vf_linkstate.link_state = ivi.linkstate;
1200 vf_rss_query_en.setting = ivi.rss_query_en;
1201 vf_trust.setting = ivi.trusted;
1202 vf = nla_nest_start(skb, IFLA_VF_INFO);
1203 if (!vf) {
1204 nla_nest_cancel(skb, vfinfo);
1205 goto nla_put_failure;
1206 }
1207 if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
1208 nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
1209 nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
1210 &vf_rate) ||
1211 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
1212 &vf_tx_rate) ||
1213 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
1214 &vf_spoofchk) ||
1215 nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
1216 &vf_linkstate) ||
1217 nla_put(skb, IFLA_VF_RSS_QUERY_EN,
1218 sizeof(vf_rss_query_en),
1219 &vf_rss_query_en) ||
1220 nla_put(skb, IFLA_VF_TRUST,
1221 sizeof(vf_trust), &vf_trust))
1222 goto nla_put_failure; 1276 goto nla_put_failure;
1223 memset(&vf_stats, 0, sizeof(vf_stats));
1224 if (dev->netdev_ops->ndo_get_vf_stats)
1225 dev->netdev_ops->ndo_get_vf_stats(dev, i,
1226 &vf_stats);
1227 vfstats = nla_nest_start(skb, IFLA_VF_STATS);
1228 if (!vfstats) {
1229 nla_nest_cancel(skb, vf);
1230 nla_nest_cancel(skb, vfinfo);
1231 goto nla_put_failure;
1232 }
1233 if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
1234 vf_stats.rx_packets) ||
1235 nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
1236 vf_stats.tx_packets) ||
1237 nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
1238 vf_stats.rx_bytes) ||
1239 nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
1240 vf_stats.tx_bytes) ||
1241 nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
1242 vf_stats.broadcast) ||
1243 nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
1244 vf_stats.multicast))
1245 goto nla_put_failure;
1246 nla_nest_end(skb, vfstats);
1247 nla_nest_end(skb, vf);
1248 } 1277 }
1278
1249 nla_nest_end(skb, vfinfo); 1279 nla_nest_end(skb, vfinfo);
1250 } 1280 }
1251 1281
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index aa41e6dd6429..152b9c70e252 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4268,7 +4268,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
4268 return NULL; 4268 return NULL;
4269 } 4269 }
4270 4270
4271 memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); 4271 memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len,
4272 2 * ETH_ALEN);
4272 skb->mac_header += VLAN_HLEN; 4273 skb->mac_header += VLAN_HLEN;
4273 return skb; 4274 return skb;
4274} 4275}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3e87447e65c7..d97268e8ff10 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -923,14 +923,21 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
923 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 923 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
924 fib_prefsrc != cfg->fc_dst) { 924 fib_prefsrc != cfg->fc_dst) {
925 u32 tb_id = cfg->fc_table; 925 u32 tb_id = cfg->fc_table;
926 int rc;
926 927
927 if (tb_id == RT_TABLE_MAIN) 928 if (tb_id == RT_TABLE_MAIN)
928 tb_id = RT_TABLE_LOCAL; 929 tb_id = RT_TABLE_LOCAL;
929 930
930 if (inet_addr_type_table(cfg->fc_nlinfo.nl_net, 931 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
931 fib_prefsrc, tb_id) != RTN_LOCAL) { 932 fib_prefsrc, tb_id);
932 return false; 933
934 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
935 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
936 fib_prefsrc, RT_TABLE_LOCAL);
933 } 937 }
938
939 if (rc != RTN_LOCAL)
940 return false;
934 } 941 }
935 return true; 942 return true;
936} 943}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 64aaf3522a59..6baf36e11808 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2392,11 +2392,11 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2392 struct ip_sf_socklist *psl; 2392 struct ip_sf_socklist *psl;
2393 struct net *net = sock_net(sk); 2393 struct net *net = sock_net(sk);
2394 2394
2395 ASSERT_RTNL();
2396
2395 if (!ipv4_is_multicast(addr)) 2397 if (!ipv4_is_multicast(addr))
2396 return -EINVAL; 2398 return -EINVAL;
2397 2399
2398 rtnl_lock();
2399
2400 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; 2400 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
2401 imr.imr_address.s_addr = msf->imsf_interface; 2401 imr.imr_address.s_addr = msf->imsf_interface;
2402 imr.imr_ifindex = 0; 2402 imr.imr_ifindex = 0;
@@ -2417,7 +2417,6 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2417 goto done; 2417 goto done;
2418 msf->imsf_fmode = pmc->sfmode; 2418 msf->imsf_fmode = pmc->sfmode;
2419 psl = rtnl_dereference(pmc->sflist); 2419 psl = rtnl_dereference(pmc->sflist);
2420 rtnl_unlock();
2421 if (!psl) { 2420 if (!psl) {
2422 len = 0; 2421 len = 0;
2423 count = 0; 2422 count = 0;
@@ -2436,7 +2435,6 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2436 return -EFAULT; 2435 return -EFAULT;
2437 return 0; 2436 return 0;
2438done: 2437done:
2439 rtnl_unlock();
2440 return err; 2438 return err;
2441} 2439}
2442 2440
@@ -2450,6 +2448,8 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2450 struct inet_sock *inet = inet_sk(sk); 2448 struct inet_sock *inet = inet_sk(sk);
2451 struct ip_sf_socklist *psl; 2449 struct ip_sf_socklist *psl;
2452 2450
2451 ASSERT_RTNL();
2452
2453 psin = (struct sockaddr_in *)&gsf->gf_group; 2453 psin = (struct sockaddr_in *)&gsf->gf_group;
2454 if (psin->sin_family != AF_INET) 2454 if (psin->sin_family != AF_INET)
2455 return -EINVAL; 2455 return -EINVAL;
@@ -2457,8 +2457,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2457 if (!ipv4_is_multicast(addr)) 2457 if (!ipv4_is_multicast(addr))
2458 return -EINVAL; 2458 return -EINVAL;
2459 2459
2460 rtnl_lock();
2461
2462 err = -EADDRNOTAVAIL; 2460 err = -EADDRNOTAVAIL;
2463 2461
2464 for_each_pmc_rtnl(inet, pmc) { 2462 for_each_pmc_rtnl(inet, pmc) {
@@ -2470,7 +2468,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2470 goto done; 2468 goto done;
2471 gsf->gf_fmode = pmc->sfmode; 2469 gsf->gf_fmode = pmc->sfmode;
2472 psl = rtnl_dereference(pmc->sflist); 2470 psl = rtnl_dereference(pmc->sflist);
2473 rtnl_unlock();
2474 count = psl ? psl->sl_count : 0; 2471 count = psl ? psl->sl_count : 0;
2475 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; 2472 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
2476 gsf->gf_numsrc = count; 2473 gsf->gf_numsrc = count;
@@ -2490,7 +2487,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2490 } 2487 }
2491 return 0; 2488 return 0;
2492done: 2489done:
2493 rtnl_unlock();
2494 return err; 2490 return err;
2495} 2491}
2496 2492
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1feb15f23de8..46b9c887bede 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -563,7 +563,7 @@ static void reqsk_timer_handler(unsigned long data)
563 int max_retries, thresh; 563 int max_retries, thresh;
564 u8 defer_accept; 564 u8 defer_accept;
565 565
566 if (sk_listener->sk_state != TCP_LISTEN) 566 if (sk_state_load(sk_listener) != TCP_LISTEN)
567 goto drop; 567 goto drop;
568 568
569 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 569 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
@@ -749,7 +749,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
749 * It is OK, because this socket enters to hash table only 749 * It is OK, because this socket enters to hash table only
750 * after validation is complete. 750 * after validation is complete.
751 */ 751 */
752 sk->sk_state = TCP_LISTEN; 752 sk_state_store(sk, TCP_LISTEN);
753 if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 753 if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
754 inet->inet_sport = htons(inet->inet_num); 754 inet->inet_sport = htons(inet->inet_num);
755 755
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c3c359ad66e3..5f73a7c03e27 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1251,11 +1251,22 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
1251 * the _received_ ones. The set sets the _sent_ ones. 1251 * the _received_ ones. The set sets the _sent_ ones.
1252 */ 1252 */
1253 1253
1254static bool getsockopt_needs_rtnl(int optname)
1255{
1256 switch (optname) {
1257 case IP_MSFILTER:
1258 case MCAST_MSFILTER:
1259 return true;
1260 }
1261 return false;
1262}
1263
1254static int do_ip_getsockopt(struct sock *sk, int level, int optname, 1264static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1255 char __user *optval, int __user *optlen, unsigned int flags) 1265 char __user *optval, int __user *optlen, unsigned int flags)
1256{ 1266{
1257 struct inet_sock *inet = inet_sk(sk); 1267 struct inet_sock *inet = inet_sk(sk);
1258 int val; 1268 bool needs_rtnl = getsockopt_needs_rtnl(optname);
1269 int val, err = 0;
1259 int len; 1270 int len;
1260 1271
1261 if (level != SOL_IP) 1272 if (level != SOL_IP)
@@ -1269,6 +1280,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1269 if (len < 0) 1280 if (len < 0)
1270 return -EINVAL; 1281 return -EINVAL;
1271 1282
1283 if (needs_rtnl)
1284 rtnl_lock();
1272 lock_sock(sk); 1285 lock_sock(sk);
1273 1286
1274 switch (optname) { 1287 switch (optname) {
@@ -1386,39 +1399,35 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1386 case IP_MSFILTER: 1399 case IP_MSFILTER:
1387 { 1400 {
1388 struct ip_msfilter msf; 1401 struct ip_msfilter msf;
1389 int err;
1390 1402
1391 if (len < IP_MSFILTER_SIZE(0)) { 1403 if (len < IP_MSFILTER_SIZE(0)) {
1392 release_sock(sk); 1404 err = -EINVAL;
1393 return -EINVAL; 1405 goto out;
1394 } 1406 }
1395 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { 1407 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
1396 release_sock(sk); 1408 err = -EFAULT;
1397 return -EFAULT; 1409 goto out;
1398 } 1410 }
1399 err = ip_mc_msfget(sk, &msf, 1411 err = ip_mc_msfget(sk, &msf,
1400 (struct ip_msfilter __user *)optval, optlen); 1412 (struct ip_msfilter __user *)optval, optlen);
1401 release_sock(sk); 1413 goto out;
1402 return err;
1403 } 1414 }
1404 case MCAST_MSFILTER: 1415 case MCAST_MSFILTER:
1405 { 1416 {
1406 struct group_filter gsf; 1417 struct group_filter gsf;
1407 int err;
1408 1418
1409 if (len < GROUP_FILTER_SIZE(0)) { 1419 if (len < GROUP_FILTER_SIZE(0)) {
1410 release_sock(sk); 1420 err = -EINVAL;
1411 return -EINVAL; 1421 goto out;
1412 } 1422 }
1413 if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) { 1423 if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
1414 release_sock(sk); 1424 err = -EFAULT;
1415 return -EFAULT; 1425 goto out;
1416 } 1426 }
1417 err = ip_mc_gsfget(sk, &gsf, 1427 err = ip_mc_gsfget(sk, &gsf,
1418 (struct group_filter __user *)optval, 1428 (struct group_filter __user *)optval,
1419 optlen); 1429 optlen);
1420 release_sock(sk); 1430 goto out;
1421 return err;
1422 } 1431 }
1423 case IP_MULTICAST_ALL: 1432 case IP_MULTICAST_ALL:
1424 val = inet->mc_all; 1433 val = inet->mc_all;
@@ -1485,6 +1494,12 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1485 return -EFAULT; 1494 return -EFAULT;
1486 } 1495 }
1487 return 0; 1496 return 0;
1497
1498out:
1499 release_sock(sk);
1500 if (needs_rtnl)
1501 rtnl_unlock();
1502 return err;
1488} 1503}
1489 1504
1490int ip_getsockopt(struct sock *sk, int level, 1505int ip_getsockopt(struct sock *sk, int level,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 0e5591c2ee9f..6fb869f646bf 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -67,10 +67,9 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
67 const struct nf_hook_state *state) 67 const struct nf_hook_state *state)
68{ 68{
69 struct sock *sk = skb->sk; 69 struct sock *sk = skb->sk;
70 struct inet_sock *inet = inet_sk(skb->sk);
71 70
72 if (sk && (sk->sk_family == PF_INET) && 71 if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) &&
73 inet->nodefrag) 72 inet_sk(sk)->nodefrag)
74 return NF_ACCEPT; 73 return NF_ACCEPT;
75 74
76#if IS_ENABLED(CONFIG_NF_CONNTRACK) 75#if IS_ENABLED(CONFIG_NF_CONNTRACK)
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 657d2307f031..b3ca21b2ba9b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -45,7 +45,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
45 struct net *net = nf_ct_net(ct); 45 struct net *net = nf_ct_net(ct);
46 const struct nf_conn *master = ct->master; 46 const struct nf_conn *master = ct->master;
47 struct nf_conntrack_expect *other_exp; 47 struct nf_conntrack_expect *other_exp;
48 struct nf_conntrack_tuple t; 48 struct nf_conntrack_tuple t = {};
49 const struct nf_ct_pptp_master *ct_pptp_info; 49 const struct nf_ct_pptp_master *ct_pptp_info;
50 const struct nf_nat_pptp *nat_pptp_info; 50 const struct nf_nat_pptp *nat_pptp_info;
51 struct nf_nat_range range; 51 struct nf_nat_range range;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8c0d0bdc2a7c..63e5be0abd86 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -406,10 +406,12 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
406 ip_select_ident(net, skb, NULL); 406 ip_select_ident(net, skb, NULL);
407 407
408 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 408 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
409 skb->transport_header += iphlen;
410 if (iph->protocol == IPPROTO_ICMP &&
411 length >= iphlen + sizeof(struct icmphdr))
412 icmp_out_count(net, ((struct icmphdr *)
413 skb_transport_header(skb))->type);
409 } 414 }
410 if (iph->protocol == IPPROTO_ICMP)
411 icmp_out_count(net, ((struct icmphdr *)
412 skb_transport_header(skb))->type);
413 415
414 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, 416 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
415 net, sk, skb, NULL, rt->dst.dev, 417 net, sk, skb, NULL, rt->dst.dev,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 25300c5e283b..a0bd7a55193e 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -48,14 +48,14 @@ static void set_local_port_range(struct net *net, int range[2])
48{ 48{
49 bool same_parity = !((range[0] ^ range[1]) & 1); 49 bool same_parity = !((range[0] ^ range[1]) & 1);
50 50
51 write_seqlock(&net->ipv4.ip_local_ports.lock); 51 write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
52 if (same_parity && !net->ipv4.ip_local_ports.warned) { 52 if (same_parity && !net->ipv4.ip_local_ports.warned) {
53 net->ipv4.ip_local_ports.warned = true; 53 net->ipv4.ip_local_ports.warned = true;
54 pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n"); 54 pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
55 } 55 }
56 net->ipv4.ip_local_ports.range[0] = range[0]; 56 net->ipv4.ip_local_ports.range[0] = range[0];
57 net->ipv4.ip_local_ports.range[1] = range[1]; 57 net->ipv4.ip_local_ports.range[1] = range[1];
58 write_sequnlock(&net->ipv4.ip_local_ports.lock); 58 write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
59} 59}
60 60
61/* Validate changes from /proc interface. */ 61/* Validate changes from /proc interface. */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0cfa7c0c1e80..c1728771cf89 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -451,11 +451,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
451 unsigned int mask; 451 unsigned int mask;
452 struct sock *sk = sock->sk; 452 struct sock *sk = sock->sk;
453 const struct tcp_sock *tp = tcp_sk(sk); 453 const struct tcp_sock *tp = tcp_sk(sk);
454 int state;
454 455
455 sock_rps_record_flow(sk); 456 sock_rps_record_flow(sk);
456 457
457 sock_poll_wait(file, sk_sleep(sk), wait); 458 sock_poll_wait(file, sk_sleep(sk), wait);
458 if (sk->sk_state == TCP_LISTEN) 459
460 state = sk_state_load(sk);
461 if (state == TCP_LISTEN)
459 return inet_csk_listen_poll(sk); 462 return inet_csk_listen_poll(sk);
460 463
461 /* Socket is not locked. We are protected from async events 464 /* Socket is not locked. We are protected from async events
@@ -492,14 +495,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
492 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent 495 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
493 * blocking on fresh not-connected or disconnected socket. --ANK 496 * blocking on fresh not-connected or disconnected socket. --ANK
494 */ 497 */
495 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) 498 if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
496 mask |= POLLHUP; 499 mask |= POLLHUP;
497 if (sk->sk_shutdown & RCV_SHUTDOWN) 500 if (sk->sk_shutdown & RCV_SHUTDOWN)
498 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 501 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
499 502
500 /* Connected or passive Fast Open socket? */ 503 /* Connected or passive Fast Open socket? */
501 if (sk->sk_state != TCP_SYN_SENT && 504 if (state != TCP_SYN_SENT &&
502 (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) { 505 (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
503 int target = sock_rcvlowat(sk, 0, INT_MAX); 506 int target = sock_rcvlowat(sk, 0, INT_MAX);
504 507
505 if (tp->urg_seq == tp->copied_seq && 508 if (tp->urg_seq == tp->copied_seq &&
@@ -507,9 +510,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
507 tp->urg_data) 510 tp->urg_data)
508 target++; 511 target++;
509 512
510 /* Potential race condition. If read of tp below will
511 * escape above sk->sk_state, we can be illegally awaken
512 * in SYN_* states. */
513 if (tp->rcv_nxt - tp->copied_seq >= target) 513 if (tp->rcv_nxt - tp->copied_seq >= target)
514 mask |= POLLIN | POLLRDNORM; 514 mask |= POLLIN | POLLRDNORM;
515 515
@@ -1934,7 +1934,7 @@ void tcp_set_state(struct sock *sk, int state)
1934 /* Change state AFTER socket is unhashed to avoid closed 1934 /* Change state AFTER socket is unhashed to avoid closed
1935 * socket sitting in hash tables. 1935 * socket sitting in hash tables.
1936 */ 1936 */
1937 sk->sk_state = state; 1937 sk_state_store(sk, state);
1938 1938
1939#ifdef STATE_TRACE 1939#ifdef STATE_TRACE
1940 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); 1940 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@ -2644,7 +2644,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2644 if (sk->sk_type != SOCK_STREAM) 2644 if (sk->sk_type != SOCK_STREAM)
2645 return; 2645 return;
2646 2646
2647 info->tcpi_state = sk->sk_state; 2647 info->tcpi_state = sk_state_load(sk);
2648
2648 info->tcpi_ca_state = icsk->icsk_ca_state; 2649 info->tcpi_ca_state = icsk->icsk_ca_state;
2649 info->tcpi_retransmits = icsk->icsk_retransmits; 2650 info->tcpi_retransmits = icsk->icsk_retransmits;
2650 info->tcpi_probes = icsk->icsk_probes_out; 2651 info->tcpi_probes = icsk->icsk_probes_out;
@@ -2672,7 +2673,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2672 info->tcpi_snd_mss = tp->mss_cache; 2673 info->tcpi_snd_mss = tp->mss_cache;
2673 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; 2674 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
2674 2675
2675 if (sk->sk_state == TCP_LISTEN) { 2676 if (info->tcpi_state == TCP_LISTEN) {
2676 info->tcpi_unacked = sk->sk_ack_backlog; 2677 info->tcpi_unacked = sk->sk_ack_backlog;
2677 info->tcpi_sacked = sk->sk_max_ack_backlog; 2678 info->tcpi_sacked = sk->sk_max_ack_backlog;
2678 } else { 2679 } else {
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 479f34946177..b31604086edd 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -21,7 +21,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
21{ 21{
22 struct tcp_info *info = _info; 22 struct tcp_info *info = _info;
23 23
24 if (sk->sk_state == TCP_LISTEN) { 24 if (sk_state_load(sk) == TCP_LISTEN) {
25 r->idiag_rqueue = sk->sk_ack_backlog; 25 r->idiag_rqueue = sk->sk_ack_backlog;
26 r->idiag_wqueue = sk->sk_max_ack_backlog; 26 r->idiag_wqueue = sk->sk_max_ack_backlog;
27 } else if (sk->sk_type == SOCK_STREAM) { 27 } else if (sk->sk_type == SOCK_STREAM) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1c2648bbac4b..ba09016d1bfd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1326,6 +1326,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1326 if (__inet_inherit_port(sk, newsk) < 0) 1326 if (__inet_inherit_port(sk, newsk) < 0)
1327 goto put_and_exit; 1327 goto put_and_exit;
1328 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); 1328 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1329 if (*own_req)
1330 tcp_move_syn(newtp, req);
1329 1331
1330 return newsk; 1332 return newsk;
1331 1333
@@ -2156,6 +2158,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2156 __u16 destp = ntohs(inet->inet_dport); 2158 __u16 destp = ntohs(inet->inet_dport);
2157 __u16 srcp = ntohs(inet->inet_sport); 2159 __u16 srcp = ntohs(inet->inet_sport);
2158 int rx_queue; 2160 int rx_queue;
2161 int state;
2159 2162
2160 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2163 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2161 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || 2164 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
@@ -2173,17 +2176,18 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2173 timer_expires = jiffies; 2176 timer_expires = jiffies;
2174 } 2177 }
2175 2178
2176 if (sk->sk_state == TCP_LISTEN) 2179 state = sk_state_load(sk);
2180 if (state == TCP_LISTEN)
2177 rx_queue = sk->sk_ack_backlog; 2181 rx_queue = sk->sk_ack_backlog;
2178 else 2182 else
2179 /* 2183 /* Because we don't lock the socket,
2180 * because we dont lock socket, we might find a transient negative value 2184 * we might find a transient negative value.
2181 */ 2185 */
2182 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 2186 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2183 2187
2184 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2188 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2185 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", 2189 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2186 i, src, srcp, dest, destp, sk->sk_state, 2190 i, src, srcp, dest, destp, state,
2187 tp->write_seq - tp->snd_una, 2191 tp->write_seq - tp->snd_una,
2188 rx_queue, 2192 rx_queue,
2189 timer_active, 2193 timer_active,
@@ -2197,8 +2201,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2197 jiffies_to_clock_t(icsk->icsk_ack.ato), 2201 jiffies_to_clock_t(icsk->icsk_ack.ato),
2198 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 2202 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2199 tp->snd_cwnd, 2203 tp->snd_cwnd,
2200 sk->sk_state == TCP_LISTEN ? 2204 state == TCP_LISTEN ?
2201 (fastopenq ? fastopenq->max_qlen : 0) : 2205 fastopenq->max_qlen :
2202 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); 2206 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2203} 2207}
2204 2208
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 3575dd1e5b67..ac6b1961ffeb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -551,9 +551,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
551 newtp->rack.mstamp.v64 = 0; 551 newtp->rack.mstamp.v64 = 0;
552 newtp->rack.advanced = 0; 552 newtp->rack.advanced = 0;
553 553
554 newtp->saved_syn = req->saved_syn;
555 req->saved_syn = NULL;
556
557 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); 554 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
558 } 555 }
559 return newsk; 556 return newsk;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d72fa90d6feb..d84742f003a9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -418,6 +418,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
418 if (err) { 418 if (err) {
419 ipv6_mc_destroy_dev(ndev); 419 ipv6_mc_destroy_dev(ndev);
420 del_timer(&ndev->regen_timer); 420 del_timer(&ndev->regen_timer);
421 snmp6_unregister_dev(ndev);
421 goto err_release; 422 goto err_release;
422 } 423 }
423 /* protected by rtnl_lock */ 424 /* protected by rtnl_lock */
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 124338a39e29..5ee56d0a8699 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1651,7 +1651,6 @@ out:
1651 if (!err) { 1651 if (!err) {
1652 ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); 1652 ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
1653 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1653 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1654 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
1655 } else { 1654 } else {
1656 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 1655 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
1657 } 1656 }
@@ -2015,7 +2014,6 @@ out:
2015 if (!err) { 2014 if (!err) {
2016 ICMP6MSGOUT_INC_STATS(net, idev, type); 2015 ICMP6MSGOUT_INC_STATS(net, idev, type);
2017 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 2016 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
2018 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
2019 } else 2017 } else
2020 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 2018 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
2021 2019
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c8bc9b4ac328..6f01fe122abd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -404,6 +404,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
404 } 404 }
405} 405}
406 406
407static bool __rt6_check_expired(const struct rt6_info *rt)
408{
409 if (rt->rt6i_flags & RTF_EXPIRES)
410 return time_after(jiffies, rt->dst.expires);
411 else
412 return false;
413}
414
407static bool rt6_check_expired(const struct rt6_info *rt) 415static bool rt6_check_expired(const struct rt6_info *rt)
408{ 416{
409 if (rt->rt6i_flags & RTF_EXPIRES) { 417 if (rt->rt6i_flags & RTF_EXPIRES) {
@@ -1252,7 +1260,8 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1252 1260
1253static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) 1261static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1254{ 1262{
1255 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 1263 if (!__rt6_check_expired(rt) &&
1264 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1256 rt6_check((struct rt6_info *)(rt->dst.from), cookie)) 1265 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1257 return &rt->dst; 1266 return &rt->dst;
1258 else 1267 else
@@ -1272,7 +1281,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1272 1281
1273 rt6_dst_from_metrics_check(rt); 1282 rt6_dst_from_metrics_check(rt);
1274 1283
1275 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) 1284 if (rt->rt6i_flags & RTF_PCPU ||
1285 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1276 return rt6_dst_from_check(rt, cookie); 1286 return rt6_dst_from_check(rt, cookie);
1277 else 1287 else
1278 return rt6_check(rt, cookie); 1288 return rt6_check(rt, cookie);
@@ -1322,6 +1332,12 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1322 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); 1332 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1323} 1333}
1324 1334
1335static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1336{
1337 return !(rt->rt6i_flags & RTF_CACHE) &&
1338 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1339}
1340
1325static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 1341static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1326 const struct ipv6hdr *iph, u32 mtu) 1342 const struct ipv6hdr *iph, u32 mtu)
1327{ 1343{
@@ -1335,7 +1351,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1335 if (mtu >= dst_mtu(dst)) 1351 if (mtu >= dst_mtu(dst))
1336 return; 1352 return;
1337 1353
1338 if (rt6->rt6i_flags & RTF_CACHE) { 1354 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1339 rt6_do_update_pmtu(rt6, mtu); 1355 rt6_do_update_pmtu(rt6, mtu);
1340 } else { 1356 } else {
1341 const struct in6_addr *daddr, *saddr; 1357 const struct in6_addr *daddr, *saddr;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ea2f4d5440b5..c5429a636f1a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1140,14 +1140,18 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
1140 goto out; 1140 goto out;
1141 } 1141 }
1142 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); 1142 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1143 /* Clone pktoptions received with SYN, if we own the req */ 1143 if (*own_req) {
1144 if (*own_req && ireq->pktopts) { 1144 tcp_move_syn(newtp, req);
1145 newnp->pktoptions = skb_clone(ireq->pktopts, 1145
1146 sk_gfp_atomic(sk, GFP_ATOMIC)); 1146 /* Clone pktoptions received with SYN, if we own the req */
1147 consume_skb(ireq->pktopts); 1147 if (ireq->pktopts) {
1148 ireq->pktopts = NULL; 1148 newnp->pktoptions = skb_clone(ireq->pktopts,
1149 if (newnp->pktoptions) 1149 sk_gfp_atomic(sk, GFP_ATOMIC));
1150 skb_set_owner_r(newnp->pktoptions, newsk); 1150 consume_skb(ireq->pktopts);
1151 ireq->pktopts = NULL;
1152 if (newnp->pktoptions)
1153 skb_set_owner_r(newnp->pktoptions, newsk);
1154 }
1151 } 1155 }
1152 1156
1153 return newsk; 1157 return newsk;
@@ -1686,6 +1690,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1686 const struct tcp_sock *tp = tcp_sk(sp); 1690 const struct tcp_sock *tp = tcp_sk(sp);
1687 const struct inet_connection_sock *icsk = inet_csk(sp); 1691 const struct inet_connection_sock *icsk = inet_csk(sp);
1688 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 1692 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1693 int rx_queue;
1694 int state;
1689 1695
1690 dest = &sp->sk_v6_daddr; 1696 dest = &sp->sk_v6_daddr;
1691 src = &sp->sk_v6_rcv_saddr; 1697 src = &sp->sk_v6_rcv_saddr;
@@ -1706,6 +1712,15 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1706 timer_expires = jiffies; 1712 timer_expires = jiffies;
1707 } 1713 }
1708 1714
1715 state = sk_state_load(sp);
1716 if (state == TCP_LISTEN)
1717 rx_queue = sp->sk_ack_backlog;
1718 else
1719 /* Because we don't lock the socket,
1720 * we might find a transient negative value.
1721 */
1722 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1723
1709 seq_printf(seq, 1724 seq_printf(seq,
1710 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1725 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1711 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 1726 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
@@ -1714,9 +1729,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1714 src->s6_addr32[2], src->s6_addr32[3], srcp, 1729 src->s6_addr32[2], src->s6_addr32[3], srcp,
1715 dest->s6_addr32[0], dest->s6_addr32[1], 1730 dest->s6_addr32[0], dest->s6_addr32[1],
1716 dest->s6_addr32[2], dest->s6_addr32[3], destp, 1731 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1717 sp->sk_state, 1732 state,
1718 tp->write_seq-tp->snd_una, 1733 tp->write_seq - tp->snd_una,
1719 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), 1734 rx_queue,
1720 timer_active, 1735 timer_active,
1721 jiffies_delta_to_clock_t(timer_expires - jiffies), 1736 jiffies_delta_to_clock_t(timer_expires - jiffies),
1722 icsk->icsk_retransmits, 1737 icsk->icsk_retransmits,
@@ -1728,7 +1743,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1728 jiffies_to_clock_t(icsk->icsk_ack.ato), 1743 jiffies_to_clock_t(icsk->icsk_ack.ato),
1729 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 1744 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1730 tp->snd_cwnd, 1745 tp->snd_cwnd,
1731 sp->sk_state == TCP_LISTEN ? 1746 state == TCP_LISTEN ?
1732 fastopenq->max_qlen : 1747 fastopenq->max_qlen :
1733 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 1748 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1734 ); 1749 );
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e22349ea7256..4692782b5280 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -869,7 +869,7 @@ config NETFILTER_XT_TARGET_TEE
869 depends on IPV6 || IPV6=n 869 depends on IPV6 || IPV6=n
870 depends on !NF_CONNTRACK || NF_CONNTRACK 870 depends on !NF_CONNTRACK || NF_CONNTRACK
871 select NF_DUP_IPV4 871 select NF_DUP_IPV4
872 select NF_DUP_IPV6 if IP6_NF_IPTABLES 872 select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
873 ---help--- 873 ---help---
874 This option adds a "TEE" target with which a packet can be cloned and 874 This option adds a "TEE" target with which a packet can be cloned and
875 this clone be rerouted to another nexthop. 875 this clone be rerouted to another nexthop.
@@ -882,7 +882,7 @@ config NETFILTER_XT_TARGET_TPROXY
882 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n 882 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
883 depends on IP_NF_MANGLE 883 depends on IP_NF_MANGLE
884 select NF_DEFRAG_IPV4 884 select NF_DEFRAG_IPV4
885 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 885 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
886 help 886 help
887 This option adds a `TPROXY' target, which is somewhat similar to 887 This option adds a `TPROXY' target, which is somewhat similar to
888 REDIRECT. It can only be used in the mangle table and is useful 888 REDIRECT. It can only be used in the mangle table and is useful
@@ -1375,7 +1375,7 @@ config NETFILTER_XT_MATCH_SOCKET
1375 depends on IPV6 || IPV6=n 1375 depends on IPV6 || IPV6=n
1376 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n 1376 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
1377 select NF_DEFRAG_IPV4 1377 select NF_DEFRAG_IPV4
1378 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 1378 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
1379 help 1379 help
1380 This option adds a `socket' match, which can be used to match 1380 This option adds a `socket' match, which can be used to match
1381 packets for which a TCP or UDP socket lookup finds a valid socket. 1381 packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759ed0fa..b0bc475f641e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
33#define mtype_gc IPSET_TOKEN(MTYPE, _gc) 33#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
34#define mtype MTYPE 34#define mtype MTYPE
35 35
36#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id)) 36#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
37 37
38static void 38static void
39mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) 39mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@ mtype_destroy(struct ip_set *set)
67 del_timer_sync(&map->gc); 67 del_timer_sync(&map->gc);
68 68
69 ip_set_free(map->members); 69 ip_set_free(map->members);
70 if (set->dsize) { 70 if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
71 if (set->extensions & IPSET_EXT_DESTROY) 71 mtype_ext_cleanup(set);
72 mtype_ext_cleanup(set); 72 ip_set_free(map);
73 ip_set_free(map->extensions);
74 }
75 kfree(map);
76 73
77 set->data = NULL; 74 set->data = NULL;
78} 75}
@@ -92,16 +89,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
92{ 89{
93 const struct mtype *map = set->data; 90 const struct mtype *map = set->data;
94 struct nlattr *nested; 91 struct nlattr *nested;
92 size_t memsize = sizeof(*map) + map->memsize;
95 93
96 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 94 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
97 if (!nested) 95 if (!nested)
98 goto nla_put_failure; 96 goto nla_put_failure;
99 if (mtype_do_head(skb, map) || 97 if (mtype_do_head(skb, map) ||
100 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 98 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
101 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, 99 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
102 htonl(sizeof(*map) +
103 map->memsize +
104 set->dsize * map->elements)))
105 goto nla_put_failure; 100 goto nla_put_failure;
106 if (unlikely(ip_set_put_flags(skb, set))) 101 if (unlikely(ip_set_put_flags(skb, set)))
107 goto nla_put_failure; 102 goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a564334418..4783efff0bde 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@ MODULE_ALIAS("ip_set_bitmap:ip");
41/* Type structure */ 41/* Type structure */
42struct bitmap_ip { 42struct bitmap_ip {
43 void *members; /* the set members */ 43 void *members; /* the set members */
44 void *extensions; /* data extensions */
45 u32 first_ip; /* host byte order, included in range */ 44 u32 first_ip; /* host byte order, included in range */
46 u32 last_ip; /* host byte order, included in range */ 45 u32 last_ip; /* host byte order, included in range */
47 u32 elements; /* number of max elements in the set */ 46 u32 elements; /* number of max elements in the set */
@@ -49,6 +48,8 @@ struct bitmap_ip {
49 size_t memsize; /* members size */ 48 size_t memsize; /* members size */
50 u8 netmask; /* subnet netmask */ 49 u8 netmask; /* subnet netmask */
51 struct timer_list gc; /* garbage collection */ 50 struct timer_list gc; /* garbage collection */
51 unsigned char extensions[0] /* data extensions */
52 __aligned(__alignof__(u64));
52}; 53};
53 54
54/* ADT structure for generic function args */ 55/* ADT structure for generic function args */
@@ -224,13 +225,6 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
224 map->members = ip_set_alloc(map->memsize); 225 map->members = ip_set_alloc(map->memsize);
225 if (!map->members) 226 if (!map->members)
226 return false; 227 return false;
227 if (set->dsize) {
228 map->extensions = ip_set_alloc(set->dsize * elements);
229 if (!map->extensions) {
230 kfree(map->members);
231 return false;
232 }
233 }
234 map->first_ip = first_ip; 228 map->first_ip = first_ip;
235 map->last_ip = last_ip; 229 map->last_ip = last_ip;
236 map->elements = elements; 230 map->elements = elements;
@@ -316,13 +310,13 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
316 pr_debug("hosts %u, elements %llu\n", 310 pr_debug("hosts %u, elements %llu\n",
317 hosts, (unsigned long long)elements); 311 hosts, (unsigned long long)elements);
318 312
319 map = kzalloc(sizeof(*map), GFP_KERNEL); 313 set->dsize = ip_set_elem_len(set, tb, 0, 0);
314 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
320 if (!map) 315 if (!map)
321 return -ENOMEM; 316 return -ENOMEM;
322 317
323 map->memsize = bitmap_bytes(0, elements - 1); 318 map->memsize = bitmap_bytes(0, elements - 1);
324 set->variant = &bitmap_ip; 319 set->variant = &bitmap_ip;
325 set->dsize = ip_set_elem_len(set, tb, 0);
326 if (!init_map_ip(set, map, first_ip, last_ip, 320 if (!init_map_ip(set, map, first_ip, last_ip,
327 elements, hosts, netmask)) { 321 elements, hosts, netmask)) {
328 kfree(map); 322 kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1430535118fb..29dde208381d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@ enum {
47/* Type structure */ 47/* Type structure */
48struct bitmap_ipmac { 48struct bitmap_ipmac {
49 void *members; /* the set members */ 49 void *members; /* the set members */
50 void *extensions; /* MAC + data extensions */
51 u32 first_ip; /* host byte order, included in range */ 50 u32 first_ip; /* host byte order, included in range */
52 u32 last_ip; /* host byte order, included in range */ 51 u32 last_ip; /* host byte order, included in range */
53 u32 elements; /* number of max elements in the set */ 52 u32 elements; /* number of max elements in the set */
54 size_t memsize; /* members size */ 53 size_t memsize; /* members size */
55 struct timer_list gc; /* garbage collector */ 54 struct timer_list gc; /* garbage collector */
55 unsigned char extensions[0] /* MAC + data extensions */
56 __aligned(__alignof__(u64));
56}; 57};
57 58
58/* ADT structure for generic function args */ 59/* ADT structure for generic function args */
59struct bitmap_ipmac_adt_elem { 60struct bitmap_ipmac_adt_elem {
61 unsigned char ether[ETH_ALEN] __aligned(2);
60 u16 id; 62 u16 id;
61 unsigned char *ether; 63 u16 add_mac;
62}; 64};
63 65
64struct bitmap_ipmac_elem { 66struct bitmap_ipmac_elem {
65 unsigned char ether[ETH_ALEN]; 67 unsigned char ether[ETH_ALEN];
66 unsigned char filled; 68 unsigned char filled;
67} __attribute__ ((aligned)); 69} __aligned(__alignof__(u64));
68 70
69static inline u32 71static inline u32
70ip_to_id(const struct bitmap_ipmac *m, u32 ip) 72ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
72 return ip - m->first_ip; 74 return ip - m->first_ip;
73} 75}
74 76
75static inline struct bitmap_ipmac_elem * 77#define get_elem(extensions, id, dsize) \
76get_elem(void *extensions, u16 id, size_t dsize) 78 (struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
77{ 79
78 return (struct bitmap_ipmac_elem *)(extensions + id * dsize); 80#define get_const_elem(extensions, id, dsize) \
79} 81 (const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
80 82
81/* Common functions */ 83/* Common functions */
82 84
@@ -88,10 +90,9 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
88 90
89 if (!test_bit(e->id, map->members)) 91 if (!test_bit(e->id, map->members))
90 return 0; 92 return 0;
91 elem = get_elem(map->extensions, e->id, dsize); 93 elem = get_const_elem(map->extensions, e->id, dsize);
92 if (elem->filled == MAC_FILLED) 94 if (e->add_mac && elem->filled == MAC_FILLED)
93 return !e->ether || 95 return ether_addr_equal(e->ether, elem->ether);
94 ether_addr_equal(e->ether, elem->ether);
95 /* Trigger kernel to fill out the ethernet address */ 96 /* Trigger kernel to fill out the ethernet address */
96 return -EAGAIN; 97 return -EAGAIN;
97} 98}
@@ -103,7 +104,7 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
103 104
104 if (!test_bit(id, map->members)) 105 if (!test_bit(id, map->members))
105 return 0; 106 return 0;
106 elem = get_elem(map->extensions, id, dsize); 107 elem = get_const_elem(map->extensions, id, dsize);
107 /* Timer not started for the incomplete elements */ 108 /* Timer not started for the incomplete elements */
108 return elem->filled == MAC_FILLED; 109 return elem->filled == MAC_FILLED;
109} 110}
@@ -133,7 +134,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
133 * and we can reuse it later when MAC is filled out, 134 * and we can reuse it later when MAC is filled out,
134 * possibly by the kernel 135 * possibly by the kernel
135 */ 136 */
136 if (e->ether) 137 if (e->add_mac)
137 ip_set_timeout_set(timeout, t); 138 ip_set_timeout_set(timeout, t);
138 else 139 else
139 *timeout = t; 140 *timeout = t;
@@ -150,7 +151,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
150 elem = get_elem(map->extensions, e->id, dsize); 151 elem = get_elem(map->extensions, e->id, dsize);
151 if (test_bit(e->id, map->members)) { 152 if (test_bit(e->id, map->members)) {
152 if (elem->filled == MAC_FILLED) { 153 if (elem->filled == MAC_FILLED) {
153 if (e->ether && 154 if (e->add_mac &&
154 (flags & IPSET_FLAG_EXIST) && 155 (flags & IPSET_FLAG_EXIST) &&
155 !ether_addr_equal(e->ether, elem->ether)) { 156 !ether_addr_equal(e->ether, elem->ether)) {
156 /* memcpy isn't atomic */ 157 /* memcpy isn't atomic */
@@ -159,7 +160,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
159 ether_addr_copy(elem->ether, e->ether); 160 ether_addr_copy(elem->ether, e->ether);
160 } 161 }
161 return IPSET_ADD_FAILED; 162 return IPSET_ADD_FAILED;
162 } else if (!e->ether) 163 } else if (!e->add_mac)
163 /* Already added without ethernet address */ 164 /* Already added without ethernet address */
164 return IPSET_ADD_FAILED; 165 return IPSET_ADD_FAILED;
165 /* Fill the MAC address and trigger the timer activation */ 166 /* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
168 ether_addr_copy(elem->ether, e->ether); 169 ether_addr_copy(elem->ether, e->ether);
169 elem->filled = MAC_FILLED; 170 elem->filled = MAC_FILLED;
170 return IPSET_ADD_START_STORED_TIMEOUT; 171 return IPSET_ADD_START_STORED_TIMEOUT;
171 } else if (e->ether) { 172 } else if (e->add_mac) {
172 /* We can store MAC too */ 173 /* We can store MAC too */
173 ether_addr_copy(elem->ether, e->ether); 174 ether_addr_copy(elem->ether, e->ether);
174 elem->filled = MAC_FILLED; 175 elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
191 u32 id, size_t dsize) 192 u32 id, size_t dsize)
192{ 193{
193 const struct bitmap_ipmac_elem *elem = 194 const struct bitmap_ipmac_elem *elem =
194 get_elem(map->extensions, id, dsize); 195 get_const_elem(map->extensions, id, dsize);
195 196
196 return nla_put_ipaddr4(skb, IPSET_ATTR_IP, 197 return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
197 htonl(map->first_ip + id)) || 198 htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
213{ 214{
214 struct bitmap_ipmac *map = set->data; 215 struct bitmap_ipmac *map = set->data;
215 ipset_adtfn adtfn = set->variant->adt[adt]; 216 ipset_adtfn adtfn = set->variant->adt[adt];
216 struct bitmap_ipmac_adt_elem e = { .id = 0 }; 217 struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
217 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 218 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
218 u32 ip; 219 u32 ip;
219 220
@@ -231,7 +232,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
231 return -EINVAL; 232 return -EINVAL;
232 233
233 e.id = ip_to_id(map, ip); 234 e.id = ip_to_id(map, ip);
234 e.ether = eth_hdr(skb)->h_source; 235 memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
235 236
236 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); 237 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
237} 238}
@@ -265,11 +266,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
265 return -IPSET_ERR_BITMAP_RANGE; 266 return -IPSET_ERR_BITMAP_RANGE;
266 267
267 e.id = ip_to_id(map, ip); 268 e.id = ip_to_id(map, ip);
268 if (tb[IPSET_ATTR_ETHER]) 269 if (tb[IPSET_ATTR_ETHER]) {
269 e.ether = nla_data(tb[IPSET_ATTR_ETHER]); 270 memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
270 else 271 e.add_mac = 1;
271 e.ether = NULL; 272 }
272
273 ret = adtfn(set, &e, &ext, &ext, flags); 273 ret = adtfn(set, &e, &ext, &ext, flags);
274 274
275 return ip_set_eexist(ret, flags) ? 0 : ret; 275 return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +300,6 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
300 map->members = ip_set_alloc(map->memsize); 300 map->members = ip_set_alloc(map->memsize);
301 if (!map->members) 301 if (!map->members)
302 return false; 302 return false;
303 if (set->dsize) {
304 map->extensions = ip_set_alloc(set->dsize * elements);
305 if (!map->extensions) {
306 kfree(map->members);
307 return false;
308 }
309 }
310 map->first_ip = first_ip; 303 map->first_ip = first_ip;
311 map->last_ip = last_ip; 304 map->last_ip = last_ip;
312 map->elements = elements; 305 map->elements = elements;
@@ -361,14 +354,15 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
361 if (elements > IPSET_BITMAP_MAX_RANGE + 1) 354 if (elements > IPSET_BITMAP_MAX_RANGE + 1)
362 return -IPSET_ERR_BITMAP_RANGE_SIZE; 355 return -IPSET_ERR_BITMAP_RANGE_SIZE;
363 356
364 map = kzalloc(sizeof(*map), GFP_KERNEL); 357 set->dsize = ip_set_elem_len(set, tb,
358 sizeof(struct bitmap_ipmac_elem),
359 __alignof__(struct bitmap_ipmac_elem));
360 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
365 if (!map) 361 if (!map)
366 return -ENOMEM; 362 return -ENOMEM;
367 363
368 map->memsize = bitmap_bytes(0, elements - 1); 364 map->memsize = bitmap_bytes(0, elements - 1);
369 set->variant = &bitmap_ipmac; 365 set->variant = &bitmap_ipmac;
370 set->dsize = ip_set_elem_len(set, tb,
371 sizeof(struct bitmap_ipmac_elem));
372 if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { 366 if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
373 kfree(map); 367 kfree(map);
374 return -ENOMEM; 368 return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd5da46..7f0c733358a4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@ MODULE_ALIAS("ip_set_bitmap:port");
35/* Type structure */ 35/* Type structure */
36struct bitmap_port { 36struct bitmap_port {
37 void *members; /* the set members */ 37 void *members; /* the set members */
38 void *extensions; /* data extensions */
39 u16 first_port; /* host byte order, included in range */ 38 u16 first_port; /* host byte order, included in range */
40 u16 last_port; /* host byte order, included in range */ 39 u16 last_port; /* host byte order, included in range */
41 u32 elements; /* number of max elements in the set */ 40 u32 elements; /* number of max elements in the set */
42 size_t memsize; /* members size */ 41 size_t memsize; /* members size */
43 struct timer_list gc; /* garbage collection */ 42 struct timer_list gc; /* garbage collection */
43 unsigned char extensions[0] /* data extensions */
44 __aligned(__alignof__(u64));
44}; 45};
45 46
46/* ADT structure for generic function args */ 47/* ADT structure for generic function args */
@@ -209,13 +210,6 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
209 map->members = ip_set_alloc(map->memsize); 210 map->members = ip_set_alloc(map->memsize);
210 if (!map->members) 211 if (!map->members)
211 return false; 212 return false;
212 if (set->dsize) {
213 map->extensions = ip_set_alloc(set->dsize * map->elements);
214 if (!map->extensions) {
215 kfree(map->members);
216 return false;
217 }
218 }
219 map->first_port = first_port; 213 map->first_port = first_port;
220 map->last_port = last_port; 214 map->last_port = last_port;
221 set->timeout = IPSET_NO_TIMEOUT; 215 set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
232{ 226{
233 struct bitmap_port *map; 227 struct bitmap_port *map;
234 u16 first_port, last_port; 228 u16 first_port, last_port;
229 u32 elements;
235 230
236 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 231 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
237 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || 232 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
248 last_port = tmp; 243 last_port = tmp;
249 } 244 }
250 245
251 map = kzalloc(sizeof(*map), GFP_KERNEL); 246 elements = last_port - first_port + 1;
247 set->dsize = ip_set_elem_len(set, tb, 0, 0);
248 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
252 if (!map) 249 if (!map)
253 return -ENOMEM; 250 return -ENOMEM;
254 251
255 map->elements = last_port - first_port + 1; 252 map->elements = elements;
256 map->memsize = bitmap_bytes(0, map->elements); 253 map->memsize = bitmap_bytes(0, map->elements);
257 set->variant = &bitmap_port; 254 set->variant = &bitmap_port;
258 set->dsize = ip_set_elem_len(set, tb, 0);
259 if (!init_map_port(set, map, first_port, last_port)) { 255 if (!init_map_port(set, map, first_port, last_port)) {
260 kfree(map); 256 kfree(map);
261 return -ENOMEM; 257 return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69ab9c2634e1..54f3d7cb23e6 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@ add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
364} 364}
365 365
366size_t 366size_t
367ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) 367ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
368 size_t align)
368{ 369{
369 enum ip_set_ext_id id; 370 enum ip_set_ext_id id;
370 size_t offset = len;
371 u32 cadt_flags = 0; 371 u32 cadt_flags = 0;
372 372
373 if (tb[IPSET_ATTR_CADT_FLAGS]) 373 if (tb[IPSET_ATTR_CADT_FLAGS])
374 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 374 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
375 if (cadt_flags & IPSET_FLAG_WITH_FORCEADD) 375 if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
376 set->flags |= IPSET_CREATE_FLAG_FORCEADD; 376 set->flags |= IPSET_CREATE_FLAG_FORCEADD;
377 if (!align)
378 align = 1;
377 for (id = 0; id < IPSET_EXT_ID_MAX; id++) { 379 for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
378 if (!add_extension(id, cadt_flags, tb)) 380 if (!add_extension(id, cadt_flags, tb))
379 continue; 381 continue;
380 offset = ALIGN(offset, ip_set_extensions[id].align); 382 len = ALIGN(len, ip_set_extensions[id].align);
381 set->offset[id] = offset; 383 set->offset[id] = len;
382 set->extensions |= ip_set_extensions[id].type; 384 set->extensions |= ip_set_extensions[id].type;
383 offset += ip_set_extensions[id].len; 385 len += ip_set_extensions[id].len;
384 } 386 }
385 return offset; 387 return ALIGN(len, align);
386} 388}
387EXPORT_SYMBOL_GPL(ip_set_elem_len); 389EXPORT_SYMBOL_GPL(ip_set_elem_len);
388 390
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54fcaf2a..e5336ab36d67 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@ struct hbucket {
72 DECLARE_BITMAP(used, AHASH_MAX_TUNED); 72 DECLARE_BITMAP(used, AHASH_MAX_TUNED);
73 u8 size; /* size of the array */ 73 u8 size; /* size of the array */
74 u8 pos; /* position of the first free entry */ 74 u8 pos; /* position of the first free entry */
75 unsigned char value[0]; /* the array of the values */ 75 unsigned char value[0] /* the array of the values */
76} __attribute__ ((aligned)); 76 __aligned(__alignof__(u64));
77};
77 78
78/* The hash table: the table size stored here in order to make resizing easy */ 79/* The hash table: the table size stored here in order to make resizing easy */
79struct htable { 80struct htable {
@@ -475,7 +476,7 @@ static void
475mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) 476mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
476{ 477{
477 struct htable *t; 478 struct htable *t;
478 struct hbucket *n; 479 struct hbucket *n, *tmp;
479 struct mtype_elem *data; 480 struct mtype_elem *data;
480 u32 i, j, d; 481 u32 i, j, d;
481#ifdef IP_SET_HASH_WITH_NETS 482#ifdef IP_SET_HASH_WITH_NETS
@@ -510,9 +511,14 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
510 } 511 }
511 } 512 }
512 if (d >= AHASH_INIT_SIZE) { 513 if (d >= AHASH_INIT_SIZE) {
513 struct hbucket *tmp = kzalloc(sizeof(*tmp) + 514 if (d >= n->size) {
514 (n->size - AHASH_INIT_SIZE) * dsize, 515 rcu_assign_pointer(hbucket(t, i), NULL);
515 GFP_ATOMIC); 516 kfree_rcu(n, rcu);
517 continue;
518 }
519 tmp = kzalloc(sizeof(*tmp) +
520 (n->size - AHASH_INIT_SIZE) * dsize,
521 GFP_ATOMIC);
516 if (!tmp) 522 if (!tmp)
517 /* Still try to delete expired elements */ 523 /* Still try to delete expired elements */
518 continue; 524 continue;
@@ -522,7 +528,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
522 continue; 528 continue;
523 data = ahash_data(n, j, dsize); 529 data = ahash_data(n, j, dsize);
524 memcpy(tmp->value + d * dsize, data, dsize); 530 memcpy(tmp->value + d * dsize, data, dsize);
525 set_bit(j, tmp->used); 531 set_bit(d, tmp->used);
526 d++; 532 d++;
527 } 533 }
528 tmp->pos = d; 534 tmp->pos = d;
@@ -1323,12 +1329,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1323#endif 1329#endif
1324 set->variant = &IPSET_TOKEN(HTYPE, 4_variant); 1330 set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
1325 set->dsize = ip_set_elem_len(set, tb, 1331 set->dsize = ip_set_elem_len(set, tb,
1326 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); 1332 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
1333 __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
1327#ifndef IP_SET_PROTO_UNDEF 1334#ifndef IP_SET_PROTO_UNDEF
1328 } else { 1335 } else {
1329 set->variant = &IPSET_TOKEN(HTYPE, 6_variant); 1336 set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
1330 set->dsize = ip_set_elem_len(set, tb, 1337 set->dsize = ip_set_elem_len(set, tb,
1331 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem))); 1338 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
1339 __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
1332 } 1340 }
1333#endif 1341#endif
1334 if (tb[IPSET_ATTR_TIMEOUT]) { 1342 if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a30ce6e8c90..bbede95c9f68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -31,7 +31,7 @@ struct set_elem {
31 struct rcu_head rcu; 31 struct rcu_head rcu;
32 struct list_head list; 32 struct list_head list;
33 ip_set_id_t id; 33 ip_set_id_t id;
34}; 34} __aligned(__alignof__(u64));
35 35
36struct set_adt_elem { 36struct set_adt_elem {
37 ip_set_id_t id; 37 ip_set_id_t id;
@@ -618,7 +618,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
618 size = IP_SET_LIST_MIN_SIZE; 618 size = IP_SET_LIST_MIN_SIZE;
619 619
620 set->variant = &set_variant; 620 set->variant = &set_variant;
621 set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); 621 set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
622 __alignof__(struct set_elem));
622 if (!init_list_set(net, set, size)) 623 if (!init_list_set(net, set, size))
623 return -ENOMEM; 624 return -ENOMEM;
624 if (tb[IPSET_ATTR_TIMEOUT]) { 625 if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1e24fff53e4b..f57b4dcdb233 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1176,6 +1176,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
1176 struct ip_vs_protocol *pp; 1176 struct ip_vs_protocol *pp;
1177 struct ip_vs_proto_data *pd; 1177 struct ip_vs_proto_data *pd;
1178 struct ip_vs_conn *cp; 1178 struct ip_vs_conn *cp;
1179 struct sock *sk;
1179 1180
1180 EnterFunction(11); 1181 EnterFunction(11);
1181 1182
@@ -1183,13 +1184,12 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
1183 if (skb->ipvs_property) 1184 if (skb->ipvs_property)
1184 return NF_ACCEPT; 1185 return NF_ACCEPT;
1185 1186
1187 sk = skb_to_full_sk(skb);
1186 /* Bad... Do not break raw sockets */ 1188 /* Bad... Do not break raw sockets */
1187 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1189 if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
1188 af == AF_INET)) { 1190 af == AF_INET)) {
1189 struct sock *sk = skb->sk;
1190 struct inet_sock *inet = inet_sk(skb->sk);
1191 1191
1192 if (inet && sk->sk_family == PF_INET && inet->nodefrag) 1192 if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
1193 return NF_ACCEPT; 1193 return NF_ACCEPT;
1194 } 1194 }
1195 1195
@@ -1681,6 +1681,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
1681 struct ip_vs_conn *cp; 1681 struct ip_vs_conn *cp;
1682 int ret, pkts; 1682 int ret, pkts;
1683 int conn_reuse_mode; 1683 int conn_reuse_mode;
1684 struct sock *sk;
1684 1685
1685 /* Already marked as IPVS request or reply? */ 1686 /* Already marked as IPVS request or reply? */
1686 if (skb->ipvs_property) 1687 if (skb->ipvs_property)
@@ -1708,12 +1709,11 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
1708 ip_vs_fill_iph_skb(af, skb, false, &iph); 1709 ip_vs_fill_iph_skb(af, skb, false, &iph);
1709 1710
1710 /* Bad... Do not break raw sockets */ 1711 /* Bad... Do not break raw sockets */
1711 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1712 sk = skb_to_full_sk(skb);
1713 if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
1712 af == AF_INET)) { 1714 af == AF_INET)) {
1713 struct sock *sk = skb->sk;
1714 struct inet_sock *inet = inet_sk(skb->sk);
1715 1715
1716 if (inet && sk->sk_family == PF_INET && inet->nodefrag) 1716 if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
1717 return NF_ACCEPT; 1717 return NF_ACCEPT;
1718 } 1718 }
1719 1719
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 97b75f9bfbcd..d43869879fcf 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -55,7 +55,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
55 55
56 rcu_read_lock(); 56 rcu_read_lock();
57 indev = __in_dev_get_rcu(skb->dev); 57 indev = __in_dev_get_rcu(skb->dev);
58 if (indev != NULL) { 58 if (indev && indev->ifa_list) {
59 ifa = indev->ifa_list; 59 ifa = indev->ifa_list;
60 newdst = ifa->ifa_local; 60 newdst = ifa->ifa_local;
61 } 61 }
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index f1d9e887f5b1..46453ab318db 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -492,7 +492,7 @@ static int nfnetlink_bind(struct net *net, int group)
492 type = nfnl_group2type[group]; 492 type = nfnl_group2type[group];
493 493
494 rcu_read_lock(); 494 rcu_read_lock();
495 ss = nfnetlink_get_subsys(type); 495 ss = nfnetlink_get_subsys(type << 8);
496 rcu_read_unlock(); 496 rcu_read_unlock();
497 if (!ss) 497 if (!ss)
498 request_module("nfnetlink-subsys-%d", type); 498 request_module("nfnetlink-subsys-%d", type);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 06eb48fceb42..740cce4685ac 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -825,7 +825,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
825 struct net *net = sock_net(ctnl); 825 struct net *net = sock_net(ctnl);
826 struct nfnl_log_net *log = nfnl_log_pernet(net); 826 struct nfnl_log_net *log = nfnl_log_pernet(net);
827 int ret = 0; 827 int ret = 0;
828 u16 flags; 828 u16 flags = 0;
829 829
830 if (nfula[NFULA_CFG_CMD]) { 830 if (nfula[NFULA_CFG_CMD]) {
831 u_int8_t pf = nfmsg->nfgen_family; 831 u_int8_t pf = nfmsg->nfgen_family;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 1067fb4c1ffa..c7808fc19719 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -47,27 +47,34 @@ static void nft_counter_eval(const struct nft_expr *expr,
47 local_bh_enable(); 47 local_bh_enable();
48} 48}
49 49
50static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) 50static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
51 struct nft_counter *total)
51{ 52{
52 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); 53 const struct nft_counter_percpu *cpu_stats;
53 struct nft_counter_percpu *cpu_stats;
54 struct nft_counter total;
55 u64 bytes, packets; 54 u64 bytes, packets;
56 unsigned int seq; 55 unsigned int seq;
57 int cpu; 56 int cpu;
58 57
59 memset(&total, 0, sizeof(total)); 58 memset(total, 0, sizeof(*total));
60 for_each_possible_cpu(cpu) { 59 for_each_possible_cpu(cpu) {
61 cpu_stats = per_cpu_ptr(priv->counter, cpu); 60 cpu_stats = per_cpu_ptr(counter, cpu);
62 do { 61 do {
63 seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); 62 seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
64 bytes = cpu_stats->counter.bytes; 63 bytes = cpu_stats->counter.bytes;
65 packets = cpu_stats->counter.packets; 64 packets = cpu_stats->counter.packets;
66 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); 65 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
67 66
68 total.packets += packets; 67 total->packets += packets;
69 total.bytes += bytes; 68 total->bytes += bytes;
70 } 69 }
70}
71
72static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
73{
74 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
75 struct nft_counter total;
76
77 nft_counter_fetch(priv->counter, &total);
71 78
72 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) || 79 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
73 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets))) 80 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
@@ -118,6 +125,31 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
118 free_percpu(priv->counter); 125 free_percpu(priv->counter);
119} 126}
120 127
128static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
129{
130 struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
131 struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
132 struct nft_counter_percpu __percpu *cpu_stats;
133 struct nft_counter_percpu *this_cpu;
134 struct nft_counter total;
135
136 nft_counter_fetch(priv->counter, &total);
137
138 cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
139 GFP_ATOMIC);
140 if (cpu_stats == NULL)
141 return ENOMEM;
142
143 preempt_disable();
144 this_cpu = this_cpu_ptr(cpu_stats);
145 this_cpu->counter.packets = total.packets;
146 this_cpu->counter.bytes = total.bytes;
147 preempt_enable();
148
149 priv_clone->counter = cpu_stats;
150 return 0;
151}
152
121static struct nft_expr_type nft_counter_type; 153static struct nft_expr_type nft_counter_type;
122static const struct nft_expr_ops nft_counter_ops = { 154static const struct nft_expr_ops nft_counter_ops = {
123 .type = &nft_counter_type, 155 .type = &nft_counter_type,
@@ -126,6 +158,7 @@ static const struct nft_expr_ops nft_counter_ops = {
126 .init = nft_counter_init, 158 .init = nft_counter_init,
127 .destroy = nft_counter_destroy, 159 .destroy = nft_counter_destroy,
128 .dump = nft_counter_dump, 160 .dump = nft_counter_dump,
161 .clone = nft_counter_clone,
129}; 162};
130 163
131static struct nft_expr_type nft_counter_type __read_mostly = { 164static struct nft_expr_type nft_counter_type __read_mostly = {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 513a8ef60a59..9dec3bd1b63c 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -50,8 +50,9 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
50 } 50 }
51 51
52 ext = nft_set_elem_ext(set, elem); 52 ext = nft_set_elem_ext(set, elem);
53 if (priv->expr != NULL) 53 if (priv->expr != NULL &&
54 nft_expr_clone(nft_set_ext_expr(ext), priv->expr); 54 nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
55 return NULL;
55 56
56 return elem; 57 return elem;
57} 58}
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e4ad2c24bc41..9dfaf4d55ee0 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -31,6 +31,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
31 const struct nft_meta *priv = nft_expr_priv(expr); 31 const struct nft_meta *priv = nft_expr_priv(expr);
32 const struct sk_buff *skb = pkt->skb; 32 const struct sk_buff *skb = pkt->skb;
33 const struct net_device *in = pkt->in, *out = pkt->out; 33 const struct net_device *in = pkt->in, *out = pkt->out;
34 struct sock *sk;
34 u32 *dest = &regs->data[priv->dreg]; 35 u32 *dest = &regs->data[priv->dreg];
35 36
36 switch (priv->key) { 37 switch (priv->key) {
@@ -86,33 +87,35 @@ void nft_meta_get_eval(const struct nft_expr *expr,
86 *(u16 *)dest = out->type; 87 *(u16 *)dest = out->type;
87 break; 88 break;
88 case NFT_META_SKUID: 89 case NFT_META_SKUID:
89 if (skb->sk == NULL || !sk_fullsock(skb->sk)) 90 sk = skb_to_full_sk(skb);
91 if (!sk || !sk_fullsock(sk))
90 goto err; 92 goto err;
91 93
92 read_lock_bh(&skb->sk->sk_callback_lock); 94 read_lock_bh(&sk->sk_callback_lock);
93 if (skb->sk->sk_socket == NULL || 95 if (sk->sk_socket == NULL ||
94 skb->sk->sk_socket->file == NULL) { 96 sk->sk_socket->file == NULL) {
95 read_unlock_bh(&skb->sk->sk_callback_lock); 97 read_unlock_bh(&sk->sk_callback_lock);
96 goto err; 98 goto err;
97 } 99 }
98 100
99 *dest = from_kuid_munged(&init_user_ns, 101 *dest = from_kuid_munged(&init_user_ns,
100 skb->sk->sk_socket->file->f_cred->fsuid); 102 sk->sk_socket->file->f_cred->fsuid);
101 read_unlock_bh(&skb->sk->sk_callback_lock); 103 read_unlock_bh(&sk->sk_callback_lock);
102 break; 104 break;
103 case NFT_META_SKGID: 105 case NFT_META_SKGID:
104 if (skb->sk == NULL || !sk_fullsock(skb->sk)) 106 sk = skb_to_full_sk(skb);
107 if (!sk || !sk_fullsock(sk))
105 goto err; 108 goto err;
106 109
107 read_lock_bh(&skb->sk->sk_callback_lock); 110 read_lock_bh(&sk->sk_callback_lock);
108 if (skb->sk->sk_socket == NULL || 111 if (sk->sk_socket == NULL ||
109 skb->sk->sk_socket->file == NULL) { 112 sk->sk_socket->file == NULL) {
110 read_unlock_bh(&skb->sk->sk_callback_lock); 113 read_unlock_bh(&sk->sk_callback_lock);
111 goto err; 114 goto err;
112 } 115 }
113 *dest = from_kgid_munged(&init_user_ns, 116 *dest = from_kgid_munged(&init_user_ns,
114 skb->sk->sk_socket->file->f_cred->fsgid); 117 sk->sk_socket->file->f_cred->fsgid);
115 read_unlock_bh(&skb->sk->sk_callback_lock); 118 read_unlock_bh(&sk->sk_callback_lock);
116 break; 119 break;
117#ifdef CONFIG_IP_ROUTE_CLASSID 120#ifdef CONFIG_IP_ROUTE_CLASSID
118 case NFT_META_RTCLASSID: { 121 case NFT_META_RTCLASSID: {
@@ -168,9 +171,10 @@ void nft_meta_get_eval(const struct nft_expr *expr,
168 break; 171 break;
169#ifdef CONFIG_CGROUP_NET_CLASSID 172#ifdef CONFIG_CGROUP_NET_CLASSID
170 case NFT_META_CGROUP: 173 case NFT_META_CGROUP:
171 if (skb->sk == NULL || !sk_fullsock(skb->sk)) 174 sk = skb_to_full_sk(skb);
175 if (!sk || !sk_fullsock(sk))
172 goto err; 176 goto err;
173 *dest = skb->sk->sk_classid; 177 *dest = sk->sk_classid;
174 break; 178 break;
175#endif 179#endif
176 default: 180 default:
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 899b06115fc5..3eff7b67cdf2 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -31,8 +31,9 @@ static unsigned int
31tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) 31tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
32{ 32{
33 const struct xt_tee_tginfo *info = par->targinfo; 33 const struct xt_tee_tginfo *info = par->targinfo;
34 int oif = info->priv ? info->priv->oif : 0;
34 35
35 nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, info->priv->oif); 36 nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif);
36 37
37 return XT_CONTINUE; 38 return XT_CONTINUE;
38} 39}
@@ -42,8 +43,9 @@ static unsigned int
42tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) 43tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
43{ 44{
44 const struct xt_tee_tginfo *info = par->targinfo; 45 const struct xt_tee_tginfo *info = par->targinfo;
46 int oif = info->priv ? info->priv->oif : 0;
45 47
46 nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, info->priv->oif); 48 nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif);
47 49
48 return XT_CONTINUE; 50 return XT_CONTINUE;
49} 51}
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index ca2e577ed8ac..1302b475abcb 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -14,6 +14,7 @@
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/file.h> 15#include <linux/file.h>
16#include <net/sock.h> 16#include <net/sock.h>
17#include <net/inet_sock.h>
17#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
18#include <linux/netfilter/xt_owner.h> 19#include <linux/netfilter/xt_owner.h>
19 20
@@ -33,8 +34,9 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
33{ 34{
34 const struct xt_owner_match_info *info = par->matchinfo; 35 const struct xt_owner_match_info *info = par->matchinfo;
35 const struct file *filp; 36 const struct file *filp;
37 struct sock *sk = skb_to_full_sk(skb);
36 38
37 if (skb->sk == NULL || skb->sk->sk_socket == NULL) 39 if (sk == NULL || sk->sk_socket == NULL)
38 return (info->match ^ info->invert) == 0; 40 return (info->match ^ info->invert) == 0;
39 else if (info->match & info->invert & XT_OWNER_SOCKET) 41 else if (info->match & info->invert & XT_OWNER_SOCKET)
40 /* 42 /*
@@ -43,7 +45,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
43 */ 45 */
44 return false; 46 return false;
45 47
46 filp = skb->sk->sk_socket->file; 48 filp = sk->sk_socket->file;
47 if (filp == NULL) 49 if (filp == NULL)
48 return ((info->match ^ info->invert) & 50 return ((info->match ^ info->invert) &
49 (XT_OWNER_UID | XT_OWNER_GID)) == 0; 51 (XT_OWNER_UID | XT_OWNER_GID)) == 0;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 691660b9b7ef..1cf928fb573e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1741,6 +1741,20 @@ static void fanout_release(struct sock *sk)
1741 kfree_rcu(po->rollover, rcu); 1741 kfree_rcu(po->rollover, rcu);
1742} 1742}
1743 1743
1744static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
1745 struct sk_buff *skb)
1746{
1747 /* Earlier code assumed this would be a VLAN pkt, double-check
1748 * this now that we have the actual packet in hand. We can only
1749 * do this check on Ethernet devices.
1750 */
1751 if (unlikely(dev->type != ARPHRD_ETHER))
1752 return false;
1753
1754 skb_reset_mac_header(skb);
1755 return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
1756}
1757
1744static const struct proto_ops packet_ops; 1758static const struct proto_ops packet_ops;
1745 1759
1746static const struct proto_ops packet_ops_spkt; 1760static const struct proto_ops packet_ops_spkt;
@@ -1902,18 +1916,10 @@ retry:
1902 goto retry; 1916 goto retry;
1903 } 1917 }
1904 1918
1905 if (len > (dev->mtu + dev->hard_header_len + extra_len)) { 1919 if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
1906 /* Earlier code assumed this would be a VLAN pkt, 1920 !packet_extra_vlan_len_allowed(dev, skb)) {
1907 * double-check this now that we have the actual 1921 err = -EMSGSIZE;
1908 * packet in hand. 1922 goto out_unlock;
1909 */
1910 struct ethhdr *ehdr;
1911 skb_reset_mac_header(skb);
1912 ehdr = eth_hdr(skb);
1913 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1914 err = -EMSGSIZE;
1915 goto out_unlock;
1916 }
1917 } 1923 }
1918 1924
1919 skb->protocol = proto; 1925 skb->protocol = proto;
@@ -2332,6 +2338,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len)
2332 return false; 2338 return false;
2333} 2339}
2334 2340
2341static void tpacket_set_protocol(const struct net_device *dev,
2342 struct sk_buff *skb)
2343{
2344 if (dev->type == ARPHRD_ETHER) {
2345 skb_reset_mac_header(skb);
2346 skb->protocol = eth_hdr(skb)->h_proto;
2347 }
2348}
2349
2335static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, 2350static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2336 void *frame, struct net_device *dev, int size_max, 2351 void *frame, struct net_device *dev, int size_max,
2337 __be16 proto, unsigned char *addr, int hlen) 2352 __be16 proto, unsigned char *addr, int hlen)
@@ -2368,8 +2383,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2368 skb_reserve(skb, hlen); 2383 skb_reserve(skb, hlen);
2369 skb_reset_network_header(skb); 2384 skb_reset_network_header(skb);
2370 2385
2371 if (!packet_use_direct_xmit(po))
2372 skb_probe_transport_header(skb, 0);
2373 if (unlikely(po->tp_tx_has_off)) { 2386 if (unlikely(po->tp_tx_has_off)) {
2374 int off_min, off_max, off; 2387 int off_min, off_max, off;
2375 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); 2388 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2415,6 +2428,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2415 dev->hard_header_len); 2428 dev->hard_header_len);
2416 if (unlikely(err)) 2429 if (unlikely(err))
2417 return err; 2430 return err;
2431 if (!skb->protocol)
2432 tpacket_set_protocol(dev, skb);
2418 2433
2419 data += dev->hard_header_len; 2434 data += dev->hard_header_len;
2420 to_write -= dev->hard_header_len; 2435 to_write -= dev->hard_header_len;
@@ -2449,6 +2464,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2449 len = ((to_write > len_max) ? len_max : to_write); 2464 len = ((to_write > len_max) ? len_max : to_write);
2450 } 2465 }
2451 2466
2467 skb_probe_transport_header(skb, 0);
2468
2452 return tp_len; 2469 return tp_len;
2453} 2470}
2454 2471
@@ -2493,12 +2510,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2493 if (unlikely(!(dev->flags & IFF_UP))) 2510 if (unlikely(!(dev->flags & IFF_UP)))
2494 goto out_put; 2511 goto out_put;
2495 2512
2496 reserve = dev->hard_header_len + VLAN_HLEN; 2513 if (po->sk.sk_socket->type == SOCK_RAW)
2514 reserve = dev->hard_header_len;
2497 size_max = po->tx_ring.frame_size 2515 size_max = po->tx_ring.frame_size
2498 - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); 2516 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2499 2517
2500 if (size_max > dev->mtu + reserve) 2518 if (size_max > dev->mtu + reserve + VLAN_HLEN)
2501 size_max = dev->mtu + reserve; 2519 size_max = dev->mtu + reserve + VLAN_HLEN;
2502 2520
2503 do { 2521 do {
2504 ph = packet_current_frame(po, &po->tx_ring, 2522 ph = packet_current_frame(po, &po->tx_ring,
@@ -2525,18 +2543,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2525 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, 2543 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
2526 addr, hlen); 2544 addr, hlen);
2527 if (likely(tp_len >= 0) && 2545 if (likely(tp_len >= 0) &&
2528 tp_len > dev->mtu + dev->hard_header_len) { 2546 tp_len > dev->mtu + reserve &&
2529 struct ethhdr *ehdr; 2547 !packet_extra_vlan_len_allowed(dev, skb))
2530 /* Earlier code assumed this would be a VLAN pkt, 2548 tp_len = -EMSGSIZE;
2531 * double-check this now that we have the actual
2532 * packet in hand.
2533 */
2534 2549
2535 skb_reset_mac_header(skb);
2536 ehdr = eth_hdr(skb);
2537 if (ehdr->h_proto != htons(ETH_P_8021Q))
2538 tp_len = -EMSGSIZE;
2539 }
2540 if (unlikely(tp_len < 0)) { 2550 if (unlikely(tp_len < 0)) {
2541 if (po->tp_loss) { 2551 if (po->tp_loss) {
2542 __packet_set_status(po, ph, 2552 __packet_set_status(po, ph,
@@ -2765,18 +2775,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2765 2775
2766 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 2776 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2767 2777
2768 if (!gso_type && (len > dev->mtu + reserve + extra_len)) { 2778 if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
2769 /* Earlier code assumed this would be a VLAN pkt, 2779 !packet_extra_vlan_len_allowed(dev, skb)) {
2770 * double-check this now that we have the actual 2780 err = -EMSGSIZE;
2771 * packet in hand. 2781 goto out_free;
2772 */
2773 struct ethhdr *ehdr;
2774 skb_reset_mac_header(skb);
2775 ehdr = eth_hdr(skb);
2776 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
2777 err = -EMSGSIZE;
2778 goto out_free;
2779 }
2780 } 2782 }
2781 2783
2782 skb->protocol = proto; 2784 skb->protocol = proto;
@@ -2807,8 +2809,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2807 len += vnet_hdr_len; 2809 len += vnet_hdr_len;
2808 } 2810 }
2809 2811
2810 if (!packet_use_direct_xmit(po)) 2812 skb_probe_transport_header(skb, reserve);
2811 skb_probe_transport_header(skb, reserve); 2813
2812 if (unlikely(extra_len == 4)) 2814 if (unlikely(extra_len == 4))
2813 skb->no_fcs = 1; 2815 skb->no_fcs = 1;
2814 2816
@@ -2911,22 +2913,40 @@ static int packet_release(struct socket *sock)
2911 * Attach a packet hook. 2913 * Attach a packet hook.
2912 */ 2914 */
2913 2915
2914static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) 2916static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
2917 __be16 proto)
2915{ 2918{
2916 struct packet_sock *po = pkt_sk(sk); 2919 struct packet_sock *po = pkt_sk(sk);
2917 struct net_device *dev_curr; 2920 struct net_device *dev_curr;
2918 __be16 proto_curr; 2921 __be16 proto_curr;
2919 bool need_rehook; 2922 bool need_rehook;
2923 struct net_device *dev = NULL;
2924 int ret = 0;
2925 bool unlisted = false;
2920 2926
2921 if (po->fanout) { 2927 if (po->fanout)
2922 if (dev)
2923 dev_put(dev);
2924
2925 return -EINVAL; 2928 return -EINVAL;
2926 }
2927 2929
2928 lock_sock(sk); 2930 lock_sock(sk);
2929 spin_lock(&po->bind_lock); 2931 spin_lock(&po->bind_lock);
2932 rcu_read_lock();
2933
2934 if (name) {
2935 dev = dev_get_by_name_rcu(sock_net(sk), name);
2936 if (!dev) {
2937 ret = -ENODEV;
2938 goto out_unlock;
2939 }
2940 } else if (ifindex) {
2941 dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
2942 if (!dev) {
2943 ret = -ENODEV;
2944 goto out_unlock;
2945 }
2946 }
2947
2948 if (dev)
2949 dev_hold(dev);
2930 2950
2931 proto_curr = po->prot_hook.type; 2951 proto_curr = po->prot_hook.type;
2932 dev_curr = po->prot_hook.dev; 2952 dev_curr = po->prot_hook.dev;
@@ -2934,14 +2954,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
2934 need_rehook = proto_curr != proto || dev_curr != dev; 2954 need_rehook = proto_curr != proto || dev_curr != dev;
2935 2955
2936 if (need_rehook) { 2956 if (need_rehook) {
2937 unregister_prot_hook(sk, true); 2957 if (po->running) {
2958 rcu_read_unlock();
2959 __unregister_prot_hook(sk, true);
2960 rcu_read_lock();
2961 dev_curr = po->prot_hook.dev;
2962 if (dev)
2963 unlisted = !dev_get_by_index_rcu(sock_net(sk),
2964 dev->ifindex);
2965 }
2938 2966
2939 po->num = proto; 2967 po->num = proto;
2940 po->prot_hook.type = proto; 2968 po->prot_hook.type = proto;
2941 po->prot_hook.dev = dev;
2942 2969
2943 po->ifindex = dev ? dev->ifindex : 0; 2970 if (unlikely(unlisted)) {
2944 packet_cached_dev_assign(po, dev); 2971 dev_put(dev);
2972 po->prot_hook.dev = NULL;
2973 po->ifindex = -1;
2974 packet_cached_dev_reset(po);
2975 } else {
2976 po->prot_hook.dev = dev;
2977 po->ifindex = dev ? dev->ifindex : 0;
2978 packet_cached_dev_assign(po, dev);
2979 }
2945 } 2980 }
2946 if (dev_curr) 2981 if (dev_curr)
2947 dev_put(dev_curr); 2982 dev_put(dev_curr);
@@ -2949,7 +2984,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
2949 if (proto == 0 || !need_rehook) 2984 if (proto == 0 || !need_rehook)
2950 goto out_unlock; 2985 goto out_unlock;
2951 2986
2952 if (!dev || (dev->flags & IFF_UP)) { 2987 if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
2953 register_prot_hook(sk); 2988 register_prot_hook(sk);
2954 } else { 2989 } else {
2955 sk->sk_err = ENETDOWN; 2990 sk->sk_err = ENETDOWN;
@@ -2958,9 +2993,10 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
2958 } 2993 }
2959 2994
2960out_unlock: 2995out_unlock:
2996 rcu_read_unlock();
2961 spin_unlock(&po->bind_lock); 2997 spin_unlock(&po->bind_lock);
2962 release_sock(sk); 2998 release_sock(sk);
2963 return 0; 2999 return ret;
2964} 3000}
2965 3001
2966/* 3002/*
@@ -2972,8 +3008,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
2972{ 3008{
2973 struct sock *sk = sock->sk; 3009 struct sock *sk = sock->sk;
2974 char name[15]; 3010 char name[15];
2975 struct net_device *dev;
2976 int err = -ENODEV;
2977 3011
2978 /* 3012 /*
2979 * Check legality 3013 * Check legality
@@ -2983,19 +3017,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
2983 return -EINVAL; 3017 return -EINVAL;
2984 strlcpy(name, uaddr->sa_data, sizeof(name)); 3018 strlcpy(name, uaddr->sa_data, sizeof(name));
2985 3019
2986 dev = dev_get_by_name(sock_net(sk), name); 3020 return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
2987 if (dev)
2988 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
2989 return err;
2990} 3021}
2991 3022
2992static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 3023static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
2993{ 3024{
2994 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; 3025 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
2995 struct sock *sk = sock->sk; 3026 struct sock *sk = sock->sk;
2996 struct net_device *dev = NULL;
2997 int err;
2998
2999 3027
3000 /* 3028 /*
3001 * Check legality 3029 * Check legality
@@ -3006,16 +3034,8 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
3006 if (sll->sll_family != AF_PACKET) 3034 if (sll->sll_family != AF_PACKET)
3007 return -EINVAL; 3035 return -EINVAL;
3008 3036
3009 if (sll->sll_ifindex) { 3037 return packet_do_bind(sk, NULL, sll->sll_ifindex,
3010 err = -ENODEV; 3038 sll->sll_protocol ? : pkt_sk(sk)->num);
3011 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
3012 if (dev == NULL)
3013 goto out;
3014 }
3015 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
3016
3017out:
3018 return err;
3019} 3039}
3020 3040
3021static struct proto packet_proto = { 3041static struct proto packet_proto = {
@@ -4089,7 +4109,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4089 err = -EINVAL; 4109 err = -EINVAL;
4090 if (unlikely((int)req->tp_block_size <= 0)) 4110 if (unlikely((int)req->tp_block_size <= 0))
4091 goto out; 4111 goto out;
4092 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) 4112 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
4093 goto out; 4113 goto out;
4094 if (po->tp_version >= TPACKET_V3 && 4114 if (po->tp_version >= TPACKET_V3 &&
4095 (int)(req->tp_block_size - 4115 (int)(req->tp_block_size -
@@ -4101,8 +4121,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4101 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) 4121 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
4102 goto out; 4122 goto out;
4103 4123
4104 rb->frames_per_block = req->tp_block_size/req->tp_frame_size; 4124 rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
4105 if (unlikely(rb->frames_per_block <= 0)) 4125 if (unlikely(rb->frames_per_block == 0))
4106 goto out; 4126 goto out;
4107 if (unlikely((rb->frames_per_block * req->tp_block_nr) != 4127 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
4108 req->tp_frame_nr)) 4128 req->tp_frame_nr))
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 536838b657bf..fbfec6a18839 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -22,6 +22,7 @@
22#include <linux/if_vlan.h> 22#include <linux/if_vlan.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <net/inet_sock.h>
25 26
26#include <net/pkt_cls.h> 27#include <net/pkt_cls.h>
27#include <net/ip.h> 28#include <net/ip.h>
@@ -197,8 +198,11 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb)
197 198
198static u32 flow_get_skuid(const struct sk_buff *skb) 199static u32 flow_get_skuid(const struct sk_buff *skb)
199{ 200{
200 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { 201 struct sock *sk = skb_to_full_sk(skb);
201 kuid_t skuid = skb->sk->sk_socket->file->f_cred->fsuid; 202
203 if (sk && sk->sk_socket && sk->sk_socket->file) {
204 kuid_t skuid = sk->sk_socket->file->f_cred->fsuid;
205
202 return from_kuid(&init_user_ns, skuid); 206 return from_kuid(&init_user_ns, skuid);
203 } 207 }
204 return 0; 208 return 0;
@@ -206,8 +210,11 @@ static u32 flow_get_skuid(const struct sk_buff *skb)
206 210
207static u32 flow_get_skgid(const struct sk_buff *skb) 211static u32 flow_get_skgid(const struct sk_buff *skb)
208{ 212{
209 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { 213 struct sock *sk = skb_to_full_sk(skb);
210 kgid_t skgid = skb->sk->sk_socket->file->f_cred->fsgid; 214
215 if (sk && sk->sk_socket && sk->sk_socket->file) {
216 kgid_t skgid = sk->sk_socket->file->f_cred->fsgid;
217
211 return from_kgid(&init_user_ns, skgid); 218 return from_kgid(&init_user_ns, skgid);
212 } 219 }
213 return 0; 220 return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index b5294ce20cd4..f2aabc0089da 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -343,119 +343,145 @@ META_COLLECTOR(int_sk_refcnt)
343 343
344META_COLLECTOR(int_sk_rcvbuf) 344META_COLLECTOR(int_sk_rcvbuf)
345{ 345{
346 if (skip_nonlocal(skb)) { 346 const struct sock *sk = skb_to_full_sk(skb);
347
348 if (!sk) {
347 *err = -1; 349 *err = -1;
348 return; 350 return;
349 } 351 }
350 dst->value = skb->sk->sk_rcvbuf; 352 dst->value = sk->sk_rcvbuf;
351} 353}
352 354
353META_COLLECTOR(int_sk_shutdown) 355META_COLLECTOR(int_sk_shutdown)
354{ 356{
355 if (skip_nonlocal(skb)) { 357 const struct sock *sk = skb_to_full_sk(skb);
358
359 if (!sk) {
356 *err = -1; 360 *err = -1;
357 return; 361 return;
358 } 362 }
359 dst->value = skb->sk->sk_shutdown; 363 dst->value = sk->sk_shutdown;
360} 364}
361 365
362META_COLLECTOR(int_sk_proto) 366META_COLLECTOR(int_sk_proto)
363{ 367{
364 if (skip_nonlocal(skb)) { 368 const struct sock *sk = skb_to_full_sk(skb);
369
370 if (!sk) {
365 *err = -1; 371 *err = -1;
366 return; 372 return;
367 } 373 }
368 dst->value = skb->sk->sk_protocol; 374 dst->value = sk->sk_protocol;
369} 375}
370 376
371META_COLLECTOR(int_sk_type) 377META_COLLECTOR(int_sk_type)
372{ 378{
373 if (skip_nonlocal(skb)) { 379 const struct sock *sk = skb_to_full_sk(skb);
380
381 if (!sk) {
374 *err = -1; 382 *err = -1;
375 return; 383 return;
376 } 384 }
377 dst->value = skb->sk->sk_type; 385 dst->value = sk->sk_type;
378} 386}
379 387
380META_COLLECTOR(int_sk_rmem_alloc) 388META_COLLECTOR(int_sk_rmem_alloc)
381{ 389{
382 if (skip_nonlocal(skb)) { 390 const struct sock *sk = skb_to_full_sk(skb);
391
392 if (!sk) {
383 *err = -1; 393 *err = -1;
384 return; 394 return;
385 } 395 }
386 dst->value = sk_rmem_alloc_get(skb->sk); 396 dst->value = sk_rmem_alloc_get(sk);
387} 397}
388 398
389META_COLLECTOR(int_sk_wmem_alloc) 399META_COLLECTOR(int_sk_wmem_alloc)
390{ 400{
391 if (skip_nonlocal(skb)) { 401 const struct sock *sk = skb_to_full_sk(skb);
402
403 if (!sk) {
392 *err = -1; 404 *err = -1;
393 return; 405 return;
394 } 406 }
395 dst->value = sk_wmem_alloc_get(skb->sk); 407 dst->value = sk_wmem_alloc_get(sk);
396} 408}
397 409
398META_COLLECTOR(int_sk_omem_alloc) 410META_COLLECTOR(int_sk_omem_alloc)
399{ 411{
400 if (skip_nonlocal(skb)) { 412 const struct sock *sk = skb_to_full_sk(skb);
413
414 if (!sk) {
401 *err = -1; 415 *err = -1;
402 return; 416 return;
403 } 417 }
404 dst->value = atomic_read(&skb->sk->sk_omem_alloc); 418 dst->value = atomic_read(&sk->sk_omem_alloc);
405} 419}
406 420
407META_COLLECTOR(int_sk_rcv_qlen) 421META_COLLECTOR(int_sk_rcv_qlen)
408{ 422{
409 if (skip_nonlocal(skb)) { 423 const struct sock *sk = skb_to_full_sk(skb);
424
425 if (!sk) {
410 *err = -1; 426 *err = -1;
411 return; 427 return;
412 } 428 }
413 dst->value = skb->sk->sk_receive_queue.qlen; 429 dst->value = sk->sk_receive_queue.qlen;
414} 430}
415 431
416META_COLLECTOR(int_sk_snd_qlen) 432META_COLLECTOR(int_sk_snd_qlen)
417{ 433{
418 if (skip_nonlocal(skb)) { 434 const struct sock *sk = skb_to_full_sk(skb);
435
436 if (!sk) {
419 *err = -1; 437 *err = -1;
420 return; 438 return;
421 } 439 }
422 dst->value = skb->sk->sk_write_queue.qlen; 440 dst->value = sk->sk_write_queue.qlen;
423} 441}
424 442
425META_COLLECTOR(int_sk_wmem_queued) 443META_COLLECTOR(int_sk_wmem_queued)
426{ 444{
427 if (skip_nonlocal(skb)) { 445 const struct sock *sk = skb_to_full_sk(skb);
446
447 if (!sk) {
428 *err = -1; 448 *err = -1;
429 return; 449 return;
430 } 450 }
431 dst->value = skb->sk->sk_wmem_queued; 451 dst->value = sk->sk_wmem_queued;
432} 452}
433 453
434META_COLLECTOR(int_sk_fwd_alloc) 454META_COLLECTOR(int_sk_fwd_alloc)
435{ 455{
436 if (skip_nonlocal(skb)) { 456 const struct sock *sk = skb_to_full_sk(skb);
457
458 if (!sk) {
437 *err = -1; 459 *err = -1;
438 return; 460 return;
439 } 461 }
440 dst->value = skb->sk->sk_forward_alloc; 462 dst->value = sk->sk_forward_alloc;
441} 463}
442 464
443META_COLLECTOR(int_sk_sndbuf) 465META_COLLECTOR(int_sk_sndbuf)
444{ 466{
445 if (skip_nonlocal(skb)) { 467 const struct sock *sk = skb_to_full_sk(skb);
468
469 if (!sk) {
446 *err = -1; 470 *err = -1;
447 return; 471 return;
448 } 472 }
449 dst->value = skb->sk->sk_sndbuf; 473 dst->value = sk->sk_sndbuf;
450} 474}
451 475
452META_COLLECTOR(int_sk_alloc) 476META_COLLECTOR(int_sk_alloc)
453{ 477{
454 if (skip_nonlocal(skb)) { 478 const struct sock *sk = skb_to_full_sk(skb);
479
480 if (!sk) {
455 *err = -1; 481 *err = -1;
456 return; 482 return;
457 } 483 }
458 dst->value = (__force int) skb->sk->sk_allocation; 484 dst->value = (__force int) sk->sk_allocation;
459} 485}
460 486
461META_COLLECTOR(int_sk_hash) 487META_COLLECTOR(int_sk_hash)
@@ -469,92 +495,112 @@ META_COLLECTOR(int_sk_hash)
469 495
470META_COLLECTOR(int_sk_lingertime) 496META_COLLECTOR(int_sk_lingertime)
471{ 497{
472 if (skip_nonlocal(skb)) { 498 const struct sock *sk = skb_to_full_sk(skb);
499
500 if (!sk) {
473 *err = -1; 501 *err = -1;
474 return; 502 return;
475 } 503 }
476 dst->value = skb->sk->sk_lingertime / HZ; 504 dst->value = sk->sk_lingertime / HZ;
477} 505}
478 506
479META_COLLECTOR(int_sk_err_qlen) 507META_COLLECTOR(int_sk_err_qlen)
480{ 508{
481 if (skip_nonlocal(skb)) { 509 const struct sock *sk = skb_to_full_sk(skb);
510
511 if (!sk) {
482 *err = -1; 512 *err = -1;
483 return; 513 return;
484 } 514 }
485 dst->value = skb->sk->sk_error_queue.qlen; 515 dst->value = sk->sk_error_queue.qlen;
486} 516}
487 517
488META_COLLECTOR(int_sk_ack_bl) 518META_COLLECTOR(int_sk_ack_bl)
489{ 519{
490 if (skip_nonlocal(skb)) { 520 const struct sock *sk = skb_to_full_sk(skb);
521
522 if (!sk) {
491 *err = -1; 523 *err = -1;
492 return; 524 return;
493 } 525 }
494 dst->value = skb->sk->sk_ack_backlog; 526 dst->value = sk->sk_ack_backlog;
495} 527}
496 528
497META_COLLECTOR(int_sk_max_ack_bl) 529META_COLLECTOR(int_sk_max_ack_bl)
498{ 530{
499 if (skip_nonlocal(skb)) { 531 const struct sock *sk = skb_to_full_sk(skb);
532
533 if (!sk) {
500 *err = -1; 534 *err = -1;
501 return; 535 return;
502 } 536 }
503 dst->value = skb->sk->sk_max_ack_backlog; 537 dst->value = sk->sk_max_ack_backlog;
504} 538}
505 539
506META_COLLECTOR(int_sk_prio) 540META_COLLECTOR(int_sk_prio)
507{ 541{
508 if (skip_nonlocal(skb)) { 542 const struct sock *sk = skb_to_full_sk(skb);
543
544 if (!sk) {
509 *err = -1; 545 *err = -1;
510 return; 546 return;
511 } 547 }
512 dst->value = skb->sk->sk_priority; 548 dst->value = sk->sk_priority;
513} 549}
514 550
515META_COLLECTOR(int_sk_rcvlowat) 551META_COLLECTOR(int_sk_rcvlowat)
516{ 552{
517 if (skip_nonlocal(skb)) { 553 const struct sock *sk = skb_to_full_sk(skb);
554
555 if (!sk) {
518 *err = -1; 556 *err = -1;
519 return; 557 return;
520 } 558 }
521 dst->value = skb->sk->sk_rcvlowat; 559 dst->value = sk->sk_rcvlowat;
522} 560}
523 561
524META_COLLECTOR(int_sk_rcvtimeo) 562META_COLLECTOR(int_sk_rcvtimeo)
525{ 563{
526 if (skip_nonlocal(skb)) { 564 const struct sock *sk = skb_to_full_sk(skb);
565
566 if (!sk) {
527 *err = -1; 567 *err = -1;
528 return; 568 return;
529 } 569 }
530 dst->value = skb->sk->sk_rcvtimeo / HZ; 570 dst->value = sk->sk_rcvtimeo / HZ;
531} 571}
532 572
533META_COLLECTOR(int_sk_sndtimeo) 573META_COLLECTOR(int_sk_sndtimeo)
534{ 574{
535 if (skip_nonlocal(skb)) { 575 const struct sock *sk = skb_to_full_sk(skb);
576
577 if (!sk) {
536 *err = -1; 578 *err = -1;
537 return; 579 return;
538 } 580 }
539 dst->value = skb->sk->sk_sndtimeo / HZ; 581 dst->value = sk->sk_sndtimeo / HZ;
540} 582}
541 583
542META_COLLECTOR(int_sk_sendmsg_off) 584META_COLLECTOR(int_sk_sendmsg_off)
543{ 585{
544 if (skip_nonlocal(skb)) { 586 const struct sock *sk = skb_to_full_sk(skb);
587
588 if (!sk) {
545 *err = -1; 589 *err = -1;
546 return; 590 return;
547 } 591 }
548 dst->value = skb->sk->sk_frag.offset; 592 dst->value = sk->sk_frag.offset;
549} 593}
550 594
551META_COLLECTOR(int_sk_write_pend) 595META_COLLECTOR(int_sk_write_pend)
552{ 596{
553 if (skip_nonlocal(skb)) { 597 const struct sock *sk = skb_to_full_sk(skb);
598
599 if (!sk) {
554 *err = -1; 600 *err = -1;
555 return; 601 return;
556 } 602 }
557 dst->value = skb->sk->sk_write_pending; 603 dst->value = sk->sk_write_pending;
558} 604}
559 605
560/************************************************************************** 606/**************************************************************************
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 4f15b7d730e1..1543e39f47c3 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
809 if (!has_sha1) 809 if (!has_sha1)
810 return -EINVAL; 810 return -EINVAL;
811 811
812 memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0], 812 for (i = 0; i < hmacs->shmac_num_idents; i++)
813 hmacs->shmac_num_idents * sizeof(__u16)); 813 ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
814 ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + 814 ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
815 hmacs->shmac_num_idents * sizeof(__u16)); 815 hmacs->shmac_num_idents * sizeof(__u16));
816 return 0; 816 return 0;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index dace13d7638e..799e65b944b9 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1411,17 +1411,16 @@ gss_key_timeout(struct rpc_cred *rc)
1411{ 1411{
1412 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); 1412 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
1413 struct gss_cl_ctx *ctx; 1413 struct gss_cl_ctx *ctx;
1414 unsigned long now = jiffies; 1414 unsigned long timeout = jiffies + (gss_key_expire_timeo * HZ);
1415 unsigned long expire; 1415 int ret = 0;
1416 1416
1417 rcu_read_lock(); 1417 rcu_read_lock();
1418 ctx = rcu_dereference(gss_cred->gc_ctx); 1418 ctx = rcu_dereference(gss_cred->gc_ctx);
1419 if (ctx) 1419 if (!ctx || time_after(timeout, ctx->gc_expiry))
1420 expire = ctx->gc_expiry - (gss_key_expire_timeo * HZ); 1420 ret = -EACCES;
1421 rcu_read_unlock(); 1421 rcu_read_unlock();
1422 if (!ctx || time_after(now, expire)) 1422
1423 return -EACCES; 1423 return ret;
1424 return 0;
1425} 1424}
1426 1425
1427static int 1426static int
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4a2340a54401..5e4f815c2b34 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -41,13 +41,16 @@
41static bool cache_defer_req(struct cache_req *req, struct cache_head *item); 41static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
42static void cache_revisit_request(struct cache_head *item); 42static void cache_revisit_request(struct cache_head *item);
43 43
44static void cache_init(struct cache_head *h) 44static void cache_init(struct cache_head *h, struct cache_detail *detail)
45{ 45{
46 time_t now = seconds_since_boot(); 46 time_t now = seconds_since_boot();
47 INIT_HLIST_NODE(&h->cache_list); 47 INIT_HLIST_NODE(&h->cache_list);
48 h->flags = 0; 48 h->flags = 0;
49 kref_init(&h->ref); 49 kref_init(&h->ref);
50 h->expiry_time = now + CACHE_NEW_EXPIRY; 50 h->expiry_time = now + CACHE_NEW_EXPIRY;
51 if (now <= detail->flush_time)
52 /* ensure it isn't already expired */
53 now = detail->flush_time + 1;
51 h->last_refresh = now; 54 h->last_refresh = now;
52} 55}
53 56
@@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
81 * we might get lose if we need to 84 * we might get lose if we need to
82 * cache_put it soon. 85 * cache_put it soon.
83 */ 86 */
84 cache_init(new); 87 cache_init(new, detail);
85 detail->init(new, key); 88 detail->init(new, key);
86 89
87 write_lock(&detail->hash_lock); 90 write_lock(&detail->hash_lock);
@@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
116 119
117static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); 120static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
118 121
119static void cache_fresh_locked(struct cache_head *head, time_t expiry) 122static void cache_fresh_locked(struct cache_head *head, time_t expiry,
123 struct cache_detail *detail)
120{ 124{
125 time_t now = seconds_since_boot();
126 if (now <= detail->flush_time)
127 /* ensure it isn't immediately treated as expired */
128 now = detail->flush_time + 1;
121 head->expiry_time = expiry; 129 head->expiry_time = expiry;
122 head->last_refresh = seconds_since_boot(); 130 head->last_refresh = now;
123 smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */ 131 smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
124 set_bit(CACHE_VALID, &head->flags); 132 set_bit(CACHE_VALID, &head->flags);
125} 133}
@@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
149 set_bit(CACHE_NEGATIVE, &old->flags); 157 set_bit(CACHE_NEGATIVE, &old->flags);
150 else 158 else
151 detail->update(old, new); 159 detail->update(old, new);
152 cache_fresh_locked(old, new->expiry_time); 160 cache_fresh_locked(old, new->expiry_time, detail);
153 write_unlock(&detail->hash_lock); 161 write_unlock(&detail->hash_lock);
154 cache_fresh_unlocked(old, detail); 162 cache_fresh_unlocked(old, detail);
155 return old; 163 return old;
@@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
162 cache_put(old, detail); 170 cache_put(old, detail);
163 return NULL; 171 return NULL;
164 } 172 }
165 cache_init(tmp); 173 cache_init(tmp, detail);
166 detail->init(tmp, old); 174 detail->init(tmp, old);
167 175
168 write_lock(&detail->hash_lock); 176 write_lock(&detail->hash_lock);
@@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
173 hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]); 181 hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
174 detail->entries++; 182 detail->entries++;
175 cache_get(tmp); 183 cache_get(tmp);
176 cache_fresh_locked(tmp, new->expiry_time); 184 cache_fresh_locked(tmp, new->expiry_time, detail);
177 cache_fresh_locked(old, 0); 185 cache_fresh_locked(old, 0, detail);
178 write_unlock(&detail->hash_lock); 186 write_unlock(&detail->hash_lock);
179 cache_fresh_unlocked(tmp, detail); 187 cache_fresh_unlocked(tmp, detail);
180 cache_fresh_unlocked(old, detail); 188 cache_fresh_unlocked(old, detail);
@@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
219 rv = cache_is_valid(h); 227 rv = cache_is_valid(h);
220 if (rv == -EAGAIN) { 228 if (rv == -EAGAIN) {
221 set_bit(CACHE_NEGATIVE, &h->flags); 229 set_bit(CACHE_NEGATIVE, &h->flags);
222 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); 230 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
231 detail);
223 rv = -ENOENT; 232 rv = -ENOENT;
224 } 233 }
225 write_unlock(&detail->hash_lock); 234 write_unlock(&detail->hash_lock);
@@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);
487 496
488void cache_purge(struct cache_detail *detail) 497void cache_purge(struct cache_detail *detail)
489{ 498{
490 detail->flush_time = LONG_MAX; 499 time_t now = seconds_since_boot();
500 if (detail->flush_time >= now)
501 now = detail->flush_time + 1;
502 /* 'now' is the maximum value any 'last_refresh' can have */
503 detail->flush_time = now;
491 detail->nextcheck = seconds_since_boot(); 504 detail->nextcheck = seconds_since_boot();
492 cache_flush(); 505 cache_flush();
493 detail->flush_time = 1;
494} 506}
495EXPORT_SYMBOL_GPL(cache_purge); 507EXPORT_SYMBOL_GPL(cache_purge);
496 508
@@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1436{ 1448{
1437 char tbuf[20]; 1449 char tbuf[20];
1438 char *bp, *ep; 1450 char *bp, *ep;
1451 time_t then, now;
1439 1452
1440 if (*ppos || count > sizeof(tbuf)-1) 1453 if (*ppos || count > sizeof(tbuf)-1)
1441 return -EINVAL; 1454 return -EINVAL;
@@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1447 return -EINVAL; 1460 return -EINVAL;
1448 1461
1449 bp = tbuf; 1462 bp = tbuf;
1450 cd->flush_time = get_expiry(&bp); 1463 then = get_expiry(&bp);
1451 cd->nextcheck = seconds_since_boot(); 1464 now = seconds_since_boot();
1465 cd->nextcheck = now;
1466 /* Can only set flush_time to 1 second beyond "now", or
1467 * possibly 1 second beyond flushtime. This is because
1468 * flush_time never goes backwards so it mustn't get too far
1469 * ahead of time.
1470 */
1471 if (then >= now) {
1472 /* Want to flush everything, so behave like cache_purge() */
1473 if (cd->flush_time >= now)
1474 now = cd->flush_time + 1;
1475 then = now;
1476 }
1477
1478 cd->flush_time = then;
1452 cache_flush(); 1479 cache_flush();
1453 1480
1454 *ppos += count; 1481 *ppos += count;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0c8120229a03..1413cdcc131c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -181,7 +181,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
181 struct page **ppage = xdr->pages; 181 struct page **ppage = xdr->pages;
182 size_t base = xdr->page_base; 182 size_t base = xdr->page_base;
183 unsigned int pglen = xdr->page_len; 183 unsigned int pglen = xdr->page_len;
184 unsigned int flags = MSG_MORE; 184 unsigned int flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
185 int slen; 185 int slen;
186 int len = 0; 186 int len = 0;
187 187
@@ -399,6 +399,31 @@ static int svc_sock_secure_port(struct svc_rqst *rqstp)
399 return svc_port_is_privileged(svc_addr(rqstp)); 399 return svc_port_is_privileged(svc_addr(rqstp));
400} 400}
401 401
402static bool sunrpc_waitqueue_active(wait_queue_head_t *wq)
403{
404 if (!wq)
405 return false;
406 /*
407 * There should normally be a memory * barrier here--see
408 * wq_has_sleeper().
409 *
410 * It appears that isn't currently necessary, though, basically
411 * because callers all appear to have sufficient memory barriers
412 * between the time the relevant change is made and the
413 * time they call these callbacks.
414 *
415 * The nfsd code itself doesn't actually explicitly wait on
416 * these waitqueues, but it may wait on them for example in
417 * sendpage() or sendmsg() calls. (And those may be the only
418 * places, since it it uses nonblocking reads.)
419 *
420 * Maybe we should add the memory barriers anyway, but these are
421 * hot paths so we'd need to be convinced there's no sigificant
422 * penalty.
423 */
424 return waitqueue_active(wq);
425}
426
402/* 427/*
403 * INET callback when data has been received on the socket. 428 * INET callback when data has been received on the socket.
404 */ 429 */
@@ -414,7 +439,7 @@ static void svc_udp_data_ready(struct sock *sk)
414 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 439 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
415 svc_xprt_enqueue(&svsk->sk_xprt); 440 svc_xprt_enqueue(&svsk->sk_xprt);
416 } 441 }
417 if (wq && waitqueue_active(wq)) 442 if (sunrpc_waitqueue_active(wq))
418 wake_up_interruptible(wq); 443 wake_up_interruptible(wq);
419} 444}
420 445
@@ -432,7 +457,7 @@ static void svc_write_space(struct sock *sk)
432 svc_xprt_enqueue(&svsk->sk_xprt); 457 svc_xprt_enqueue(&svsk->sk_xprt);
433 } 458 }
434 459
435 if (wq && waitqueue_active(wq)) { 460 if (sunrpc_waitqueue_active(wq)) {
436 dprintk("RPC svc_write_space: someone sleeping on %p\n", 461 dprintk("RPC svc_write_space: someone sleeping on %p\n",
437 svsk); 462 svsk);
438 wake_up_interruptible(wq); 463 wake_up_interruptible(wq);
@@ -787,7 +812,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
787 } 812 }
788 813
789 wq = sk_sleep(sk); 814 wq = sk_sleep(sk);
790 if (wq && waitqueue_active(wq)) 815 if (sunrpc_waitqueue_active(wq))
791 wake_up_interruptible_all(wq); 816 wake_up_interruptible_all(wq);
792} 817}
793 818
@@ -808,7 +833,7 @@ static void svc_tcp_state_change(struct sock *sk)
808 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 833 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
809 svc_xprt_enqueue(&svsk->sk_xprt); 834 svc_xprt_enqueue(&svsk->sk_xprt);
810 } 835 }
811 if (wq && waitqueue_active(wq)) 836 if (sunrpc_waitqueue_active(wq))
812 wake_up_interruptible_all(wq); 837 wake_up_interruptible_all(wq);
813} 838}
814 839
@@ -823,7 +848,7 @@ static void svc_tcp_data_ready(struct sock *sk)
823 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 848 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
824 svc_xprt_enqueue(&svsk->sk_xprt); 849 svc_xprt_enqueue(&svsk->sk_xprt);
825 } 850 }
826 if (wq && waitqueue_active(wq)) 851 if (sunrpc_waitqueue_active(wq))
827 wake_up_interruptible(wq); 852 wake_up_interruptible(wq);
828} 853}
829 854
@@ -1367,7 +1392,6 @@ EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
1367 1392
1368/* 1393/*
1369 * Initialize socket for RPC use and create svc_sock struct 1394 * Initialize socket for RPC use and create svc_sock struct
1370 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
1371 */ 1395 */
1372static struct svc_sock *svc_setup_socket(struct svc_serv *serv, 1396static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1373 struct socket *sock, 1397 struct socket *sock,
@@ -1594,7 +1618,7 @@ static void svc_sock_detach(struct svc_xprt *xprt)
1594 sk->sk_write_space = svsk->sk_owspace; 1618 sk->sk_write_space = svsk->sk_owspace;
1595 1619
1596 wq = sk_sleep(sk); 1620 wq = sk_sleep(sk);
1597 if (wq && waitqueue_active(wq)) 1621 if (sunrpc_waitqueue_active(wq))
1598 wake_up_interruptible(wq); 1622 wake_up_interruptible(wq);
1599} 1623}
1600 1624
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aaa0b58d6aba..955ec152cb71 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -441,6 +441,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
441 if (state == TCP_LISTEN) 441 if (state == TCP_LISTEN)
442 unix_release_sock(skb->sk, 1); 442 unix_release_sock(skb->sk, 1);
443 /* passed fds are erased in the kfree_skb hook */ 443 /* passed fds are erased in the kfree_skb hook */
444 UNIXCB(skb).consumed = skb->len;
444 kfree_skb(skb); 445 kfree_skb(skb);
445 } 446 }
446 447
@@ -1799,6 +1800,7 @@ alloc_skb:
1799 * this - does no harm 1800 * this - does no harm
1800 */ 1801 */
1801 consume_skb(newskb); 1802 consume_skb(newskb);
1803 newskb = NULL;
1802 } 1804 }
1803 1805
1804 if (skb_append_pagefrags(skb, page, offset, size)) { 1806 if (skb_append_pagefrags(skb, page, offset, size)) {
@@ -1811,8 +1813,11 @@ alloc_skb:
1811 skb->truesize += size; 1813 skb->truesize += size;
1812 atomic_add(size, &sk->sk_wmem_alloc); 1814 atomic_add(size, &sk->sk_wmem_alloc);
1813 1815
1814 if (newskb) 1816 if (newskb) {
1817 spin_lock(&other->sk_receive_queue.lock);
1815 __skb_queue_tail(&other->sk_receive_queue, newskb); 1818 __skb_queue_tail(&other->sk_receive_queue, newskb);
1819 spin_unlock(&other->sk_receive_queue.lock);
1820 }
1816 1821
1817 unix_state_unlock(other); 1822 unix_state_unlock(other);
1818 mutex_unlock(&unix_sk(other)->readlock); 1823 mutex_unlock(&unix_sk(other)->readlock);
@@ -2072,6 +2077,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
2072 2077
2073 do { 2078 do {
2074 int chunk; 2079 int chunk;
2080 bool drop_skb;
2075 struct sk_buff *skb, *last; 2081 struct sk_buff *skb, *last;
2076 2082
2077 unix_state_lock(sk); 2083 unix_state_lock(sk);
@@ -2152,7 +2158,11 @@ unlock:
2152 } 2158 }
2153 2159
2154 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2160 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2161 skb_get(skb);
2155 chunk = state->recv_actor(skb, skip, chunk, state); 2162 chunk = state->recv_actor(skb, skip, chunk, state);
2163 drop_skb = !unix_skb_len(skb);
2164 /* skb is only safe to use if !drop_skb */
2165 consume_skb(skb);
2156 if (chunk < 0) { 2166 if (chunk < 0) {
2157 if (copied == 0) 2167 if (copied == 0)
2158 copied = -EFAULT; 2168 copied = -EFAULT;
@@ -2161,6 +2171,18 @@ unlock:
2161 copied += chunk; 2171 copied += chunk;
2162 size -= chunk; 2172 size -= chunk;
2163 2173
2174 if (drop_skb) {
2175 /* the skb was touched by a concurrent reader;
2176 * we should not expect anything from this skb
2177 * anymore and assume it invalid - we can be
2178 * sure it was dropped from the socket queue
2179 *
2180 * let's report a short read
2181 */
2182 err = 0;
2183 break;
2184 }
2185
2164 /* Mark read part of skb as used */ 2186 /* Mark read part of skb as used */
2165 if (!(flags & MSG_PEEK)) { 2187 if (!(flags & MSG_PEEK)) {
2166 UNIXCB(skb).consumed += chunk; 2188 UNIXCB(skb).consumed += chunk;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 400d87294de3..0a369bb440e7 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1234,7 +1234,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
1234 /* Callers of accept() will be be waiting on the listening socket, not 1234 /* Callers of accept() will be be waiting on the listening socket, not
1235 * the pending socket. 1235 * the pending socket.
1236 */ 1236 */
1237 listener->sk_state_change(listener); 1237 listener->sk_data_ready(listener);
1238 1238
1239 return 0; 1239 return 0;
1240 1240