aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2015-12-07 10:09:03 -0500
committerTejun Heo <tj@kernel.org>2015-12-07 10:09:03 -0500
commit0b98f0c04245877ae0b625a7f0aa55b8ff98e0c4 (patch)
tree486ebe0d76217a4f7781e28fbd96facb0b66f9da /net
parent67cde9c4938945b9510730c64e68d2f1dd7bc0aa (diff)
parent527e9316f8ec44bd53d90fb9f611fa7ffff52bb9 (diff)
Merge branch 'master' into for-4.4-fixes
The following commit which went into mainline through networking tree 3b13758f51de ("cgroups: Allow dynamically changing net_classid") conflicts in net/core/netclassid_cgroup.c with the following pending fix in cgroup/for-4.4-fixes. 1f7dd3e5a6e4 ("cgroup: fix handling of multi-destination migration from subtree_control enabling") The former separates out update_classid() from cgrp_attach() and updates it to walk all fds of all tasks in the target css so that it can be used from both migration and config change paths. The latter drops @css from cgrp_attach(). Resolve the conflict by making cgrp_attach() call update_classid() with the css from the first task. We can revive @tset walking in cgrp_attach() but given that net_cls is v1 only where there always is only one target css during migration, this is fine. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Nina Schiff <ninasc@fb.com>
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/bluetooth/af_bluetooth.c6
-rw-r--r--net/bluetooth/smp.c7
-rw-r--r--net/bridge/br_stp.c2
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/caif/caif_socket.c4
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c18
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/netclassid_cgroup.c29
-rw-r--r--net/core/rtnetlink.c274
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/core/sock.c12
-rw-r--r--net/core/stream.c6
-rw-r--r--net/dccp/ipv6.c37
-rw-r--r--net/dccp/proto.c3
-rw-r--r--net/decnet/af_decnet.c8
-rw-r--r--net/dns_resolver/dns_query.c2
-rw-r--r--net/hsr/hsr_device.c2
-rw-r--r--net/ipv4/igmp.c5
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/ipmr.c23
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/raw.c8
-rw-r--r--net/ipv4/tcp.c28
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_input.c23
-rw-r--r--net/ipv4/tcp_ipv4.c17
-rw-r--r--net/ipv4/tcp_timer.c14
-rw-r--r--net/ipv4/udp.c1
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/af_inet6.c15
-rw-r--r--net/ipv6/datagram.c4
-rw-r--r--net/ipv6/exthdrs.c3
-rw-r--r--net/ipv6/icmp.c14
-rw-r--r--net/ipv6/inet6_connection_sock.c21
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6mr.c19
-rw-r--r--net/ipv6/ipv6_sockglue.c33
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/ndisc.c10
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c5
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/reassembly.c10
-rw-r--r--net/ipv6/route.c24
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c51
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/l2tp/l2tp_ip6.c8
-rw-r--r--net/mac80211/agg-tx.c3
-rw-r--r--net/mac80211/cfg.c8
-rw-r--r--net/mac80211/iface.c5
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mesh_pathtbl.c8
-rw-r--r--net/mac80211/scan.c9
-rw-r--r--net/netfilter/Kconfig6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h17
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c64
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c18
-rw-r--r--net/netfilter/ipset/ip_set_core.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h26
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c16
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nft_counter.c49
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/nfc/llcp_sock.c2
-rw-r--r--net/openvswitch/dp_notify.c2
-rw-r--r--net/openvswitch/vport-geneve.c1
-rw-r--r--net/openvswitch/vport-gre.c1
-rw-r--r--net/openvswitch/vport-netdev.c8
-rw-r--r--net/openvswitch/vport.c8
-rw-r--r--net/openvswitch/vport.h8
-rw-r--r--net/packet/af_packet.c96
-rw-r--r--net/rds/connection.c6
-rw-r--r--net/rds/send.c4
-rw-r--r--net/rxrpc/ar-ack.c4
-rw-r--r--net/rxrpc/ar-output.c2
-rw-r--r--net/sched/sch_api.c27
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_mq.c4
-rw-r--r--net/sched/sch_mqprio.c4
-rw-r--r--net/sctp/auth.c4
-rw-r--r--net/sctp/ipv6.c13
-rw-r--r--net/sctp/socket.c39
-rw-r--r--net/socket.c21
-rw-r--r--net/sunrpc/backchannel_rqst.c8
-rw-r--r--net/sunrpc/svc.c1
-rw-r--r--net/sunrpc/xprtsock.c14
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/socket.c10
-rw-r--r--net/tipc/udp_media.c7
-rw-r--r--net/unix/af_unix.c292
96 files changed, 1044 insertions, 617 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 496b27588493..e2ed69850489 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@ bool vlan_do_receive(struct sk_buff **skbp)
30 skb->pkt_type = PACKET_HOST; 30 skb->pkt_type = PACKET_HOST;
31 } 31 }
32 32
33 if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { 33 if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
34 !netif_is_macvlan_port(vlan_dev) &&
35 !netif_is_bridge_port(vlan_dev)) {
34 unsigned int offset = skb->data - skb_mac_header(skb); 36 unsigned int offset = skb->data - skb_mac_header(skb);
35 37
36 /* 38 /*
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index a3bffd1ec2b4..70306cc9d814 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -271,11 +271,11 @@ static long bt_sock_data_wait(struct sock *sk, long timeo)
271 if (signal_pending(current) || !timeo) 271 if (signal_pending(current) || !timeo)
272 break; 272 break;
273 273
274 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 274 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
275 release_sock(sk); 275 release_sock(sk);
276 timeo = schedule_timeout(timeo); 276 timeo = schedule_timeout(timeo);
277 lock_sock(sk); 277 lock_sock(sk);
278 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 278 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
279 } 279 }
280 280
281 __set_current_state(TASK_RUNNING); 281 __set_current_state(TASK_RUNNING);
@@ -441,7 +441,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
441 if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk)) 441 if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk))
442 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 442 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
443 else 443 else
444 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 444 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
445 445
446 return mask; 446 return mask;
447} 447}
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index c91353841e40..ffed8a1d4f27 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3027,8 +3027,13 @@ static void smp_ready_cb(struct l2cap_chan *chan)
3027 3027
3028 BT_DBG("chan %p", chan); 3028 BT_DBG("chan %p", chan);
3029 3029
3030 /* No need to call l2cap_chan_hold() here since we already own
3031 * the reference taken in smp_new_conn_cb(). This is just the
3032 * first time that we tie it to a specific pointer. The code in
3033 * l2cap_core.c ensures that there's no risk this function wont
3034 * get called if smp_new_conn_cb was previously called.
3035 */
3030 conn->smp = chan; 3036 conn->smp = chan;
3031 l2cap_chan_hold(chan);
3032 3037
3033 if (hcon->type == ACL_LINK && test_bit(HCI_CONN_ENCRYPT, &hcon->flags)) 3038 if (hcon->type == ACL_LINK && test_bit(HCI_CONN_ENCRYPT, &hcon->flags))
3034 bredr_pairing(chan); 3039 bredr_pairing(chan);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index f7e8dee64fc8..5f3f64553179 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -48,7 +48,7 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
48 48
49 p->state = state; 49 p->state = state;
50 err = switchdev_port_attr_set(p->dev, &attr); 50 err = switchdev_port_attr_set(p->dev, &attr);
51 if (err) 51 if (err && err != -EOPNOTSUPP)
52 br_warn(p->br, "error setting offload STP state on port %u(%s)\n", 52 br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
53 (unsigned int) p->port_no, p->dev->name); 53 (unsigned int) p->port_no, p->dev->name);
54} 54}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index fa53d7a89f48..5396ff08af32 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -50,7 +50,7 @@ void br_init_port(struct net_bridge_port *p)
50 p->config_pending = 0; 50 p->config_pending = 0;
51 51
52 err = switchdev_port_attr_set(p->dev, &attr); 52 err = switchdev_port_attr_set(p->dev, &attr);
53 if (err) 53 if (err && err != -EOPNOTSUPP)
54 netdev_err(p->dev, "failed to set HW ageing time\n"); 54 netdev_err(p->dev, "failed to set HW ageing time\n");
55} 55}
56 56
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index cc858919108e..aa209b1066c9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -323,7 +323,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
323 !timeo) 323 !timeo)
324 break; 324 break;
325 325
326 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 326 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
327 release_sock(sk); 327 release_sock(sk);
328 timeo = schedule_timeout(timeo); 328 timeo = schedule_timeout(timeo);
329 lock_sock(sk); 329 lock_sock(sk);
@@ -331,7 +331,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
331 if (sock_flag(sk, SOCK_DEAD)) 331 if (sock_flag(sk, SOCK_DEAD))
332 break; 332 break;
333 333
334 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 334 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
335 } 335 }
336 336
337 finish_wait(sk_sleep(sk), &wait); 337 finish_wait(sk_sleep(sk), &wait);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 617088aee21d..d62af69ad844 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -785,7 +785,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
785 if (sock_writeable(sk)) 785 if (sock_writeable(sk))
786 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 786 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
787 else 787 else
788 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 788 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
789 789
790 return mask; 790 return mask;
791} 791}
diff --git a/net/core/dev.c b/net/core/dev.c
index ab9b8d0d115e..ae00b894e675 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2403,17 +2403,20 @@ static void skb_warn_bad_offload(const struct sk_buff *skb)
2403{ 2403{
2404 static const netdev_features_t null_features = 0; 2404 static const netdev_features_t null_features = 0;
2405 struct net_device *dev = skb->dev; 2405 struct net_device *dev = skb->dev;
2406 const char *driver = ""; 2406 const char *name = "";
2407 2407
2408 if (!net_ratelimit()) 2408 if (!net_ratelimit())
2409 return; 2409 return;
2410 2410
2411 if (dev && dev->dev.parent) 2411 if (dev) {
2412 driver = dev_driver_string(dev->dev.parent); 2412 if (dev->dev.parent)
2413 2413 name = dev_driver_string(dev->dev.parent);
2414 else
2415 name = netdev_name(dev);
2416 }
2414 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " 2417 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2415 "gso_type=%d ip_summed=%d\n", 2418 "gso_type=%d ip_summed=%d\n",
2416 driver, dev ? &dev->features : &null_features, 2419 name, dev ? &dev->features : &null_features,
2417 skb->sk ? &skb->sk->sk_route_caps : &null_features, 2420 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2418 skb->len, skb->data_len, skb_shinfo(skb)->gso_size, 2421 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2419 skb_shinfo(skb)->gso_type, skb->ip_summed); 2422 skb_shinfo(skb)->gso_type, skb->ip_summed);
@@ -6426,11 +6429,16 @@ int __netdev_update_features(struct net_device *dev)
6426 6429
6427 if (dev->netdev_ops->ndo_set_features) 6430 if (dev->netdev_ops->ndo_set_features)
6428 err = dev->netdev_ops->ndo_set_features(dev, features); 6431 err = dev->netdev_ops->ndo_set_features(dev, features);
6432 else
6433 err = 0;
6429 6434
6430 if (unlikely(err < 0)) { 6435 if (unlikely(err < 0)) {
6431 netdev_err(dev, 6436 netdev_err(dev,
6432 "set_features() failed (%d); wanted %pNF, left %pNF\n", 6437 "set_features() failed (%d); wanted %pNF, left %pNF\n",
6433 err, &features, &dev->features); 6438 err, &features, &dev->features);
6439 /* return non-0 since some features might have changed and
6440 * it's better to fire a spurious notification than miss it
6441 */
6434 return -1; 6442 return -1;
6435 } 6443 }
6436 6444
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1aa8437ed6c4..f18ae91b652e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -857,7 +857,7 @@ static void neigh_probe(struct neighbour *neigh)
857 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 857 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
858 /* keep skb alive even if arp_queue overflows */ 858 /* keep skb alive even if arp_queue overflows */
859 if (skb) 859 if (skb)
860 skb = skb_copy(skb, GFP_ATOMIC); 860 skb = skb_clone(skb, GFP_ATOMIC);
861 write_unlock(&neigh->lock); 861 write_unlock(&neigh->lock);
862 neigh->ops->solicit(neigh, skb); 862 neigh->ops->solicit(neigh, skb);
863 atomic_inc(&neigh->probes); 863 atomic_inc(&neigh->probes);
@@ -2215,7 +2215,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2215 ndm->ndm_pad2 = 0; 2215 ndm->ndm_pad2 = 0;
2216 ndm->ndm_flags = pn->flags | NTF_PROXY; 2216 ndm->ndm_flags = pn->flags | NTF_PROXY;
2217 ndm->ndm_type = RTN_UNICAST; 2217 ndm->ndm_type = RTN_UNICAST;
2218 ndm->ndm_ifindex = pn->dev->ifindex; 2218 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2219 ndm->ndm_state = NUD_NONE; 2219 ndm->ndm_state = NUD_NONE;
2220 2220
2221 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) 2221 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
@@ -2333,7 +2333,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2333 if (h > s_h) 2333 if (h > s_h)
2334 s_idx = 0; 2334 s_idx = 0;
2335 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { 2335 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2336 if (dev_net(n->dev) != net) 2336 if (pneigh_net(n) != net)
2337 continue; 2337 continue;
2338 if (idx < s_idx) 2338 if (idx < s_idx)
2339 goto next; 2339 goto next;
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 81cb3c72efe8..d9ee8d08a3a6 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -56,7 +56,7 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
56 kfree(css_cls_state(css)); 56 kfree(css_cls_state(css));
57} 57}
58 58
59static int update_classid(const void *v, struct file *file, unsigned n) 59static int update_classid_sock(const void *v, struct file *file, unsigned n)
60{ 60{
61 int err; 61 int err;
62 struct socket *sock = sock_from_file(file, &err); 62 struct socket *sock = sock_from_file(file, &err);
@@ -67,19 +67,27 @@ static int update_classid(const void *v, struct file *file, unsigned n)
67 return 0; 67 return 0;
68} 68}
69 69
70static void cgrp_attach(struct cgroup_taskset *tset) 70static void update_classid(struct cgroup_subsys_state *css, void *v)
71{ 71{
72 struct css_task_iter it;
72 struct task_struct *p; 73 struct task_struct *p;
73 struct cgroup_subsys_state *css;
74
75 cgroup_taskset_for_each(p, css, tset) {
76 struct cgroup_cls_state *cs = css_cls_state(css);
77 void *v = (void *)(unsigned long)cs->classid;
78 74
75 css_task_iter_start(css, &it);
76 while ((p = css_task_iter_next(&it))) {
79 task_lock(p); 77 task_lock(p);
80 iterate_fd(p->files, 0, update_classid, v); 78 iterate_fd(p->files, 0, update_classid_sock, v);
81 task_unlock(p); 79 task_unlock(p);
82 } 80 }
81 css_task_iter_end(&it);
82}
83
84static void cgrp_attach(struct cgroup_taskset *tset)
85{
86 struct cgroup_subsys_state *css;
87
88 cgroup_taskset_first(tset, &css);
89 update_classid(css,
90 (void *)(unsigned long)css_cls_state(css)->classid);
83} 91}
84 92
85static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) 93static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -90,8 +98,11 @@ static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
90static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, 98static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
91 u64 value) 99 u64 value)
92{ 100{
93 css_cls_state(css)->classid = (u32) value; 101 struct cgroup_cls_state *cs = css_cls_state(css);
102
103 cs->classid = (u32)value;
94 104
105 update_classid(css, (void *)(unsigned long)cs->classid);
95 return 0; 106 return 0;
96} 107}
97 108
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 504bd17b7456..34ba7a08876d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1045,15 +1045,156 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
1045 return 0; 1045 return 0;
1046} 1046}
1047 1047
1048static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
1049 struct net_device *dev)
1050{
1051 const struct rtnl_link_stats64 *stats;
1052 struct rtnl_link_stats64 temp;
1053 struct nlattr *attr;
1054
1055 stats = dev_get_stats(dev, &temp);
1056
1057 attr = nla_reserve(skb, IFLA_STATS,
1058 sizeof(struct rtnl_link_stats));
1059 if (!attr)
1060 return -EMSGSIZE;
1061
1062 copy_rtnl_link_stats(nla_data(attr), stats);
1063
1064 attr = nla_reserve(skb, IFLA_STATS64,
1065 sizeof(struct rtnl_link_stats64));
1066 if (!attr)
1067 return -EMSGSIZE;
1068
1069 copy_rtnl_link_stats64(nla_data(attr), stats);
1070
1071 return 0;
1072}
1073
1074static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
1075 struct net_device *dev,
1076 int vfs_num,
1077 struct nlattr *vfinfo)
1078{
1079 struct ifla_vf_rss_query_en vf_rss_query_en;
1080 struct ifla_vf_link_state vf_linkstate;
1081 struct ifla_vf_spoofchk vf_spoofchk;
1082 struct ifla_vf_tx_rate vf_tx_rate;
1083 struct ifla_vf_stats vf_stats;
1084 struct ifla_vf_trust vf_trust;
1085 struct ifla_vf_vlan vf_vlan;
1086 struct ifla_vf_rate vf_rate;
1087 struct nlattr *vf, *vfstats;
1088 struct ifla_vf_mac vf_mac;
1089 struct ifla_vf_info ivi;
1090
1091 /* Not all SR-IOV capable drivers support the
1092 * spoofcheck and "RSS query enable" query. Preset to
1093 * -1 so the user space tool can detect that the driver
1094 * didn't report anything.
1095 */
1096 ivi.spoofchk = -1;
1097 ivi.rss_query_en = -1;
1098 ivi.trusted = -1;
1099 memset(ivi.mac, 0, sizeof(ivi.mac));
1100 /* The default value for VF link state is "auto"
1101 * IFLA_VF_LINK_STATE_AUTO which equals zero
1102 */
1103 ivi.linkstate = 0;
1104 if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
1105 return 0;
1106
1107 vf_mac.vf =
1108 vf_vlan.vf =
1109 vf_rate.vf =
1110 vf_tx_rate.vf =
1111 vf_spoofchk.vf =
1112 vf_linkstate.vf =
1113 vf_rss_query_en.vf =
1114 vf_trust.vf = ivi.vf;
1115
1116 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
1117 vf_vlan.vlan = ivi.vlan;
1118 vf_vlan.qos = ivi.qos;
1119 vf_tx_rate.rate = ivi.max_tx_rate;
1120 vf_rate.min_tx_rate = ivi.min_tx_rate;
1121 vf_rate.max_tx_rate = ivi.max_tx_rate;
1122 vf_spoofchk.setting = ivi.spoofchk;
1123 vf_linkstate.link_state = ivi.linkstate;
1124 vf_rss_query_en.setting = ivi.rss_query_en;
1125 vf_trust.setting = ivi.trusted;
1126 vf = nla_nest_start(skb, IFLA_VF_INFO);
1127 if (!vf) {
1128 nla_nest_cancel(skb, vfinfo);
1129 return -EMSGSIZE;
1130 }
1131 if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
1132 nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
1133 nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
1134 &vf_rate) ||
1135 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
1136 &vf_tx_rate) ||
1137 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
1138 &vf_spoofchk) ||
1139 nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
1140 &vf_linkstate) ||
1141 nla_put(skb, IFLA_VF_RSS_QUERY_EN,
1142 sizeof(vf_rss_query_en),
1143 &vf_rss_query_en) ||
1144 nla_put(skb, IFLA_VF_TRUST,
1145 sizeof(vf_trust), &vf_trust))
1146 return -EMSGSIZE;
1147 memset(&vf_stats, 0, sizeof(vf_stats));
1148 if (dev->netdev_ops->ndo_get_vf_stats)
1149 dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
1150 &vf_stats);
1151 vfstats = nla_nest_start(skb, IFLA_VF_STATS);
1152 if (!vfstats) {
1153 nla_nest_cancel(skb, vf);
1154 nla_nest_cancel(skb, vfinfo);
1155 return -EMSGSIZE;
1156 }
1157 if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
1158 vf_stats.rx_packets) ||
1159 nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
1160 vf_stats.tx_packets) ||
1161 nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
1162 vf_stats.rx_bytes) ||
1163 nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
1164 vf_stats.tx_bytes) ||
1165 nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
1166 vf_stats.broadcast) ||
1167 nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
1168 vf_stats.multicast))
1169 return -EMSGSIZE;
1170 nla_nest_end(skb, vfstats);
1171 nla_nest_end(skb, vf);
1172 return 0;
1173}
1174
1175static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
1176{
1177 struct rtnl_link_ifmap map = {
1178 .mem_start = dev->mem_start,
1179 .mem_end = dev->mem_end,
1180 .base_addr = dev->base_addr,
1181 .irq = dev->irq,
1182 .dma = dev->dma,
1183 .port = dev->if_port,
1184 };
1185 if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
1186 return -EMSGSIZE;
1187
1188 return 0;
1189}
1190
1048static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 1191static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1049 int type, u32 pid, u32 seq, u32 change, 1192 int type, u32 pid, u32 seq, u32 change,
1050 unsigned int flags, u32 ext_filter_mask) 1193 unsigned int flags, u32 ext_filter_mask)
1051{ 1194{
1052 struct ifinfomsg *ifm; 1195 struct ifinfomsg *ifm;
1053 struct nlmsghdr *nlh; 1196 struct nlmsghdr *nlh;
1054 struct rtnl_link_stats64 temp; 1197 struct nlattr *af_spec;
1055 const struct rtnl_link_stats64 *stats;
1056 struct nlattr *attr, *af_spec;
1057 struct rtnl_af_ops *af_ops; 1198 struct rtnl_af_ops *af_ops;
1058 struct net_device *upper_dev = netdev_master_upper_dev_get(dev); 1199 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
1059 1200
@@ -1096,18 +1237,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1096 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) 1237 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
1097 goto nla_put_failure; 1238 goto nla_put_failure;
1098 1239
1099 if (1) { 1240 if (rtnl_fill_link_ifmap(skb, dev))
1100 struct rtnl_link_ifmap map = { 1241 goto nla_put_failure;
1101 .mem_start = dev->mem_start,
1102 .mem_end = dev->mem_end,
1103 .base_addr = dev->base_addr,
1104 .irq = dev->irq,
1105 .dma = dev->dma,
1106 .port = dev->if_port,
1107 };
1108 if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
1109 goto nla_put_failure;
1110 }
1111 1242
1112 if (dev->addr_len) { 1243 if (dev->addr_len) {
1113 if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) || 1244 if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
@@ -1124,128 +1255,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1124 if (rtnl_phys_switch_id_fill(skb, dev)) 1255 if (rtnl_phys_switch_id_fill(skb, dev))
1125 goto nla_put_failure; 1256 goto nla_put_failure;
1126 1257
1127 attr = nla_reserve(skb, IFLA_STATS, 1258 if (rtnl_fill_stats(skb, dev))
1128 sizeof(struct rtnl_link_stats));
1129 if (attr == NULL)
1130 goto nla_put_failure;
1131
1132 stats = dev_get_stats(dev, &temp);
1133 copy_rtnl_link_stats(nla_data(attr), stats);
1134
1135 attr = nla_reserve(skb, IFLA_STATS64,
1136 sizeof(struct rtnl_link_stats64));
1137 if (attr == NULL)
1138 goto nla_put_failure; 1259 goto nla_put_failure;
1139 copy_rtnl_link_stats64(nla_data(attr), stats);
1140 1260
1141 if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && 1261 if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
1142 nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent))) 1262 nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
1143 goto nla_put_failure; 1263 goto nla_put_failure;
1144 1264
1145 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent 1265 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
1146 && (ext_filter_mask & RTEXT_FILTER_VF)) { 1266 ext_filter_mask & RTEXT_FILTER_VF) {
1147 int i; 1267 int i;
1148 1268 struct nlattr *vfinfo;
1149 struct nlattr *vfinfo, *vf, *vfstats;
1150 int num_vfs = dev_num_vf(dev->dev.parent); 1269 int num_vfs = dev_num_vf(dev->dev.parent);
1151 1270
1152 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); 1271 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
1153 if (!vfinfo) 1272 if (!vfinfo)
1154 goto nla_put_failure; 1273 goto nla_put_failure;
1155 for (i = 0; i < num_vfs; i++) { 1274 for (i = 0; i < num_vfs; i++) {
1156 struct ifla_vf_info ivi; 1275 if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
1157 struct ifla_vf_mac vf_mac;
1158 struct ifla_vf_vlan vf_vlan;
1159 struct ifla_vf_rate vf_rate;
1160 struct ifla_vf_tx_rate vf_tx_rate;
1161 struct ifla_vf_spoofchk vf_spoofchk;
1162 struct ifla_vf_link_state vf_linkstate;
1163 struct ifla_vf_rss_query_en vf_rss_query_en;
1164 struct ifla_vf_stats vf_stats;
1165 struct ifla_vf_trust vf_trust;
1166
1167 /*
1168 * Not all SR-IOV capable drivers support the
1169 * spoofcheck and "RSS query enable" query. Preset to
1170 * -1 so the user space tool can detect that the driver
1171 * didn't report anything.
1172 */
1173 ivi.spoofchk = -1;
1174 ivi.rss_query_en = -1;
1175 ivi.trusted = -1;
1176 memset(ivi.mac, 0, sizeof(ivi.mac));
1177 /* The default value for VF link state is "auto"
1178 * IFLA_VF_LINK_STATE_AUTO which equals zero
1179 */
1180 ivi.linkstate = 0;
1181 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
1182 break;
1183 vf_mac.vf =
1184 vf_vlan.vf =
1185 vf_rate.vf =
1186 vf_tx_rate.vf =
1187 vf_spoofchk.vf =
1188 vf_linkstate.vf =
1189 vf_rss_query_en.vf =
1190 vf_trust.vf = ivi.vf;
1191
1192 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
1193 vf_vlan.vlan = ivi.vlan;
1194 vf_vlan.qos = ivi.qos;
1195 vf_tx_rate.rate = ivi.max_tx_rate;
1196 vf_rate.min_tx_rate = ivi.min_tx_rate;
1197 vf_rate.max_tx_rate = ivi.max_tx_rate;
1198 vf_spoofchk.setting = ivi.spoofchk;
1199 vf_linkstate.link_state = ivi.linkstate;
1200 vf_rss_query_en.setting = ivi.rss_query_en;
1201 vf_trust.setting = ivi.trusted;
1202 vf = nla_nest_start(skb, IFLA_VF_INFO);
1203 if (!vf) {
1204 nla_nest_cancel(skb, vfinfo);
1205 goto nla_put_failure;
1206 }
1207 if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
1208 nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
1209 nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
1210 &vf_rate) ||
1211 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
1212 &vf_tx_rate) ||
1213 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
1214 &vf_spoofchk) ||
1215 nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
1216 &vf_linkstate) ||
1217 nla_put(skb, IFLA_VF_RSS_QUERY_EN,
1218 sizeof(vf_rss_query_en),
1219 &vf_rss_query_en) ||
1220 nla_put(skb, IFLA_VF_TRUST,
1221 sizeof(vf_trust), &vf_trust))
1222 goto nla_put_failure; 1276 goto nla_put_failure;
1223 memset(&vf_stats, 0, sizeof(vf_stats));
1224 if (dev->netdev_ops->ndo_get_vf_stats)
1225 dev->netdev_ops->ndo_get_vf_stats(dev, i,
1226 &vf_stats);
1227 vfstats = nla_nest_start(skb, IFLA_VF_STATS);
1228 if (!vfstats) {
1229 nla_nest_cancel(skb, vf);
1230 nla_nest_cancel(skb, vfinfo);
1231 goto nla_put_failure;
1232 }
1233 if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
1234 vf_stats.rx_packets) ||
1235 nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
1236 vf_stats.tx_packets) ||
1237 nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
1238 vf_stats.rx_bytes) ||
1239 nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
1240 vf_stats.tx_bytes) ||
1241 nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
1242 vf_stats.broadcast) ||
1243 nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
1244 vf_stats.multicast))
1245 goto nla_put_failure;
1246 nla_nest_end(skb, vfstats);
1247 nla_nest_end(skb, vf);
1248 } 1277 }
1278
1249 nla_nest_end(skb, vfinfo); 1279 nla_nest_end(skb, vfinfo);
1250 } 1280 }
1251 1281
diff --git a/net/core/scm.c b/net/core/scm.c
index 3b6899b7d810..8a1741b14302 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -305,6 +305,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
305 err = put_user(cmlen, &cm->cmsg_len); 305 err = put_user(cmlen, &cm->cmsg_len);
306 if (!err) { 306 if (!err) {
307 cmlen = CMSG_SPACE(i*sizeof(int)); 307 cmlen = CMSG_SPACE(i*sizeof(int));
308 if (msg->msg_controllen < cmlen)
309 cmlen = msg->msg_controllen;
308 msg->msg_control += cmlen; 310 msg->msg_control += cmlen;
309 msg->msg_controllen -= cmlen; 311 msg->msg_controllen -= cmlen;
310 } 312 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index aa41e6dd6429..152b9c70e252 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4268,7 +4268,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
4268 return NULL; 4268 return NULL;
4269 } 4269 }
4270 4270
4271 memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); 4271 memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len,
4272 2 * ETH_ALEN);
4272 skb->mac_header += VLAN_HLEN; 4273 skb->mac_header += VLAN_HLEN;
4273 return skb; 4274 return skb;
4274} 4275}
diff --git a/net/core/sock.c b/net/core/sock.c
index 1e4dd54bfb5a..e31dfcee1729 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1530,7 +1530,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1530 skb_queue_head_init(&newsk->sk_receive_queue); 1530 skb_queue_head_init(&newsk->sk_receive_queue);
1531 skb_queue_head_init(&newsk->sk_write_queue); 1531 skb_queue_head_init(&newsk->sk_write_queue);
1532 1532
1533 spin_lock_init(&newsk->sk_dst_lock);
1534 rwlock_init(&newsk->sk_callback_lock); 1533 rwlock_init(&newsk->sk_callback_lock);
1535 lockdep_set_class_and_name(&newsk->sk_callback_lock, 1534 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1536 af_callback_keys + newsk->sk_family, 1535 af_callback_keys + newsk->sk_family,
@@ -1607,7 +1606,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1607{ 1606{
1608 u32 max_segs = 1; 1607 u32 max_segs = 1;
1609 1608
1610 __sk_dst_set(sk, dst); 1609 sk_dst_set(sk, dst);
1611 sk->sk_route_caps = dst->dev->features; 1610 sk->sk_route_caps = dst->dev->features;
1612 if (sk->sk_route_caps & NETIF_F_GSO) 1611 if (sk->sk_route_caps & NETIF_F_GSO)
1613 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 1612 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
@@ -1815,7 +1814,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
1815{ 1814{
1816 DEFINE_WAIT(wait); 1815 DEFINE_WAIT(wait);
1817 1816
1818 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1817 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1819 for (;;) { 1818 for (;;) {
1820 if (!timeo) 1819 if (!timeo)
1821 break; 1820 break;
@@ -1861,7 +1860,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1861 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) 1860 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
1862 break; 1861 break;
1863 1862
1864 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1863 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1865 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1864 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1866 err = -EAGAIN; 1865 err = -EAGAIN;
1867 if (!timeo) 1866 if (!timeo)
@@ -2048,9 +2047,9 @@ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2048 DEFINE_WAIT(wait); 2047 DEFINE_WAIT(wait);
2049 2048
2050 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2049 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2051 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2050 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2052 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); 2051 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
2053 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2052 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2054 finish_wait(sk_sleep(sk), &wait); 2053 finish_wait(sk_sleep(sk), &wait);
2055 return rc; 2054 return rc;
2056} 2055}
@@ -2388,7 +2387,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2388 } else 2387 } else
2389 sk->sk_wq = NULL; 2388 sk->sk_wq = NULL;
2390 2389
2391 spin_lock_init(&sk->sk_dst_lock);
2392 rwlock_init(&sk->sk_callback_lock); 2390 rwlock_init(&sk->sk_callback_lock);
2393 lockdep_set_class_and_name(&sk->sk_callback_lock, 2391 lockdep_set_class_and_name(&sk->sk_callback_lock,
2394 af_callback_keys + sk->sk_family, 2392 af_callback_keys + sk->sk_family,
diff --git a/net/core/stream.c b/net/core/stream.c
index d70f77a0c889..b96f7a79e544 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -39,7 +39,7 @@ void sk_stream_write_space(struct sock *sk)
39 wake_up_interruptible_poll(&wq->wait, POLLOUT | 39 wake_up_interruptible_poll(&wq->wait, POLLOUT |
40 POLLWRNORM | POLLWRBAND); 40 POLLWRNORM | POLLWRBAND);
41 if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) 41 if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
42 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); 42 sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
43 rcu_read_unlock(); 43 rcu_read_unlock();
44 } 44 }
45} 45}
@@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
126 current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; 126 current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
127 127
128 while (1) { 128 while (1) {
129 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 129 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
130 130
131 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 131 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
132 132
@@ -139,7 +139,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
139 } 139 }
140 if (signal_pending(current)) 140 if (signal_pending(current))
141 goto do_interrupted; 141 goto do_interrupted;
142 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 142 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
143 if (sk_stream_memory_free(sk) && !vm_wait) 143 if (sk_stream_memory_free(sk) && !vm_wait)
144 break; 144 break;
145 145
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index db5fc2440a23..9c6d0508e63a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -202,7 +202,9 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
202 security_req_classify_flow(req, flowi6_to_flowi(&fl6)); 202 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
203 203
204 204
205 final_p = fl6_update_dst(&fl6, np->opt, &final); 205 rcu_read_lock();
206 final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
207 rcu_read_unlock();
206 208
207 dst = ip6_dst_lookup_flow(sk, &fl6, final_p); 209 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
208 if (IS_ERR(dst)) { 210 if (IS_ERR(dst)) {
@@ -219,7 +221,10 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
219 &ireq->ir_v6_loc_addr, 221 &ireq->ir_v6_loc_addr,
220 &ireq->ir_v6_rmt_addr); 222 &ireq->ir_v6_rmt_addr);
221 fl6.daddr = ireq->ir_v6_rmt_addr; 223 fl6.daddr = ireq->ir_v6_rmt_addr;
222 err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); 224 rcu_read_lock();
225 err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
226 np->tclass);
227 rcu_read_unlock();
223 err = net_xmit_eval(err); 228 err = net_xmit_eval(err);
224 } 229 }
225 230
@@ -387,6 +392,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
387 struct inet_request_sock *ireq = inet_rsk(req); 392 struct inet_request_sock *ireq = inet_rsk(req);
388 struct ipv6_pinfo *newnp; 393 struct ipv6_pinfo *newnp;
389 const struct ipv6_pinfo *np = inet6_sk(sk); 394 const struct ipv6_pinfo *np = inet6_sk(sk);
395 struct ipv6_txoptions *opt;
390 struct inet_sock *newinet; 396 struct inet_sock *newinet;
391 struct dccp6_sock *newdp6; 397 struct dccp6_sock *newdp6;
392 struct sock *newsk; 398 struct sock *newsk;
@@ -453,7 +459,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
453 * comment in that function for the gory details. -acme 459 * comment in that function for the gory details. -acme
454 */ 460 */
455 461
456 __ip6_dst_store(newsk, dst, NULL, NULL); 462 ip6_dst_store(newsk, dst, NULL, NULL);
457 newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | 463 newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
458 NETIF_F_TSO); 464 NETIF_F_TSO);
459 newdp6 = (struct dccp6_sock *)newsk; 465 newdp6 = (struct dccp6_sock *)newsk;
@@ -488,13 +494,15 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
488 * Yes, keeping reference count would be much more clever, but we make 494 * Yes, keeping reference count would be much more clever, but we make
489 * one more one thing there: reattach optmem to newsk. 495 * one more one thing there: reattach optmem to newsk.
490 */ 496 */
491 if (np->opt != NULL) 497 opt = rcu_dereference(np->opt);
492 newnp->opt = ipv6_dup_options(newsk, np->opt); 498 if (opt) {
493 499 opt = ipv6_dup_options(newsk, opt);
500 RCU_INIT_POINTER(newnp->opt, opt);
501 }
494 inet_csk(newsk)->icsk_ext_hdr_len = 0; 502 inet_csk(newsk)->icsk_ext_hdr_len = 0;
495 if (newnp->opt != NULL) 503 if (opt)
496 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + 504 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
497 newnp->opt->opt_flen); 505 opt->opt_flen;
498 506
499 dccp_sync_mss(newsk, dst_mtu(dst)); 507 dccp_sync_mss(newsk, dst_mtu(dst));
500 508
@@ -757,6 +765,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
757 struct ipv6_pinfo *np = inet6_sk(sk); 765 struct ipv6_pinfo *np = inet6_sk(sk);
758 struct dccp_sock *dp = dccp_sk(sk); 766 struct dccp_sock *dp = dccp_sk(sk);
759 struct in6_addr *saddr = NULL, *final_p, final; 767 struct in6_addr *saddr = NULL, *final_p, final;
768 struct ipv6_txoptions *opt;
760 struct flowi6 fl6; 769 struct flowi6 fl6;
761 struct dst_entry *dst; 770 struct dst_entry *dst;
762 int addr_type; 771 int addr_type;
@@ -856,7 +865,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
856 fl6.fl6_sport = inet->inet_sport; 865 fl6.fl6_sport = inet->inet_sport;
857 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); 866 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
858 867
859 final_p = fl6_update_dst(&fl6, np->opt, &final); 868 opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
869 final_p = fl6_update_dst(&fl6, opt, &final);
860 870
861 dst = ip6_dst_lookup_flow(sk, &fl6, final_p); 871 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
862 if (IS_ERR(dst)) { 872 if (IS_ERR(dst)) {
@@ -873,12 +883,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
873 np->saddr = *saddr; 883 np->saddr = *saddr;
874 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 884 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
875 885
876 __ip6_dst_store(sk, dst, NULL, NULL); 886 ip6_dst_store(sk, dst, NULL, NULL);
877 887
878 icsk->icsk_ext_hdr_len = 0; 888 icsk->icsk_ext_hdr_len = 0;
879 if (np->opt != NULL) 889 if (opt)
880 icsk->icsk_ext_hdr_len = (np->opt->opt_flen + 890 icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
881 np->opt->opt_nflen);
882 891
883 inet->inet_dport = usin->sin6_port; 892 inet->inet_dport = usin->sin6_port;
884 893
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b5cf13a28009..41e65804ddf5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -339,8 +339,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
339 if (sk_stream_is_writeable(sk)) { 339 if (sk_stream_is_writeable(sk)) {
340 mask |= POLLOUT | POLLWRNORM; 340 mask |= POLLOUT | POLLWRNORM;
341 } else { /* send SIGIO later */ 341 } else { /* send SIGIO later */
342 set_bit(SOCK_ASYNC_NOSPACE, 342 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
343 &sk->sk_socket->flags);
344 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 343 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
345 344
346 /* Race breaker. If space is freed after 345 /* Race breaker. If space is freed after
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 675cf94e04f8..eebf5ac8ce18 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1747,9 +1747,9 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
1747 } 1747 }
1748 1748
1749 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1749 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1750 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1750 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1751 sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); 1751 sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
1752 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1752 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1753 finish_wait(sk_sleep(sk), &wait); 1753 finish_wait(sk_sleep(sk), &wait);
1754 } 1754 }
1755 1755
@@ -2004,10 +2004,10 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
2004 } 2004 }
2005 2005
2006 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2006 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2007 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2007 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2008 sk_wait_event(sk, &timeo, 2008 sk_wait_event(sk, &timeo,
2009 !dn_queue_too_long(scp, queue, flags)); 2009 !dn_queue_too_long(scp, queue, flags));
2010 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2010 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2011 finish_wait(sk_sleep(sk), &wait); 2011 finish_wait(sk_sleep(sk), &wait);
2012 continue; 2012 continue;
2013 } 2013 }
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 4677b6fa6dda..ecc28cff08ab 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -67,7 +67,7 @@
67 * Returns the size of the result on success, -ve error code otherwise. 67 * Returns the size of the result on success, -ve error code otherwise.
68 */ 68 */
69int dns_query(const char *type, const char *name, size_t namelen, 69int dns_query(const char *type, const char *name, size_t namelen,
70 const char *options, char **_result, time_t *_expiry) 70 const char *options, char **_result, time64_t *_expiry)
71{ 71{
72 struct key *rkey; 72 struct key *rkey;
73 const struct user_key_payload *upayload; 73 const struct user_key_payload *upayload;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 35a9788bb3ae..c7d1adca30d8 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -312,7 +312,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, u8 type)
312 return; 312 return;
313 313
314out: 314out:
315 WARN_ON_ONCE("HSR: Could not send supervision frame\n"); 315 WARN_ONCE(1, "HSR: Could not send supervision frame\n");
316 kfree_skb(skb); 316 kfree_skb(skb);
317} 317}
318 318
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6baf36e11808..05e4cba14162 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2126,7 +2126,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
2126 ASSERT_RTNL(); 2126 ASSERT_RTNL();
2127 2127
2128 in_dev = ip_mc_find_dev(net, imr); 2128 in_dev = ip_mc_find_dev(net, imr);
2129 if (!in_dev) { 2129 if (!imr->imr_ifindex && !imr->imr_address.s_addr && !in_dev) {
2130 ret = -ENODEV; 2130 ret = -ENODEV;
2131 goto out; 2131 goto out;
2132 } 2132 }
@@ -2147,7 +2147,8 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
2147 2147
2148 *imlp = iml->next_rcu; 2148 *imlp = iml->next_rcu;
2149 2149
2150 ip_mc_dec_group(in_dev, group); 2150 if (in_dev)
2151 ip_mc_dec_group(in_dev, group);
2151 2152
2152 /* decrease mem now to avoid the memleak warning */ 2153 /* decrease mem now to avoid the memleak warning */
2153 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); 2154 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1feb15f23de8..46b9c887bede 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -563,7 +563,7 @@ static void reqsk_timer_handler(unsigned long data)
563 int max_retries, thresh; 563 int max_retries, thresh;
564 u8 defer_accept; 564 u8 defer_accept;
565 565
566 if (sk_listener->sk_state != TCP_LISTEN) 566 if (sk_state_load(sk_listener) != TCP_LISTEN)
567 goto drop; 567 goto drop;
568 568
569 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 569 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
@@ -749,7 +749,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
749 * It is OK, because this socket enters to hash table only 749 * It is OK, because this socket enters to hash table only
750 * after validation is complete. 750 * after validation is complete.
751 */ 751 */
752 sk->sk_state = TCP_LISTEN; 752 sk_state_store(sk, TCP_LISTEN);
753 if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 753 if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
754 inet->inet_sport = htons(inet->inet_num); 754 inet->inet_sport = htons(inet->inet_num);
755 755
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 92dd4b74d513..c3a38353f5dc 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -134,7 +134,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
134 struct mfc_cache *c, struct rtmsg *rtm); 134 struct mfc_cache *c, struct rtmsg *rtm);
135static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 135static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
136 int cmd); 136 int cmd);
137static void mroute_clean_tables(struct mr_table *mrt); 137static void mroute_clean_tables(struct mr_table *mrt, bool all);
138static void ipmr_expire_process(unsigned long arg); 138static void ipmr_expire_process(unsigned long arg);
139 139
140#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 140#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -350,7 +350,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
350static void ipmr_free_table(struct mr_table *mrt) 350static void ipmr_free_table(struct mr_table *mrt)
351{ 351{
352 del_timer_sync(&mrt->ipmr_expire_timer); 352 del_timer_sync(&mrt->ipmr_expire_timer);
353 mroute_clean_tables(mrt); 353 mroute_clean_tables(mrt, true);
354 kfree(mrt); 354 kfree(mrt);
355} 355}
356 356
@@ -441,10 +441,6 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
441 return dev; 441 return dev;
442 442
443failure: 443failure:
444 /* allow the register to be completed before unregistering. */
445 rtnl_unlock();
446 rtnl_lock();
447
448 unregister_netdevice(dev); 444 unregister_netdevice(dev);
449 return NULL; 445 return NULL;
450} 446}
@@ -540,10 +536,6 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
540 return dev; 536 return dev;
541 537
542failure: 538failure:
543 /* allow the register to be completed before unregistering. */
544 rtnl_unlock();
545 rtnl_lock();
546
547 unregister_netdevice(dev); 539 unregister_netdevice(dev);
548 return NULL; 540 return NULL;
549} 541}
@@ -1208,7 +1200,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1208 * Close the multicast socket, and clear the vif tables etc 1200 * Close the multicast socket, and clear the vif tables etc
1209 */ 1201 */
1210 1202
1211static void mroute_clean_tables(struct mr_table *mrt) 1203static void mroute_clean_tables(struct mr_table *mrt, bool all)
1212{ 1204{
1213 int i; 1205 int i;
1214 LIST_HEAD(list); 1206 LIST_HEAD(list);
@@ -1217,8 +1209,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
1217 /* Shut down all active vif entries */ 1209 /* Shut down all active vif entries */
1218 1210
1219 for (i = 0; i < mrt->maxvif; i++) { 1211 for (i = 0; i < mrt->maxvif; i++) {
1220 if (!(mrt->vif_table[i].flags & VIFF_STATIC)) 1212 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1221 vif_delete(mrt, i, 0, &list); 1213 continue;
1214 vif_delete(mrt, i, 0, &list);
1222 } 1215 }
1223 unregister_netdevice_many(&list); 1216 unregister_netdevice_many(&list);
1224 1217
@@ -1226,7 +1219,7 @@ static void mroute_clean_tables(struct mr_table *mrt)
1226 1219
1227 for (i = 0; i < MFC_LINES; i++) { 1220 for (i = 0; i < MFC_LINES; i++) {
1228 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { 1221 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1229 if (c->mfc_flags & MFC_STATIC) 1222 if (!all && (c->mfc_flags & MFC_STATIC))
1230 continue; 1223 continue;
1231 list_del_rcu(&c->list); 1224 list_del_rcu(&c->list);
1232 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1225 mroute_netlink_event(mrt, c, RTM_DELROUTE);
@@ -1261,7 +1254,7 @@ static void mrtsock_destruct(struct sock *sk)
1261 NETCONFA_IFINDEX_ALL, 1254 NETCONFA_IFINDEX_ALL,
1262 net->ipv4.devconf_all); 1255 net->ipv4.devconf_all);
1263 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1256 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1264 mroute_clean_tables(mrt); 1257 mroute_clean_tables(mrt, false);
1265 } 1258 }
1266 } 1259 }
1267 rtnl_unlock(); 1260 rtnl_unlock();
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 657d2307f031..b3ca21b2ba9b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -45,7 +45,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
45 struct net *net = nf_ct_net(ct); 45 struct net *net = nf_ct_net(ct);
46 const struct nf_conn *master = ct->master; 46 const struct nf_conn *master = ct->master;
47 struct nf_conntrack_expect *other_exp; 47 struct nf_conntrack_expect *other_exp;
48 struct nf_conntrack_tuple t; 48 struct nf_conntrack_tuple t = {};
49 const struct nf_ct_pptp_master *ct_pptp_info; 49 const struct nf_ct_pptp_master *ct_pptp_info;
50 const struct nf_nat_pptp *nat_pptp_info; 50 const struct nf_nat_pptp *nat_pptp_info;
51 struct nf_nat_range range; 51 struct nf_nat_range range;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8c0d0bdc2a7c..63e5be0abd86 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -406,10 +406,12 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
406 ip_select_ident(net, skb, NULL); 406 ip_select_ident(net, skb, NULL);
407 407
408 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 408 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
409 skb->transport_header += iphlen;
410 if (iph->protocol == IPPROTO_ICMP &&
411 length >= iphlen + sizeof(struct icmphdr))
412 icmp_out_count(net, ((struct icmphdr *)
413 skb_transport_header(skb))->type);
409 } 414 }
410 if (iph->protocol == IPPROTO_ICMP)
411 icmp_out_count(net, ((struct icmphdr *)
412 skb_transport_header(skb))->type);
413 415
414 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, 416 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
415 net, sk, skb, NULL, rt->dst.dev, 417 net, sk, skb, NULL, rt->dst.dev,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0cfa7c0c1e80..c82cca18c90f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -451,11 +451,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
451 unsigned int mask; 451 unsigned int mask;
452 struct sock *sk = sock->sk; 452 struct sock *sk = sock->sk;
453 const struct tcp_sock *tp = tcp_sk(sk); 453 const struct tcp_sock *tp = tcp_sk(sk);
454 int state;
454 455
455 sock_rps_record_flow(sk); 456 sock_rps_record_flow(sk);
456 457
457 sock_poll_wait(file, sk_sleep(sk), wait); 458 sock_poll_wait(file, sk_sleep(sk), wait);
458 if (sk->sk_state == TCP_LISTEN) 459
460 state = sk_state_load(sk);
461 if (state == TCP_LISTEN)
459 return inet_csk_listen_poll(sk); 462 return inet_csk_listen_poll(sk);
460 463
461 /* Socket is not locked. We are protected from async events 464 /* Socket is not locked. We are protected from async events
@@ -492,14 +495,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
492 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent 495 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
493 * blocking on fresh not-connected or disconnected socket. --ANK 496 * blocking on fresh not-connected or disconnected socket. --ANK
494 */ 497 */
495 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) 498 if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
496 mask |= POLLHUP; 499 mask |= POLLHUP;
497 if (sk->sk_shutdown & RCV_SHUTDOWN) 500 if (sk->sk_shutdown & RCV_SHUTDOWN)
498 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 501 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
499 502
500 /* Connected or passive Fast Open socket? */ 503 /* Connected or passive Fast Open socket? */
501 if (sk->sk_state != TCP_SYN_SENT && 504 if (state != TCP_SYN_SENT &&
502 (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) { 505 (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
503 int target = sock_rcvlowat(sk, 0, INT_MAX); 506 int target = sock_rcvlowat(sk, 0, INT_MAX);
504 507
505 if (tp->urg_seq == tp->copied_seq && 508 if (tp->urg_seq == tp->copied_seq &&
@@ -507,9 +510,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
507 tp->urg_data) 510 tp->urg_data)
508 target++; 511 target++;
509 512
510 /* Potential race condition. If read of tp below will
511 * escape above sk->sk_state, we can be illegally awaken
512 * in SYN_* states. */
513 if (tp->rcv_nxt - tp->copied_seq >= target) 513 if (tp->rcv_nxt - tp->copied_seq >= target)
514 mask |= POLLIN | POLLRDNORM; 514 mask |= POLLIN | POLLRDNORM;
515 515
@@ -517,8 +517,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
517 if (sk_stream_is_writeable(sk)) { 517 if (sk_stream_is_writeable(sk)) {
518 mask |= POLLOUT | POLLWRNORM; 518 mask |= POLLOUT | POLLWRNORM;
519 } else { /* send SIGIO later */ 519 } else { /* send SIGIO later */
520 set_bit(SOCK_ASYNC_NOSPACE, 520 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
521 &sk->sk_socket->flags);
522 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 521 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
523 522
524 /* Race breaker. If space is freed after 523 /* Race breaker. If space is freed after
@@ -906,7 +905,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
906 goto out_err; 905 goto out_err;
907 } 906 }
908 907
909 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 908 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
910 909
911 mss_now = tcp_send_mss(sk, &size_goal, flags); 910 mss_now = tcp_send_mss(sk, &size_goal, flags);
912 copied = 0; 911 copied = 0;
@@ -1134,7 +1133,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1134 } 1133 }
1135 1134
1136 /* This should be in poll */ 1135 /* This should be in poll */
1137 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1136 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1138 1137
1139 mss_now = tcp_send_mss(sk, &size_goal, flags); 1138 mss_now = tcp_send_mss(sk, &size_goal, flags);
1140 1139
@@ -1934,7 +1933,7 @@ void tcp_set_state(struct sock *sk, int state)
1934 /* Change state AFTER socket is unhashed to avoid closed 1933 /* Change state AFTER socket is unhashed to avoid closed
1935 * socket sitting in hash tables. 1934 * socket sitting in hash tables.
1936 */ 1935 */
1937 sk->sk_state = state; 1936 sk_state_store(sk, state);
1938 1937
1939#ifdef STATE_TRACE 1938#ifdef STATE_TRACE
1940 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); 1939 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@ -2644,7 +2643,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2644 if (sk->sk_type != SOCK_STREAM) 2643 if (sk->sk_type != SOCK_STREAM)
2645 return; 2644 return;
2646 2645
2647 info->tcpi_state = sk->sk_state; 2646 info->tcpi_state = sk_state_load(sk);
2647
2648 info->tcpi_ca_state = icsk->icsk_ca_state; 2648 info->tcpi_ca_state = icsk->icsk_ca_state;
2649 info->tcpi_retransmits = icsk->icsk_retransmits; 2649 info->tcpi_retransmits = icsk->icsk_retransmits;
2650 info->tcpi_probes = icsk->icsk_probes_out; 2650 info->tcpi_probes = icsk->icsk_probes_out;
@@ -2672,7 +2672,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2672 info->tcpi_snd_mss = tp->mss_cache; 2672 info->tcpi_snd_mss = tp->mss_cache;
2673 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; 2673 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
2674 2674
2675 if (sk->sk_state == TCP_LISTEN) { 2675 if (info->tcpi_state == TCP_LISTEN) {
2676 info->tcpi_unacked = sk->sk_ack_backlog; 2676 info->tcpi_unacked = sk->sk_ack_backlog;
2677 info->tcpi_sacked = sk->sk_max_ack_backlog; 2677 info->tcpi_sacked = sk->sk_max_ack_backlog;
2678 } else { 2678 } else {
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 479f34946177..b31604086edd 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -21,7 +21,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
21{ 21{
22 struct tcp_info *info = _info; 22 struct tcp_info *info = _info;
23 23
24 if (sk->sk_state == TCP_LISTEN) { 24 if (sk_state_load(sk) == TCP_LISTEN) {
25 r->idiag_rqueue = sk->sk_ack_backlog; 25 r->idiag_rqueue = sk->sk_ack_backlog;
26 r->idiag_wqueue = sk->sk_max_ack_backlog; 26 r->idiag_wqueue = sk->sk_max_ack_backlog;
27 } else if (sk->sk_type == SOCK_STREAM) { 27 } else if (sk->sk_type == SOCK_STREAM) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fdd88c3803a6..2d656eef7f8e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4481,19 +4481,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
4481int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) 4481int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4482{ 4482{
4483 struct sk_buff *skb; 4483 struct sk_buff *skb;
4484 int err = -ENOMEM;
4485 int data_len = 0;
4484 bool fragstolen; 4486 bool fragstolen;
4485 4487
4486 if (size == 0) 4488 if (size == 0)
4487 return 0; 4489 return 0;
4488 4490
4489 skb = alloc_skb(size, sk->sk_allocation); 4491 if (size > PAGE_SIZE) {
4492 int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
4493
4494 data_len = npages << PAGE_SHIFT;
4495 size = data_len + (size & ~PAGE_MASK);
4496 }
4497 skb = alloc_skb_with_frags(size - data_len, data_len,
4498 PAGE_ALLOC_COSTLY_ORDER,
4499 &err, sk->sk_allocation);
4490 if (!skb) 4500 if (!skb)
4491 goto err; 4501 goto err;
4492 4502
4503 skb_put(skb, size - data_len);
4504 skb->data_len = data_len;
4505 skb->len = size;
4506
4493 if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) 4507 if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
4494 goto err_free; 4508 goto err_free;
4495 4509
4496 if (memcpy_from_msg(skb_put(skb, size), msg, size)) 4510 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
4511 if (err)
4497 goto err_free; 4512 goto err_free;
4498 4513
4499 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; 4514 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
@@ -4509,7 +4524,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4509err_free: 4524err_free:
4510 kfree_skb(skb); 4525 kfree_skb(skb);
4511err: 4526err:
4512 return -ENOMEM; 4527 return err;
4528
4513} 4529}
4514 4530
4515static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 4531static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
@@ -5667,6 +5683,7 @@ discard:
5667 } 5683 }
5668 5684
5669 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; 5685 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5686 tp->copied_seq = tp->rcv_nxt;
5670 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; 5687 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5671 5688
5672 /* RFC1323: The window in SYN & SYN/ACK segments is 5689 /* RFC1323: The window in SYN & SYN/ACK segments is
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 950e28c0cdf2..db003438aaf5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -921,7 +921,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
921 } 921 }
922 922
923 md5sig = rcu_dereference_protected(tp->md5sig_info, 923 md5sig = rcu_dereference_protected(tp->md5sig_info,
924 sock_owned_by_user(sk)); 924 sock_owned_by_user(sk) ||
925 lockdep_is_held(&sk->sk_lock.slock));
925 if (!md5sig) { 926 if (!md5sig) {
926 md5sig = kmalloc(sizeof(*md5sig), gfp); 927 md5sig = kmalloc(sizeof(*md5sig), gfp);
927 if (!md5sig) 928 if (!md5sig)
@@ -2158,6 +2159,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2158 __u16 destp = ntohs(inet->inet_dport); 2159 __u16 destp = ntohs(inet->inet_dport);
2159 __u16 srcp = ntohs(inet->inet_sport); 2160 __u16 srcp = ntohs(inet->inet_sport);
2160 int rx_queue; 2161 int rx_queue;
2162 int state;
2161 2163
2162 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2164 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2163 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || 2165 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
@@ -2175,17 +2177,18 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2175 timer_expires = jiffies; 2177 timer_expires = jiffies;
2176 } 2178 }
2177 2179
2178 if (sk->sk_state == TCP_LISTEN) 2180 state = sk_state_load(sk);
2181 if (state == TCP_LISTEN)
2179 rx_queue = sk->sk_ack_backlog; 2182 rx_queue = sk->sk_ack_backlog;
2180 else 2183 else
2181 /* 2184 /* Because we don't lock the socket,
2182 * because we dont lock socket, we might find a transient negative value 2185 * we might find a transient negative value.
2183 */ 2186 */
2184 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 2187 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2185 2188
2186 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2189 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2187 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", 2190 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2188 i, src, srcp, dest, destp, sk->sk_state, 2191 i, src, srcp, dest, destp, state,
2189 tp->write_seq - tp->snd_una, 2192 tp->write_seq - tp->snd_una,
2190 rx_queue, 2193 rx_queue,
2191 timer_active, 2194 timer_active,
@@ -2199,8 +2202,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2199 jiffies_to_clock_t(icsk->icsk_ack.ato), 2202 jiffies_to_clock_t(icsk->icsk_ack.ato),
2200 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 2203 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2201 tp->snd_cwnd, 2204 tp->snd_cwnd,
2202 sk->sk_state == TCP_LISTEN ? 2205 state == TCP_LISTEN ?
2203 (fastopenq ? fastopenq->max_qlen : 0) : 2206 fastopenq->max_qlen :
2204 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); 2207 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2205} 2208}
2206 2209
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c9c716a483e4..193ba1fa8a9a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -168,7 +168,7 @@ static int tcp_write_timeout(struct sock *sk)
168 dst_negative_advice(sk); 168 dst_negative_advice(sk);
169 if (tp->syn_fastopen || tp->syn_data) 169 if (tp->syn_fastopen || tp->syn_data)
170 tcp_fastopen_cache_set(sk, 0, NULL, true, 0); 170 tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
171 if (tp->syn_data) 171 if (tp->syn_data && icsk->icsk_retransmits == 1)
172 NET_INC_STATS_BH(sock_net(sk), 172 NET_INC_STATS_BH(sock_net(sk),
173 LINUX_MIB_TCPFASTOPENACTIVEFAIL); 173 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
174 } 174 }
@@ -176,6 +176,18 @@ static int tcp_write_timeout(struct sock *sk)
176 syn_set = true; 176 syn_set = true;
177 } else { 177 } else {
178 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { 178 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
179 /* Some middle-boxes may black-hole Fast Open _after_
180 * the handshake. Therefore we conservatively disable
181 * Fast Open on this path on recurring timeouts with
182 * few or zero bytes acked after Fast Open.
183 */
184 if (tp->syn_data_acked &&
185 tp->bytes_acked <= tp->rx_opt.mss_clamp) {
186 tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
187 if (icsk->icsk_retransmits == sysctl_tcp_retries1)
188 NET_INC_STATS_BH(sock_net(sk),
189 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
190 }
179 /* Black hole detection */ 191 /* Black hole detection */
180 tcp_mtu_probing(icsk, sk); 192 tcp_mtu_probing(icsk, sk);
181 193
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24ec14f9825c..0c7b0e61b917 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -100,7 +100,6 @@
100#include <linux/slab.h> 100#include <linux/slab.h>
101#include <net/tcp_states.h> 101#include <net/tcp_states.h>
102#include <linux/skbuff.h> 102#include <linux/skbuff.h>
103#include <linux/netdevice.h>
104#include <linux/proc_fs.h> 103#include <linux/proc_fs.h>
105#include <linux/seq_file.h> 104#include <linux/seq_file.h>
106#include <net/net_namespace.h> 105#include <net/net_namespace.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d84742f003a9..61f26851655c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3642,7 +3642,7 @@ static void addrconf_dad_work(struct work_struct *w)
3642 3642
3643 /* send a neighbour solicitation for our addr */ 3643 /* send a neighbour solicitation for our addr */
3644 addrconf_addr_solict_mult(&ifp->addr, &mcaddr); 3644 addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
3645 ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, NULL); 3645 ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any);
3646out: 3646out:
3647 in6_ifa_put(ifp); 3647 in6_ifa_put(ifp);
3648 rtnl_unlock(); 3648 rtnl_unlock();
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 44bb66bde0e2..8ec0df75f1c4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -428,9 +428,11 @@ void inet6_destroy_sock(struct sock *sk)
428 428
429 /* Free tx options */ 429 /* Free tx options */
430 430
431 opt = xchg(&np->opt, NULL); 431 opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
432 if (opt) 432 if (opt) {
433 sock_kfree_s(sk, opt, opt->tot_len); 433 atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
434 txopt_put(opt);
435 }
434} 436}
435EXPORT_SYMBOL_GPL(inet6_destroy_sock); 437EXPORT_SYMBOL_GPL(inet6_destroy_sock);
436 438
@@ -659,7 +661,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
659 fl6.fl6_sport = inet->inet_sport; 661 fl6.fl6_sport = inet->inet_sport;
660 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); 662 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
661 663
662 final_p = fl6_update_dst(&fl6, np->opt, &final); 664 rcu_read_lock();
665 final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
666 &final);
667 rcu_read_unlock();
663 668
664 dst = ip6_dst_lookup_flow(sk, &fl6, final_p); 669 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
665 if (IS_ERR(dst)) { 670 if (IS_ERR(dst)) {
@@ -668,7 +673,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
668 return PTR_ERR(dst); 673 return PTR_ERR(dst);
669 } 674 }
670 675
671 __ip6_dst_store(sk, dst, NULL, NULL); 676 ip6_dst_store(sk, dst, NULL, NULL);
672 } 677 }
673 678
674 return 0; 679 return 0;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index d70b0238f468..517c55b01ba8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -167,8 +167,10 @@ ipv4_connected:
167 167
168 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); 168 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
169 169
170 opt = flowlabel ? flowlabel->opt : np->opt; 170 rcu_read_lock();
171 opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
171 final_p = fl6_update_dst(&fl6, opt, &final); 172 final_p = fl6_update_dst(&fl6, opt, &final);
173 rcu_read_unlock();
172 174
173 dst = ip6_dst_lookup_flow(sk, &fl6, final_p); 175 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
174 err = 0; 176 err = 0;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ce203b0402be..ea7c4d64a00a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
727 *((char **)&opt2->dst1opt) += dif; 727 *((char **)&opt2->dst1opt) += dif;
728 if (opt2->srcrt) 728 if (opt2->srcrt)
729 *((char **)&opt2->srcrt) += dif; 729 *((char **)&opt2->srcrt) += dif;
730 atomic_set(&opt2->refcnt, 1);
730 } 731 }
731 return opt2; 732 return opt2;
732} 733}
@@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
790 return ERR_PTR(-ENOBUFS); 791 return ERR_PTR(-ENOBUFS);
791 792
792 memset(opt2, 0, tot_len); 793 memset(opt2, 0, tot_len);
793 794 atomic_set(&opt2->refcnt, 1);
794 opt2->tot_len = tot_len; 795 opt2->tot_len = tot_len;
795 p = (char *)(opt2 + 1); 796 p = (char *)(opt2 + 1);
796 797
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 36c5a98b0472..0a37ddc7af51 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -834,11 +834,6 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
834 security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); 834 security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
835} 835}
836 836
837/*
838 * Special lock-class for __icmpv6_sk:
839 */
840static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
841
842static int __net_init icmpv6_sk_init(struct net *net) 837static int __net_init icmpv6_sk_init(struct net *net)
843{ 838{
844 struct sock *sk; 839 struct sock *sk;
@@ -860,15 +855,6 @@ static int __net_init icmpv6_sk_init(struct net *net)
860 855
861 net->ipv6.icmp_sk[i] = sk; 856 net->ipv6.icmp_sk[i] = sk;
862 857
863 /*
864 * Split off their lock-class, because sk->sk_dst_lock
865 * gets used from softirqs, which is safe for
866 * __icmpv6_sk (because those never get directly used
867 * via userspace syscalls), but unsafe for normal sockets.
868 */
869 lockdep_set_class(&sk->sk_dst_lock,
870 &icmpv6_socket_sk_dst_lock_key);
871
872 /* Enough space for 2 64K ICMP packets, including 858 /* Enough space for 2 64K ICMP packets, including
873 * sk_buff struct overhead. 859 * sk_buff struct overhead.
874 */ 860 */
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 5d1c7cee2cb2..a7ca2cde2ecb 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -78,7 +78,9 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
78 memset(fl6, 0, sizeof(*fl6)); 78 memset(fl6, 0, sizeof(*fl6));
79 fl6->flowi6_proto = proto; 79 fl6->flowi6_proto = proto;
80 fl6->daddr = ireq->ir_v6_rmt_addr; 80 fl6->daddr = ireq->ir_v6_rmt_addr;
81 final_p = fl6_update_dst(fl6, np->opt, &final); 81 rcu_read_lock();
82 final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
83 rcu_read_unlock();
82 fl6->saddr = ireq->ir_v6_loc_addr; 84 fl6->saddr = ireq->ir_v6_loc_addr;
83 fl6->flowi6_oif = ireq->ir_iif; 85 fl6->flowi6_oif = ireq->ir_iif;
84 fl6->flowi6_mark = ireq->ir_mark; 86 fl6->flowi6_mark = ireq->ir_mark;
@@ -109,14 +111,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
109EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); 111EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
110 112
111static inline 113static inline
112void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
113 const struct in6_addr *daddr,
114 const struct in6_addr *saddr)
115{
116 __ip6_dst_store(sk, dst, daddr, saddr);
117}
118
119static inline
120struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) 114struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
121{ 115{
122 return __sk_dst_check(sk, cookie); 116 return __sk_dst_check(sk, cookie);
@@ -142,14 +136,16 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
142 fl6->fl6_dport = inet->inet_dport; 136 fl6->fl6_dport = inet->inet_dport;
143 security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); 137 security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
144 138
145 final_p = fl6_update_dst(fl6, np->opt, &final); 139 rcu_read_lock();
140 final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
141 rcu_read_unlock();
146 142
147 dst = __inet6_csk_dst_check(sk, np->dst_cookie); 143 dst = __inet6_csk_dst_check(sk, np->dst_cookie);
148 if (!dst) { 144 if (!dst) {
149 dst = ip6_dst_lookup_flow(sk, fl6, final_p); 145 dst = ip6_dst_lookup_flow(sk, fl6, final_p);
150 146
151 if (!IS_ERR(dst)) 147 if (!IS_ERR(dst))
152 __inet6_csk_dst_store(sk, dst, NULL, NULL); 148 ip6_dst_store(sk, dst, NULL, NULL);
153 } 149 }
154 return dst; 150 return dst;
155} 151}
@@ -175,7 +171,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
175 /* Restore final destination back after routing done */ 171 /* Restore final destination back after routing done */
176 fl6.daddr = sk->sk_v6_daddr; 172 fl6.daddr = sk->sk_v6_daddr;
177 173
178 res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); 174 res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
175 np->tclass);
179 rcu_read_unlock(); 176 rcu_read_unlock();
180 return res; 177 return res;
181} 178}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index eabffbb89795..137fca42aaa6 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -177,7 +177,7 @@ void ip6_tnl_dst_reset(struct ip6_tnl *t)
177 int i; 177 int i;
178 178
179 for_each_possible_cpu(i) 179 for_each_possible_cpu(i)
180 ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL); 180 ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
181} 181}
182EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); 182EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
183 183
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index ad19136086dd..a10e77103c88 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
118 int cmd); 118 int cmd);
119static int ip6mr_rtm_dumproute(struct sk_buff *skb, 119static int ip6mr_rtm_dumproute(struct sk_buff *skb,
120 struct netlink_callback *cb); 120 struct netlink_callback *cb);
121static void mroute_clean_tables(struct mr6_table *mrt); 121static void mroute_clean_tables(struct mr6_table *mrt, bool all);
122static void ipmr_expire_process(unsigned long arg); 122static void ipmr_expire_process(unsigned long arg);
123 123
124#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 124#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -334,7 +334,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
334static void ip6mr_free_table(struct mr6_table *mrt) 334static void ip6mr_free_table(struct mr6_table *mrt)
335{ 335{
336 del_timer_sync(&mrt->ipmr_expire_timer); 336 del_timer_sync(&mrt->ipmr_expire_timer);
337 mroute_clean_tables(mrt); 337 mroute_clean_tables(mrt, true);
338 kfree(mrt); 338 kfree(mrt);
339} 339}
340 340
@@ -765,10 +765,6 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
765 return dev; 765 return dev;
766 766
767failure: 767failure:
768 /* allow the register to be completed before unregistering. */
769 rtnl_unlock();
770 rtnl_lock();
771
772 unregister_netdevice(dev); 768 unregister_netdevice(dev);
773 return NULL; 769 return NULL;
774} 770}
@@ -1542,7 +1538,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1542 * Close the multicast socket, and clear the vif tables etc 1538 * Close the multicast socket, and clear the vif tables etc
1543 */ 1539 */
1544 1540
1545static void mroute_clean_tables(struct mr6_table *mrt) 1541static void mroute_clean_tables(struct mr6_table *mrt, bool all)
1546{ 1542{
1547 int i; 1543 int i;
1548 LIST_HEAD(list); 1544 LIST_HEAD(list);
@@ -1552,8 +1548,9 @@ static void mroute_clean_tables(struct mr6_table *mrt)
1552 * Shut down all active vif entries 1548 * Shut down all active vif entries
1553 */ 1549 */
1554 for (i = 0; i < mrt->maxvif; i++) { 1550 for (i = 0; i < mrt->maxvif; i++) {
1555 if (!(mrt->vif6_table[i].flags & VIFF_STATIC)) 1551 if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
1556 mif6_delete(mrt, i, &list); 1552 continue;
1553 mif6_delete(mrt, i, &list);
1557 } 1554 }
1558 unregister_netdevice_many(&list); 1555 unregister_netdevice_many(&list);
1559 1556
@@ -1562,7 +1559,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
1562 */ 1559 */
1563 for (i = 0; i < MFC6_LINES; i++) { 1560 for (i = 0; i < MFC6_LINES; i++) {
1564 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { 1561 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1565 if (c->mfc_flags & MFC_STATIC) 1562 if (!all && (c->mfc_flags & MFC_STATIC))
1566 continue; 1563 continue;
1567 write_lock_bh(&mrt_lock); 1564 write_lock_bh(&mrt_lock);
1568 list_del(&c->list); 1565 list_del(&c->list);
@@ -1625,7 +1622,7 @@ int ip6mr_sk_done(struct sock *sk)
1625 net->ipv6.devconf_all); 1622 net->ipv6.devconf_all);
1626 write_unlock_bh(&mrt_lock); 1623 write_unlock_bh(&mrt_lock);
1627 1624
1628 mroute_clean_tables(mrt); 1625 mroute_clean_tables(mrt, false);
1629 err = 0; 1626 err = 0;
1630 break; 1627 break;
1631 } 1628 }
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 63e6956917c9..4449ad1f8114 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
111 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 111 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
112 } 112 }
113 } 113 }
114 opt = xchg(&inet6_sk(sk)->opt, opt); 114 opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
115 opt);
115 sk_dst_reset(sk); 116 sk_dst_reset(sk);
116 117
117 return opt; 118 return opt;
@@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
231 sk->sk_socket->ops = &inet_dgram_ops; 232 sk->sk_socket->ops = &inet_dgram_ops;
232 sk->sk_family = PF_INET; 233 sk->sk_family = PF_INET;
233 } 234 }
234 opt = xchg(&np->opt, NULL); 235 opt = xchg((__force struct ipv6_txoptions **)&np->opt,
235 if (opt) 236 NULL);
236 sock_kfree_s(sk, opt, opt->tot_len); 237 if (opt) {
238 atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
239 txopt_put(opt);
240 }
237 pktopt = xchg(&np->pktoptions, NULL); 241 pktopt = xchg(&np->pktoptions, NULL);
238 kfree_skb(pktopt); 242 kfree_skb(pktopt);
239 243
@@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
403 if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) 407 if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
404 break; 408 break;
405 409
406 opt = ipv6_renew_options(sk, np->opt, optname, 410 opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
411 opt = ipv6_renew_options(sk, opt, optname,
407 (struct ipv6_opt_hdr __user *)optval, 412 (struct ipv6_opt_hdr __user *)optval,
408 optlen); 413 optlen);
409 if (IS_ERR(opt)) { 414 if (IS_ERR(opt)) {
@@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
432 retv = 0; 437 retv = 0;
433 opt = ipv6_update_options(sk, opt); 438 opt = ipv6_update_options(sk, opt);
434sticky_done: 439sticky_done:
435 if (opt) 440 if (opt) {
436 sock_kfree_s(sk, opt, opt->tot_len); 441 atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
442 txopt_put(opt);
443 }
437 break; 444 break;
438 } 445 }
439 446
@@ -486,6 +493,7 @@ sticky_done:
486 break; 493 break;
487 494
488 memset(opt, 0, sizeof(*opt)); 495 memset(opt, 0, sizeof(*opt));
496 atomic_set(&opt->refcnt, 1);
489 opt->tot_len = sizeof(*opt) + optlen; 497 opt->tot_len = sizeof(*opt) + optlen;
490 retv = -EFAULT; 498 retv = -EFAULT;
491 if (copy_from_user(opt+1, optval, optlen)) 499 if (copy_from_user(opt+1, optval, optlen))
@@ -502,8 +510,10 @@ update:
502 retv = 0; 510 retv = 0;
503 opt = ipv6_update_options(sk, opt); 511 opt = ipv6_update_options(sk, opt);
504done: 512done:
505 if (opt) 513 if (opt) {
506 sock_kfree_s(sk, opt, opt->tot_len); 514 atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
515 txopt_put(opt);
516 }
507 break; 517 break;
508 } 518 }
509 case IPV6_UNICAST_HOPS: 519 case IPV6_UNICAST_HOPS:
@@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1110 case IPV6_RTHDR: 1120 case IPV6_RTHDR:
1111 case IPV6_DSTOPTS: 1121 case IPV6_DSTOPTS:
1112 { 1122 {
1123 struct ipv6_txoptions *opt;
1113 1124
1114 lock_sock(sk); 1125 lock_sock(sk);
1115 len = ipv6_getsockopt_sticky(sk, np->opt, 1126 opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
1116 optname, optval, len); 1127 len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
1117 release_sock(sk); 1128 release_sock(sk);
1118 /* check if ipv6_getsockopt_sticky() returns err code */ 1129 /* check if ipv6_getsockopt_sticky() returns err code */
1119 if (len < 0) 1130 if (len < 0)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 124338a39e29..5ee56d0a8699 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1651,7 +1651,6 @@ out:
1651 if (!err) { 1651 if (!err) {
1652 ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); 1652 ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
1653 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1653 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1654 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
1655 } else { 1654 } else {
1656 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 1655 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
1657 } 1656 }
@@ -2015,7 +2014,6 @@ out:
2015 if (!err) { 2014 if (!err) {
2016 ICMP6MSGOUT_INC_STATS(net, idev, type); 2015 ICMP6MSGOUT_INC_STATS(net, idev, type);
2017 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 2016 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
2018 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
2019 } else 2017 } else
2020 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 2018 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
2021 2019
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3e0f855e1bea..d6161e1c48c8 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -556,8 +556,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
556} 556}
557 557
558void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, 558void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
559 const struct in6_addr *daddr, const struct in6_addr *saddr, 559 const struct in6_addr *daddr, const struct in6_addr *saddr)
560 struct sk_buff *oskb)
561{ 560{
562 struct sk_buff *skb; 561 struct sk_buff *skb;
563 struct in6_addr addr_buf; 562 struct in6_addr addr_buf;
@@ -593,9 +592,6 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
593 ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, 592 ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
594 dev->dev_addr); 593 dev->dev_addr);
595 594
596 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb)
597 skb_dst_copy(skb, oskb);
598
599 ndisc_send_skb(skb, daddr, saddr); 595 ndisc_send_skb(skb, daddr, saddr);
600} 596}
601 597
@@ -682,12 +678,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
682 "%s: trying to ucast probe in NUD_INVALID: %pI6\n", 678 "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
683 __func__, target); 679 __func__, target);
684 } 680 }
685 ndisc_send_ns(dev, target, target, saddr, skb); 681 ndisc_send_ns(dev, target, target, saddr);
686 } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { 682 } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
687 neigh_app_ns(neigh); 683 neigh_app_ns(neigh);
688 } else { 684 } else {
689 addrconf_addr_solict_mult(target, &mcaddr); 685 addrconf_addr_solict_mult(target, &mcaddr);
690 ndisc_send_ns(dev, target, &mcaddr, saddr, skb); 686 ndisc_send_ns(dev, target, &mcaddr, saddr);
691 } 687 }
692} 688}
693 689
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d5efeb87350e..bab4441ed4e4 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data)
190/* Creation primitives. */ 190/* Creation primitives. */
191static inline struct frag_queue *fq_find(struct net *net, __be32 id, 191static inline struct frag_queue *fq_find(struct net *net, __be32 id,
192 u32 user, struct in6_addr *src, 192 u32 user, struct in6_addr *src,
193 struct in6_addr *dst, u8 ecn) 193 struct in6_addr *dst, int iif, u8 ecn)
194{ 194{
195 struct inet_frag_queue *q; 195 struct inet_frag_queue *q;
196 struct ip6_create_arg arg; 196 struct ip6_create_arg arg;
@@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
200 arg.user = user; 200 arg.user = user;
201 arg.src = src; 201 arg.src = src;
202 arg.dst = dst; 202 arg.dst = dst;
203 arg.iif = iif;
203 arg.ecn = ecn; 204 arg.ecn = ecn;
204 205
205 local_bh_disable(); 206 local_bh_disable();
@@ -601,7 +602,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
601 fhdr = (struct frag_hdr *)skb_transport_header(clone); 602 fhdr = (struct frag_hdr *)skb_transport_header(clone);
602 603
603 fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, 604 fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
604 ip6_frag_ecn(hdr)); 605 skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
605 if (fq == NULL) { 606 if (fq == NULL) {
606 pr_debug("Can't find and can't create new queue\n"); 607 pr_debug("Can't find and can't create new queue\n");
607 goto ret_orig; 608 goto ret_orig;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dc65ec198f7c..99140986e887 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -733,6 +733,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
733 733
734static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) 734static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
735{ 735{
736 struct ipv6_txoptions *opt_to_free = NULL;
736 struct ipv6_txoptions opt_space; 737 struct ipv6_txoptions opt_space;
737 DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); 738 DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
738 struct in6_addr *daddr, *final_p, final; 739 struct in6_addr *daddr, *final_p, final;
@@ -839,8 +840,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
839 if (!(opt->opt_nflen|opt->opt_flen)) 840 if (!(opt->opt_nflen|opt->opt_flen))
840 opt = NULL; 841 opt = NULL;
841 } 842 }
842 if (!opt) 843 if (!opt) {
843 opt = np->opt; 844 opt = txopt_get(np);
845 opt_to_free = opt;
846 }
844 if (flowlabel) 847 if (flowlabel)
845 opt = fl6_merge_options(&opt_space, flowlabel, opt); 848 opt = fl6_merge_options(&opt_space, flowlabel, opt);
846 opt = ipv6_fixup_options(&opt_space, opt); 849 opt = ipv6_fixup_options(&opt_space, opt);
@@ -906,6 +909,7 @@ done:
906 dst_release(dst); 909 dst_release(dst);
907out: 910out:
908 fl6_sock_release(flowlabel); 911 fl6_sock_release(flowlabel);
912 txopt_put(opt_to_free);
909 return err < 0 ? err : len; 913 return err < 0 ? err : len;
910do_confirm: 914do_confirm:
911 dst_confirm(dst); 915 dst_confirm(dst);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 44e21a03cfc3..45f5ae51de65 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
108 return fq->id == arg->id && 108 return fq->id == arg->id &&
109 fq->user == arg->user && 109 fq->user == arg->user &&
110 ipv6_addr_equal(&fq->saddr, arg->src) && 110 ipv6_addr_equal(&fq->saddr, arg->src) &&
111 ipv6_addr_equal(&fq->daddr, arg->dst); 111 ipv6_addr_equal(&fq->daddr, arg->dst) &&
112 (arg->iif == fq->iif ||
113 !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
114 IPV6_ADDR_LINKLOCAL)));
112} 115}
113EXPORT_SYMBOL(ip6_frag_match); 116EXPORT_SYMBOL(ip6_frag_match);
114 117
@@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data)
180 183
181static struct frag_queue * 184static struct frag_queue *
182fq_find(struct net *net, __be32 id, const struct in6_addr *src, 185fq_find(struct net *net, __be32 id, const struct in6_addr *src,
183 const struct in6_addr *dst, u8 ecn) 186 const struct in6_addr *dst, int iif, u8 ecn)
184{ 187{
185 struct inet_frag_queue *q; 188 struct inet_frag_queue *q;
186 struct ip6_create_arg arg; 189 struct ip6_create_arg arg;
@@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
190 arg.user = IP6_DEFRAG_LOCAL_DELIVER; 193 arg.user = IP6_DEFRAG_LOCAL_DELIVER;
191 arg.src = src; 194 arg.src = src;
192 arg.dst = dst; 195 arg.dst = dst;
196 arg.iif = iif;
193 arg.ecn = ecn; 197 arg.ecn = ecn;
194 198
195 hash = inet6_hash_frag(id, src, dst); 199 hash = inet6_hash_frag(id, src, dst);
@@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
551 } 555 }
552 556
553 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, 557 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
554 ip6_frag_ecn(hdr)); 558 skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
555 if (fq) { 559 if (fq) {
556 int ret; 560 int ret;
557 561
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c8bc9b4ac328..826e6aa44f8d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -404,6 +404,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
404 } 404 }
405} 405}
406 406
407static bool __rt6_check_expired(const struct rt6_info *rt)
408{
409 if (rt->rt6i_flags & RTF_EXPIRES)
410 return time_after(jiffies, rt->dst.expires);
411 else
412 return false;
413}
414
407static bool rt6_check_expired(const struct rt6_info *rt) 415static bool rt6_check_expired(const struct rt6_info *rt)
408{ 416{
409 if (rt->rt6i_flags & RTF_EXPIRES) { 417 if (rt->rt6i_flags & RTF_EXPIRES) {
@@ -515,7 +523,7 @@ static void rt6_probe_deferred(struct work_struct *w)
515 container_of(w, struct __rt6_probe_work, work); 523 container_of(w, struct __rt6_probe_work, work);
516 524
517 addrconf_addr_solict_mult(&work->target, &mcaddr); 525 addrconf_addr_solict_mult(&work->target, &mcaddr);
518 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL); 526 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
519 dev_put(work->dev); 527 dev_put(work->dev);
520 kfree(work); 528 kfree(work);
521} 529}
@@ -1252,7 +1260,8 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1252 1260
1253static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) 1261static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1254{ 1262{
1255 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 1263 if (!__rt6_check_expired(rt) &&
1264 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1256 rt6_check((struct rt6_info *)(rt->dst.from), cookie)) 1265 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1257 return &rt->dst; 1266 return &rt->dst;
1258 else 1267 else
@@ -1272,7 +1281,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1272 1281
1273 rt6_dst_from_metrics_check(rt); 1282 rt6_dst_from_metrics_check(rt);
1274 1283
1275 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) 1284 if (rt->rt6i_flags & RTF_PCPU ||
1285 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1276 return rt6_dst_from_check(rt, cookie); 1286 return rt6_dst_from_check(rt, cookie);
1277 else 1287 else
1278 return rt6_check(rt, cookie); 1288 return rt6_check(rt, cookie);
@@ -1322,6 +1332,12 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1322 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); 1332 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1323} 1333}
1324 1334
1335static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1336{
1337 return !(rt->rt6i_flags & RTF_CACHE) &&
1338 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1339}
1340
1325static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 1341static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1326 const struct ipv6hdr *iph, u32 mtu) 1342 const struct ipv6hdr *iph, u32 mtu)
1327{ 1343{
@@ -1335,7 +1351,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1335 if (mtu >= dst_mtu(dst)) 1351 if (mtu >= dst_mtu(dst))
1336 return; 1352 return;
1337 1353
1338 if (rt6->rt6i_flags & RTF_CACHE) { 1354 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1339 rt6_do_update_pmtu(rt6, mtu); 1355 rt6_do_update_pmtu(rt6, mtu);
1340 } else { 1356 } else {
1341 const struct in6_addr *daddr, *saddr; 1357 const struct in6_addr *daddr, *saddr;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb8f2fa1c7fb..eaf7ac496d50 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -222,7 +222,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
222 memset(&fl6, 0, sizeof(fl6)); 222 memset(&fl6, 0, sizeof(fl6));
223 fl6.flowi6_proto = IPPROTO_TCP; 223 fl6.flowi6_proto = IPPROTO_TCP;
224 fl6.daddr = ireq->ir_v6_rmt_addr; 224 fl6.daddr = ireq->ir_v6_rmt_addr;
225 final_p = fl6_update_dst(&fl6, np->opt, &final); 225 final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
226 fl6.saddr = ireq->ir_v6_loc_addr; 226 fl6.saddr = ireq->ir_v6_loc_addr;
227 fl6.flowi6_oif = sk->sk_bound_dev_if; 227 fl6.flowi6_oif = sk->sk_bound_dev_if;
228 fl6.flowi6_mark = ireq->ir_mark; 228 fl6.flowi6_mark = ireq->ir_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5baa8e754e41..e7aab561b7b4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -120,6 +120,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
120 struct ipv6_pinfo *np = inet6_sk(sk); 120 struct ipv6_pinfo *np = inet6_sk(sk);
121 struct tcp_sock *tp = tcp_sk(sk); 121 struct tcp_sock *tp = tcp_sk(sk);
122 struct in6_addr *saddr = NULL, *final_p, final; 122 struct in6_addr *saddr = NULL, *final_p, final;
123 struct ipv6_txoptions *opt;
123 struct flowi6 fl6; 124 struct flowi6 fl6;
124 struct dst_entry *dst; 125 struct dst_entry *dst;
125 int addr_type; 126 int addr_type;
@@ -235,7 +236,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
235 fl6.fl6_dport = usin->sin6_port; 236 fl6.fl6_dport = usin->sin6_port;
236 fl6.fl6_sport = inet->inet_sport; 237 fl6.fl6_sport = inet->inet_sport;
237 238
238 final_p = fl6_update_dst(&fl6, np->opt, &final); 239 opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
240 final_p = fl6_update_dst(&fl6, opt, &final);
239 241
240 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); 242 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
241 243
@@ -255,7 +257,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
255 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 257 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
256 258
257 sk->sk_gso_type = SKB_GSO_TCPV6; 259 sk->sk_gso_type = SKB_GSO_TCPV6;
258 __ip6_dst_store(sk, dst, NULL, NULL); 260 ip6_dst_store(sk, dst, NULL, NULL);
259 261
260 if (tcp_death_row.sysctl_tw_recycle && 262 if (tcp_death_row.sysctl_tw_recycle &&
261 !tp->rx_opt.ts_recent_stamp && 263 !tp->rx_opt.ts_recent_stamp &&
@@ -263,9 +265,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
263 tcp_fetch_timewait_stamp(sk, dst); 265 tcp_fetch_timewait_stamp(sk, dst);
264 266
265 icsk->icsk_ext_hdr_len = 0; 267 icsk->icsk_ext_hdr_len = 0;
266 if (np->opt) 268 if (opt)
267 icsk->icsk_ext_hdr_len = (np->opt->opt_flen + 269 icsk->icsk_ext_hdr_len = opt->opt_flen +
268 np->opt->opt_nflen); 270 opt->opt_nflen;
269 271
270 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 272 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
271 273
@@ -461,7 +463,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
461 if (np->repflow && ireq->pktopts) 463 if (np->repflow && ireq->pktopts)
462 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 464 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
463 465
464 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); 466 err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
467 np->tclass);
465 err = net_xmit_eval(err); 468 err = net_xmit_eval(err);
466 } 469 }
467 470
@@ -972,6 +975,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
972 struct inet_request_sock *ireq; 975 struct inet_request_sock *ireq;
973 struct ipv6_pinfo *newnp; 976 struct ipv6_pinfo *newnp;
974 const struct ipv6_pinfo *np = inet6_sk(sk); 977 const struct ipv6_pinfo *np = inet6_sk(sk);
978 struct ipv6_txoptions *opt;
975 struct tcp6_sock *newtcp6sk; 979 struct tcp6_sock *newtcp6sk;
976 struct inet_sock *newinet; 980 struct inet_sock *newinet;
977 struct tcp_sock *newtp; 981 struct tcp_sock *newtp;
@@ -1056,7 +1060,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
1056 */ 1060 */
1057 1061
1058 newsk->sk_gso_type = SKB_GSO_TCPV6; 1062 newsk->sk_gso_type = SKB_GSO_TCPV6;
1059 __ip6_dst_store(newsk, dst, NULL, NULL); 1063 ip6_dst_store(newsk, dst, NULL, NULL);
1060 inet6_sk_rx_dst_set(newsk, skb); 1064 inet6_sk_rx_dst_set(newsk, skb);
1061 1065
1062 newtcp6sk = (struct tcp6_sock *)newsk; 1066 newtcp6sk = (struct tcp6_sock *)newsk;
@@ -1098,13 +1102,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
1098 but we make one more one thing there: reattach optmem 1102 but we make one more one thing there: reattach optmem
1099 to newsk. 1103 to newsk.
1100 */ 1104 */
1101 if (np->opt) 1105 opt = rcu_dereference(np->opt);
1102 newnp->opt = ipv6_dup_options(newsk, np->opt); 1106 if (opt) {
1103 1107 opt = ipv6_dup_options(newsk, opt);
1108 RCU_INIT_POINTER(newnp->opt, opt);
1109 }
1104 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1110 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1105 if (newnp->opt) 1111 if (opt)
1106 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + 1112 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1107 newnp->opt->opt_flen); 1113 opt->opt_flen;
1108 1114
1109 tcp_ca_openreq_child(newsk, dst); 1115 tcp_ca_openreq_child(newsk, dst);
1110 1116
@@ -1690,6 +1696,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1690 const struct tcp_sock *tp = tcp_sk(sp); 1696 const struct tcp_sock *tp = tcp_sk(sp);
1691 const struct inet_connection_sock *icsk = inet_csk(sp); 1697 const struct inet_connection_sock *icsk = inet_csk(sp);
1692 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 1698 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1699 int rx_queue;
1700 int state;
1693 1701
1694 dest = &sp->sk_v6_daddr; 1702 dest = &sp->sk_v6_daddr;
1695 src = &sp->sk_v6_rcv_saddr; 1703 src = &sp->sk_v6_rcv_saddr;
@@ -1710,6 +1718,15 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1710 timer_expires = jiffies; 1718 timer_expires = jiffies;
1711 } 1719 }
1712 1720
1721 state = sk_state_load(sp);
1722 if (state == TCP_LISTEN)
1723 rx_queue = sp->sk_ack_backlog;
1724 else
1725 /* Because we don't lock the socket,
1726 * we might find a transient negative value.
1727 */
1728 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1729
1713 seq_printf(seq, 1730 seq_printf(seq,
1714 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1731 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1715 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 1732 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
@@ -1718,9 +1735,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1718 src->s6_addr32[2], src->s6_addr32[3], srcp, 1735 src->s6_addr32[2], src->s6_addr32[3], srcp,
1719 dest->s6_addr32[0], dest->s6_addr32[1], 1736 dest->s6_addr32[0], dest->s6_addr32[1],
1720 dest->s6_addr32[2], dest->s6_addr32[3], destp, 1737 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1721 sp->sk_state, 1738 state,
1722 tp->write_seq-tp->snd_una, 1739 tp->write_seq - tp->snd_una,
1723 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), 1740 rx_queue,
1724 timer_active, 1741 timer_active,
1725 jiffies_delta_to_clock_t(timer_expires - jiffies), 1742 jiffies_delta_to_clock_t(timer_expires - jiffies),
1726 icsk->icsk_retransmits, 1743 icsk->icsk_retransmits,
@@ -1732,7 +1749,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1732 jiffies_to_clock_t(icsk->icsk_ack.ato), 1749 jiffies_to_clock_t(icsk->icsk_ack.ato),
1733 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 1750 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1734 tp->snd_cwnd, 1751 tp->snd_cwnd,
1735 sp->sk_state == TCP_LISTEN ? 1752 state == TCP_LISTEN ?
1736 fastopenq->max_qlen : 1753 fastopenq->max_qlen :
1737 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 1754 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1738 ); 1755 );
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 01bcb49619ee..9da3287a3923 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1110,6 +1110,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1110 DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); 1110 DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
1111 struct in6_addr *daddr, *final_p, final; 1111 struct in6_addr *daddr, *final_p, final;
1112 struct ipv6_txoptions *opt = NULL; 1112 struct ipv6_txoptions *opt = NULL;
1113 struct ipv6_txoptions *opt_to_free = NULL;
1113 struct ip6_flowlabel *flowlabel = NULL; 1114 struct ip6_flowlabel *flowlabel = NULL;
1114 struct flowi6 fl6; 1115 struct flowi6 fl6;
1115 struct dst_entry *dst; 1116 struct dst_entry *dst;
@@ -1263,8 +1264,10 @@ do_udp_sendmsg:
1263 opt = NULL; 1264 opt = NULL;
1264 connected = 0; 1265 connected = 0;
1265 } 1266 }
1266 if (!opt) 1267 if (!opt) {
1267 opt = np->opt; 1268 opt = txopt_get(np);
1269 opt_to_free = opt;
1270 }
1268 if (flowlabel) 1271 if (flowlabel)
1269 opt = fl6_merge_options(&opt_space, flowlabel, opt); 1272 opt = fl6_merge_options(&opt_space, flowlabel, opt);
1270 opt = ipv6_fixup_options(&opt_space, opt); 1273 opt = ipv6_fixup_options(&opt_space, opt);
@@ -1373,6 +1376,7 @@ release_dst:
1373out: 1376out:
1374 dst_release(dst); 1377 dst_release(dst);
1375 fl6_sock_release(flowlabel); 1378 fl6_sock_release(flowlabel);
1379 txopt_put(opt_to_free);
1376 if (!err) 1380 if (!err)
1377 return len; 1381 return len;
1378 /* 1382 /*
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index fcb2752419c6..435608c4306d 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1483,7 +1483,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
1483 if (sock_writeable(sk) && iucv_below_msglim(sk)) 1483 if (sock_writeable(sk) && iucv_below_msglim(sk))
1484 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 1484 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1485 else 1485 else
1486 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1486 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1487 1487
1488 return mask; 1488 return mask;
1489} 1489}
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index aca38d8aed8e..a2c8747d2936 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
486 DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); 486 DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
487 struct in6_addr *daddr, *final_p, final; 487 struct in6_addr *daddr, *final_p, final;
488 struct ipv6_pinfo *np = inet6_sk(sk); 488 struct ipv6_pinfo *np = inet6_sk(sk);
489 struct ipv6_txoptions *opt_to_free = NULL;
489 struct ipv6_txoptions *opt = NULL; 490 struct ipv6_txoptions *opt = NULL;
490 struct ip6_flowlabel *flowlabel = NULL; 491 struct ip6_flowlabel *flowlabel = NULL;
491 struct dst_entry *dst = NULL; 492 struct dst_entry *dst = NULL;
@@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
575 opt = NULL; 576 opt = NULL;
576 } 577 }
577 578
578 if (opt == NULL) 579 if (!opt) {
579 opt = np->opt; 580 opt = txopt_get(np);
581 opt_to_free = opt;
582 }
580 if (flowlabel) 583 if (flowlabel)
581 opt = fl6_merge_options(&opt_space, flowlabel, opt); 584 opt = fl6_merge_options(&opt_space, flowlabel, opt);
582 opt = ipv6_fixup_options(&opt_space, opt); 585 opt = ipv6_fixup_options(&opt_space, opt);
@@ -631,6 +634,7 @@ done:
631 dst_release(dst); 634 dst_release(dst);
632out: 635out:
633 fl6_sock_release(flowlabel); 636 fl6_sock_release(flowlabel);
637 txopt_put(opt_to_free);
634 638
635 return err < 0 ? err : len; 639 return err < 0 ? err : len;
636 640
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index a758eb84e8f0..ff757181b0a8 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -500,7 +500,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
500 /* send AddBA request */ 500 /* send AddBA request */
501 ieee80211_send_addba_request(sdata, sta->sta.addr, tid, 501 ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
502 tid_tx->dialog_token, start_seq_num, 502 tid_tx->dialog_token, start_seq_num,
503 local->hw.max_tx_aggregation_subframes, 503 IEEE80211_MAX_AMPDU_BUF,
504 tid_tx->timeout); 504 tid_tx->timeout);
505} 505}
506 506
@@ -926,6 +926,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
926 amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK; 926 amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
927 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; 927 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
928 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; 928 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
929 buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);
929 930
930 mutex_lock(&sta->ampdu_mlme.mtx); 931 mutex_lock(&sta->ampdu_mlme.mtx);
931 932
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c2bd1b6a6922..da471eef07bb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3454,8 +3454,12 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
3454 goto out_unlock; 3454 goto out_unlock;
3455 } 3455 }
3456 } else { 3456 } else {
3457 /* for cookie below */ 3457 /* Assign a dummy non-zero cookie, it's not sent to
3458 ack_skb = skb; 3458 * userspace in this case but we rely on its value
3459 * internally in the need_offchan case to distinguish
3460 * mgmt-tx from remain-on-channel.
3461 */
3462 *cookie = 0xffffffff;
3459 } 3463 }
3460 3464
3461 if (!need_offchan) { 3465 if (!need_offchan) {
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d0dc1bfaeec2..c9e325d2e120 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -76,7 +76,8 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
76void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata, 76void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
77 bool update_bss) 77 bool update_bss)
78{ 78{
79 if (__ieee80211_recalc_txpower(sdata) || update_bss) 79 if (__ieee80211_recalc_txpower(sdata) ||
80 (update_bss && ieee80211_sdata_running(sdata)))
80 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); 81 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER);
81} 82}
82 83
@@ -1861,6 +1862,7 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
1861 unregister_netdevice(sdata->dev); 1862 unregister_netdevice(sdata->dev);
1862 } else { 1863 } else {
1863 cfg80211_unregister_wdev(&sdata->wdev); 1864 cfg80211_unregister_wdev(&sdata->wdev);
1865 ieee80211_teardown_sdata(sdata);
1864 kfree(sdata); 1866 kfree(sdata);
1865 } 1867 }
1866} 1868}
@@ -1870,7 +1872,6 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata)
1870 if (WARN_ON_ONCE(!test_bit(SDATA_STATE_RUNNING, &sdata->state))) 1872 if (WARN_ON_ONCE(!test_bit(SDATA_STATE_RUNNING, &sdata->state)))
1871 return; 1873 return;
1872 ieee80211_do_stop(sdata, true); 1874 ieee80211_do_stop(sdata, true);
1873 ieee80211_teardown_sdata(sdata);
1874} 1875}
1875 1876
1876void ieee80211_remove_interfaces(struct ieee80211_local *local) 1877void ieee80211_remove_interfaces(struct ieee80211_local *local)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 858f6b1cb149..175ffcf7fb06 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -541,8 +541,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
541 NL80211_FEATURE_HT_IBSS | 541 NL80211_FEATURE_HT_IBSS |
542 NL80211_FEATURE_VIF_TXPOWER | 542 NL80211_FEATURE_VIF_TXPOWER |
543 NL80211_FEATURE_MAC_ON_CREATE | 543 NL80211_FEATURE_MAC_ON_CREATE |
544 NL80211_FEATURE_USERSPACE_MPM | 544 NL80211_FEATURE_USERSPACE_MPM;
545 NL80211_FEATURE_FULL_AP_CLIENT_STATE;
546 545
547 if (!ops->hw_scan) 546 if (!ops->hw_scan)
548 wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | 547 wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index b890e225a8f1..b3b44a5dd375 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -779,10 +779,8 @@ void mesh_plink_broken(struct sta_info *sta)
779static void mesh_path_node_reclaim(struct rcu_head *rp) 779static void mesh_path_node_reclaim(struct rcu_head *rp)
780{ 780{
781 struct mpath_node *node = container_of(rp, struct mpath_node, rcu); 781 struct mpath_node *node = container_of(rp, struct mpath_node, rcu);
782 struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
783 782
784 del_timer_sync(&node->mpath->timer); 783 del_timer_sync(&node->mpath->timer);
785 atomic_dec(&sdata->u.mesh.mpaths);
786 kfree(node->mpath); 784 kfree(node->mpath);
787 kfree(node); 785 kfree(node);
788} 786}
@@ -790,8 +788,9 @@ static void mesh_path_node_reclaim(struct rcu_head *rp)
790/* needs to be called with the corresponding hashwlock taken */ 788/* needs to be called with the corresponding hashwlock taken */
791static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node) 789static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node)
792{ 790{
793 struct mesh_path *mpath; 791 struct mesh_path *mpath = node->mpath;
794 mpath = node->mpath; 792 struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
793
795 spin_lock(&mpath->state_lock); 794 spin_lock(&mpath->state_lock);
796 mpath->flags |= MESH_PATH_RESOLVING; 795 mpath->flags |= MESH_PATH_RESOLVING;
797 if (mpath->is_gate) 796 if (mpath->is_gate)
@@ -799,6 +798,7 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node)
799 hlist_del_rcu(&node->list); 798 hlist_del_rcu(&node->list);
800 call_rcu(&node->rcu, mesh_path_node_reclaim); 799 call_rcu(&node->rcu, mesh_path_node_reclaim);
801 spin_unlock(&mpath->state_lock); 800 spin_unlock(&mpath->state_lock);
801 atomic_dec(&sdata->u.mesh.mpaths);
802 atomic_dec(&tbl->entries); 802 atomic_dec(&tbl->entries);
803} 803}
804 804
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 4aeca4b0c3cb..a413e52f7691 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -597,8 +597,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
597 /* We need to ensure power level is at max for scanning. */ 597 /* We need to ensure power level is at max for scanning. */
598 ieee80211_hw_config(local, 0); 598 ieee80211_hw_config(local, 0);
599 599
600 if ((req->channels[0]->flags & 600 if ((req->channels[0]->flags & (IEEE80211_CHAN_NO_IR |
601 IEEE80211_CHAN_NO_IR) || 601 IEEE80211_CHAN_RADAR)) ||
602 !req->n_ssids) { 602 !req->n_ssids) {
603 next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; 603 next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
604 } else { 604 } else {
@@ -645,7 +645,7 @@ ieee80211_scan_get_channel_time(struct ieee80211_channel *chan)
645 * TODO: channel switching also consumes quite some time, 645 * TODO: channel switching also consumes quite some time,
646 * add that delay as well to get a better estimation 646 * add that delay as well to get a better estimation
647 */ 647 */
648 if (chan->flags & IEEE80211_CHAN_NO_IR) 648 if (chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR))
649 return IEEE80211_PASSIVE_CHANNEL_TIME; 649 return IEEE80211_PASSIVE_CHANNEL_TIME;
650 return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME; 650 return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME;
651} 651}
@@ -777,7 +777,8 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
777 * 777 *
778 * In any case, it is not necessary for a passive scan. 778 * In any case, it is not necessary for a passive scan.
779 */ 779 */
780 if (chan->flags & IEEE80211_CHAN_NO_IR || !scan_req->n_ssids) { 780 if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) ||
781 !scan_req->n_ssids) {
781 *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; 782 *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
782 local->next_scan_state = SCAN_DECISION; 783 local->next_scan_state = SCAN_DECISION;
783 return; 784 return;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e22349ea7256..4692782b5280 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -869,7 +869,7 @@ config NETFILTER_XT_TARGET_TEE
869 depends on IPV6 || IPV6=n 869 depends on IPV6 || IPV6=n
870 depends on !NF_CONNTRACK || NF_CONNTRACK 870 depends on !NF_CONNTRACK || NF_CONNTRACK
871 select NF_DUP_IPV4 871 select NF_DUP_IPV4
872 select NF_DUP_IPV6 if IP6_NF_IPTABLES 872 select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
873 ---help--- 873 ---help---
874 This option adds a "TEE" target with which a packet can be cloned and 874 This option adds a "TEE" target with which a packet can be cloned and
875 this clone be rerouted to another nexthop. 875 this clone be rerouted to another nexthop.
@@ -882,7 +882,7 @@ config NETFILTER_XT_TARGET_TPROXY
882 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n 882 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
883 depends on IP_NF_MANGLE 883 depends on IP_NF_MANGLE
884 select NF_DEFRAG_IPV4 884 select NF_DEFRAG_IPV4
885 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 885 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
886 help 886 help
887 This option adds a `TPROXY' target, which is somewhat similar to 887 This option adds a `TPROXY' target, which is somewhat similar to
888 REDIRECT. It can only be used in the mangle table and is useful 888 REDIRECT. It can only be used in the mangle table and is useful
@@ -1375,7 +1375,7 @@ config NETFILTER_XT_MATCH_SOCKET
1375 depends on IPV6 || IPV6=n 1375 depends on IPV6 || IPV6=n
1376 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n 1376 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
1377 select NF_DEFRAG_IPV4 1377 select NF_DEFRAG_IPV4
1378 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 1378 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
1379 help 1379 help
1380 This option adds a `socket' match, which can be used to match 1380 This option adds a `socket' match, which can be used to match
1381 packets for which a TCP or UDP socket lookup finds a valid socket. 1381 packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759ed0fa..b0bc475f641e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
33#define mtype_gc IPSET_TOKEN(MTYPE, _gc) 33#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
34#define mtype MTYPE 34#define mtype MTYPE
35 35
36#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id)) 36#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
37 37
38static void 38static void
39mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) 39mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@ mtype_destroy(struct ip_set *set)
67 del_timer_sync(&map->gc); 67 del_timer_sync(&map->gc);
68 68
69 ip_set_free(map->members); 69 ip_set_free(map->members);
70 if (set->dsize) { 70 if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
71 if (set->extensions & IPSET_EXT_DESTROY) 71 mtype_ext_cleanup(set);
72 mtype_ext_cleanup(set); 72 ip_set_free(map);
73 ip_set_free(map->extensions);
74 }
75 kfree(map);
76 73
77 set->data = NULL; 74 set->data = NULL;
78} 75}
@@ -92,16 +89,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
92{ 89{
93 const struct mtype *map = set->data; 90 const struct mtype *map = set->data;
94 struct nlattr *nested; 91 struct nlattr *nested;
92 size_t memsize = sizeof(*map) + map->memsize;
95 93
96 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 94 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
97 if (!nested) 95 if (!nested)
98 goto nla_put_failure; 96 goto nla_put_failure;
99 if (mtype_do_head(skb, map) || 97 if (mtype_do_head(skb, map) ||
100 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 98 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
101 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, 99 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
102 htonl(sizeof(*map) +
103 map->memsize +
104 set->dsize * map->elements)))
105 goto nla_put_failure; 100 goto nla_put_failure;
106 if (unlikely(ip_set_put_flags(skb, set))) 101 if (unlikely(ip_set_put_flags(skb, set)))
107 goto nla_put_failure; 102 goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a564334418..4783efff0bde 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@ MODULE_ALIAS("ip_set_bitmap:ip");
41/* Type structure */ 41/* Type structure */
42struct bitmap_ip { 42struct bitmap_ip {
43 void *members; /* the set members */ 43 void *members; /* the set members */
44 void *extensions; /* data extensions */
45 u32 first_ip; /* host byte order, included in range */ 44 u32 first_ip; /* host byte order, included in range */
46 u32 last_ip; /* host byte order, included in range */ 45 u32 last_ip; /* host byte order, included in range */
47 u32 elements; /* number of max elements in the set */ 46 u32 elements; /* number of max elements in the set */
@@ -49,6 +48,8 @@ struct bitmap_ip {
49 size_t memsize; /* members size */ 48 size_t memsize; /* members size */
50 u8 netmask; /* subnet netmask */ 49 u8 netmask; /* subnet netmask */
51 struct timer_list gc; /* garbage collection */ 50 struct timer_list gc; /* garbage collection */
51 unsigned char extensions[0] /* data extensions */
52 __aligned(__alignof__(u64));
52}; 53};
53 54
54/* ADT structure for generic function args */ 55/* ADT structure for generic function args */
@@ -224,13 +225,6 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
224 map->members = ip_set_alloc(map->memsize); 225 map->members = ip_set_alloc(map->memsize);
225 if (!map->members) 226 if (!map->members)
226 return false; 227 return false;
227 if (set->dsize) {
228 map->extensions = ip_set_alloc(set->dsize * elements);
229 if (!map->extensions) {
230 kfree(map->members);
231 return false;
232 }
233 }
234 map->first_ip = first_ip; 228 map->first_ip = first_ip;
235 map->last_ip = last_ip; 229 map->last_ip = last_ip;
236 map->elements = elements; 230 map->elements = elements;
@@ -316,13 +310,13 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
316 pr_debug("hosts %u, elements %llu\n", 310 pr_debug("hosts %u, elements %llu\n",
317 hosts, (unsigned long long)elements); 311 hosts, (unsigned long long)elements);
318 312
319 map = kzalloc(sizeof(*map), GFP_KERNEL); 313 set->dsize = ip_set_elem_len(set, tb, 0, 0);
314 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
320 if (!map) 315 if (!map)
321 return -ENOMEM; 316 return -ENOMEM;
322 317
323 map->memsize = bitmap_bytes(0, elements - 1); 318 map->memsize = bitmap_bytes(0, elements - 1);
324 set->variant = &bitmap_ip; 319 set->variant = &bitmap_ip;
325 set->dsize = ip_set_elem_len(set, tb, 0);
326 if (!init_map_ip(set, map, first_ip, last_ip, 320 if (!init_map_ip(set, map, first_ip, last_ip,
327 elements, hosts, netmask)) { 321 elements, hosts, netmask)) {
328 kfree(map); 322 kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1430535118fb..29dde208381d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@ enum {
47/* Type structure */ 47/* Type structure */
48struct bitmap_ipmac { 48struct bitmap_ipmac {
49 void *members; /* the set members */ 49 void *members; /* the set members */
50 void *extensions; /* MAC + data extensions */
51 u32 first_ip; /* host byte order, included in range */ 50 u32 first_ip; /* host byte order, included in range */
52 u32 last_ip; /* host byte order, included in range */ 51 u32 last_ip; /* host byte order, included in range */
53 u32 elements; /* number of max elements in the set */ 52 u32 elements; /* number of max elements in the set */
54 size_t memsize; /* members size */ 53 size_t memsize; /* members size */
55 struct timer_list gc; /* garbage collector */ 54 struct timer_list gc; /* garbage collector */
55 unsigned char extensions[0] /* MAC + data extensions */
56 __aligned(__alignof__(u64));
56}; 57};
57 58
58/* ADT structure for generic function args */ 59/* ADT structure for generic function args */
59struct bitmap_ipmac_adt_elem { 60struct bitmap_ipmac_adt_elem {
61 unsigned char ether[ETH_ALEN] __aligned(2);
60 u16 id; 62 u16 id;
61 unsigned char *ether; 63 u16 add_mac;
62}; 64};
63 65
64struct bitmap_ipmac_elem { 66struct bitmap_ipmac_elem {
65 unsigned char ether[ETH_ALEN]; 67 unsigned char ether[ETH_ALEN];
66 unsigned char filled; 68 unsigned char filled;
67} __attribute__ ((aligned)); 69} __aligned(__alignof__(u64));
68 70
69static inline u32 71static inline u32
70ip_to_id(const struct bitmap_ipmac *m, u32 ip) 72ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
72 return ip - m->first_ip; 74 return ip - m->first_ip;
73} 75}
74 76
75static inline struct bitmap_ipmac_elem * 77#define get_elem(extensions, id, dsize) \
76get_elem(void *extensions, u16 id, size_t dsize) 78 (struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
77{ 79
78 return (struct bitmap_ipmac_elem *)(extensions + id * dsize); 80#define get_const_elem(extensions, id, dsize) \
79} 81 (const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
80 82
81/* Common functions */ 83/* Common functions */
82 84
@@ -88,10 +90,9 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
88 90
89 if (!test_bit(e->id, map->members)) 91 if (!test_bit(e->id, map->members))
90 return 0; 92 return 0;
91 elem = get_elem(map->extensions, e->id, dsize); 93 elem = get_const_elem(map->extensions, e->id, dsize);
92 if (elem->filled == MAC_FILLED) 94 if (e->add_mac && elem->filled == MAC_FILLED)
93 return !e->ether || 95 return ether_addr_equal(e->ether, elem->ether);
94 ether_addr_equal(e->ether, elem->ether);
95 /* Trigger kernel to fill out the ethernet address */ 96 /* Trigger kernel to fill out the ethernet address */
96 return -EAGAIN; 97 return -EAGAIN;
97} 98}
@@ -103,7 +104,7 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
103 104
104 if (!test_bit(id, map->members)) 105 if (!test_bit(id, map->members))
105 return 0; 106 return 0;
106 elem = get_elem(map->extensions, id, dsize); 107 elem = get_const_elem(map->extensions, id, dsize);
107 /* Timer not started for the incomplete elements */ 108 /* Timer not started for the incomplete elements */
108 return elem->filled == MAC_FILLED; 109 return elem->filled == MAC_FILLED;
109} 110}
@@ -133,7 +134,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
133 * and we can reuse it later when MAC is filled out, 134 * and we can reuse it later when MAC is filled out,
134 * possibly by the kernel 135 * possibly by the kernel
135 */ 136 */
136 if (e->ether) 137 if (e->add_mac)
137 ip_set_timeout_set(timeout, t); 138 ip_set_timeout_set(timeout, t);
138 else 139 else
139 *timeout = t; 140 *timeout = t;
@@ -150,7 +151,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
150 elem = get_elem(map->extensions, e->id, dsize); 151 elem = get_elem(map->extensions, e->id, dsize);
151 if (test_bit(e->id, map->members)) { 152 if (test_bit(e->id, map->members)) {
152 if (elem->filled == MAC_FILLED) { 153 if (elem->filled == MAC_FILLED) {
153 if (e->ether && 154 if (e->add_mac &&
154 (flags & IPSET_FLAG_EXIST) && 155 (flags & IPSET_FLAG_EXIST) &&
155 !ether_addr_equal(e->ether, elem->ether)) { 156 !ether_addr_equal(e->ether, elem->ether)) {
156 /* memcpy isn't atomic */ 157 /* memcpy isn't atomic */
@@ -159,7 +160,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
159 ether_addr_copy(elem->ether, e->ether); 160 ether_addr_copy(elem->ether, e->ether);
160 } 161 }
161 return IPSET_ADD_FAILED; 162 return IPSET_ADD_FAILED;
162 } else if (!e->ether) 163 } else if (!e->add_mac)
163 /* Already added without ethernet address */ 164 /* Already added without ethernet address */
164 return IPSET_ADD_FAILED; 165 return IPSET_ADD_FAILED;
165 /* Fill the MAC address and trigger the timer activation */ 166 /* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
168 ether_addr_copy(elem->ether, e->ether); 169 ether_addr_copy(elem->ether, e->ether);
169 elem->filled = MAC_FILLED; 170 elem->filled = MAC_FILLED;
170 return IPSET_ADD_START_STORED_TIMEOUT; 171 return IPSET_ADD_START_STORED_TIMEOUT;
171 } else if (e->ether) { 172 } else if (e->add_mac) {
172 /* We can store MAC too */ 173 /* We can store MAC too */
173 ether_addr_copy(elem->ether, e->ether); 174 ether_addr_copy(elem->ether, e->ether);
174 elem->filled = MAC_FILLED; 175 elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
191 u32 id, size_t dsize) 192 u32 id, size_t dsize)
192{ 193{
193 const struct bitmap_ipmac_elem *elem = 194 const struct bitmap_ipmac_elem *elem =
194 get_elem(map->extensions, id, dsize); 195 get_const_elem(map->extensions, id, dsize);
195 196
196 return nla_put_ipaddr4(skb, IPSET_ATTR_IP, 197 return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
197 htonl(map->first_ip + id)) || 198 htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
213{ 214{
214 struct bitmap_ipmac *map = set->data; 215 struct bitmap_ipmac *map = set->data;
215 ipset_adtfn adtfn = set->variant->adt[adt]; 216 ipset_adtfn adtfn = set->variant->adt[adt];
216 struct bitmap_ipmac_adt_elem e = { .id = 0 }; 217 struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
217 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 218 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
218 u32 ip; 219 u32 ip;
219 220
@@ -231,7 +232,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
231 return -EINVAL; 232 return -EINVAL;
232 233
233 e.id = ip_to_id(map, ip); 234 e.id = ip_to_id(map, ip);
234 e.ether = eth_hdr(skb)->h_source; 235 memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
235 236
236 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); 237 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
237} 238}
@@ -265,11 +266,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
265 return -IPSET_ERR_BITMAP_RANGE; 266 return -IPSET_ERR_BITMAP_RANGE;
266 267
267 e.id = ip_to_id(map, ip); 268 e.id = ip_to_id(map, ip);
268 if (tb[IPSET_ATTR_ETHER]) 269 if (tb[IPSET_ATTR_ETHER]) {
269 e.ether = nla_data(tb[IPSET_ATTR_ETHER]); 270 memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
270 else 271 e.add_mac = 1;
271 e.ether = NULL; 272 }
272
273 ret = adtfn(set, &e, &ext, &ext, flags); 273 ret = adtfn(set, &e, &ext, &ext, flags);
274 274
275 return ip_set_eexist(ret, flags) ? 0 : ret; 275 return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +300,6 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
300 map->members = ip_set_alloc(map->memsize); 300 map->members = ip_set_alloc(map->memsize);
301 if (!map->members) 301 if (!map->members)
302 return false; 302 return false;
303 if (set->dsize) {
304 map->extensions = ip_set_alloc(set->dsize * elements);
305 if (!map->extensions) {
306 kfree(map->members);
307 return false;
308 }
309 }
310 map->first_ip = first_ip; 303 map->first_ip = first_ip;
311 map->last_ip = last_ip; 304 map->last_ip = last_ip;
312 map->elements = elements; 305 map->elements = elements;
@@ -361,14 +354,15 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
361 if (elements > IPSET_BITMAP_MAX_RANGE + 1) 354 if (elements > IPSET_BITMAP_MAX_RANGE + 1)
362 return -IPSET_ERR_BITMAP_RANGE_SIZE; 355 return -IPSET_ERR_BITMAP_RANGE_SIZE;
363 356
364 map = kzalloc(sizeof(*map), GFP_KERNEL); 357 set->dsize = ip_set_elem_len(set, tb,
358 sizeof(struct bitmap_ipmac_elem),
359 __alignof__(struct bitmap_ipmac_elem));
360 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
365 if (!map) 361 if (!map)
366 return -ENOMEM; 362 return -ENOMEM;
367 363
368 map->memsize = bitmap_bytes(0, elements - 1); 364 map->memsize = bitmap_bytes(0, elements - 1);
369 set->variant = &bitmap_ipmac; 365 set->variant = &bitmap_ipmac;
370 set->dsize = ip_set_elem_len(set, tb,
371 sizeof(struct bitmap_ipmac_elem));
372 if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { 366 if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
373 kfree(map); 367 kfree(map);
374 return -ENOMEM; 368 return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd5da46..7f0c733358a4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@ MODULE_ALIAS("ip_set_bitmap:port");
35/* Type structure */ 35/* Type structure */
36struct bitmap_port { 36struct bitmap_port {
37 void *members; /* the set members */ 37 void *members; /* the set members */
38 void *extensions; /* data extensions */
39 u16 first_port; /* host byte order, included in range */ 38 u16 first_port; /* host byte order, included in range */
40 u16 last_port; /* host byte order, included in range */ 39 u16 last_port; /* host byte order, included in range */
41 u32 elements; /* number of max elements in the set */ 40 u32 elements; /* number of max elements in the set */
42 size_t memsize; /* members size */ 41 size_t memsize; /* members size */
43 struct timer_list gc; /* garbage collection */ 42 struct timer_list gc; /* garbage collection */
43 unsigned char extensions[0] /* data extensions */
44 __aligned(__alignof__(u64));
44}; 45};
45 46
46/* ADT structure for generic function args */ 47/* ADT structure for generic function args */
@@ -209,13 +210,6 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
209 map->members = ip_set_alloc(map->memsize); 210 map->members = ip_set_alloc(map->memsize);
210 if (!map->members) 211 if (!map->members)
211 return false; 212 return false;
212 if (set->dsize) {
213 map->extensions = ip_set_alloc(set->dsize * map->elements);
214 if (!map->extensions) {
215 kfree(map->members);
216 return false;
217 }
218 }
219 map->first_port = first_port; 213 map->first_port = first_port;
220 map->last_port = last_port; 214 map->last_port = last_port;
221 set->timeout = IPSET_NO_TIMEOUT; 215 set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
232{ 226{
233 struct bitmap_port *map; 227 struct bitmap_port *map;
234 u16 first_port, last_port; 228 u16 first_port, last_port;
229 u32 elements;
235 230
236 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 231 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
237 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || 232 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
248 last_port = tmp; 243 last_port = tmp;
249 } 244 }
250 245
251 map = kzalloc(sizeof(*map), GFP_KERNEL); 246 elements = last_port - first_port + 1;
247 set->dsize = ip_set_elem_len(set, tb, 0, 0);
248 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
252 if (!map) 249 if (!map)
253 return -ENOMEM; 250 return -ENOMEM;
254 251
255 map->elements = last_port - first_port + 1; 252 map->elements = elements;
256 map->memsize = bitmap_bytes(0, map->elements); 253 map->memsize = bitmap_bytes(0, map->elements);
257 set->variant = &bitmap_port; 254 set->variant = &bitmap_port;
258 set->dsize = ip_set_elem_len(set, tb, 0);
259 if (!init_map_port(set, map, first_port, last_port)) { 255 if (!init_map_port(set, map, first_port, last_port)) {
260 kfree(map); 256 kfree(map);
261 return -ENOMEM; 257 return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69ab9c2634e1..54f3d7cb23e6 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@ add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
364} 364}
365 365
366size_t 366size_t
367ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) 367ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
368 size_t align)
368{ 369{
369 enum ip_set_ext_id id; 370 enum ip_set_ext_id id;
370 size_t offset = len;
371 u32 cadt_flags = 0; 371 u32 cadt_flags = 0;
372 372
373 if (tb[IPSET_ATTR_CADT_FLAGS]) 373 if (tb[IPSET_ATTR_CADT_FLAGS])
374 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 374 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
375 if (cadt_flags & IPSET_FLAG_WITH_FORCEADD) 375 if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
376 set->flags |= IPSET_CREATE_FLAG_FORCEADD; 376 set->flags |= IPSET_CREATE_FLAG_FORCEADD;
377 if (!align)
378 align = 1;
377 for (id = 0; id < IPSET_EXT_ID_MAX; id++) { 379 for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
378 if (!add_extension(id, cadt_flags, tb)) 380 if (!add_extension(id, cadt_flags, tb))
379 continue; 381 continue;
380 offset = ALIGN(offset, ip_set_extensions[id].align); 382 len = ALIGN(len, ip_set_extensions[id].align);
381 set->offset[id] = offset; 383 set->offset[id] = len;
382 set->extensions |= ip_set_extensions[id].type; 384 set->extensions |= ip_set_extensions[id].type;
383 offset += ip_set_extensions[id].len; 385 len += ip_set_extensions[id].len;
384 } 386 }
385 return offset; 387 return ALIGN(len, align);
386} 388}
387EXPORT_SYMBOL_GPL(ip_set_elem_len); 389EXPORT_SYMBOL_GPL(ip_set_elem_len);
388 390
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54fcaf2a..e5336ab36d67 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@ struct hbucket {
72 DECLARE_BITMAP(used, AHASH_MAX_TUNED); 72 DECLARE_BITMAP(used, AHASH_MAX_TUNED);
73 u8 size; /* size of the array */ 73 u8 size; /* size of the array */
74 u8 pos; /* position of the first free entry */ 74 u8 pos; /* position of the first free entry */
75 unsigned char value[0]; /* the array of the values */ 75 unsigned char value[0] /* the array of the values */
76} __attribute__ ((aligned)); 76 __aligned(__alignof__(u64));
77};
77 78
78/* The hash table: the table size stored here in order to make resizing easy */ 79/* The hash table: the table size stored here in order to make resizing easy */
79struct htable { 80struct htable {
@@ -475,7 +476,7 @@ static void
475mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) 476mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
476{ 477{
477 struct htable *t; 478 struct htable *t;
478 struct hbucket *n; 479 struct hbucket *n, *tmp;
479 struct mtype_elem *data; 480 struct mtype_elem *data;
480 u32 i, j, d; 481 u32 i, j, d;
481#ifdef IP_SET_HASH_WITH_NETS 482#ifdef IP_SET_HASH_WITH_NETS
@@ -510,9 +511,14 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
510 } 511 }
511 } 512 }
512 if (d >= AHASH_INIT_SIZE) { 513 if (d >= AHASH_INIT_SIZE) {
513 struct hbucket *tmp = kzalloc(sizeof(*tmp) + 514 if (d >= n->size) {
514 (n->size - AHASH_INIT_SIZE) * dsize, 515 rcu_assign_pointer(hbucket(t, i), NULL);
515 GFP_ATOMIC); 516 kfree_rcu(n, rcu);
517 continue;
518 }
519 tmp = kzalloc(sizeof(*tmp) +
520 (n->size - AHASH_INIT_SIZE) * dsize,
521 GFP_ATOMIC);
516 if (!tmp) 522 if (!tmp)
517 /* Still try to delete expired elements */ 523 /* Still try to delete expired elements */
518 continue; 524 continue;
@@ -522,7 +528,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
522 continue; 528 continue;
523 data = ahash_data(n, j, dsize); 529 data = ahash_data(n, j, dsize);
524 memcpy(tmp->value + d * dsize, data, dsize); 530 memcpy(tmp->value + d * dsize, data, dsize);
525 set_bit(j, tmp->used); 531 set_bit(d, tmp->used);
526 d++; 532 d++;
527 } 533 }
528 tmp->pos = d; 534 tmp->pos = d;
@@ -1323,12 +1329,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1323#endif 1329#endif
1324 set->variant = &IPSET_TOKEN(HTYPE, 4_variant); 1330 set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
1325 set->dsize = ip_set_elem_len(set, tb, 1331 set->dsize = ip_set_elem_len(set, tb,
1326 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); 1332 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
1333 __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
1327#ifndef IP_SET_PROTO_UNDEF 1334#ifndef IP_SET_PROTO_UNDEF
1328 } else { 1335 } else {
1329 set->variant = &IPSET_TOKEN(HTYPE, 6_variant); 1336 set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
1330 set->dsize = ip_set_elem_len(set, tb, 1337 set->dsize = ip_set_elem_len(set, tb,
1331 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem))); 1338 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
1339 __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
1332 } 1340 }
1333#endif 1341#endif
1334 if (tb[IPSET_ATTR_TIMEOUT]) { 1342 if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a30ce6e8c90..bbede95c9f68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -31,7 +31,7 @@ struct set_elem {
31 struct rcu_head rcu; 31 struct rcu_head rcu;
32 struct list_head list; 32 struct list_head list;
33 ip_set_id_t id; 33 ip_set_id_t id;
34}; 34} __aligned(__alignof__(u64));
35 35
36struct set_adt_elem { 36struct set_adt_elem {
37 ip_set_id_t id; 37 ip_set_id_t id;
@@ -618,7 +618,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
618 size = IP_SET_LIST_MIN_SIZE; 618 size = IP_SET_LIST_MIN_SIZE;
619 619
620 set->variant = &set_variant; 620 set->variant = &set_variant;
621 set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); 621 set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
622 __alignof__(struct set_elem));
622 if (!init_list_set(net, set, size)) 623 if (!init_list_set(net, set, size))
623 return -ENOMEM; 624 return -ENOMEM;
624 if (tb[IPSET_ATTR_TIMEOUT]) { 625 if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1e24fff53e4b..f57b4dcdb233 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1176,6 +1176,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
1176 struct ip_vs_protocol *pp; 1176 struct ip_vs_protocol *pp;
1177 struct ip_vs_proto_data *pd; 1177 struct ip_vs_proto_data *pd;
1178 struct ip_vs_conn *cp; 1178 struct ip_vs_conn *cp;
1179 struct sock *sk;
1179 1180
1180 EnterFunction(11); 1181 EnterFunction(11);
1181 1182
@@ -1183,13 +1184,12 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
1183 if (skb->ipvs_property) 1184 if (skb->ipvs_property)
1184 return NF_ACCEPT; 1185 return NF_ACCEPT;
1185 1186
1187 sk = skb_to_full_sk(skb);
1186 /* Bad... Do not break raw sockets */ 1188 /* Bad... Do not break raw sockets */
1187 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1189 if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
1188 af == AF_INET)) { 1190 af == AF_INET)) {
1189 struct sock *sk = skb->sk;
1190 struct inet_sock *inet = inet_sk(skb->sk);
1191 1191
1192 if (inet && sk->sk_family == PF_INET && inet->nodefrag) 1192 if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
1193 return NF_ACCEPT; 1193 return NF_ACCEPT;
1194 } 1194 }
1195 1195
@@ -1681,6 +1681,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
1681 struct ip_vs_conn *cp; 1681 struct ip_vs_conn *cp;
1682 int ret, pkts; 1682 int ret, pkts;
1683 int conn_reuse_mode; 1683 int conn_reuse_mode;
1684 struct sock *sk;
1684 1685
1685 /* Already marked as IPVS request or reply? */ 1686 /* Already marked as IPVS request or reply? */
1686 if (skb->ipvs_property) 1687 if (skb->ipvs_property)
@@ -1708,12 +1709,11 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
1708 ip_vs_fill_iph_skb(af, skb, false, &iph); 1709 ip_vs_fill_iph_skb(af, skb, false, &iph);
1709 1710
1710 /* Bad... Do not break raw sockets */ 1711 /* Bad... Do not break raw sockets */
1711 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1712 sk = skb_to_full_sk(skb);
1713 if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
1712 af == AF_INET)) { 1714 af == AF_INET)) {
1713 struct sock *sk = skb->sk;
1714 struct inet_sock *inet = inet_sk(skb->sk);
1715 1715
1716 if (inet && sk->sk_family == PF_INET && inet->nodefrag) 1716 if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
1717 return NF_ACCEPT; 1717 return NF_ACCEPT;
1718 } 1718 }
1719 1719
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 06eb48fceb42..740cce4685ac 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -825,7 +825,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
825 struct net *net = sock_net(ctnl); 825 struct net *net = sock_net(ctnl);
826 struct nfnl_log_net *log = nfnl_log_pernet(net); 826 struct nfnl_log_net *log = nfnl_log_pernet(net);
827 int ret = 0; 827 int ret = 0;
828 u16 flags; 828 u16 flags = 0;
829 829
830 if (nfula[NFULA_CFG_CMD]) { 830 if (nfula[NFULA_CFG_CMD]) {
831 u_int8_t pf = nfmsg->nfgen_family; 831 u_int8_t pf = nfmsg->nfgen_family;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 1067fb4c1ffa..c7808fc19719 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -47,27 +47,34 @@ static void nft_counter_eval(const struct nft_expr *expr,
47 local_bh_enable(); 47 local_bh_enable();
48} 48}
49 49
50static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) 50static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
51 struct nft_counter *total)
51{ 52{
52 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); 53 const struct nft_counter_percpu *cpu_stats;
53 struct nft_counter_percpu *cpu_stats;
54 struct nft_counter total;
55 u64 bytes, packets; 54 u64 bytes, packets;
56 unsigned int seq; 55 unsigned int seq;
57 int cpu; 56 int cpu;
58 57
59 memset(&total, 0, sizeof(total)); 58 memset(total, 0, sizeof(*total));
60 for_each_possible_cpu(cpu) { 59 for_each_possible_cpu(cpu) {
61 cpu_stats = per_cpu_ptr(priv->counter, cpu); 60 cpu_stats = per_cpu_ptr(counter, cpu);
62 do { 61 do {
63 seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); 62 seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
64 bytes = cpu_stats->counter.bytes; 63 bytes = cpu_stats->counter.bytes;
65 packets = cpu_stats->counter.packets; 64 packets = cpu_stats->counter.packets;
66 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); 65 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
67 66
68 total.packets += packets; 67 total->packets += packets;
69 total.bytes += bytes; 68 total->bytes += bytes;
70 } 69 }
70}
71
72static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
73{
74 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
75 struct nft_counter total;
76
77 nft_counter_fetch(priv->counter, &total);
71 78
72 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) || 79 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
73 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets))) 80 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
@@ -118,6 +125,31 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
118 free_percpu(priv->counter); 125 free_percpu(priv->counter);
119} 126}
120 127
128static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
129{
130 struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
131 struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
132 struct nft_counter_percpu __percpu *cpu_stats;
133 struct nft_counter_percpu *this_cpu;
134 struct nft_counter total;
135
136 nft_counter_fetch(priv->counter, &total);
137
138 cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
139 GFP_ATOMIC);
140 if (cpu_stats == NULL)
141 return ENOMEM;
142
143 preempt_disable();
144 this_cpu = this_cpu_ptr(cpu_stats);
145 this_cpu->counter.packets = total.packets;
146 this_cpu->counter.bytes = total.bytes;
147 preempt_enable();
148
149 priv_clone->counter = cpu_stats;
150 return 0;
151}
152
121static struct nft_expr_type nft_counter_type; 153static struct nft_expr_type nft_counter_type;
122static const struct nft_expr_ops nft_counter_ops = { 154static const struct nft_expr_ops nft_counter_ops = {
123 .type = &nft_counter_type, 155 .type = &nft_counter_type,
@@ -126,6 +158,7 @@ static const struct nft_expr_ops nft_counter_ops = {
126 .init = nft_counter_init, 158 .init = nft_counter_init,
127 .destroy = nft_counter_destroy, 159 .destroy = nft_counter_destroy,
128 .dump = nft_counter_dump, 160 .dump = nft_counter_dump,
161 .clone = nft_counter_clone,
129}; 162};
130 163
131static struct nft_expr_type nft_counter_type __read_mostly = { 164static struct nft_expr_type nft_counter_type __read_mostly = {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 513a8ef60a59..9dec3bd1b63c 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -50,8 +50,9 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
50 } 50 }
51 51
52 ext = nft_set_elem_ext(set, elem); 52 ext = nft_set_elem_ext(set, elem);
53 if (priv->expr != NULL) 53 if (priv->expr != NULL &&
54 nft_expr_clone(nft_set_ext_expr(ext), priv->expr); 54 nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
55 return NULL;
55 56
56 return elem; 57 return elem;
57} 58}
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index b7de0da46acd..ecf0a0196f18 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -572,7 +572,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
572 if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED) 572 if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED)
573 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 573 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
574 else 574 else
575 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 575 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
576 576
577 pr_debug("mask 0x%x\n", mask); 577 pr_debug("mask 0x%x\n", mask);
578 578
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index a7a80a6b77b0..653d073bae45 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -58,7 +58,7 @@ void ovs_dp_notify_wq(struct work_struct *work)
58 struct hlist_node *n; 58 struct hlist_node *n;
59 59
60 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) { 60 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
61 if (vport->ops->type != OVS_VPORT_TYPE_NETDEV) 61 if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)
62 continue; 62 continue;
63 63
64 if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH)) 64 if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH))
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index efb736bb6855..e41cd12d9b2d 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -117,7 +117,6 @@ static struct vport_ops ovs_geneve_vport_ops = {
117 .destroy = ovs_netdev_tunnel_destroy, 117 .destroy = ovs_netdev_tunnel_destroy,
118 .get_options = geneve_get_options, 118 .get_options = geneve_get_options,
119 .send = dev_queue_xmit, 119 .send = dev_queue_xmit,
120 .owner = THIS_MODULE,
121}; 120};
122 121
123static int __init ovs_geneve_tnl_init(void) 122static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index c3257d78d3d2..7f8897f33a67 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -89,7 +89,6 @@ static struct vport_ops ovs_gre_vport_ops = {
89 .create = gre_create, 89 .create = gre_create,
90 .send = dev_queue_xmit, 90 .send = dev_queue_xmit,
91 .destroy = ovs_netdev_tunnel_destroy, 91 .destroy = ovs_netdev_tunnel_destroy,
92 .owner = THIS_MODULE,
93}; 92};
94 93
95static int __init ovs_gre_tnl_init(void) 94static int __init ovs_gre_tnl_init(void)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index b327368a3848..6b0190b987ec 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -180,9 +180,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
180 if (vport->dev->priv_flags & IFF_OVS_DATAPATH) 180 if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
181 ovs_netdev_detach_dev(vport); 181 ovs_netdev_detach_dev(vport);
182 182
183 /* Early release so we can unregister the device */ 183 /* We can be invoked by both explicit vport deletion and
184 * underlying netdev deregistration; delete the link only
185 * if it's not already shutting down.
186 */
187 if (vport->dev->reg_state == NETREG_REGISTERED)
188 rtnl_delete_link(vport->dev);
184 dev_put(vport->dev); 189 dev_put(vport->dev);
185 rtnl_delete_link(vport->dev);
186 vport->dev = NULL; 190 vport->dev = NULL;
187 rtnl_unlock(); 191 rtnl_unlock();
188 192
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 0ac0fd004d7e..31cbc8c5c7db 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -71,7 +71,7 @@ static struct hlist_head *hash_bucket(const struct net *net, const char *name)
71 return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; 71 return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
72} 72}
73 73
74int ovs_vport_ops_register(struct vport_ops *ops) 74int __ovs_vport_ops_register(struct vport_ops *ops)
75{ 75{
76 int err = -EEXIST; 76 int err = -EEXIST;
77 struct vport_ops *o; 77 struct vport_ops *o;
@@ -87,7 +87,7 @@ errout:
87 ovs_unlock(); 87 ovs_unlock();
88 return err; 88 return err;
89} 89}
90EXPORT_SYMBOL_GPL(ovs_vport_ops_register); 90EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
91 91
92void ovs_vport_ops_unregister(struct vport_ops *ops) 92void ovs_vport_ops_unregister(struct vport_ops *ops)
93{ 93{
@@ -256,8 +256,8 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
256 * 256 *
257 * @vport: vport to delete. 257 * @vport: vport to delete.
258 * 258 *
259 * Detaches @vport from its datapath and destroys it. It is possible to fail 259 * Detaches @vport from its datapath and destroys it. ovs_mutex must
260 * for reasons such as lack of memory. ovs_mutex must be held. 260 * be held.
261 */ 261 */
262void ovs_vport_del(struct vport *vport) 262void ovs_vport_del(struct vport *vport)
263{ 263{
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index bdfd82a7c064..8ea3a96980ac 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -196,7 +196,13 @@ static inline const char *ovs_vport_name(struct vport *vport)
196 return vport->dev->name; 196 return vport->dev->name;
197} 197}
198 198
199int ovs_vport_ops_register(struct vport_ops *ops); 199int __ovs_vport_ops_register(struct vport_ops *ops);
200#define ovs_vport_ops_register(ops) \
201 ({ \
202 (ops)->owner = THIS_MODULE; \
203 __ovs_vport_ops_register(ops); \
204 })
205
200void ovs_vport_ops_unregister(struct vport_ops *ops); 206void ovs_vport_ops_unregister(struct vport_ops *ops);
201 207
202static inline struct rtable *ovs_tunnel_route_lookup(struct net *net, 208static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index af399cac5205..992396aa635c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1741,6 +1741,20 @@ static void fanout_release(struct sock *sk)
1741 kfree_rcu(po->rollover, rcu); 1741 kfree_rcu(po->rollover, rcu);
1742} 1742}
1743 1743
1744static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
1745 struct sk_buff *skb)
1746{
1747 /* Earlier code assumed this would be a VLAN pkt, double-check
1748 * this now that we have the actual packet in hand. We can only
1749 * do this check on Ethernet devices.
1750 */
1751 if (unlikely(dev->type != ARPHRD_ETHER))
1752 return false;
1753
1754 skb_reset_mac_header(skb);
1755 return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
1756}
1757
1744static const struct proto_ops packet_ops; 1758static const struct proto_ops packet_ops;
1745 1759
1746static const struct proto_ops packet_ops_spkt; 1760static const struct proto_ops packet_ops_spkt;
@@ -1902,18 +1916,10 @@ retry:
1902 goto retry; 1916 goto retry;
1903 } 1917 }
1904 1918
1905 if (len > (dev->mtu + dev->hard_header_len + extra_len)) { 1919 if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
1906 /* Earlier code assumed this would be a VLAN pkt, 1920 !packet_extra_vlan_len_allowed(dev, skb)) {
1907 * double-check this now that we have the actual 1921 err = -EMSGSIZE;
1908 * packet in hand. 1922 goto out_unlock;
1909 */
1910 struct ethhdr *ehdr;
1911 skb_reset_mac_header(skb);
1912 ehdr = eth_hdr(skb);
1913 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1914 err = -EMSGSIZE;
1915 goto out_unlock;
1916 }
1917 } 1923 }
1918 1924
1919 skb->protocol = proto; 1925 skb->protocol = proto;
@@ -2323,8 +2329,8 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
2323static bool ll_header_truncated(const struct net_device *dev, int len) 2329static bool ll_header_truncated(const struct net_device *dev, int len)
2324{ 2330{
2325 /* net device doesn't like empty head */ 2331 /* net device doesn't like empty head */
2326 if (unlikely(len <= dev->hard_header_len)) { 2332 if (unlikely(len < dev->hard_header_len)) {
2327 net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n", 2333 net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
2328 current->comm, len, dev->hard_header_len); 2334 current->comm, len, dev->hard_header_len);
2329 return true; 2335 return true;
2330 } 2336 }
@@ -2332,6 +2338,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len)
2332 return false; 2338 return false;
2333} 2339}
2334 2340
2341static void tpacket_set_protocol(const struct net_device *dev,
2342 struct sk_buff *skb)
2343{
2344 if (dev->type == ARPHRD_ETHER) {
2345 skb_reset_mac_header(skb);
2346 skb->protocol = eth_hdr(skb)->h_proto;
2347 }
2348}
2349
2335static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, 2350static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2336 void *frame, struct net_device *dev, int size_max, 2351 void *frame, struct net_device *dev, int size_max,
2337 __be16 proto, unsigned char *addr, int hlen) 2352 __be16 proto, unsigned char *addr, int hlen)
@@ -2368,8 +2383,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2368 skb_reserve(skb, hlen); 2383 skb_reserve(skb, hlen);
2369 skb_reset_network_header(skb); 2384 skb_reset_network_header(skb);
2370 2385
2371 if (!packet_use_direct_xmit(po))
2372 skb_probe_transport_header(skb, 0);
2373 if (unlikely(po->tp_tx_has_off)) { 2386 if (unlikely(po->tp_tx_has_off)) {
2374 int off_min, off_max, off; 2387 int off_min, off_max, off;
2375 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); 2388 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2415,6 +2428,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2415 dev->hard_header_len); 2428 dev->hard_header_len);
2416 if (unlikely(err)) 2429 if (unlikely(err))
2417 return err; 2430 return err;
2431 if (!skb->protocol)
2432 tpacket_set_protocol(dev, skb);
2418 2433
2419 data += dev->hard_header_len; 2434 data += dev->hard_header_len;
2420 to_write -= dev->hard_header_len; 2435 to_write -= dev->hard_header_len;
@@ -2449,6 +2464,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2449 len = ((to_write > len_max) ? len_max : to_write); 2464 len = ((to_write > len_max) ? len_max : to_write);
2450 } 2465 }
2451 2466
2467 skb_probe_transport_header(skb, 0);
2468
2452 return tp_len; 2469 return tp_len;
2453} 2470}
2454 2471
@@ -2493,12 +2510,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2493 if (unlikely(!(dev->flags & IFF_UP))) 2510 if (unlikely(!(dev->flags & IFF_UP)))
2494 goto out_put; 2511 goto out_put;
2495 2512
2496 reserve = dev->hard_header_len + VLAN_HLEN; 2513 if (po->sk.sk_socket->type == SOCK_RAW)
2514 reserve = dev->hard_header_len;
2497 size_max = po->tx_ring.frame_size 2515 size_max = po->tx_ring.frame_size
2498 - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); 2516 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2499 2517
2500 if (size_max > dev->mtu + reserve) 2518 if (size_max > dev->mtu + reserve + VLAN_HLEN)
2501 size_max = dev->mtu + reserve; 2519 size_max = dev->mtu + reserve + VLAN_HLEN;
2502 2520
2503 do { 2521 do {
2504 ph = packet_current_frame(po, &po->tx_ring, 2522 ph = packet_current_frame(po, &po->tx_ring,
@@ -2525,18 +2543,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2525 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, 2543 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
2526 addr, hlen); 2544 addr, hlen);
2527 if (likely(tp_len >= 0) && 2545 if (likely(tp_len >= 0) &&
2528 tp_len > dev->mtu + dev->hard_header_len) { 2546 tp_len > dev->mtu + reserve &&
2529 struct ethhdr *ehdr; 2547 !packet_extra_vlan_len_allowed(dev, skb))
2530 /* Earlier code assumed this would be a VLAN pkt, 2548 tp_len = -EMSGSIZE;
2531 * double-check this now that we have the actual
2532 * packet in hand.
2533 */
2534 2549
2535 skb_reset_mac_header(skb);
2536 ehdr = eth_hdr(skb);
2537 if (ehdr->h_proto != htons(ETH_P_8021Q))
2538 tp_len = -EMSGSIZE;
2539 }
2540 if (unlikely(tp_len < 0)) { 2550 if (unlikely(tp_len < 0)) {
2541 if (po->tp_loss) { 2551 if (po->tp_loss) {
2542 __packet_set_status(po, ph, 2552 __packet_set_status(po, ph,
@@ -2765,18 +2775,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2765 2775
2766 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 2776 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2767 2777
2768 if (!gso_type && (len > dev->mtu + reserve + extra_len)) { 2778 if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
2769 /* Earlier code assumed this would be a VLAN pkt, 2779 !packet_extra_vlan_len_allowed(dev, skb)) {
2770 * double-check this now that we have the actual 2780 err = -EMSGSIZE;
2771 * packet in hand. 2781 goto out_free;
2772 */
2773 struct ethhdr *ehdr;
2774 skb_reset_mac_header(skb);
2775 ehdr = eth_hdr(skb);
2776 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
2777 err = -EMSGSIZE;
2778 goto out_free;
2779 }
2780 } 2782 }
2781 2783
2782 skb->protocol = proto; 2784 skb->protocol = proto;
@@ -2807,8 +2809,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2807 len += vnet_hdr_len; 2809 len += vnet_hdr_len;
2808 } 2810 }
2809 2811
2810 if (!packet_use_direct_xmit(po)) 2812 skb_probe_transport_header(skb, reserve);
2811 skb_probe_transport_header(skb, reserve); 2813
2812 if (unlikely(extra_len == 4)) 2814 if (unlikely(extra_len == 4))
2813 skb->no_fcs = 1; 2815 skb->no_fcs = 1;
2814 2816
@@ -4107,7 +4109,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4107 err = -EINVAL; 4109 err = -EINVAL;
4108 if (unlikely((int)req->tp_block_size <= 0)) 4110 if (unlikely((int)req->tp_block_size <= 0))
4109 goto out; 4111 goto out;
4110 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) 4112 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
4111 goto out; 4113 goto out;
4112 if (po->tp_version >= TPACKET_V3 && 4114 if (po->tp_version >= TPACKET_V3 &&
4113 (int)(req->tp_block_size - 4115 (int)(req->tp_block_size -
@@ -4119,8 +4121,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4119 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) 4121 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
4120 goto out; 4122 goto out;
4121 4123
4122 rb->frames_per_block = req->tp_block_size/req->tp_frame_size; 4124 rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
4123 if (unlikely(rb->frames_per_block <= 0)) 4125 if (unlikely(rb->frames_per_block == 0))
4124 goto out; 4126 goto out;
4125 if (unlikely((rb->frames_per_block * req->tp_block_nr) != 4127 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
4126 req->tp_frame_nr)) 4128 req->tp_frame_nr))
diff --git a/net/rds/connection.c b/net/rds/connection.c
index d4564036a339..e3b118cae81d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -186,12 +186,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
186 } 186 }
187 } 187 }
188 188
189 if (trans == NULL) {
190 kmem_cache_free(rds_conn_slab, conn);
191 conn = ERR_PTR(-ENODEV);
192 goto out;
193 }
194
195 conn->c_trans = trans; 189 conn->c_trans = trans;
196 190
197 ret = trans->conn_alloc(conn, gfp); 191 ret = trans->conn_alloc(conn, gfp);
diff --git a/net/rds/send.c b/net/rds/send.c
index 827155c2ead1..c9cdb358ea88 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1013,11 +1013,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1013 release_sock(sk); 1013 release_sock(sk);
1014 } 1014 }
1015 1015
1016 /* racing with another thread binding seems ok here */ 1016 lock_sock(sk);
1017 if (daddr == 0 || rs->rs_bound_addr == 0) { 1017 if (daddr == 0 || rs->rs_bound_addr == 0) {
1018 release_sock(sk);
1018 ret = -ENOTCONN; /* XXX not a great errno */ 1019 ret = -ENOTCONN; /* XXX not a great errno */
1019 goto out; 1020 goto out;
1020 } 1021 }
1022 release_sock(sk);
1021 1023
1022 if (payload_len > rds_sk_sndbuf(rs)) { 1024 if (payload_len > rds_sk_sndbuf(rs)) {
1023 ret = -EMSGSIZE; 1025 ret = -EMSGSIZE;
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index e0547f521f20..adc555e0323d 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -723,8 +723,10 @@ process_further:
723 723
724 if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY || 724 if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
725 call->state == RXRPC_CALL_SERVER_AWAIT_ACK) && 725 call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
726 hard > tx) 726 hard > tx) {
727 call->acks_hard = tx;
727 goto all_acked; 728 goto all_acked;
729 }
728 730
729 smp_rmb(); 731 smp_rmb();
730 rxrpc_rotate_tx_window(call, hard - 1); 732 rxrpc_rotate_tx_window(call, hard - 1);
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index a40d3afe93b7..14c4e12c47b0 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -531,7 +531,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
531 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 531 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
532 532
533 /* this should be in poll */ 533 /* this should be in poll */
534 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 534 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
535 535
536 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 536 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
537 return -EPIPE; 537 return -EPIPE;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f43c8f33f09e..7ec667dd4ce1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -253,7 +253,8 @@ int qdisc_set_default(const char *name)
253} 253}
254 254
255/* We know handle. Find qdisc among all qdisc's attached to device 255/* We know handle. Find qdisc among all qdisc's attached to device
256 (root qdisc, all its children, children of children etc.) 256 * (root qdisc, all its children, children of children etc.)
257 * Note: caller either uses rtnl or rcu_read_lock()
257 */ 258 */
258 259
259static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) 260static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
@@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
264 root->handle == handle) 265 root->handle == handle)
265 return root; 266 return root;
266 267
267 list_for_each_entry(q, &root->list, list) { 268 list_for_each_entry_rcu(q, &root->list, list) {
268 if (q->handle == handle) 269 if (q->handle == handle)
269 return q; 270 return q;
270 } 271 }
@@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q)
277 struct Qdisc *root = qdisc_dev(q)->qdisc; 278 struct Qdisc *root = qdisc_dev(q)->qdisc;
278 279
279 WARN_ON_ONCE(root == &noop_qdisc); 280 WARN_ON_ONCE(root == &noop_qdisc);
280 list_add_tail(&q->list, &root->list); 281 ASSERT_RTNL();
282 list_add_tail_rcu(&q->list, &root->list);
281 } 283 }
282} 284}
283EXPORT_SYMBOL(qdisc_list_add); 285EXPORT_SYMBOL(qdisc_list_add);
284 286
285void qdisc_list_del(struct Qdisc *q) 287void qdisc_list_del(struct Qdisc *q)
286{ 288{
287 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) 289 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
288 list_del(&q->list); 290 ASSERT_RTNL();
291 list_del_rcu(&q->list);
292 }
289} 293}
290EXPORT_SYMBOL(qdisc_list_del); 294EXPORT_SYMBOL(qdisc_list_del);
291 295
@@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
750 if (n == 0) 754 if (n == 0)
751 return; 755 return;
752 drops = max_t(int, n, 0); 756 drops = max_t(int, n, 0);
757 rcu_read_lock();
753 while ((parentid = sch->parent)) { 758 while ((parentid = sch->parent)) {
754 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) 759 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
755 return; 760 break;
756 761
762 if (sch->flags & TCQ_F_NOPARENT)
763 break;
764 /* TODO: perform the search on a per txq basis */
757 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); 765 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
758 if (sch == NULL) { 766 if (sch == NULL) {
759 WARN_ON(parentid != TC_H_ROOT); 767 WARN_ON_ONCE(parentid != TC_H_ROOT);
760 return; 768 break;
761 } 769 }
762 cops = sch->ops->cl_ops; 770 cops = sch->ops->cl_ops;
763 if (cops->qlen_notify) { 771 if (cops->qlen_notify) {
@@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
768 sch->q.qlen -= n; 776 sch->q.qlen -= n;
769 __qdisc_qstats_drop(sch, drops); 777 __qdisc_qstats_drop(sch, drops);
770 } 778 }
779 rcu_read_unlock();
771} 780}
772EXPORT_SYMBOL(qdisc_tree_decrease_qlen); 781EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
773 782
@@ -941,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
941 } 950 }
942 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); 951 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
943 if (!netif_is_multiqueue(dev)) 952 if (!netif_is_multiqueue(dev))
944 sch->flags |= TCQ_F_ONETXQUEUE; 953 sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
945 } 954 }
946 955
947 sch->handle = handle; 956 sch->handle = handle;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cb5d4ad32946..e82a1ad80aa5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -737,7 +737,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
737 return; 737 return;
738 } 738 }
739 if (!netif_is_multiqueue(dev)) 739 if (!netif_is_multiqueue(dev))
740 qdisc->flags |= TCQ_F_ONETXQUEUE; 740 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
741 dev_queue->qdisc_sleeping = qdisc; 741 dev_queue->qdisc_sleeping = qdisc;
742} 742}
743 743
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f3cbaecd283a..3e82f047caaf 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
63 if (qdisc == NULL) 63 if (qdisc == NULL)
64 goto err; 64 goto err;
65 priv->qdiscs[ntx] = qdisc; 65 priv->qdiscs[ntx] = qdisc;
66 qdisc->flags |= TCQ_F_ONETXQUEUE; 66 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
67 } 67 }
68 68
69 sch->flags |= TCQ_F_MQROOT; 69 sch->flags |= TCQ_F_MQROOT;
@@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
156 156
157 *old = dev_graft_qdisc(dev_queue, new); 157 *old = dev_graft_qdisc(dev_queue, new);
158 if (new) 158 if (new)
159 new->flags |= TCQ_F_ONETXQUEUE; 159 new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
160 if (dev->flags & IFF_UP) 160 if (dev->flags & IFF_UP)
161 dev_activate(dev); 161 dev_activate(dev);
162 return 0; 162 return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 3811a745452c..ad70ecf57ce7 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
132 goto err; 132 goto err;
133 } 133 }
134 priv->qdiscs[i] = qdisc; 134 priv->qdiscs[i] = qdisc;
135 qdisc->flags |= TCQ_F_ONETXQUEUE; 135 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
136 } 136 }
137 137
138 /* If the mqprio options indicate that hardware should own 138 /* If the mqprio options indicate that hardware should own
@@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
209 *old = dev_graft_qdisc(dev_queue, new); 209 *old = dev_graft_qdisc(dev_queue, new);
210 210
211 if (new) 211 if (new)
212 new->flags |= TCQ_F_ONETXQUEUE; 212 new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
213 213
214 if (dev->flags & IFF_UP) 214 if (dev->flags & IFF_UP)
215 dev_activate(dev); 215 dev_activate(dev);
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 4f15b7d730e1..1543e39f47c3 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
809 if (!has_sha1) 809 if (!has_sha1)
810 return -EINVAL; 810 return -EINVAL;
811 811
812 memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0], 812 for (i = 0; i < hmacs->shmac_num_idents; i++)
813 hmacs->shmac_num_idents * sizeof(__u16)); 813 ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
814 ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + 814 ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
815 hmacs->shmac_num_idents * sizeof(__u16)); 815 hmacs->shmac_num_idents * sizeof(__u16));
816 return 0; 816 return 0;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e917d27328ea..acb45b8c2a9d 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -209,6 +209,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
209 struct sock *sk = skb->sk; 209 struct sock *sk = skb->sk;
210 struct ipv6_pinfo *np = inet6_sk(sk); 210 struct ipv6_pinfo *np = inet6_sk(sk);
211 struct flowi6 *fl6 = &transport->fl.u.ip6; 211 struct flowi6 *fl6 = &transport->fl.u.ip6;
212 int res;
212 213
213 pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, 214 pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
214 skb->len, &fl6->saddr, &fl6->daddr); 215 skb->len, &fl6->saddr, &fl6->daddr);
@@ -220,7 +221,10 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
220 221
221 SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); 222 SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
222 223
223 return ip6_xmit(sk, skb, fl6, np->opt, np->tclass); 224 rcu_read_lock();
225 res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass);
226 rcu_read_unlock();
227 return res;
224} 228}
225 229
226/* Returns the dst cache entry for the given source and destination ip 230/* Returns the dst cache entry for the given source and destination ip
@@ -262,7 +266,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
262 pr_debug("src=%pI6 - ", &fl6->saddr); 266 pr_debug("src=%pI6 - ", &fl6->saddr);
263 } 267 }
264 268
265 final_p = fl6_update_dst(fl6, np->opt, &final); 269 rcu_read_lock();
270 final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
271 rcu_read_unlock();
272
266 dst = ip6_dst_lookup_flow(sk, fl6, final_p); 273 dst = ip6_dst_lookup_flow(sk, fl6, final_p);
267 if (!asoc || saddr) 274 if (!asoc || saddr)
268 goto out; 275 goto out;
@@ -321,7 +328,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
321 if (baddr) { 328 if (baddr) {
322 fl6->saddr = baddr->v6.sin6_addr; 329 fl6->saddr = baddr->v6.sin6_addr;
323 fl6->fl6_sport = baddr->v6.sin6_port; 330 fl6->fl6_sport = baddr->v6.sin6_port;
324 final_p = fl6_update_dst(fl6, np->opt, &final); 331 final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
325 dst = ip6_dst_lookup_flow(sk, fl6, final_p); 332 dst = ip6_dst_lookup_flow(sk, fl6, final_p);
326 } 333 }
327 334
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 897c01c029ca..03c8256063ec 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -972,7 +972,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
972 return -EFAULT; 972 return -EFAULT;
973 973
974 /* Alloc space for the address array in kernel memory. */ 974 /* Alloc space for the address array in kernel memory. */
975 kaddrs = kmalloc(addrs_size, GFP_KERNEL); 975 kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN);
976 if (unlikely(!kaddrs)) 976 if (unlikely(!kaddrs))
977 return -ENOMEM; 977 return -ENOMEM;
978 978
@@ -4928,7 +4928,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
4928 to = optval + offsetof(struct sctp_getaddrs, addrs); 4928 to = optval + offsetof(struct sctp_getaddrs, addrs);
4929 space_left = len - offsetof(struct sctp_getaddrs, addrs); 4929 space_left = len - offsetof(struct sctp_getaddrs, addrs);
4930 4930
4931 addrs = kmalloc(space_left, GFP_KERNEL); 4931 addrs = kmalloc(space_left, GFP_USER | __GFP_NOWARN);
4932 if (!addrs) 4932 if (!addrs)
4933 return -ENOMEM; 4933 return -ENOMEM;
4934 4934
@@ -6458,7 +6458,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
6458 if (sctp_writeable(sk)) { 6458 if (sctp_writeable(sk)) {
6459 mask |= POLLOUT | POLLWRNORM; 6459 mask |= POLLOUT | POLLWRNORM;
6460 } else { 6460 } else {
6461 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 6461 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
6462 /* 6462 /*
6463 * Since the socket is not locked, the buffer 6463 * Since the socket is not locked, the buffer
6464 * might be made available after the writeable check and 6464 * might be made available after the writeable check and
@@ -6801,26 +6801,30 @@ no_packet:
6801static void __sctp_write_space(struct sctp_association *asoc) 6801static void __sctp_write_space(struct sctp_association *asoc)
6802{ 6802{
6803 struct sock *sk = asoc->base.sk; 6803 struct sock *sk = asoc->base.sk;
6804 struct socket *sock = sk->sk_socket;
6805 6804
6806 if ((sctp_wspace(asoc) > 0) && sock) { 6805 if (sctp_wspace(asoc) <= 0)
6807 if (waitqueue_active(&asoc->wait)) 6806 return;
6808 wake_up_interruptible(&asoc->wait); 6807
6808 if (waitqueue_active(&asoc->wait))
6809 wake_up_interruptible(&asoc->wait);
6809 6810
6810 if (sctp_writeable(sk)) { 6811 if (sctp_writeable(sk)) {
6811 wait_queue_head_t *wq = sk_sleep(sk); 6812 struct socket_wq *wq;
6812 6813
6813 if (wq && waitqueue_active(wq)) 6814 rcu_read_lock();
6814 wake_up_interruptible(wq); 6815 wq = rcu_dereference(sk->sk_wq);
6816 if (wq) {
6817 if (waitqueue_active(&wq->wait))
6818 wake_up_interruptible(&wq->wait);
6815 6819
6816 /* Note that we try to include the Async I/O support 6820 /* Note that we try to include the Async I/O support
6817 * here by modeling from the current TCP/UDP code. 6821 * here by modeling from the current TCP/UDP code.
6818 * We have not tested with it yet. 6822 * We have not tested with it yet.
6819 */ 6823 */
6820 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) 6824 if (!(sk->sk_shutdown & SEND_SHUTDOWN))
6821 sock_wake_async(sock, 6825 sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
6822 SOCK_WAKE_SPACE, POLL_OUT);
6823 } 6826 }
6827 rcu_read_unlock();
6824 } 6828 }
6825} 6829}
6826 6830
@@ -7375,6 +7379,13 @@ struct proto sctp_prot = {
7375 7379
7376#if IS_ENABLED(CONFIG_IPV6) 7380#if IS_ENABLED(CONFIG_IPV6)
7377 7381
7382#include <net/transp_v6.h>
7383static void sctp_v6_destroy_sock(struct sock *sk)
7384{
7385 sctp_destroy_sock(sk);
7386 inet6_destroy_sock(sk);
7387}
7388
7378struct proto sctpv6_prot = { 7389struct proto sctpv6_prot = {
7379 .name = "SCTPv6", 7390 .name = "SCTPv6",
7380 .owner = THIS_MODULE, 7391 .owner = THIS_MODULE,
@@ -7384,7 +7395,7 @@ struct proto sctpv6_prot = {
7384 .accept = sctp_accept, 7395 .accept = sctp_accept,
7385 .ioctl = sctp_ioctl, 7396 .ioctl = sctp_ioctl,
7386 .init = sctp_init_sock, 7397 .init = sctp_init_sock,
7387 .destroy = sctp_destroy_sock, 7398 .destroy = sctp_v6_destroy_sock,
7388 .shutdown = sctp_shutdown, 7399 .shutdown = sctp_shutdown,
7389 .setsockopt = sctp_setsockopt, 7400 .setsockopt = sctp_setsockopt,
7390 .getsockopt = sctp_getsockopt, 7401 .getsockopt = sctp_getsockopt,
diff --git a/net/socket.c b/net/socket.c
index dd2c247c99e3..456fadb3d819 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1056,27 +1056,20 @@ static int sock_fasync(int fd, struct file *filp, int on)
1056 return 0; 1056 return 0;
1057} 1057}
1058 1058
1059/* This function may be called only under socket lock or callback_lock or rcu_lock */ 1059/* This function may be called only under rcu_lock */
1060 1060
1061int sock_wake_async(struct socket *sock, int how, int band) 1061int sock_wake_async(struct socket_wq *wq, int how, int band)
1062{ 1062{
1063 struct socket_wq *wq; 1063 if (!wq || !wq->fasync_list)
1064
1065 if (!sock)
1066 return -1;
1067 rcu_read_lock();
1068 wq = rcu_dereference(sock->wq);
1069 if (!wq || !wq->fasync_list) {
1070 rcu_read_unlock();
1071 return -1; 1064 return -1;
1072 } 1065
1073 switch (how) { 1066 switch (how) {
1074 case SOCK_WAKE_WAITD: 1067 case SOCK_WAKE_WAITD:
1075 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1068 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1076 break; 1069 break;
1077 goto call_kill; 1070 goto call_kill;
1078 case SOCK_WAKE_SPACE: 1071 case SOCK_WAKE_SPACE:
1079 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) 1072 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1080 break; 1073 break;
1081 /* fall through */ 1074 /* fall through */
1082 case SOCK_WAKE_IO: 1075 case SOCK_WAKE_IO:
@@ -1086,7 +1079,7 @@ call_kill:
1086 case SOCK_WAKE_URG: 1079 case SOCK_WAKE_URG:
1087 kill_fasync(&wq->fasync_list, SIGURG, band); 1080 kill_fasync(&wq->fasync_list, SIGURG, band);
1088 } 1081 }
1089 rcu_read_unlock(); 1082
1090 return 0; 1083 return 0;
1091} 1084}
1092EXPORT_SYMBOL(sock_wake_async); 1085EXPORT_SYMBOL(sock_wake_async);
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 229956bf8457..95f82d8d4888 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -353,12 +353,20 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
353{ 353{
354 struct rpc_xprt *xprt = req->rq_xprt; 354 struct rpc_xprt *xprt = req->rq_xprt;
355 struct svc_serv *bc_serv = xprt->bc_serv; 355 struct svc_serv *bc_serv = xprt->bc_serv;
356 struct xdr_buf *rq_rcv_buf = &req->rq_rcv_buf;
356 357
357 spin_lock(&xprt->bc_pa_lock); 358 spin_lock(&xprt->bc_pa_lock);
358 list_del(&req->rq_bc_pa_list); 359 list_del(&req->rq_bc_pa_list);
359 xprt_dec_alloc_count(xprt, 1); 360 xprt_dec_alloc_count(xprt, 1);
360 spin_unlock(&xprt->bc_pa_lock); 361 spin_unlock(&xprt->bc_pa_lock);
361 362
363 if (copied <= rq_rcv_buf->head[0].iov_len) {
364 rq_rcv_buf->head[0].iov_len = copied;
365 rq_rcv_buf->page_len = 0;
366 } else {
367 rq_rcv_buf->page_len = copied - rq_rcv_buf->head[0].iov_len;
368 }
369
362 req->rq_private_buf.len = copied; 370 req->rq_private_buf.len = copied;
363 set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); 371 set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
364 372
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bc5b7b5032ca..7fccf9675df8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1363,6 +1363,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
1363 memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); 1363 memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
1364 memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg)); 1364 memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
1365 memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res)); 1365 memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
1366 rqstp->rq_arg.len = req->rq_private_buf.len;
1366 1367
1367 /* reset result send buffer "put" position */ 1368 /* reset result send buffer "put" position */
1368 resv->iov_len = 0; 1369 resv->iov_len = 0;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1d1a70498910..2ffaf6a79499 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -398,7 +398,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
398 if (unlikely(!sock)) 398 if (unlikely(!sock))
399 return -ENOTSOCK; 399 return -ENOTSOCK;
400 400
401 clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); 401 clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags);
402 if (base != 0) { 402 if (base != 0) {
403 addr = NULL; 403 addr = NULL;
404 addrlen = 0; 404 addrlen = 0;
@@ -442,7 +442,7 @@ static void xs_nospace_callback(struct rpc_task *task)
442 struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); 442 struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
443 443
444 transport->inet->sk_write_pending--; 444 transport->inet->sk_write_pending--;
445 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 445 clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
446} 446}
447 447
448/** 448/**
@@ -467,7 +467,7 @@ static int xs_nospace(struct rpc_task *task)
467 467
468 /* Don't race with disconnect */ 468 /* Don't race with disconnect */
469 if (xprt_connected(xprt)) { 469 if (xprt_connected(xprt)) {
470 if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { 470 if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) {
471 /* 471 /*
472 * Notify TCP that we're limited by the application 472 * Notify TCP that we're limited by the application
473 * window size 473 * window size
@@ -478,7 +478,7 @@ static int xs_nospace(struct rpc_task *task)
478 xprt_wait_for_buffer_space(task, xs_nospace_callback); 478 xprt_wait_for_buffer_space(task, xs_nospace_callback);
479 } 479 }
480 } else { 480 } else {
481 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 481 clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
482 ret = -ENOTCONN; 482 ret = -ENOTCONN;
483 } 483 }
484 484
@@ -626,7 +626,7 @@ process_status:
626 case -EPERM: 626 case -EPERM:
627 /* When the server has died, an ICMP port unreachable message 627 /* When the server has died, an ICMP port unreachable message
628 * prompts ECONNREFUSED. */ 628 * prompts ECONNREFUSED. */
629 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 629 clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
630 } 630 }
631 631
632 return status; 632 return status;
@@ -715,7 +715,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
715 case -EADDRINUSE: 715 case -EADDRINUSE:
716 case -ENOBUFS: 716 case -ENOBUFS:
717 case -EPIPE: 717 case -EPIPE:
718 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 718 clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
719 } 719 }
720 720
721 return status; 721 return status;
@@ -1618,7 +1618,7 @@ static void xs_write_space(struct sock *sk)
1618 1618
1619 if (unlikely(!(xprt = xprt_from_sock(sk)))) 1619 if (unlikely(!(xprt = xprt_from_sock(sk))))
1620 return; 1620 return;
1621 if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) 1621 if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0)
1622 return; 1622 return;
1623 1623
1624 xprt_write_space(xprt); 1624 xprt_write_space(xprt);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 9efbdbde2b08..91aea071ab27 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -191,6 +191,7 @@ void tipc_link_add_bc_peer(struct tipc_link *snd_l,
191 191
192 snd_l->ackers++; 192 snd_l->ackers++;
193 rcv_l->acked = snd_l->snd_nxt - 1; 193 rcv_l->acked = snd_l->snd_nxt - 1;
194 snd_l->state = LINK_ESTABLISHED;
194 tipc_link_build_bc_init_msg(uc_l, xmitq); 195 tipc_link_build_bc_init_msg(uc_l, xmitq);
195} 196}
196 197
@@ -206,6 +207,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
206 rcv_l->state = LINK_RESET; 207 rcv_l->state = LINK_RESET;
207 if (!snd_l->ackers) { 208 if (!snd_l->ackers) {
208 tipc_link_reset(snd_l); 209 tipc_link_reset(snd_l);
210 snd_l->state = LINK_RESET;
209 __skb_queue_purge(xmitq); 211 __skb_queue_purge(xmitq);
210 } 212 }
211} 213}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 552dbaba9cf3..b53246fb0412 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -105,6 +105,7 @@ struct tipc_sock {
105static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); 105static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
106static void tipc_data_ready(struct sock *sk); 106static void tipc_data_ready(struct sock *sk);
107static void tipc_write_space(struct sock *sk); 107static void tipc_write_space(struct sock *sk);
108static void tipc_sock_destruct(struct sock *sk);
108static int tipc_release(struct socket *sock); 109static int tipc_release(struct socket *sock);
109static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); 110static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
110static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); 111static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
@@ -381,6 +382,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
381 sk->sk_rcvbuf = sysctl_tipc_rmem[1]; 382 sk->sk_rcvbuf = sysctl_tipc_rmem[1];
382 sk->sk_data_ready = tipc_data_ready; 383 sk->sk_data_ready = tipc_data_ready;
383 sk->sk_write_space = tipc_write_space; 384 sk->sk_write_space = tipc_write_space;
385 sk->sk_destruct = tipc_sock_destruct;
384 tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; 386 tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
385 tsk->sent_unacked = 0; 387 tsk->sent_unacked = 0;
386 atomic_set(&tsk->dupl_rcvcnt, 0); 388 atomic_set(&tsk->dupl_rcvcnt, 0);
@@ -470,9 +472,6 @@ static int tipc_release(struct socket *sock)
470 tipc_node_remove_conn(net, dnode, tsk->portid); 472 tipc_node_remove_conn(net, dnode, tsk->portid);
471 } 473 }
472 474
473 /* Discard any remaining (connection-based) messages in receive queue */
474 __skb_queue_purge(&sk->sk_receive_queue);
475
476 /* Reject any messages that accumulated in backlog queue */ 475 /* Reject any messages that accumulated in backlog queue */
477 sock->state = SS_DISCONNECTING; 476 sock->state = SS_DISCONNECTING;
478 release_sock(sk); 477 release_sock(sk);
@@ -1515,6 +1514,11 @@ static void tipc_data_ready(struct sock *sk)
1515 rcu_read_unlock(); 1514 rcu_read_unlock();
1516} 1515}
1517 1516
1517static void tipc_sock_destruct(struct sock *sk)
1518{
1519 __skb_queue_purge(&sk->sk_receive_queue);
1520}
1521
1518/** 1522/**
1519 * filter_connect - Handle all incoming messages for a connection-based socket 1523 * filter_connect - Handle all incoming messages for a connection-based socket
1520 * @tsk: TIPC socket 1524 * @tsk: TIPC socket
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ad2719ad4c1b..70c03271b798 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -158,8 +158,11 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
158 struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; 158 struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
159 struct rtable *rt; 159 struct rtable *rt;
160 160
161 if (skb_headroom(skb) < UDP_MIN_HEADROOM) 161 if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
162 pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); 162 err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
163 if (err)
164 goto tx_error;
165 }
163 166
164 skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); 167 skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
165 ub = rcu_dereference_rtnl(b->media_ptr); 168 ub = rcu_dereference_rtnl(b->media_ptr);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aaa0b58d6aba..45aebd966978 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,6 +326,118 @@ found:
326 return s; 326 return s;
327} 327}
328 328
329/* Support code for asymmetrically connected dgram sockets
330 *
331 * If a datagram socket is connected to a socket not itself connected
332 * to the first socket (eg, /dev/log), clients may only enqueue more
333 * messages if the present receive queue of the server socket is not
334 * "too large". This means there's a second writeability condition
335 * poll and sendmsg need to test. The dgram recv code will do a wake
336 * up on the peer_wait wait queue of a socket upon reception of a
337 * datagram which needs to be propagated to sleeping would-be writers
338 * since these might not have sent anything so far. This can't be
339 * accomplished via poll_wait because the lifetime of the server
340 * socket might be less than that of its clients if these break their
341 * association with it or if the server socket is closed while clients
342 * are still connected to it and there's no way to inform "a polling
343 * implementation" that it should let go of a certain wait queue
344 *
345 * In order to propagate a wake up, a wait_queue_t of the client
346 * socket is enqueued on the peer_wait queue of the server socket
347 * whose wake function does a wake_up on the ordinary client socket
348 * wait queue. This connection is established whenever a write (or
349 * poll for write) hit the flow control condition and broken when the
350 * association to the server socket is dissolved or after a wake up
351 * was relayed.
352 */
353
354static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
355 void *key)
356{
357 struct unix_sock *u;
358 wait_queue_head_t *u_sleep;
359
360 u = container_of(q, struct unix_sock, peer_wake);
361
362 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
363 q);
364 u->peer_wake.private = NULL;
365
366 /* relaying can only happen while the wq still exists */
367 u_sleep = sk_sleep(&u->sk);
368 if (u_sleep)
369 wake_up_interruptible_poll(u_sleep, key);
370
371 return 0;
372}
373
374static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
375{
376 struct unix_sock *u, *u_other;
377 int rc;
378
379 u = unix_sk(sk);
380 u_other = unix_sk(other);
381 rc = 0;
382 spin_lock(&u_other->peer_wait.lock);
383
384 if (!u->peer_wake.private) {
385 u->peer_wake.private = other;
386 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
387
388 rc = 1;
389 }
390
391 spin_unlock(&u_other->peer_wait.lock);
392 return rc;
393}
394
395static void unix_dgram_peer_wake_disconnect(struct sock *sk,
396 struct sock *other)
397{
398 struct unix_sock *u, *u_other;
399
400 u = unix_sk(sk);
401 u_other = unix_sk(other);
402 spin_lock(&u_other->peer_wait.lock);
403
404 if (u->peer_wake.private == other) {
405 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
406 u->peer_wake.private = NULL;
407 }
408
409 spin_unlock(&u_other->peer_wait.lock);
410}
411
412static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
413 struct sock *other)
414{
415 unix_dgram_peer_wake_disconnect(sk, other);
416 wake_up_interruptible_poll(sk_sleep(sk),
417 POLLOUT |
418 POLLWRNORM |
419 POLLWRBAND);
420}
421
422/* preconditions:
423 * - unix_peer(sk) == other
424 * - association is stable
425 */
426static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
427{
428 int connected;
429
430 connected = unix_dgram_peer_wake_connect(sk, other);
431
432 if (unix_recvq_full(other))
433 return 1;
434
435 if (connected)
436 unix_dgram_peer_wake_disconnect(sk, other);
437
438 return 0;
439}
440
329static int unix_writable(const struct sock *sk) 441static int unix_writable(const struct sock *sk)
330{ 442{
331 return sk->sk_state != TCP_LISTEN && 443 return sk->sk_state != TCP_LISTEN &&
@@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
431 skpair->sk_state_change(skpair); 543 skpair->sk_state_change(skpair);
432 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 544 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
433 } 545 }
546
547 unix_dgram_peer_wake_disconnect(sk, skpair);
434 sock_put(skpair); /* It may now die */ 548 sock_put(skpair); /* It may now die */
435 unix_peer(sk) = NULL; 549 unix_peer(sk) = NULL;
436 } 550 }
@@ -441,6 +555,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
441 if (state == TCP_LISTEN) 555 if (state == TCP_LISTEN)
442 unix_release_sock(skb->sk, 1); 556 unix_release_sock(skb->sk, 1);
443 /* passed fds are erased in the kfree_skb hook */ 557 /* passed fds are erased in the kfree_skb hook */
558 UNIXCB(skb).consumed = skb->len;
444 kfree_skb(skb); 559 kfree_skb(skb);
445 } 560 }
446 561
@@ -665,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
665 INIT_LIST_HEAD(&u->link); 780 INIT_LIST_HEAD(&u->link);
666 mutex_init(&u->readlock); /* single task reading lock */ 781 mutex_init(&u->readlock); /* single task reading lock */
667 init_waitqueue_head(&u->peer_wait); 782 init_waitqueue_head(&u->peer_wait);
783 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
668 unix_insert_socket(unix_sockets_unbound(sk), sk); 784 unix_insert_socket(unix_sockets_unbound(sk), sk);
669out: 785out:
670 if (sk == NULL) 786 if (sk == NULL)
@@ -1032,6 +1148,8 @@ restart:
1032 if (unix_peer(sk)) { 1148 if (unix_peer(sk)) {
1033 struct sock *old_peer = unix_peer(sk); 1149 struct sock *old_peer = unix_peer(sk);
1034 unix_peer(sk) = other; 1150 unix_peer(sk) = other;
1151 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1152
1035 unix_state_double_unlock(sk, other); 1153 unix_state_double_unlock(sk, other);
1036 1154
1037 if (other != old_peer) 1155 if (other != old_peer)
@@ -1433,6 +1551,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
1433 return err; 1551 return err;
1434} 1552}
1435 1553
1554static bool unix_passcred_enabled(const struct socket *sock,
1555 const struct sock *other)
1556{
1557 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1558 !other->sk_socket ||
1559 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1560}
1561
1436/* 1562/*
1437 * Some apps rely on write() giving SCM_CREDENTIALS 1563 * Some apps rely on write() giving SCM_CREDENTIALS
1438 * We include credentials if source or destination socket 1564 * We include credentials if source or destination socket
@@ -1443,14 +1569,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1443{ 1569{
1444 if (UNIXCB(skb).pid) 1570 if (UNIXCB(skb).pid)
1445 return; 1571 return;
1446 if (test_bit(SOCK_PASSCRED, &sock->flags) || 1572 if (unix_passcred_enabled(sock, other)) {
1447 !other->sk_socket ||
1448 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1449 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1573 UNIXCB(skb).pid = get_pid(task_tgid(current));
1450 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1574 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1451 } 1575 }
1452} 1576}
1453 1577
1578static int maybe_init_creds(struct scm_cookie *scm,
1579 struct socket *socket,
1580 const struct sock *other)
1581{
1582 int err;
1583 struct msghdr msg = { .msg_controllen = 0 };
1584
1585 err = scm_send(socket, &msg, scm, false);
1586 if (err)
1587 return err;
1588
1589 if (unix_passcred_enabled(socket, other)) {
1590 scm->pid = get_pid(task_tgid(current));
1591 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1592 }
1593 return err;
1594}
1595
1596static bool unix_skb_scm_eq(struct sk_buff *skb,
1597 struct scm_cookie *scm)
1598{
1599 const struct unix_skb_parms *u = &UNIXCB(skb);
1600
1601 return u->pid == scm->pid &&
1602 uid_eq(u->uid, scm->creds.uid) &&
1603 gid_eq(u->gid, scm->creds.gid) &&
1604 unix_secdata_eq(scm, skb);
1605}
1606
1454/* 1607/*
1455 * Send AF_UNIX data. 1608 * Send AF_UNIX data.
1456 */ 1609 */
@@ -1471,6 +1624,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1471 struct scm_cookie scm; 1624 struct scm_cookie scm;
1472 int max_level; 1625 int max_level;
1473 int data_len = 0; 1626 int data_len = 0;
1627 int sk_locked;
1474 1628
1475 wait_for_unix_gc(); 1629 wait_for_unix_gc();
1476 err = scm_send(sock, msg, &scm, false); 1630 err = scm_send(sock, msg, &scm, false);
@@ -1549,12 +1703,14 @@ restart:
1549 goto out_free; 1703 goto out_free;
1550 } 1704 }
1551 1705
1706 sk_locked = 0;
1552 unix_state_lock(other); 1707 unix_state_lock(other);
1708restart_locked:
1553 err = -EPERM; 1709 err = -EPERM;
1554 if (!unix_may_send(sk, other)) 1710 if (!unix_may_send(sk, other))
1555 goto out_unlock; 1711 goto out_unlock;
1556 1712
1557 if (sock_flag(other, SOCK_DEAD)) { 1713 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1558 /* 1714 /*
1559 * Check with 1003.1g - what should 1715 * Check with 1003.1g - what should
1560 * datagram error 1716 * datagram error
@@ -1562,10 +1718,14 @@ restart:
1562 unix_state_unlock(other); 1718 unix_state_unlock(other);
1563 sock_put(other); 1719 sock_put(other);
1564 1720
1721 if (!sk_locked)
1722 unix_state_lock(sk);
1723
1565 err = 0; 1724 err = 0;
1566 unix_state_lock(sk);
1567 if (unix_peer(sk) == other) { 1725 if (unix_peer(sk) == other) {
1568 unix_peer(sk) = NULL; 1726 unix_peer(sk) = NULL;
1727 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1728
1569 unix_state_unlock(sk); 1729 unix_state_unlock(sk);
1570 1730
1571 unix_dgram_disconnected(sk, other); 1731 unix_dgram_disconnected(sk, other);
@@ -1591,21 +1751,38 @@ restart:
1591 goto out_unlock; 1751 goto out_unlock;
1592 } 1752 }
1593 1753
1594 if (unix_peer(other) != sk && unix_recvq_full(other)) { 1754 if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1595 if (!timeo) { 1755 if (timeo) {
1596 err = -EAGAIN; 1756 timeo = unix_wait_for_peer(other, timeo);
1597 goto out_unlock; 1757
1758 err = sock_intr_errno(timeo);
1759 if (signal_pending(current))
1760 goto out_free;
1761
1762 goto restart;
1598 } 1763 }
1599 1764
1600 timeo = unix_wait_for_peer(other, timeo); 1765 if (!sk_locked) {
1766 unix_state_unlock(other);
1767 unix_state_double_lock(sk, other);
1768 }
1601 1769
1602 err = sock_intr_errno(timeo); 1770 if (unix_peer(sk) != other ||
1603 if (signal_pending(current)) 1771 unix_dgram_peer_wake_me(sk, other)) {
1604 goto out_free; 1772 err = -EAGAIN;
1773 sk_locked = 1;
1774 goto out_unlock;
1775 }
1605 1776
1606 goto restart; 1777 if (!sk_locked) {
1778 sk_locked = 1;
1779 goto restart_locked;
1780 }
1607 } 1781 }
1608 1782
1783 if (unlikely(sk_locked))
1784 unix_state_unlock(sk);
1785
1609 if (sock_flag(other, SOCK_RCVTSTAMP)) 1786 if (sock_flag(other, SOCK_RCVTSTAMP))
1610 __net_timestamp(skb); 1787 __net_timestamp(skb);
1611 maybe_add_creds(skb, sock, other); 1788 maybe_add_creds(skb, sock, other);
@@ -1619,6 +1796,8 @@ restart:
1619 return len; 1796 return len;
1620 1797
1621out_unlock: 1798out_unlock:
1799 if (sk_locked)
1800 unix_state_unlock(sk);
1622 unix_state_unlock(other); 1801 unix_state_unlock(other);
1623out_free: 1802out_free:
1624 kfree_skb(skb); 1803 kfree_skb(skb);
@@ -1740,8 +1919,10 @@ out_err:
1740static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 1919static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1741 int offset, size_t size, int flags) 1920 int offset, size_t size, int flags)
1742{ 1921{
1743 int err = 0; 1922 int err;
1744 bool send_sigpipe = true; 1923 bool send_sigpipe = false;
1924 bool init_scm = true;
1925 struct scm_cookie scm;
1745 struct sock *other, *sk = socket->sk; 1926 struct sock *other, *sk = socket->sk;
1746 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 1927 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1747 1928
@@ -1759,7 +1940,7 @@ alloc_skb:
1759 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 1940 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1760 &err, 0); 1941 &err, 0);
1761 if (!newskb) 1942 if (!newskb)
1762 return err; 1943 goto err;
1763 } 1944 }
1764 1945
1765 /* we must acquire readlock as we modify already present 1946 /* we must acquire readlock as we modify already present
@@ -1768,12 +1949,12 @@ alloc_skb:
1768 err = mutex_lock_interruptible(&unix_sk(other)->readlock); 1949 err = mutex_lock_interruptible(&unix_sk(other)->readlock);
1769 if (err) { 1950 if (err) {
1770 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 1951 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1771 send_sigpipe = false;
1772 goto err; 1952 goto err;
1773 } 1953 }
1774 1954
1775 if (sk->sk_shutdown & SEND_SHUTDOWN) { 1955 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1776 err = -EPIPE; 1956 err = -EPIPE;
1957 send_sigpipe = true;
1777 goto err_unlock; 1958 goto err_unlock;
1778 } 1959 }
1779 1960
@@ -1782,23 +1963,34 @@ alloc_skb:
1782 if (sock_flag(other, SOCK_DEAD) || 1963 if (sock_flag(other, SOCK_DEAD) ||
1783 other->sk_shutdown & RCV_SHUTDOWN) { 1964 other->sk_shutdown & RCV_SHUTDOWN) {
1784 err = -EPIPE; 1965 err = -EPIPE;
1966 send_sigpipe = true;
1785 goto err_state_unlock; 1967 goto err_state_unlock;
1786 } 1968 }
1787 1969
1970 if (init_scm) {
1971 err = maybe_init_creds(&scm, socket, other);
1972 if (err)
1973 goto err_state_unlock;
1974 init_scm = false;
1975 }
1976
1788 skb = skb_peek_tail(&other->sk_receive_queue); 1977 skb = skb_peek_tail(&other->sk_receive_queue);
1789 if (tail && tail == skb) { 1978 if (tail && tail == skb) {
1790 skb = newskb; 1979 skb = newskb;
1791 } else if (!skb) { 1980 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1792 if (newskb) 1981 if (newskb) {
1793 skb = newskb; 1982 skb = newskb;
1794 else 1983 } else {
1984 tail = skb;
1795 goto alloc_skb; 1985 goto alloc_skb;
1986 }
1796 } else if (newskb) { 1987 } else if (newskb) {
1797 /* this is fast path, we don't necessarily need to 1988 /* this is fast path, we don't necessarily need to
1798 * call to kfree_skb even though with newskb == NULL 1989 * call to kfree_skb even though with newskb == NULL
1799 * this - does no harm 1990 * this - does no harm
1800 */ 1991 */
1801 consume_skb(newskb); 1992 consume_skb(newskb);
1993 newskb = NULL;
1802 } 1994 }
1803 1995
1804 if (skb_append_pagefrags(skb, page, offset, size)) { 1996 if (skb_append_pagefrags(skb, page, offset, size)) {
@@ -1811,14 +2003,20 @@ alloc_skb:
1811 skb->truesize += size; 2003 skb->truesize += size;
1812 atomic_add(size, &sk->sk_wmem_alloc); 2004 atomic_add(size, &sk->sk_wmem_alloc);
1813 2005
1814 if (newskb) 2006 if (newskb) {
2007 err = unix_scm_to_skb(&scm, skb, false);
2008 if (err)
2009 goto err_state_unlock;
2010 spin_lock(&other->sk_receive_queue.lock);
1815 __skb_queue_tail(&other->sk_receive_queue, newskb); 2011 __skb_queue_tail(&other->sk_receive_queue, newskb);
2012 spin_unlock(&other->sk_receive_queue.lock);
2013 }
1816 2014
1817 unix_state_unlock(other); 2015 unix_state_unlock(other);
1818 mutex_unlock(&unix_sk(other)->readlock); 2016 mutex_unlock(&unix_sk(other)->readlock);
1819 2017
1820 other->sk_data_ready(other); 2018 other->sk_data_ready(other);
1821 2019 scm_destroy(&scm);
1822 return size; 2020 return size;
1823 2021
1824err_state_unlock: 2022err_state_unlock:
@@ -1829,6 +2027,8 @@ err:
1829 kfree_skb(newskb); 2027 kfree_skb(newskb);
1830 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2028 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1831 send_sig(SIGPIPE, current, 0); 2029 send_sig(SIGPIPE, current, 0);
2030 if (!init_scm)
2031 scm_destroy(&scm);
1832 return err; 2032 return err;
1833} 2033}
1834 2034
@@ -1991,7 +2191,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
1991 !timeo) 2191 !timeo)
1992 break; 2192 break;
1993 2193
1994 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2194 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1995 unix_state_unlock(sk); 2195 unix_state_unlock(sk);
1996 timeo = freezable_schedule_timeout(timeo); 2196 timeo = freezable_schedule_timeout(timeo);
1997 unix_state_lock(sk); 2197 unix_state_lock(sk);
@@ -1999,7 +2199,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
1999 if (sock_flag(sk, SOCK_DEAD)) 2199 if (sock_flag(sk, SOCK_DEAD))
2000 break; 2200 break;
2001 2201
2002 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2202 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2003 } 2203 }
2004 2204
2005 finish_wait(sk_sleep(sk), &wait); 2205 finish_wait(sk_sleep(sk), &wait);
@@ -2072,6 +2272,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
2072 2272
2073 do { 2273 do {
2074 int chunk; 2274 int chunk;
2275 bool drop_skb;
2075 struct sk_buff *skb, *last; 2276 struct sk_buff *skb, *last;
2076 2277
2077 unix_state_lock(sk); 2278 unix_state_lock(sk);
@@ -2131,10 +2332,7 @@ unlock:
2131 2332
2132 if (check_creds) { 2333 if (check_creds) {
2133 /* Never glue messages from different writers */ 2334 /* Never glue messages from different writers */
2134 if ((UNIXCB(skb).pid != scm.pid) || 2335 if (!unix_skb_scm_eq(skb, &scm))
2135 !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
2136 !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
2137 !unix_secdata_eq(&scm, skb))
2138 break; 2336 break;
2139 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2337 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2140 /* Copy credentials */ 2338 /* Copy credentials */
@@ -2152,7 +2350,11 @@ unlock:
2152 } 2350 }
2153 2351
2154 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2352 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2353 skb_get(skb);
2155 chunk = state->recv_actor(skb, skip, chunk, state); 2354 chunk = state->recv_actor(skb, skip, chunk, state);
2355 drop_skb = !unix_skb_len(skb);
2356 /* skb is only safe to use if !drop_skb */
2357 consume_skb(skb);
2156 if (chunk < 0) { 2358 if (chunk < 0) {
2157 if (copied == 0) 2359 if (copied == 0)
2158 copied = -EFAULT; 2360 copied = -EFAULT;
@@ -2161,6 +2363,18 @@ unlock:
2161 copied += chunk; 2363 copied += chunk;
2162 size -= chunk; 2364 size -= chunk;
2163 2365
2366 if (drop_skb) {
2367 /* the skb was touched by a concurrent reader;
2368 * we should not expect anything from this skb
2369 * anymore and assume it invalid - we can be
2370 * sure it was dropped from the socket queue
2371 *
2372 * let's report a short read
2373 */
2374 err = 0;
2375 break;
2376 }
2377
2164 /* Mark read part of skb as used */ 2378 /* Mark read part of skb as used */
2165 if (!(flags & MSG_PEEK)) { 2379 if (!(flags & MSG_PEEK)) {
2166 UNIXCB(skb).consumed += chunk; 2380 UNIXCB(skb).consumed += chunk;
@@ -2454,20 +2668,22 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2454 return mask; 2668 return mask;
2455 2669
2456 writable = unix_writable(sk); 2670 writable = unix_writable(sk);
2457 other = unix_peer_get(sk); 2671 if (writable) {
2458 if (other) { 2672 unix_state_lock(sk);
2459 if (unix_peer(other) != sk) { 2673
2460 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); 2674 other = unix_peer(sk);
2461 if (unix_recvq_full(other)) 2675 if (other && unix_peer(other) != sk &&
2462 writable = 0; 2676 unix_recvq_full(other) &&
2463 } 2677 unix_dgram_peer_wake_me(sk, other))
2464 sock_put(other); 2678 writable = 0;
2679
2680 unix_state_unlock(sk);
2465 } 2681 }
2466 2682
2467 if (writable) 2683 if (writable)
2468 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 2684 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2469 else 2685 else
2470 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 2686 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2471 2687
2472 return mask; 2688 return mask;
2473} 2689}