aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorBob Peterson <rpeterso@redhat.com>2018-01-18 16:17:13 -0500
committerBob Peterson <rpeterso@redhat.com>2018-01-18 16:17:13 -0500
commit786ebd9f68cdf512f389e5f2d0015f1beb0777d8 (patch)
treeb4bae82884c2232f891aec11e58b38eb4fec5e5d /net
parent1f23bc7869fffec40b8bd9333a74a18d1de54d98 (diff)
parent4e56a6411fbce6f859566e17298114c2434391a4 (diff)
Merge branch 'punch-hole' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_fd.c1
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/batman-adv/tp_meter.c4
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/skbuff.c6
-rw-r--r--net/dccp/minisocks.c6
-rw-r--r--net/dccp/proto.c5
-rw-r--r--net/dsa/dsa2.c25
-rw-r--r--net/dsa/slave.c1
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/igmp.c44
-rw-r--r--net/ipv4/inet_timewait_sock.c6
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c3
-rw-r--r--net/ipv4/raw.c15
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_bbr.c12
-rw-r--r--net/ipv4/tcp_input.c22
-rw-r--r--net/ipv4/tcp_ipv4.c61
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_rate.c10
-rw-r--r--net/ipv4/tcp_recovery.c28
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/mcast.c25
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/ipv6/tcp_ipv6.c13
-rw-r--r--net/kcm/kcmsock.c68
-rw-r--r--net/mac80211/ht.c7
-rw-r--r--net/mac80211/mesh_hwmp.c15
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/tx.c29
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c128
-rw-r--r--net/netfilter/nf_conntrack_netlink.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c3
-rw-r--r--net/netfilter/nf_tables_api.c7
-rw-r--r--net/netfilter/nfnetlink_cthelper.c10
-rw-r--r--net/netfilter/nfnetlink_log.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c5
-rw-r--r--net/netfilter/nft_exthdr.c2
-rw-r--r--net/netfilter/x_tables.c9
-rw-r--r--net/netfilter/xt_bpf.c6
-rw-r--r--net/netfilter/xt_osf.c7
-rw-r--r--net/netlink/af_netlink.c3
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/openvswitch/flow_netlink.c16
-rw-r--r--net/packet/af_packet.c37
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/rds/rdma.c2
-rw-r--r--net/rxrpc/af_rxrpc.c24
-rw-r--r--net/rxrpc/ar-internal.h103
-rw-r--r--net/rxrpc/call_accept.c2
-rw-r--r--net/rxrpc/call_event.c229
-rw-r--r--net/rxrpc/call_object.c62
-rw-r--r--net/rxrpc/conn_client.c54
-rw-r--r--net/rxrpc/conn_event.c124
-rw-r--r--net/rxrpc/conn_object.c76
-rw-r--r--net/rxrpc/input.c76
-rw-r--r--net/rxrpc/misc.c19
-rw-r--r--net/rxrpc/net_ns.c33
-rw-r--r--net/rxrpc/output.c43
-rw-r--r--net/rxrpc/recvmsg.c12
-rw-r--r--net/rxrpc/sendmsg.c126
-rw-r--r--net/rxrpc/sysctl.c60
-rw-r--r--net/sched/act_meta_mark.c1
-rw-r--r--net/sched/act_meta_skbtcindex.c1
-rw-r--r--net/sched/act_sample.c14
-rw-r--r--net/sched/cls_api.c22
-rw-r--r--net/sched/cls_bpf.c23
-rw-r--r--net/sched/cls_u32.c1
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_cbq.c9
-rw-r--r--net/sched/sch_choke.c3
-rw-r--r--net/sched/sch_generic.c3
-rw-r--r--net/sched/sch_gred.c3
-rw-r--r--net/sched/sch_ingress.c15
-rw-r--r--net/sched/sch_red.c33
-rw-r--r--net/sched/sch_sfq.c4
-rw-r--r--net/sctp/chunk.c11
-rw-r--r--net/sctp/outqueue.c19
-rw-r--r--net/sctp/protocol.c1
-rw-r--r--net/sctp/socket.c13
-rw-r--r--net/sctp/stream.c79
-rw-r--r--net/sctp/stream_sched.c25
-rw-r--r--net/sctp/stream_sched_prio.c7
-rw-r--r--net/sctp/stream_sched_rr.c7
-rw-r--r--net/socket.c110
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c5
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/svcauth_unix.c6
-rw-r--r--net/sunrpc/xprt.c28
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c6
-rw-r--r--net/sunrpc/xprtrdma/transport.c2
-rw-r--r--net/sunrpc/xprtrdma/verbs.c2
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/tipc/group.c2
-rw-r--r--net/tipc/server.c3
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tipc/udp_media.c4
-rw-r--r--net/vmw_vsock/hyperv_transport.c2
-rw-r--r--net/vmw_vsock/vmci_transport.c14
-rw-r--r--net/wireless/Kconfig7
-rw-r--r--net/wireless/Makefile48
114 files changed, 1538 insertions, 759 deletions
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 985046ae4231..80f5c79053a4 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -839,7 +839,6 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
839 if (IS_ERR(file)) { 839 if (IS_ERR(file)) {
840 pr_err("%s (%d): failed to map fd\n", 840 pr_err("%s (%d): failed to map fd\n",
841 __func__, task_pid_nr(current)); 841 __func__, task_pid_nr(current));
842 sock_release(csocket);
843 kfree(p); 842 kfree(p);
844 return PTR_ERR(file); 843 return PTR_ERR(file);
845 } 844 }
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 1b659ab652fb..bbe8414b6ee7 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1214,7 +1214,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1214 orig_node->last_seen = jiffies; 1214 orig_node->last_seen = jiffies;
1215 1215
1216 /* find packet count of corresponding one hop neighbor */ 1216 /* find packet count of corresponding one hop neighbor */
1217 spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); 1217 spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
1218 if_num = if_incoming->if_num; 1218 if_num = if_incoming->if_num;
1219 orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; 1219 orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
1220 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); 1220 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
@@ -1224,7 +1224,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1224 } else { 1224 } else {
1225 neigh_rq_count = 0; 1225 neigh_rq_count = 0;
1226 } 1226 }
1227 spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); 1227 spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
1228 1228
1229 /* pay attention to not get a value bigger than 100 % */ 1229 /* pay attention to not get a value bigger than 100 % */
1230 if (orig_eq_count > neigh_rq_count) 1230 if (orig_eq_count > neigh_rq_count)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 341ceab8338d..e0e2bfcd6b3e 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -814,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
814 } 814 }
815 815
816 orig_gw = batadv_gw_node_get(bat_priv, orig_node); 816 orig_gw = batadv_gw_node_get(bat_priv, orig_node);
817 if (!orig_node) 817 if (!orig_gw)
818 goto out; 818 goto out;
819 819
820 if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) 820 if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index a98cf1104a30..ebe6e38934e4 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -499,6 +499,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
499 */ 499 */
500 if (skb->priority >= 256 && skb->priority <= 263) 500 if (skb->priority >= 256 && skb->priority <= 263)
501 frag_header.priority = skb->priority - 256; 501 frag_header.priority = skb->priority - 256;
502 else
503 frag_header.priority = 0;
502 504
503 ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); 505 ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
504 ether_addr_copy(frag_header.dest, orig_node->orig); 506 ether_addr_copy(frag_header.dest, orig_node->orig);
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 15cd2139381e..ebc4e2241c77 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -482,7 +482,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
482 482
483/** 483/**
484 * batadv_tp_sender_timeout - timer that fires in case of packet loss 484 * batadv_tp_sender_timeout - timer that fires in case of packet loss
485 * @arg: address of the related tp_vars 485 * @t: address to timer_list inside tp_vars
486 * 486 *
487 * If fired it means that there was packet loss. 487 * If fired it means that there was packet loss.
488 * Switch to Slow Start, set the ss_threshold to half of the current cwnd and 488 * Switch to Slow Start, set the ss_threshold to half of the current cwnd and
@@ -1106,7 +1106,7 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
1106/** 1106/**
1107 * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is 1107 * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
1108 * reached without received ack 1108 * reached without received ack
1109 * @arg: address of the related tp_vars 1109 * @t: address to timer_list inside tp_vars
1110 */ 1110 */
1111static void batadv_tp_receiver_shutdown(struct timer_list *t) 1111static void batadv_tp_receiver_shutdown(struct timer_list *t)
1112{ 1112{
diff --git a/net/core/dev.c b/net/core/dev.c
index 07ed21d64f92..f47e96b62308 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1106,7 +1106,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1106 * when the name is long and there isn't enough space left 1106 * when the name is long and there isn't enough space left
1107 * for the digits, or if all bits are used. 1107 * for the digits, or if all bits are used.
1108 */ 1108 */
1109 return p ? -ENFILE : -EEXIST; 1109 return -ENFILE;
1110} 1110}
1111 1111
1112static int dev_alloc_name_ns(struct net *net, 1112static int dev_alloc_name_ns(struct net *net,
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 1c4810919a0a..b9057478d69c 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -14,7 +14,6 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/module.h>
18#include <linux/string.h> 17#include <linux/string.h>
19#include <linux/errno.h> 18#include <linux/errno.h>
20#include <linux/skbuff.h> 19#include <linux/skbuff.h>
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6b0ff396fa9d..a592ca025fc4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4293,7 +4293,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
4293 struct sock *sk = skb->sk; 4293 struct sock *sk = skb->sk;
4294 4294
4295 if (!skb_may_tx_timestamp(sk, false)) 4295 if (!skb_may_tx_timestamp(sk, false))
4296 return; 4296 goto err;
4297 4297
4298 /* Take a reference to prevent skb_orphan() from freeing the socket, 4298 /* Take a reference to prevent skb_orphan() from freeing the socket,
4299 * but only if the socket refcount is not zero. 4299 * but only if the socket refcount is not zero.
@@ -4302,7 +4302,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
4302 *skb_hwtstamps(skb) = *hwtstamps; 4302 *skb_hwtstamps(skb) = *hwtstamps;
4303 __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); 4303 __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
4304 sock_put(sk); 4304 sock_put(sk);
4305 return;
4305 } 4306 }
4307
4308err:
4309 kfree_skb(skb);
4306} 4310}
4307EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); 4311EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
4308 4312
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index abd07a443219..178bb9833311 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
57 if (state == DCCP_TIME_WAIT) 57 if (state == DCCP_TIME_WAIT)
58 timeo = DCCP_TIMEWAIT_LEN; 58 timeo = DCCP_TIMEWAIT_LEN;
59 59
60 /* tw_timer is pinned, so we need to make sure BH are disabled
61 * in following section, otherwise timer handler could run before
62 * we complete the initialization.
63 */
64 local_bh_disable();
60 inet_twsk_schedule(tw, timeo); 65 inet_twsk_schedule(tw, timeo);
61 /* Linkage updates. */ 66 /* Linkage updates. */
62 __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); 67 __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
63 inet_twsk_put(tw); 68 inet_twsk_put(tw);
69 local_bh_enable();
64 } else { 70 } else {
65 /* Sorry, if we're out of memory, just CLOSE this 71 /* Sorry, if we're out of memory, just CLOSE this
66 * socket up. We've got bigger problems than 72 * socket up. We've got bigger problems than
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b68168fcc06a..9d43c1f40274 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags)
259{ 259{
260 struct inet_connection_sock *icsk = inet_csk(sk); 260 struct inet_connection_sock *icsk = inet_csk(sk);
261 struct inet_sock *inet = inet_sk(sk); 261 struct inet_sock *inet = inet_sk(sk);
262 struct dccp_sock *dp = dccp_sk(sk);
262 int err = 0; 263 int err = 0;
263 const int old_state = sk->sk_state; 264 const int old_state = sk->sk_state;
264 265
@@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags)
278 sk->sk_err = ECONNRESET; 279 sk->sk_err = ECONNRESET;
279 280
280 dccp_clear_xmit_timers(sk); 281 dccp_clear_xmit_timers(sk);
282 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
283 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
284 dp->dccps_hc_rx_ccid = NULL;
285 dp->dccps_hc_tx_ccid = NULL;
281 286
282 __skb_queue_purge(&sk->sk_receive_queue); 287 __skb_queue_purge(&sk->sk_receive_queue);
283 __skb_queue_purge(&sk->sk_write_queue); 288 __skb_queue_purge(&sk->sk_write_queue);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 44e3fb7dec8c..1e287420ff49 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -51,9 +51,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index)
51 INIT_LIST_HEAD(&dst->list); 51 INIT_LIST_HEAD(&dst->list);
52 list_add_tail(&dsa_tree_list, &dst->list); 52 list_add_tail(&dsa_tree_list, &dst->list);
53 53
54 /* Initialize the reference counter to the number of switches, not 1 */
55 kref_init(&dst->refcount); 54 kref_init(&dst->refcount);
56 refcount_set(&dst->refcount.refcount, 0);
57 55
58 return dst; 56 return dst;
59} 57}
@@ -64,20 +62,23 @@ static void dsa_tree_free(struct dsa_switch_tree *dst)
64 kfree(dst); 62 kfree(dst);
65} 63}
66 64
67static struct dsa_switch_tree *dsa_tree_touch(int index) 65static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst)
68{ 66{
69 struct dsa_switch_tree *dst; 67 if (dst)
70 68 kref_get(&dst->refcount);
71 dst = dsa_tree_find(index);
72 if (!dst)
73 dst = dsa_tree_alloc(index);
74 69
75 return dst; 70 return dst;
76} 71}
77 72
78static void dsa_tree_get(struct dsa_switch_tree *dst) 73static struct dsa_switch_tree *dsa_tree_touch(int index)
79{ 74{
80 kref_get(&dst->refcount); 75 struct dsa_switch_tree *dst;
76
77 dst = dsa_tree_find(index);
78 if (dst)
79 return dsa_tree_get(dst);
80 else
81 return dsa_tree_alloc(index);
81} 82}
82 83
83static void dsa_tree_release(struct kref *ref) 84static void dsa_tree_release(struct kref *ref)
@@ -91,7 +92,8 @@ static void dsa_tree_release(struct kref *ref)
91 92
92static void dsa_tree_put(struct dsa_switch_tree *dst) 93static void dsa_tree_put(struct dsa_switch_tree *dst)
93{ 94{
94 kref_put(&dst->refcount, dsa_tree_release); 95 if (dst)
96 kref_put(&dst->refcount, dsa_tree_release);
95} 97}
96 98
97static bool dsa_port_is_dsa(struct dsa_port *port) 99static bool dsa_port_is_dsa(struct dsa_port *port)
@@ -765,6 +767,7 @@ int dsa_register_switch(struct dsa_switch *ds)
765 767
766 mutex_lock(&dsa2_mutex); 768 mutex_lock(&dsa2_mutex);
767 err = dsa_switch_probe(ds); 769 err = dsa_switch_probe(ds);
770 dsa_tree_put(ds->dst);
768 mutex_unlock(&dsa2_mutex); 771 mutex_unlock(&dsa2_mutex);
769 772
770 return err; 773 return err;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d6e7a642493b..a95a55f79137 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -16,7 +16,6 @@
16#include <linux/of_net.h> 16#include <linux/of_net.h>
17#include <linux/of_mdio.h> 17#include <linux/of_mdio.h>
18#include <linux/mdio.h> 18#include <linux/mdio.h>
19#include <linux/list.h>
20#include <net/rtnetlink.h> 19#include <net/rtnetlink.h>
21#include <net/pkt_cls.h> 20#include <net/pkt_cls.h>
22#include <net/tc_act/tc_mirred.h> 21#include <net/tc_act/tc_mirred.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a4573bccd6da..7a93359fbc72 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1428,7 +1428,7 @@ skip:
1428 1428
1429static bool inetdev_valid_mtu(unsigned int mtu) 1429static bool inetdev_valid_mtu(unsigned int mtu)
1430{ 1430{
1431 return mtu >= 68; 1431 return mtu >= IPV4_MIN_MTU;
1432} 1432}
1433 1433
1434static void inetdev_send_gratuitous_arp(struct net_device *dev, 1434static void inetdev_send_gratuitous_arp(struct net_device *dev,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d1f8f302dbf3..726f6b608274 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -89,6 +89,7 @@
89#include <linux/rtnetlink.h> 89#include <linux/rtnetlink.h>
90#include <linux/times.h> 90#include <linux/times.h>
91#include <linux/pkt_sched.h> 91#include <linux/pkt_sched.h>
92#include <linux/byteorder/generic.h>
92 93
93#include <net/net_namespace.h> 94#include <net/net_namespace.h>
94#include <net/arp.h> 95#include <net/arp.h>
@@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
321 return scount; 322 return scount;
322} 323}
323 324
325/* source address selection per RFC 3376 section 4.2.13 */
326static __be32 igmpv3_get_srcaddr(struct net_device *dev,
327 const struct flowi4 *fl4)
328{
329 struct in_device *in_dev = __in_dev_get_rcu(dev);
330
331 if (!in_dev)
332 return htonl(INADDR_ANY);
333
334 for_ifa(in_dev) {
335 if (inet_ifa_match(fl4->saddr, ifa))
336 return fl4->saddr;
337 } endfor_ifa(in_dev);
338
339 return htonl(INADDR_ANY);
340}
341
324static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) 342static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
325{ 343{
326 struct sk_buff *skb; 344 struct sk_buff *skb;
@@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
368 pip->frag_off = htons(IP_DF); 386 pip->frag_off = htons(IP_DF);
369 pip->ttl = 1; 387 pip->ttl = 1;
370 pip->daddr = fl4.daddr; 388 pip->daddr = fl4.daddr;
371 pip->saddr = fl4.saddr; 389 pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
372 pip->protocol = IPPROTO_IGMP; 390 pip->protocol = IPPROTO_IGMP;
373 pip->tot_len = 0; /* filled in later */ 391 pip->tot_len = 0; /* filled in later */
374 ip_select_ident(net, skb, NULL); 392 ip_select_ident(net, skb, NULL);
@@ -404,16 +422,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
404} 422}
405 423
406static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, 424static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
407 int type, struct igmpv3_grec **ppgr) 425 int type, struct igmpv3_grec **ppgr, unsigned int mtu)
408{ 426{
409 struct net_device *dev = pmc->interface->dev; 427 struct net_device *dev = pmc->interface->dev;
410 struct igmpv3_report *pih; 428 struct igmpv3_report *pih;
411 struct igmpv3_grec *pgr; 429 struct igmpv3_grec *pgr;
412 430
413 if (!skb) 431 if (!skb) {
414 skb = igmpv3_newpack(dev, dev->mtu); 432 skb = igmpv3_newpack(dev, mtu);
415 if (!skb) 433 if (!skb)
416 return NULL; 434 return NULL;
435 }
417 pgr = skb_put(skb, sizeof(struct igmpv3_grec)); 436 pgr = skb_put(skb, sizeof(struct igmpv3_grec));
418 pgr->grec_type = type; 437 pgr->grec_type = type;
419 pgr->grec_auxwords = 0; 438 pgr->grec_auxwords = 0;
@@ -436,12 +455,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
436 struct igmpv3_grec *pgr = NULL; 455 struct igmpv3_grec *pgr = NULL;
437 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; 456 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
438 int scount, stotal, first, isquery, truncate; 457 int scount, stotal, first, isquery, truncate;
458 unsigned int mtu;
439 459
440 if (pmc->multiaddr == IGMP_ALL_HOSTS) 460 if (pmc->multiaddr == IGMP_ALL_HOSTS)
441 return skb; 461 return skb;
442 if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) 462 if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
443 return skb; 463 return skb;
444 464
465 mtu = READ_ONCE(dev->mtu);
466 if (mtu < IPV4_MIN_MTU)
467 return skb;
468
445 isquery = type == IGMPV3_MODE_IS_INCLUDE || 469 isquery = type == IGMPV3_MODE_IS_INCLUDE ||
446 type == IGMPV3_MODE_IS_EXCLUDE; 470 type == IGMPV3_MODE_IS_EXCLUDE;
447 truncate = type == IGMPV3_MODE_IS_EXCLUDE || 471 truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -462,7 +486,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
462 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { 486 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
463 if (skb) 487 if (skb)
464 igmpv3_sendpack(skb); 488 igmpv3_sendpack(skb);
465 skb = igmpv3_newpack(dev, dev->mtu); 489 skb = igmpv3_newpack(dev, mtu);
466 } 490 }
467 } 491 }
468 first = 1; 492 first = 1;
@@ -498,12 +522,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
498 pgr->grec_nsrcs = htons(scount); 522 pgr->grec_nsrcs = htons(scount);
499 if (skb) 523 if (skb)
500 igmpv3_sendpack(skb); 524 igmpv3_sendpack(skb);
501 skb = igmpv3_newpack(dev, dev->mtu); 525 skb = igmpv3_newpack(dev, mtu);
502 first = 1; 526 first = 1;
503 scount = 0; 527 scount = 0;
504 } 528 }
505 if (first) { 529 if (first) {
506 skb = add_grhead(skb, pmc, type, &pgr); 530 skb = add_grhead(skb, pmc, type, &pgr, mtu);
507 first = 0; 531 first = 0;
508 } 532 }
509 if (!skb) 533 if (!skb)
@@ -538,7 +562,7 @@ empty_source:
538 igmpv3_sendpack(skb); 562 igmpv3_sendpack(skb);
539 skb = NULL; /* add_grhead will get a new one */ 563 skb = NULL; /* add_grhead will get a new one */
540 } 564 }
541 skb = add_grhead(skb, pmc, type, &pgr); 565 skb = add_grhead(skb, pmc, type, &pgr, mtu);
542 } 566 }
543 } 567 }
544 if (pgr) 568 if (pgr)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c690cd0d9b3f..b563e0c46bac 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -93,7 +93,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
93} 93}
94 94
95/* 95/*
96 * Enter the time wait state. 96 * Enter the time wait state. This is called with locally disabled BH.
97 * Essentially we whip up a timewait bucket, copy the relevant info into it 97 * Essentially we whip up a timewait bucket, copy the relevant info into it
98 * from the SK, and mess with hash chains and list linkage. 98 * from the SK, and mess with hash chains and list linkage.
99 */ 99 */
@@ -111,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
111 */ 111 */
112 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, 112 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
113 hashinfo->bhash_size)]; 113 hashinfo->bhash_size)];
114 spin_lock_bh(&bhead->lock); 114 spin_lock(&bhead->lock);
115 tw->tw_tb = icsk->icsk_bind_hash; 115 tw->tw_tb = icsk->icsk_bind_hash;
116 WARN_ON(!icsk->icsk_bind_hash); 116 WARN_ON(!icsk->icsk_bind_hash);
117 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); 117 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
@@ -137,7 +137,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
137 if (__sk_nulls_del_node_init_rcu(sk)) 137 if (__sk_nulls_del_node_init_rcu(sk))
138 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 138 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
139 139
140 spin_unlock_bh(lock); 140 spin_unlock(lock);
141} 141}
142EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 142EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
143 143
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bb6239169b1a..9c1735632c8c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -266,7 +266,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
266 len = gre_hdr_len + sizeof(*ershdr); 266 len = gre_hdr_len + sizeof(*ershdr);
267 267
268 if (unlikely(!pskb_may_pull(skb, len))) 268 if (unlikely(!pskb_may_pull(skb, len)))
269 return -ENOMEM; 269 return PACKET_REJECT;
270 270
271 iph = ip_hdr(skb); 271 iph = ip_hdr(skb);
272 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); 272 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fe6fee728ce4..5ddb1cb52bd4 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
349 dev->needed_headroom = t_hlen + hlen; 349 dev->needed_headroom = t_hlen + hlen;
350 mtu -= (dev->hard_header_len + t_hlen); 350 mtu -= (dev->hard_header_len + t_hlen);
351 351
352 if (mtu < 68) 352 if (mtu < IPV4_MIN_MTU)
353 mtu = 68; 353 mtu = IPV4_MIN_MTU;
354 354
355 return mtu; 355 return mtu;
356} 356}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f88221aebc9d..0c3c944a7b72 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -373,7 +373,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
373 if (!xt_find_jump_offset(offsets, newpos, 373 if (!xt_find_jump_offset(offsets, newpos,
374 newinfo->number)) 374 newinfo->number))
375 return 0; 375 return 0;
376 e = entry0 + newpos;
377 } else { 376 } else {
378 /* ... this is a fallthru */ 377 /* ... this is a fallthru */
379 newpos = pos + e->next_offset; 378 newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4cbe5e80f3bf..2e0d339028bb 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -439,7 +439,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
439 if (!xt_find_jump_offset(offsets, newpos, 439 if (!xt_find_jump_offset(offsets, newpos,
440 newinfo->number)) 440 newinfo->number))
441 return 0; 441 return 0;
442 e = entry0 + newpos;
443 } else { 442 } else {
444 /* ... this is a fallthru */ 443 /* ... this is a fallthru */
445 newpos = pos + e->next_offset; 444 newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 17b4ca562944..69060e3abe85 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -813,12 +813,13 @@ static int clusterip_net_init(struct net *net)
813 813
814static void clusterip_net_exit(struct net *net) 814static void clusterip_net_exit(struct net *net)
815{ 815{
816#ifdef CONFIG_PROC_FS
817 struct clusterip_net *cn = net_generic(net, clusterip_net_id); 816 struct clusterip_net *cn = net_generic(net, clusterip_net_id);
817#ifdef CONFIG_PROC_FS
818 proc_remove(cn->procdir); 818 proc_remove(cn->procdir);
819 cn->procdir = NULL; 819 cn->procdir = NULL;
820#endif 820#endif
821 nf_unregister_net_hook(net, &cip_arp_ops); 821 nf_unregister_net_hook(net, &cip_arp_ops);
822 WARN_ON_ONCE(!list_empty(&cn->configs));
822} 823}
823 824
824static struct pernet_operations clusterip_net_ops = { 825static struct pernet_operations clusterip_net_ops = {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 33b70bfd1122..125c1eab3eaa 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -513,11 +513,16 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
513 int err; 513 int err;
514 struct ip_options_data opt_copy; 514 struct ip_options_data opt_copy;
515 struct raw_frag_vec rfv; 515 struct raw_frag_vec rfv;
516 int hdrincl;
516 517
517 err = -EMSGSIZE; 518 err = -EMSGSIZE;
518 if (len > 0xFFFF) 519 if (len > 0xFFFF)
519 goto out; 520 goto out;
520 521
522 /* hdrincl should be READ_ONCE(inet->hdrincl)
523 * but READ_ONCE() doesn't work with bit fields
524 */
525 hdrincl = inet->hdrincl;
521 /* 526 /*
522 * Check the flags. 527 * Check the flags.
523 */ 528 */
@@ -593,7 +598,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
593 /* Linux does not mangle headers on raw sockets, 598 /* Linux does not mangle headers on raw sockets,
594 * so that IP options + IP_HDRINCL is non-sense. 599 * so that IP options + IP_HDRINCL is non-sense.
595 */ 600 */
596 if (inet->hdrincl) 601 if (hdrincl)
597 goto done; 602 goto done;
598 if (ipc.opt->opt.srr) { 603 if (ipc.opt->opt.srr) {
599 if (!daddr) 604 if (!daddr)
@@ -615,12 +620,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
615 620
616 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, 621 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
617 RT_SCOPE_UNIVERSE, 622 RT_SCOPE_UNIVERSE,
618 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 623 hdrincl ? IPPROTO_RAW : sk->sk_protocol,
619 inet_sk_flowi_flags(sk) | 624 inet_sk_flowi_flags(sk) |
620 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), 625 (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
621 daddr, saddr, 0, 0, sk->sk_uid); 626 daddr, saddr, 0, 0, sk->sk_uid);
622 627
623 if (!inet->hdrincl) { 628 if (!hdrincl) {
624 rfv.msg = msg; 629 rfv.msg = msg;
625 rfv.hlen = 0; 630 rfv.hlen = 0;
626 631
@@ -645,7 +650,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
645 goto do_confirm; 650 goto do_confirm;
646back_from_confirm: 651back_from_confirm:
647 652
648 if (inet->hdrincl) 653 if (hdrincl)
649 err = raw_send_hdrinc(sk, &fl4, msg, len, 654 err = raw_send_hdrinc(sk, &fl4, msg, len,
650 &rt, msg->msg_flags, &ipc.sockc); 655 &rt, msg->msg_flags, &ipc.sockc);
651 656
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bf97317e6c97..f08eebe60446 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2412,6 +2412,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2412 tp->snd_cwnd_cnt = 0; 2412 tp->snd_cwnd_cnt = 0;
2413 tp->window_clamp = 0; 2413 tp->window_clamp = 0;
2414 tcp_set_ca_state(sk, TCP_CA_Open); 2414 tcp_set_ca_state(sk, TCP_CA_Open);
2415 tp->is_sack_reneg = 0;
2415 tcp_clear_retrans(tp); 2416 tcp_clear_retrans(tp);
2416 inet_csk_delack_init(sk); 2417 inet_csk_delack_init(sk);
2417 /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 2418 /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 69ee877574d0..8322f26e770e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -110,7 +110,8 @@ struct bbr {
110 u32 lt_last_lost; /* LT intvl start: tp->lost */ 110 u32 lt_last_lost; /* LT intvl start: tp->lost */
111 u32 pacing_gain:10, /* current gain for setting pacing rate */ 111 u32 pacing_gain:10, /* current gain for setting pacing rate */
112 cwnd_gain:10, /* current gain for setting cwnd */ 112 cwnd_gain:10, /* current gain for setting cwnd */
113 full_bw_cnt:3, /* number of rounds without large bw gains */ 113 full_bw_reached:1, /* reached full bw in Startup? */
114 full_bw_cnt:2, /* number of rounds without large bw gains */
114 cycle_idx:3, /* current index in pacing_gain cycle array */ 115 cycle_idx:3, /* current index in pacing_gain cycle array */
115 has_seen_rtt:1, /* have we seen an RTT sample yet? */ 116 has_seen_rtt:1, /* have we seen an RTT sample yet? */
116 unused_b:5; 117 unused_b:5;
@@ -180,7 +181,7 @@ static bool bbr_full_bw_reached(const struct sock *sk)
180{ 181{
181 const struct bbr *bbr = inet_csk_ca(sk); 182 const struct bbr *bbr = inet_csk_ca(sk);
182 183
183 return bbr->full_bw_cnt >= bbr_full_bw_cnt; 184 return bbr->full_bw_reached;
184} 185}
185 186
186/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ 187/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
@@ -717,6 +718,7 @@ static void bbr_check_full_bw_reached(struct sock *sk,
717 return; 718 return;
718 } 719 }
719 ++bbr->full_bw_cnt; 720 ++bbr->full_bw_cnt;
721 bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
720} 722}
721 723
722/* If pipe is probably full, drain the queue and then enter steady-state. */ 724/* If pipe is probably full, drain the queue and then enter steady-state. */
@@ -850,6 +852,7 @@ static void bbr_init(struct sock *sk)
850 bbr->restore_cwnd = 0; 852 bbr->restore_cwnd = 0;
851 bbr->round_start = 0; 853 bbr->round_start = 0;
852 bbr->idle_restart = 0; 854 bbr->idle_restart = 0;
855 bbr->full_bw_reached = 0;
853 bbr->full_bw = 0; 856 bbr->full_bw = 0;
854 bbr->full_bw_cnt = 0; 857 bbr->full_bw_cnt = 0;
855 bbr->cycle_mstamp = 0; 858 bbr->cycle_mstamp = 0;
@@ -871,6 +874,11 @@ static u32 bbr_sndbuf_expand(struct sock *sk)
871 */ 874 */
872static u32 bbr_undo_cwnd(struct sock *sk) 875static u32 bbr_undo_cwnd(struct sock *sk)
873{ 876{
877 struct bbr *bbr = inet_csk_ca(sk);
878
879 bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
880 bbr->full_bw_cnt = 0;
881 bbr_reset_lt_bw_sampling(sk);
874 return tcp_sk(sk)->snd_cwnd; 882 return tcp_sk(sk)->snd_cwnd;
875} 883}
876 884
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 734cfc8ff76e..45f750e85714 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -508,9 +508,6 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
508 u32 new_sample = tp->rcv_rtt_est.rtt_us; 508 u32 new_sample = tp->rcv_rtt_est.rtt_us;
509 long m = sample; 509 long m = sample;
510 510
511 if (m == 0)
512 m = 1;
513
514 if (new_sample != 0) { 511 if (new_sample != 0) {
515 /* If we sample in larger samples in the non-timestamp 512 /* If we sample in larger samples in the non-timestamp
516 * case, we could grossly overestimate the RTT especially 513 * case, we could grossly overestimate the RTT especially
@@ -547,6 +544,8 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
547 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) 544 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
548 return; 545 return;
549 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time); 546 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
547 if (!delta_us)
548 delta_us = 1;
550 tcp_rcv_rtt_update(tp, delta_us, 1); 549 tcp_rcv_rtt_update(tp, delta_us, 1);
551 550
552new_measure: 551new_measure:
@@ -563,8 +562,11 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
563 (TCP_SKB_CB(skb)->end_seq - 562 (TCP_SKB_CB(skb)->end_seq -
564 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) { 563 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
565 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; 564 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
566 u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); 565 u32 delta_us;
567 566
567 if (!delta)
568 delta = 1;
569 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
568 tcp_rcv_rtt_update(tp, delta_us, 0); 570 tcp_rcv_rtt_update(tp, delta_us, 0);
569 } 571 }
570} 572}
@@ -579,6 +581,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
579 int time; 581 int time;
580 int copied; 582 int copied;
581 583
584 tcp_mstamp_refresh(tp);
582 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); 585 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
583 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) 586 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
584 return; 587 return;
@@ -1941,6 +1944,8 @@ void tcp_enter_loss(struct sock *sk)
1941 if (is_reneg) { 1944 if (is_reneg) {
1942 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); 1945 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1943 tp->sacked_out = 0; 1946 tp->sacked_out = 0;
1947 /* Mark SACK reneging until we recover from this loss event. */
1948 tp->is_sack_reneg = 1;
1944 } 1949 }
1945 tcp_clear_all_retrans_hints(tp); 1950 tcp_clear_all_retrans_hints(tp);
1946 1951
@@ -2326,6 +2331,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2326 } 2331 }
2327 tp->snd_cwnd_stamp = tcp_jiffies32; 2332 tp->snd_cwnd_stamp = tcp_jiffies32;
2328 tp->undo_marker = 0; 2333 tp->undo_marker = 0;
2334 tp->rack.advanced = 1; /* Force RACK to re-exam losses */
2329} 2335}
2330 2336
2331static inline bool tcp_may_undo(const struct tcp_sock *tp) 2337static inline bool tcp_may_undo(const struct tcp_sock *tp)
@@ -2364,6 +2370,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
2364 return true; 2370 return true;
2365 } 2371 }
2366 tcp_set_ca_state(sk, TCP_CA_Open); 2372 tcp_set_ca_state(sk, TCP_CA_Open);
2373 tp->is_sack_reneg = 0;
2367 return false; 2374 return false;
2368} 2375}
2369 2376
@@ -2397,8 +2404,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2397 NET_INC_STATS(sock_net(sk), 2404 NET_INC_STATS(sock_net(sk),
2398 LINUX_MIB_TCPSPURIOUSRTOS); 2405 LINUX_MIB_TCPSPURIOUSRTOS);
2399 inet_csk(sk)->icsk_retransmits = 0; 2406 inet_csk(sk)->icsk_retransmits = 0;
2400 if (frto_undo || tcp_is_sack(tp)) 2407 if (frto_undo || tcp_is_sack(tp)) {
2401 tcp_set_ca_state(sk, TCP_CA_Open); 2408 tcp_set_ca_state(sk, TCP_CA_Open);
2409 tp->is_sack_reneg = 0;
2410 }
2402 return true; 2411 return true;
2403 } 2412 }
2404 return false; 2413 return false;
@@ -3495,6 +3504,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3495 struct tcp_sacktag_state sack_state; 3504 struct tcp_sacktag_state sack_state;
3496 struct rate_sample rs = { .prior_delivered = 0 }; 3505 struct rate_sample rs = { .prior_delivered = 0 };
3497 u32 prior_snd_una = tp->snd_una; 3506 u32 prior_snd_una = tp->snd_una;
3507 bool is_sack_reneg = tp->is_sack_reneg;
3498 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3508 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3499 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3509 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3500 bool is_dupack = false; 3510 bool is_dupack = false;
@@ -3611,7 +3621,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3611 3621
3612 delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ 3622 delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
3613 lost = tp->lost - lost; /* freshly marked lost */ 3623 lost = tp->lost - lost; /* freshly marked lost */
3614 tcp_rate_gen(sk, delivered, lost, sack_state.rate); 3624 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
3615 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); 3625 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
3616 tcp_xmit_recovery(sk, rexmit); 3626 tcp_xmit_recovery(sk, rexmit);
3617 return 1; 3627 return 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c6bc0c4d19c6..94e28350f420 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -848,7 +848,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
848 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 848 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
849 req->ts_recent, 849 req->ts_recent,
850 0, 850 0,
851 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 851 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
852 AF_INET), 852 AF_INET),
853 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, 853 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
854 ip_hdr(skb)->tos); 854 ip_hdr(skb)->tos);
@@ -1591,6 +1591,34 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb)
1591} 1591}
1592EXPORT_SYMBOL(tcp_filter); 1592EXPORT_SYMBOL(tcp_filter);
1593 1593
1594static void tcp_v4_restore_cb(struct sk_buff *skb)
1595{
1596 memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
1597 sizeof(struct inet_skb_parm));
1598}
1599
1600static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
1601 const struct tcphdr *th)
1602{
1603 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1604 * barrier() makes sure compiler wont play fool^Waliasing games.
1605 */
1606 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1607 sizeof(struct inet_skb_parm));
1608 barrier();
1609
1610 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1611 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1612 skb->len - th->doff * 4);
1613 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1614 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1615 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1616 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1617 TCP_SKB_CB(skb)->sacked = 0;
1618 TCP_SKB_CB(skb)->has_rxtstamp =
1619 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1620}
1621
1594/* 1622/*
1595 * From tcp_input.c 1623 * From tcp_input.c
1596 */ 1624 */
@@ -1631,24 +1659,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
1631 1659
1632 th = (const struct tcphdr *)skb->data; 1660 th = (const struct tcphdr *)skb->data;
1633 iph = ip_hdr(skb); 1661 iph = ip_hdr(skb);
1634 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1635 * barrier() makes sure compiler wont play fool^Waliasing games.
1636 */
1637 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1638 sizeof(struct inet_skb_parm));
1639 barrier();
1640
1641 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1642 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1643 skb->len - th->doff * 4);
1644 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1645 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1646 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1647 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1648 TCP_SKB_CB(skb)->sacked = 0;
1649 TCP_SKB_CB(skb)->has_rxtstamp =
1650 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1651
1652lookup: 1662lookup:
1653 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, 1663 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1654 th->dest, sdif, &refcounted); 1664 th->dest, sdif, &refcounted);
@@ -1679,14 +1689,19 @@ process:
1679 sock_hold(sk); 1689 sock_hold(sk);
1680 refcounted = true; 1690 refcounted = true;
1681 nsk = NULL; 1691 nsk = NULL;
1682 if (!tcp_filter(sk, skb)) 1692 if (!tcp_filter(sk, skb)) {
1693 th = (const struct tcphdr *)skb->data;
1694 iph = ip_hdr(skb);
1695 tcp_v4_fill_cb(skb, iph, th);
1683 nsk = tcp_check_req(sk, skb, req, false); 1696 nsk = tcp_check_req(sk, skb, req, false);
1697 }
1684 if (!nsk) { 1698 if (!nsk) {
1685 reqsk_put(req); 1699 reqsk_put(req);
1686 goto discard_and_relse; 1700 goto discard_and_relse;
1687 } 1701 }
1688 if (nsk == sk) { 1702 if (nsk == sk) {
1689 reqsk_put(req); 1703 reqsk_put(req);
1704 tcp_v4_restore_cb(skb);
1690 } else if (tcp_child_process(sk, nsk, skb)) { 1705 } else if (tcp_child_process(sk, nsk, skb)) {
1691 tcp_v4_send_reset(nsk, skb); 1706 tcp_v4_send_reset(nsk, skb);
1692 goto discard_and_relse; 1707 goto discard_and_relse;
@@ -1712,6 +1727,7 @@ process:
1712 goto discard_and_relse; 1727 goto discard_and_relse;
1713 th = (const struct tcphdr *)skb->data; 1728 th = (const struct tcphdr *)skb->data;
1714 iph = ip_hdr(skb); 1729 iph = ip_hdr(skb);
1730 tcp_v4_fill_cb(skb, iph, th);
1715 1731
1716 skb->dev = NULL; 1732 skb->dev = NULL;
1717 1733
@@ -1742,6 +1758,8 @@ no_tcp_socket:
1742 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1758 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1743 goto discard_it; 1759 goto discard_it;
1744 1760
1761 tcp_v4_fill_cb(skb, iph, th);
1762
1745 if (tcp_checksum_complete(skb)) { 1763 if (tcp_checksum_complete(skb)) {
1746csum_error: 1764csum_error:
1747 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1765 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
@@ -1768,6 +1786,8 @@ do_time_wait:
1768 goto discard_it; 1786 goto discard_it;
1769 } 1787 }
1770 1788
1789 tcp_v4_fill_cb(skb, iph, th);
1790
1771 if (tcp_checksum_complete(skb)) { 1791 if (tcp_checksum_complete(skb)) {
1772 inet_twsk_put(inet_twsk(sk)); 1792 inet_twsk_put(inet_twsk(sk));
1773 goto csum_error; 1793 goto csum_error;
@@ -1784,6 +1804,7 @@ do_time_wait:
1784 if (sk2) { 1804 if (sk2) {
1785 inet_twsk_deschedule_put(inet_twsk(sk)); 1805 inet_twsk_deschedule_put(inet_twsk(sk));
1786 sk = sk2; 1806 sk = sk2;
1807 tcp_v4_restore_cb(skb);
1787 refcounted = false; 1808 refcounted = false;
1788 goto process; 1809 goto process;
1789 } 1810 }
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e36eff0403f4..b079b619b60c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -310,10 +310,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
310 if (state == TCP_TIME_WAIT) 310 if (state == TCP_TIME_WAIT)
311 timeo = TCP_TIMEWAIT_LEN; 311 timeo = TCP_TIMEWAIT_LEN;
312 312
313 /* tw_timer is pinned, so we need to make sure BH are disabled
314 * in following section, otherwise timer handler could run before
315 * we complete the initialization.
316 */
317 local_bh_disable();
313 inet_twsk_schedule(tw, timeo); 318 inet_twsk_schedule(tw, timeo);
314 /* Linkage updates. */ 319 /* Linkage updates. */
315 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); 320 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
316 inet_twsk_put(tw); 321 inet_twsk_put(tw);
322 local_bh_enable();
317 } else { 323 } else {
318 /* Sorry, if we're out of memory, just CLOSE this 324 /* Sorry, if we're out of memory, just CLOSE this
319 * socket up. We've got bigger problems than 325 * socket up. We've got bigger problems than
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 3330a370d306..c61240e43923 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
106 106
107/* Update the connection delivery information and generate a rate sample. */ 107/* Update the connection delivery information and generate a rate sample. */
108void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, 108void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
109 struct rate_sample *rs) 109 bool is_sack_reneg, struct rate_sample *rs)
110{ 110{
111 struct tcp_sock *tp = tcp_sk(sk); 111 struct tcp_sock *tp = tcp_sk(sk);
112 u32 snd_us, ack_us; 112 u32 snd_us, ack_us;
@@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
124 124
125 rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ 125 rs->acked_sacked = delivered; /* freshly ACKed or SACKed */
126 rs->losses = lost; /* freshly marked lost */ 126 rs->losses = lost; /* freshly marked lost */
127 /* Return an invalid sample if no timing information is available. */ 127 /* Return an invalid sample if no timing information is available or
128 if (!rs->prior_mstamp) { 128 * in recovery from loss with SACK reneging. Rate samples taken during
129 * a SACK reneging event may overestimate bw by including packets that
130 * were SACKed before the reneg.
131 */
132 if (!rs->prior_mstamp || is_sack_reneg) {
129 rs->delivered = -1; 133 rs->delivered = -1;
130 rs->interval_us = -1; 134 rs->interval_us = -1;
131 return; 135 return;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index d3ea89020c69..3a81720ac0c4 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -55,7 +55,8 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
55 * to queuing or delayed ACKs. 55 * to queuing or delayed ACKs.
56 */ 56 */
57 reo_wnd = 1000; 57 reo_wnd = 1000;
58 if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) { 58 if ((tp->rack.reord || inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery) &&
59 min_rtt != ~0U) {
59 reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd); 60 reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
60 reo_wnd = min(reo_wnd, tp->srtt_us >> 3); 61 reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
61 } 62 }
@@ -79,12 +80,12 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
79 */ 80 */
80 remaining = tp->rack.rtt_us + reo_wnd - 81 remaining = tp->rack.rtt_us + reo_wnd -
81 tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp); 82 tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
82 if (remaining < 0) { 83 if (remaining <= 0) {
83 tcp_rack_mark_skb_lost(sk, skb); 84 tcp_rack_mark_skb_lost(sk, skb);
84 list_del_init(&skb->tcp_tsorted_anchor); 85 list_del_init(&skb->tcp_tsorted_anchor);
85 } else { 86 } else {
86 /* Record maximum wait time (+1 to avoid 0) */ 87 /* Record maximum wait time */
87 *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); 88 *reo_timeout = max_t(u32, *reo_timeout, remaining);
88 } 89 }
89 } 90 }
90} 91}
@@ -116,13 +117,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
116{ 117{
117 u32 rtt_us; 118 u32 rtt_us;
118 119
119 if (tp->rack.mstamp &&
120 !tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
121 end_seq, tp->rack.end_seq))
122 return;
123
124 rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); 120 rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
125 if (sacked & TCPCB_RETRANS) { 121 if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
126 /* If the sacked packet was retransmitted, it's ambiguous 122 /* If the sacked packet was retransmitted, it's ambiguous
127 * whether the retransmission or the original (or the prior 123 * whether the retransmission or the original (or the prior
128 * retransmission) was sacked. 124 * retransmission) was sacked.
@@ -133,13 +129,15 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
133 * so it's at least one RTT (i.e., retransmission is at least 129 * so it's at least one RTT (i.e., retransmission is at least
134 * an RTT later). 130 * an RTT later).
135 */ 131 */
136 if (rtt_us < tcp_min_rtt(tp)) 132 return;
137 return;
138 } 133 }
139 tp->rack.rtt_us = rtt_us;
140 tp->rack.mstamp = xmit_time;
141 tp->rack.end_seq = end_seq;
142 tp->rack.advanced = 1; 134 tp->rack.advanced = 1;
135 tp->rack.rtt_us = rtt_us;
136 if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
137 end_seq, tp->rack.end_seq)) {
138 tp->rack.mstamp = xmit_time;
139 tp->rack.end_seq = end_seq;
140 }
143} 141}
144 142
145/* We have waited long enough to accommodate reordering. Mark the expired 143/* We have waited long enough to accommodate reordering. Mark the expired
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 16df6dd44b98..968fda198376 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -264,6 +264,7 @@ void tcp_delack_timer_handler(struct sock *sk)
264 icsk->icsk_ack.pingpong = 0; 264 icsk->icsk_ack.pingpong = 0;
265 icsk->icsk_ack.ato = TCP_ATO_MIN; 265 icsk->icsk_ack.ato = TCP_ATO_MIN;
266 } 266 }
267 tcp_mstamp_refresh(tcp_sk(sk));
267 tcp_send_ack(sk); 268 tcp_send_ack(sk);
268 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); 269 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
269 } 270 }
@@ -632,6 +633,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
632 goto out; 633 goto out;
633 } 634 }
634 635
636 tcp_mstamp_refresh(tp);
635 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { 637 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
636 if (tp->linger2 >= 0) { 638 if (tp->linger2 >= 0) {
637 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; 639 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3d3092adf1d2..db84f523656d 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -904,7 +904,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
904 if (t->parms.collect_md) { 904 if (t->parms.collect_md) {
905 tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); 905 tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
906 if (!tun_dst) 906 if (!tun_dst)
907 return 0; 907 goto drop;
908 } 908 }
909 ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, 909 ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
910 log_ecn_error); 910 log_ecn_error);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fc6d7d143f2c..844642682b83 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
1682} 1682}
1683 1683
1684static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, 1684static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1685 int type, struct mld2_grec **ppgr) 1685 int type, struct mld2_grec **ppgr, unsigned int mtu)
1686{ 1686{
1687 struct net_device *dev = pmc->idev->dev;
1688 struct mld2_report *pmr; 1687 struct mld2_report *pmr;
1689 struct mld2_grec *pgr; 1688 struct mld2_grec *pgr;
1690 1689
1691 if (!skb) 1690 if (!skb) {
1692 skb = mld_newpack(pmc->idev, dev->mtu); 1691 skb = mld_newpack(pmc->idev, mtu);
1693 if (!skb) 1692 if (!skb)
1694 return NULL; 1693 return NULL;
1694 }
1695 pgr = skb_put(skb, sizeof(struct mld2_grec)); 1695 pgr = skb_put(skb, sizeof(struct mld2_grec));
1696 pgr->grec_type = type; 1696 pgr->grec_type = type;
1697 pgr->grec_auxwords = 0; 1697 pgr->grec_auxwords = 0;
@@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1714 struct mld2_grec *pgr = NULL; 1714 struct mld2_grec *pgr = NULL;
1715 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; 1715 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
1716 int scount, stotal, first, isquery, truncate; 1716 int scount, stotal, first, isquery, truncate;
1717 unsigned int mtu;
1717 1718
1718 if (pmc->mca_flags & MAF_NOREPORT) 1719 if (pmc->mca_flags & MAF_NOREPORT)
1719 return skb; 1720 return skb;
1720 1721
1722 mtu = READ_ONCE(dev->mtu);
1723 if (mtu < IPV6_MIN_MTU)
1724 return skb;
1725
1721 isquery = type == MLD2_MODE_IS_INCLUDE || 1726 isquery = type == MLD2_MODE_IS_INCLUDE ||
1722 type == MLD2_MODE_IS_EXCLUDE; 1727 type == MLD2_MODE_IS_EXCLUDE;
1723 truncate = type == MLD2_MODE_IS_EXCLUDE || 1728 truncate = type == MLD2_MODE_IS_EXCLUDE ||
@@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1738 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { 1743 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
1739 if (skb) 1744 if (skb)
1740 mld_sendpack(skb); 1745 mld_sendpack(skb);
1741 skb = mld_newpack(idev, dev->mtu); 1746 skb = mld_newpack(idev, mtu);
1742 } 1747 }
1743 } 1748 }
1744 first = 1; 1749 first = 1;
@@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1774 pgr->grec_nsrcs = htons(scount); 1779 pgr->grec_nsrcs = htons(scount);
1775 if (skb) 1780 if (skb)
1776 mld_sendpack(skb); 1781 mld_sendpack(skb);
1777 skb = mld_newpack(idev, dev->mtu); 1782 skb = mld_newpack(idev, mtu);
1778 first = 1; 1783 first = 1;
1779 scount = 0; 1784 scount = 0;
1780 } 1785 }
1781 if (first) { 1786 if (first) {
1782 skb = add_grhead(skb, pmc, type, &pgr); 1787 skb = add_grhead(skb, pmc, type, &pgr, mtu);
1783 first = 0; 1788 first = 0;
1784 } 1789 }
1785 if (!skb) 1790 if (!skb)
@@ -1814,7 +1819,7 @@ empty_source:
1814 mld_sendpack(skb); 1819 mld_sendpack(skb);
1815 skb = NULL; /* add_grhead will get a new one */ 1820 skb = NULL; /* add_grhead will get a new one */
1816 } 1821 }
1817 skb = add_grhead(skb, pmc, type, &pgr); 1822 skb = add_grhead(skb, pmc, type, &pgr, mtu);
1818 } 1823 }
1819 } 1824 }
1820 if (pgr) 1825 if (pgr)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f06e25065a34..1d7ae9366335 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -458,7 +458,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
458 if (!xt_find_jump_offset(offsets, newpos, 458 if (!xt_find_jump_offset(offsets, newpos,
459 newinfo->number)) 459 newinfo->number))
460 return 0; 460 return 0;
461 e = entry0 + newpos;
462 } else { 461 } else {
463 /* ... this is a fallthru */ 462 /* ... this is a fallthru */
464 newpos = pos + e->next_offset; 463 newpos = pos + e->next_offset;
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 2b1a15846f9a..92c0047e7e33 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -33,13 +33,19 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
33 33
34 if (range->flags & NF_NAT_RANGE_MAP_IPS) 34 if (range->flags & NF_NAT_RANGE_MAP_IPS)
35 return -EINVAL; 35 return -EINVAL;
36 return 0; 36 return nf_ct_netns_get(par->net, par->family);
37}
38
39static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
40{
41 nf_ct_netns_put(par->net, par->family);
37} 42}
38 43
39static struct xt_target masquerade_tg6_reg __read_mostly = { 44static struct xt_target masquerade_tg6_reg __read_mostly = {
40 .name = "MASQUERADE", 45 .name = "MASQUERADE",
41 .family = NFPROTO_IPV6, 46 .family = NFPROTO_IPV6,
42 .checkentry = masquerade_tg6_checkentry, 47 .checkentry = masquerade_tg6_checkentry,
48 .destroy = masquerade_tg6_destroy,
43 .target = masquerade_tg6, 49 .target = masquerade_tg6,
44 .targetsize = sizeof(struct nf_nat_range), 50 .targetsize = sizeof(struct nf_nat_range),
45 .table = "nat", 51 .table = "nat",
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d60ddcb0bfe2..d7dc23c1b2ca 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1098,6 +1098,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
1098 ipip6_tunnel_link(sitn, t); 1098 ipip6_tunnel_link(sitn, t);
1099 t->parms.iph.ttl = p->iph.ttl; 1099 t->parms.iph.ttl = p->iph.ttl;
1100 t->parms.iph.tos = p->iph.tos; 1100 t->parms.iph.tos = p->iph.tos;
1101 t->parms.iph.frag_off = p->iph.frag_off;
1101 if (t->parms.link != p->link || t->fwmark != fwmark) { 1102 if (t->parms.link != p->link || t->fwmark != fwmark) {
1102 t->parms.link = p->link; 1103 t->parms.link = p->link;
1103 t->fwmark = fwmark; 1104 t->fwmark = fwmark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6bb98c93edfe..7178476b3d2f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -994,7 +994,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
994 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 994 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
995 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 995 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
996 req->ts_recent, sk->sk_bound_dev_if, 996 req->ts_recent, sk->sk_bound_dev_if,
997 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 997 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
998 0, 0); 998 0, 0);
999} 999}
1000 1000
@@ -1454,7 +1454,6 @@ process:
1454 struct sock *nsk; 1454 struct sock *nsk;
1455 1455
1456 sk = req->rsk_listener; 1456 sk = req->rsk_listener;
1457 tcp_v6_fill_cb(skb, hdr, th);
1458 if (tcp_v6_inbound_md5_hash(sk, skb)) { 1457 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1459 sk_drops_add(sk, skb); 1458 sk_drops_add(sk, skb);
1460 reqsk_put(req); 1459 reqsk_put(req);
@@ -1467,8 +1466,12 @@ process:
1467 sock_hold(sk); 1466 sock_hold(sk);
1468 refcounted = true; 1467 refcounted = true;
1469 nsk = NULL; 1468 nsk = NULL;
1470 if (!tcp_filter(sk, skb)) 1469 if (!tcp_filter(sk, skb)) {
1470 th = (const struct tcphdr *)skb->data;
1471 hdr = ipv6_hdr(skb);
1472 tcp_v6_fill_cb(skb, hdr, th);
1471 nsk = tcp_check_req(sk, skb, req, false); 1473 nsk = tcp_check_req(sk, skb, req, false);
1474 }
1472 if (!nsk) { 1475 if (!nsk) {
1473 reqsk_put(req); 1476 reqsk_put(req);
1474 goto discard_and_relse; 1477 goto discard_and_relse;
@@ -1492,8 +1495,6 @@ process:
1492 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1495 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1493 goto discard_and_relse; 1496 goto discard_and_relse;
1494 1497
1495 tcp_v6_fill_cb(skb, hdr, th);
1496
1497 if (tcp_v6_inbound_md5_hash(sk, skb)) 1498 if (tcp_v6_inbound_md5_hash(sk, skb))
1498 goto discard_and_relse; 1499 goto discard_and_relse;
1499 1500
@@ -1501,6 +1502,7 @@ process:
1501 goto discard_and_relse; 1502 goto discard_and_relse;
1502 th = (const struct tcphdr *)skb->data; 1503 th = (const struct tcphdr *)skb->data;
1503 hdr = ipv6_hdr(skb); 1504 hdr = ipv6_hdr(skb);
1505 tcp_v6_fill_cb(skb, hdr, th);
1504 1506
1505 skb->dev = NULL; 1507 skb->dev = NULL;
1506 1508
@@ -1590,7 +1592,6 @@ do_time_wait:
1590 tcp_v6_timewait_ack(sk, skb); 1592 tcp_v6_timewait_ack(sk, skb);
1591 break; 1593 break;
1592 case TCP_TW_RST: 1594 case TCP_TW_RST:
1593 tcp_v6_restore_cb(skb);
1594 tcp_v6_send_reset(sk, skb); 1595 tcp_v6_send_reset(sk, skb);
1595 inet_twsk_deschedule_put(inet_twsk(sk)); 1596 inet_twsk_deschedule_put(inet_twsk(sk));
1596 goto discard_it; 1597 goto discard_it;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 0b750a22c4b9..d4e98f20fc2a 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1625,60 +1625,30 @@ static struct proto kcm_proto = {
1625}; 1625};
1626 1626
1627/* Clone a kcm socket. */ 1627/* Clone a kcm socket. */
1628static int kcm_clone(struct socket *osock, struct kcm_clone *info, 1628static struct file *kcm_clone(struct socket *osock)
1629 struct socket **newsockp)
1630{ 1629{
1631 struct socket *newsock; 1630 struct socket *newsock;
1632 struct sock *newsk; 1631 struct sock *newsk;
1633 struct file *newfile;
1634 int err, newfd;
1635 1632
1636 err = -ENFILE;
1637 newsock = sock_alloc(); 1633 newsock = sock_alloc();
1638 if (!newsock) 1634 if (!newsock)
1639 goto out; 1635 return ERR_PTR(-ENFILE);
1640 1636
1641 newsock->type = osock->type; 1637 newsock->type = osock->type;
1642 newsock->ops = osock->ops; 1638 newsock->ops = osock->ops;
1643 1639
1644 __module_get(newsock->ops->owner); 1640 __module_get(newsock->ops->owner);
1645 1641
1646 newfd = get_unused_fd_flags(0);
1647 if (unlikely(newfd < 0)) {
1648 err = newfd;
1649 goto out_fd_fail;
1650 }
1651
1652 newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
1653 if (IS_ERR(newfile)) {
1654 err = PTR_ERR(newfile);
1655 goto out_sock_alloc_fail;
1656 }
1657
1658 newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, 1642 newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
1659 &kcm_proto, true); 1643 &kcm_proto, true);
1660 if (!newsk) { 1644 if (!newsk) {
1661 err = -ENOMEM; 1645 sock_release(newsock);
1662 goto out_sk_alloc_fail; 1646 return ERR_PTR(-ENOMEM);
1663 } 1647 }
1664
1665 sock_init_data(newsock, newsk); 1648 sock_init_data(newsock, newsk);
1666 init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); 1649 init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
1667 1650
1668 fd_install(newfd, newfile); 1651 return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
1669 *newsockp = newsock;
1670 info->fd = newfd;
1671
1672 return 0;
1673
1674out_sk_alloc_fail:
1675 fput(newfile);
1676out_sock_alloc_fail:
1677 put_unused_fd(newfd);
1678out_fd_fail:
1679 sock_release(newsock);
1680out:
1681 return err;
1682} 1652}
1683 1653
1684static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1654static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
@@ -1708,17 +1678,25 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1708 } 1678 }
1709 case SIOCKCMCLONE: { 1679 case SIOCKCMCLONE: {
1710 struct kcm_clone info; 1680 struct kcm_clone info;
1711 struct socket *newsock = NULL; 1681 struct file *file;
1712 1682
1713 err = kcm_clone(sock, &info, &newsock); 1683 info.fd = get_unused_fd_flags(0);
1714 if (!err) { 1684 if (unlikely(info.fd < 0))
1715 if (copy_to_user((void __user *)arg, &info, 1685 return info.fd;
1716 sizeof(info))) {
1717 err = -EFAULT;
1718 sys_close(info.fd);
1719 }
1720 }
1721 1686
1687 file = kcm_clone(sock);
1688 if (IS_ERR(file)) {
1689 put_unused_fd(info.fd);
1690 return PTR_ERR(file);
1691 }
1692 if (copy_to_user((void __user *)arg, &info,
1693 sizeof(info))) {
1694 put_unused_fd(info.fd);
1695 fput(file);
1696 return -EFAULT;
1697 }
1698 fd_install(info.fd, file);
1699 err = 0;
1722 break; 1700 break;
1723 } 1701 }
1724 default: 1702 default:
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 41f5e48f8021..1621b6ab17ba 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -291,13 +291,14 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
291 int i; 291 int i;
292 292
293 mutex_lock(&sta->ampdu_mlme.mtx); 293 mutex_lock(&sta->ampdu_mlme.mtx);
294 for (i = 0; i < IEEE80211_NUM_TIDS; i++) { 294 for (i = 0; i < IEEE80211_NUM_TIDS; i++)
295 ___ieee80211_stop_tx_ba_session(sta, i, reason);
296 ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, 295 ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
297 WLAN_REASON_QSTA_LEAVE_QBSS, 296 WLAN_REASON_QSTA_LEAVE_QBSS,
298 reason != AGG_STOP_DESTROY_STA && 297 reason != AGG_STOP_DESTROY_STA &&
299 reason != AGG_STOP_PEER_REQUEST); 298 reason != AGG_STOP_PEER_REQUEST);
300 } 299
300 for (i = 0; i < IEEE80211_NUM_TIDS; i++)
301 ___ieee80211_stop_tx_ba_session(sta, i, reason);
301 mutex_unlock(&sta->ampdu_mlme.mtx); 302 mutex_unlock(&sta->ampdu_mlme.mtx);
302 303
303 /* stopping might queue the work again - so cancel only afterwards */ 304 /* stopping might queue the work again - so cancel only afterwards */
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 4f7826d7b47c..4394463a0c2e 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -797,7 +797,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
797 struct mesh_path *mpath; 797 struct mesh_path *mpath;
798 u8 ttl, flags, hopcount; 798 u8 ttl, flags, hopcount;
799 const u8 *orig_addr; 799 const u8 *orig_addr;
800 u32 orig_sn, metric, metric_txsta, interval; 800 u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval;
801 bool root_is_gate; 801 bool root_is_gate;
802 802
803 ttl = rann->rann_ttl; 803 ttl = rann->rann_ttl;
@@ -808,7 +808,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
808 interval = le32_to_cpu(rann->rann_interval); 808 interval = le32_to_cpu(rann->rann_interval);
809 hopcount = rann->rann_hopcount; 809 hopcount = rann->rann_hopcount;
810 hopcount++; 810 hopcount++;
811 metric = le32_to_cpu(rann->rann_metric); 811 orig_metric = le32_to_cpu(rann->rann_metric);
812 812
813 /* Ignore our own RANNs */ 813 /* Ignore our own RANNs */
814 if (ether_addr_equal(orig_addr, sdata->vif.addr)) 814 if (ether_addr_equal(orig_addr, sdata->vif.addr))
@@ -825,7 +825,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
825 return; 825 return;
826 } 826 }
827 827
828 metric_txsta = airtime_link_metric_get(local, sta); 828 last_hop_metric = airtime_link_metric_get(local, sta);
829 new_metric = orig_metric + last_hop_metric;
830 if (new_metric < orig_metric)
831 new_metric = MAX_METRIC;
829 832
830 mpath = mesh_path_lookup(sdata, orig_addr); 833 mpath = mesh_path_lookup(sdata, orig_addr);
831 if (!mpath) { 834 if (!mpath) {
@@ -838,7 +841,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
838 } 841 }
839 842
840 if (!(SN_LT(mpath->sn, orig_sn)) && 843 if (!(SN_LT(mpath->sn, orig_sn)) &&
841 !(mpath->sn == orig_sn && metric < mpath->rann_metric)) { 844 !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) {
842 rcu_read_unlock(); 845 rcu_read_unlock();
843 return; 846 return;
844 } 847 }
@@ -856,7 +859,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
856 } 859 }
857 860
858 mpath->sn = orig_sn; 861 mpath->sn = orig_sn;
859 mpath->rann_metric = metric + metric_txsta; 862 mpath->rann_metric = new_metric;
860 mpath->is_root = true; 863 mpath->is_root = true;
861 /* Recording RANNs sender address to send individually 864 /* Recording RANNs sender address to send individually
862 * addressed PREQs destined for root mesh STA */ 865 * addressed PREQs destined for root mesh STA */
@@ -876,7 +879,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
876 mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, 879 mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr,
877 orig_sn, 0, NULL, 0, broadcast_addr, 880 orig_sn, 0, NULL, 0, broadcast_addr,
878 hopcount, ttl, interval, 881 hopcount, ttl, interval,
879 metric + metric_txsta, 0, sdata); 882 new_metric, 0, sdata);
880 } 883 }
881 884
882 rcu_read_unlock(); 885 rcu_read_unlock();
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 04460440d731..c244691deab9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -895,7 +895,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
895 struct ieee80211_hdr_3addr *nullfunc; 895 struct ieee80211_hdr_3addr *nullfunc;
896 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 896 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
897 897
898 skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif); 898 skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true);
899 if (!skb) 899 if (!skb)
900 return; 900 return;
901 901
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7b8154474b9e..3160954fc406 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4438,13 +4438,15 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
4438EXPORT_SYMBOL(ieee80211_pspoll_get); 4438EXPORT_SYMBOL(ieee80211_pspoll_get);
4439 4439
4440struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, 4440struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
4441 struct ieee80211_vif *vif) 4441 struct ieee80211_vif *vif,
4442 bool qos_ok)
4442{ 4443{
4443 struct ieee80211_hdr_3addr *nullfunc; 4444 struct ieee80211_hdr_3addr *nullfunc;
4444 struct ieee80211_sub_if_data *sdata; 4445 struct ieee80211_sub_if_data *sdata;
4445 struct ieee80211_if_managed *ifmgd; 4446 struct ieee80211_if_managed *ifmgd;
4446 struct ieee80211_local *local; 4447 struct ieee80211_local *local;
4447 struct sk_buff *skb; 4448 struct sk_buff *skb;
4449 bool qos = false;
4448 4450
4449 if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) 4451 if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
4450 return NULL; 4452 return NULL;
@@ -4453,7 +4455,17 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
4453 ifmgd = &sdata->u.mgd; 4455 ifmgd = &sdata->u.mgd;
4454 local = sdata->local; 4456 local = sdata->local;
4455 4457
4456 skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc)); 4458 if (qos_ok) {
4459 struct sta_info *sta;
4460
4461 rcu_read_lock();
4462 sta = sta_info_get(sdata, ifmgd->bssid);
4463 qos = sta && sta->sta.wme;
4464 rcu_read_unlock();
4465 }
4466
4467 skb = dev_alloc_skb(local->hw.extra_tx_headroom +
4468 sizeof(*nullfunc) + 2);
4457 if (!skb) 4469 if (!skb)
4458 return NULL; 4470 return NULL;
4459 4471
@@ -4463,6 +4475,19 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
4463 nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | 4475 nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
4464 IEEE80211_STYPE_NULLFUNC | 4476 IEEE80211_STYPE_NULLFUNC |
4465 IEEE80211_FCTL_TODS); 4477 IEEE80211_FCTL_TODS);
4478 if (qos) {
4479 __le16 qos = cpu_to_le16(7);
4480
4481 BUILD_BUG_ON((IEEE80211_STYPE_QOS_NULLFUNC |
4482 IEEE80211_STYPE_NULLFUNC) !=
4483 IEEE80211_STYPE_QOS_NULLFUNC);
4484 nullfunc->frame_control |=
4485 cpu_to_le16(IEEE80211_STYPE_QOS_NULLFUNC);
4486 skb->priority = 7;
4487 skb_set_queue_mapping(skb, IEEE80211_AC_VO);
4488 skb_put_data(skb, &qos, sizeof(qos));
4489 }
4490
4466 memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN); 4491 memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN);
4467 memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); 4492 memcpy(nullfunc->addr2, vif->addr, ETH_ALEN);
4468 memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN); 4493 memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN);
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index cf1bf2605c10..dc6347342e34 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -103,7 +103,6 @@ struct bitstr {
103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;} 103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;} 104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;} 105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
106#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
107static unsigned int get_len(struct bitstr *bs); 106static unsigned int get_len(struct bitstr *bs);
108static unsigned int get_bit(struct bitstr *bs); 107static unsigned int get_bit(struct bitstr *bs);
109static unsigned int get_bits(struct bitstr *bs, unsigned int b); 108static unsigned int get_bits(struct bitstr *bs, unsigned int b);
@@ -165,6 +164,19 @@ static unsigned int get_len(struct bitstr *bs)
165 return v; 164 return v;
166} 165}
167 166
167static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
168{
169 bits += bs->bit;
170 bytes += bits / BITS_PER_BYTE;
171 if (bits % BITS_PER_BYTE > 0)
172 bytes++;
173
174 if (*bs->cur + bytes > *bs->end)
175 return 1;
176
177 return 0;
178}
179
168/****************************************************************************/ 180/****************************************************************************/
169static unsigned int get_bit(struct bitstr *bs) 181static unsigned int get_bit(struct bitstr *bs)
170{ 182{
@@ -279,8 +291,8 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f,
279 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 291 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
280 292
281 INC_BIT(bs); 293 INC_BIT(bs);
282 294 if (nf_h323_error_boundary(bs, 0, 0))
283 CHECK_BOUND(bs, 0); 295 return H323_ERROR_BOUND;
284 return H323_ERROR_NONE; 296 return H323_ERROR_NONE;
285} 297}
286 298
@@ -293,11 +305,14 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f,
293 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 305 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
294 306
295 BYTE_ALIGN(bs); 307 BYTE_ALIGN(bs);
296 CHECK_BOUND(bs, 1); 308 if (nf_h323_error_boundary(bs, 1, 0))
309 return H323_ERROR_BOUND;
310
297 len = *bs->cur++; 311 len = *bs->cur++;
298 bs->cur += len; 312 bs->cur += len;
313 if (nf_h323_error_boundary(bs, 0, 0))
314 return H323_ERROR_BOUND;
299 315
300 CHECK_BOUND(bs, 0);
301 return H323_ERROR_NONE; 316 return H323_ERROR_NONE;
302} 317}
303 318
@@ -319,6 +334,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
319 bs->cur += 2; 334 bs->cur += 2;
320 break; 335 break;
321 case CONS: /* 64K < Range < 4G */ 336 case CONS: /* 64K < Range < 4G */
337 if (nf_h323_error_boundary(bs, 0, 2))
338 return H323_ERROR_BOUND;
322 len = get_bits(bs, 2) + 1; 339 len = get_bits(bs, 2) + 1;
323 BYTE_ALIGN(bs); 340 BYTE_ALIGN(bs);
324 if (base && (f->attr & DECODE)) { /* timeToLive */ 341 if (base && (f->attr & DECODE)) { /* timeToLive */
@@ -330,7 +347,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
330 break; 347 break;
331 case UNCO: 348 case UNCO:
332 BYTE_ALIGN(bs); 349 BYTE_ALIGN(bs);
333 CHECK_BOUND(bs, 2); 350 if (nf_h323_error_boundary(bs, 2, 0))
351 return H323_ERROR_BOUND;
334 len = get_len(bs); 352 len = get_len(bs);
335 bs->cur += len; 353 bs->cur += len;
336 break; 354 break;
@@ -341,7 +359,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
341 359
342 PRINT("\n"); 360 PRINT("\n");
343 361
344 CHECK_BOUND(bs, 0); 362 if (nf_h323_error_boundary(bs, 0, 0))
363 return H323_ERROR_BOUND;
345 return H323_ERROR_NONE; 364 return H323_ERROR_NONE;
346} 365}
347 366
@@ -357,7 +376,8 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f,
357 INC_BITS(bs, f->sz); 376 INC_BITS(bs, f->sz);
358 } 377 }
359 378
360 CHECK_BOUND(bs, 0); 379 if (nf_h323_error_boundary(bs, 0, 0))
380 return H323_ERROR_BOUND;
361 return H323_ERROR_NONE; 381 return H323_ERROR_NONE;
362} 382}
363 383
@@ -375,12 +395,14 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
375 len = f->lb; 395 len = f->lb;
376 break; 396 break;
377 case WORD: /* 2-byte length */ 397 case WORD: /* 2-byte length */
378 CHECK_BOUND(bs, 2); 398 if (nf_h323_error_boundary(bs, 2, 0))
399 return H323_ERROR_BOUND;
379 len = (*bs->cur++) << 8; 400 len = (*bs->cur++) << 8;
380 len += (*bs->cur++) + f->lb; 401 len += (*bs->cur++) + f->lb;
381 break; 402 break;
382 case SEMI: 403 case SEMI:
383 CHECK_BOUND(bs, 2); 404 if (nf_h323_error_boundary(bs, 2, 0))
405 return H323_ERROR_BOUND;
384 len = get_len(bs); 406 len = get_len(bs);
385 break; 407 break;
386 default: 408 default:
@@ -391,7 +413,8 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
391 bs->cur += len >> 3; 413 bs->cur += len >> 3;
392 bs->bit = len & 7; 414 bs->bit = len & 7;
393 415
394 CHECK_BOUND(bs, 0); 416 if (nf_h323_error_boundary(bs, 0, 0))
417 return H323_ERROR_BOUND;
395 return H323_ERROR_NONE; 418 return H323_ERROR_NONE;
396} 419}
397 420
@@ -404,12 +427,15 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f,
404 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 427 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
405 428
406 /* 2 <= Range <= 255 */ 429 /* 2 <= Range <= 255 */
430 if (nf_h323_error_boundary(bs, 0, f->sz))
431 return H323_ERROR_BOUND;
407 len = get_bits(bs, f->sz) + f->lb; 432 len = get_bits(bs, f->sz) + f->lb;
408 433
409 BYTE_ALIGN(bs); 434 BYTE_ALIGN(bs);
410 INC_BITS(bs, (len << 2)); 435 INC_BITS(bs, (len << 2));
411 436
412 CHECK_BOUND(bs, 0); 437 if (nf_h323_error_boundary(bs, 0, 0))
438 return H323_ERROR_BOUND;
413 return H323_ERROR_NONE; 439 return H323_ERROR_NONE;
414} 440}
415 441
@@ -440,15 +466,19 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
440 break; 466 break;
441 case BYTE: /* Range == 256 */ 467 case BYTE: /* Range == 256 */
442 BYTE_ALIGN(bs); 468 BYTE_ALIGN(bs);
443 CHECK_BOUND(bs, 1); 469 if (nf_h323_error_boundary(bs, 1, 0))
470 return H323_ERROR_BOUND;
444 len = (*bs->cur++) + f->lb; 471 len = (*bs->cur++) + f->lb;
445 break; 472 break;
446 case SEMI: 473 case SEMI:
447 BYTE_ALIGN(bs); 474 BYTE_ALIGN(bs);
448 CHECK_BOUND(bs, 2); 475 if (nf_h323_error_boundary(bs, 2, 0))
476 return H323_ERROR_BOUND;
449 len = get_len(bs) + f->lb; 477 len = get_len(bs) + f->lb;
450 break; 478 break;
451 default: /* 2 <= Range <= 255 */ 479 default: /* 2 <= Range <= 255 */
480 if (nf_h323_error_boundary(bs, 0, f->sz))
481 return H323_ERROR_BOUND;
452 len = get_bits(bs, f->sz) + f->lb; 482 len = get_bits(bs, f->sz) + f->lb;
453 BYTE_ALIGN(bs); 483 BYTE_ALIGN(bs);
454 break; 484 break;
@@ -458,7 +488,8 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
458 488
459 PRINT("\n"); 489 PRINT("\n");
460 490
461 CHECK_BOUND(bs, 0); 491 if (nf_h323_error_boundary(bs, 0, 0))
492 return H323_ERROR_BOUND;
462 return H323_ERROR_NONE; 493 return H323_ERROR_NONE;
463} 494}
464 495
@@ -473,10 +504,13 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
473 switch (f->sz) { 504 switch (f->sz) {
474 case BYTE: /* Range == 256 */ 505 case BYTE: /* Range == 256 */
475 BYTE_ALIGN(bs); 506 BYTE_ALIGN(bs);
476 CHECK_BOUND(bs, 1); 507 if (nf_h323_error_boundary(bs, 1, 0))
508 return H323_ERROR_BOUND;
477 len = (*bs->cur++) + f->lb; 509 len = (*bs->cur++) + f->lb;
478 break; 510 break;
479 default: /* 2 <= Range <= 255 */ 511 default: /* 2 <= Range <= 255 */
512 if (nf_h323_error_boundary(bs, 0, f->sz))
513 return H323_ERROR_BOUND;
480 len = get_bits(bs, f->sz) + f->lb; 514 len = get_bits(bs, f->sz) + f->lb;
481 BYTE_ALIGN(bs); 515 BYTE_ALIGN(bs);
482 break; 516 break;
@@ -484,7 +518,8 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
484 518
485 bs->cur += len << 1; 519 bs->cur += len << 1;
486 520
487 CHECK_BOUND(bs, 0); 521 if (nf_h323_error_boundary(bs, 0, 0))
522 return H323_ERROR_BOUND;
488 return H323_ERROR_NONE; 523 return H323_ERROR_NONE;
489} 524}
490 525
@@ -503,9 +538,13 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
503 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; 538 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
504 539
505 /* Extensible? */ 540 /* Extensible? */
541 if (nf_h323_error_boundary(bs, 0, 1))
542 return H323_ERROR_BOUND;
506 ext = (f->attr & EXT) ? get_bit(bs) : 0; 543 ext = (f->attr & EXT) ? get_bit(bs) : 0;
507 544
508 /* Get fields bitmap */ 545 /* Get fields bitmap */
546 if (nf_h323_error_boundary(bs, 0, f->sz))
547 return H323_ERROR_BOUND;
509 bmp = get_bitmap(bs, f->sz); 548 bmp = get_bitmap(bs, f->sz);
510 if (base) 549 if (base)
511 *(unsigned int *)base = bmp; 550 *(unsigned int *)base = bmp;
@@ -525,9 +564,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
525 564
526 /* Decode */ 565 /* Decode */
527 if (son->attr & OPEN) { /* Open field */ 566 if (son->attr & OPEN) { /* Open field */
528 CHECK_BOUND(bs, 2); 567 if (nf_h323_error_boundary(bs, 2, 0))
568 return H323_ERROR_BOUND;
529 len = get_len(bs); 569 len = get_len(bs);
530 CHECK_BOUND(bs, len); 570 if (nf_h323_error_boundary(bs, len, 0))
571 return H323_ERROR_BOUND;
531 if (!base || !(son->attr & DECODE)) { 572 if (!base || !(son->attr & DECODE)) {
532 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, 573 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
533 " ", son->name); 574 " ", son->name);
@@ -555,8 +596,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
555 return H323_ERROR_NONE; 596 return H323_ERROR_NONE;
556 597
557 /* Get the extension bitmap */ 598 /* Get the extension bitmap */
599 if (nf_h323_error_boundary(bs, 0, 7))
600 return H323_ERROR_BOUND;
558 bmp2_len = get_bits(bs, 7) + 1; 601 bmp2_len = get_bits(bs, 7) + 1;
559 CHECK_BOUND(bs, (bmp2_len + 7) >> 3); 602 if (nf_h323_error_boundary(bs, 0, bmp2_len))
603 return H323_ERROR_BOUND;
560 bmp2 = get_bitmap(bs, bmp2_len); 604 bmp2 = get_bitmap(bs, bmp2_len);
561 bmp |= bmp2 >> f->sz; 605 bmp |= bmp2 >> f->sz;
562 if (base) 606 if (base)
@@ -567,9 +611,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
567 for (opt = 0; opt < bmp2_len; opt++, i++, son++) { 611 for (opt = 0; opt < bmp2_len; opt++, i++, son++) {
568 /* Check Range */ 612 /* Check Range */
569 if (i >= f->ub) { /* Newer Version? */ 613 if (i >= f->ub) { /* Newer Version? */
570 CHECK_BOUND(bs, 2); 614 if (nf_h323_error_boundary(bs, 2, 0))
615 return H323_ERROR_BOUND;
571 len = get_len(bs); 616 len = get_len(bs);
572 CHECK_BOUND(bs, len); 617 if (nf_h323_error_boundary(bs, len, 0))
618 return H323_ERROR_BOUND;
573 bs->cur += len; 619 bs->cur += len;
574 continue; 620 continue;
575 } 621 }
@@ -583,9 +629,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
583 if (!((0x80000000 >> opt) & bmp2)) /* Not present */ 629 if (!((0x80000000 >> opt) & bmp2)) /* Not present */
584 continue; 630 continue;
585 631
586 CHECK_BOUND(bs, 2); 632 if (nf_h323_error_boundary(bs, 2, 0))
633 return H323_ERROR_BOUND;
587 len = get_len(bs); 634 len = get_len(bs);
588 CHECK_BOUND(bs, len); 635 if (nf_h323_error_boundary(bs, len, 0))
636 return H323_ERROR_BOUND;
589 if (!base || !(son->attr & DECODE)) { 637 if (!base || !(son->attr & DECODE)) {
590 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", 638 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
591 son->name); 639 son->name);
@@ -623,22 +671,27 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
623 switch (f->sz) { 671 switch (f->sz) {
624 case BYTE: 672 case BYTE:
625 BYTE_ALIGN(bs); 673 BYTE_ALIGN(bs);
626 CHECK_BOUND(bs, 1); 674 if (nf_h323_error_boundary(bs, 1, 0))
675 return H323_ERROR_BOUND;
627 count = *bs->cur++; 676 count = *bs->cur++;
628 break; 677 break;
629 case WORD: 678 case WORD:
630 BYTE_ALIGN(bs); 679 BYTE_ALIGN(bs);
631 CHECK_BOUND(bs, 2); 680 if (nf_h323_error_boundary(bs, 2, 0))
681 return H323_ERROR_BOUND;
632 count = *bs->cur++; 682 count = *bs->cur++;
633 count <<= 8; 683 count <<= 8;
634 count += *bs->cur++; 684 count += *bs->cur++;
635 break; 685 break;
636 case SEMI: 686 case SEMI:
637 BYTE_ALIGN(bs); 687 BYTE_ALIGN(bs);
638 CHECK_BOUND(bs, 2); 688 if (nf_h323_error_boundary(bs, 2, 0))
689 return H323_ERROR_BOUND;
639 count = get_len(bs); 690 count = get_len(bs);
640 break; 691 break;
641 default: 692 default:
693 if (nf_h323_error_boundary(bs, 0, f->sz))
694 return H323_ERROR_BOUND;
642 count = get_bits(bs, f->sz); 695 count = get_bits(bs, f->sz);
643 break; 696 break;
644 } 697 }
@@ -658,8 +711,11 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
658 for (i = 0; i < count; i++) { 711 for (i = 0; i < count; i++) {
659 if (son->attr & OPEN) { 712 if (son->attr & OPEN) {
660 BYTE_ALIGN(bs); 713 BYTE_ALIGN(bs);
714 if (nf_h323_error_boundary(bs, 2, 0))
715 return H323_ERROR_BOUND;
661 len = get_len(bs); 716 len = get_len(bs);
662 CHECK_BOUND(bs, len); 717 if (nf_h323_error_boundary(bs, len, 0))
718 return H323_ERROR_BOUND;
663 if (!base || !(son->attr & DECODE)) { 719 if (!base || !(son->attr & DECODE)) {
664 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, 720 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
665 " ", son->name); 721 " ", son->name);
@@ -710,11 +766,17 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
710 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; 766 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
711 767
712 /* Decode the choice index number */ 768 /* Decode the choice index number */
769 if (nf_h323_error_boundary(bs, 0, 1))
770 return H323_ERROR_BOUND;
713 if ((f->attr & EXT) && get_bit(bs)) { 771 if ((f->attr & EXT) && get_bit(bs)) {
714 ext = 1; 772 ext = 1;
773 if (nf_h323_error_boundary(bs, 0, 7))
774 return H323_ERROR_BOUND;
715 type = get_bits(bs, 7) + f->lb; 775 type = get_bits(bs, 7) + f->lb;
716 } else { 776 } else {
717 ext = 0; 777 ext = 0;
778 if (nf_h323_error_boundary(bs, 0, f->sz))
779 return H323_ERROR_BOUND;
718 type = get_bits(bs, f->sz); 780 type = get_bits(bs, f->sz);
719 if (type >= f->lb) 781 if (type >= f->lb)
720 return H323_ERROR_RANGE; 782 return H323_ERROR_RANGE;
@@ -727,8 +789,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
727 /* Check Range */ 789 /* Check Range */
728 if (type >= f->ub) { /* Newer version? */ 790 if (type >= f->ub) { /* Newer version? */
729 BYTE_ALIGN(bs); 791 BYTE_ALIGN(bs);
792 if (nf_h323_error_boundary(bs, 2, 0))
793 return H323_ERROR_BOUND;
730 len = get_len(bs); 794 len = get_len(bs);
731 CHECK_BOUND(bs, len); 795 if (nf_h323_error_boundary(bs, len, 0))
796 return H323_ERROR_BOUND;
732 bs->cur += len; 797 bs->cur += len;
733 return H323_ERROR_NONE; 798 return H323_ERROR_NONE;
734 } 799 }
@@ -742,8 +807,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
742 807
743 if (ext || (son->attr & OPEN)) { 808 if (ext || (son->attr & OPEN)) {
744 BYTE_ALIGN(bs); 809 BYTE_ALIGN(bs);
810 if (nf_h323_error_boundary(bs, len, 0))
811 return H323_ERROR_BOUND;
745 len = get_len(bs); 812 len = get_len(bs);
746 CHECK_BOUND(bs, len); 813 if (nf_h323_error_boundary(bs, len, 0))
814 return H323_ERROR_BOUND;
747 if (!base || !(son->attr & DECODE)) { 815 if (!base || !(son->attr & DECODE)) {
748 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", 816 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
749 son->name); 817 son->name);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59c08997bfdf..382d49792f42 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,7 +45,6 @@
45#include <net/netfilter/nf_conntrack_zones.h> 45#include <net/netfilter/nf_conntrack_zones.h>
46#include <net/netfilter/nf_conntrack_timestamp.h> 46#include <net/netfilter/nf_conntrack_timestamp.h>
47#include <net/netfilter/nf_conntrack_labels.h> 47#include <net/netfilter/nf_conntrack_labels.h>
48#include <net/netfilter/nf_conntrack_seqadj.h>
49#include <net/netfilter/nf_conntrack_synproxy.h> 48#include <net/netfilter/nf_conntrack_synproxy.h>
50#ifdef CONFIG_NF_NAT_NEEDED 49#ifdef CONFIG_NF_NAT_NEEDED
51#include <net/netfilter/nf_nat_core.h> 50#include <net/netfilter/nf_nat_core.h>
@@ -1566,9 +1565,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
1566static int ctnetlink_change_timeout(struct nf_conn *ct, 1565static int ctnetlink_change_timeout(struct nf_conn *ct,
1567 const struct nlattr * const cda[]) 1566 const struct nlattr * const cda[])
1568{ 1567{
1569 u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); 1568 u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
1570 1569
1571 ct->timeout = nfct_time_stamp + timeout * HZ; 1570 if (timeout > INT_MAX)
1571 timeout = INT_MAX;
1572 ct->timeout = nfct_time_stamp + (u32)timeout;
1572 1573
1573 if (test_bit(IPS_DYING_BIT, &ct->status)) 1574 if (test_bit(IPS_DYING_BIT, &ct->status))
1574 return -ETIME; 1575 return -ETIME;
@@ -1768,6 +1769,7 @@ ctnetlink_create_conntrack(struct net *net,
1768 int err = -EINVAL; 1769 int err = -EINVAL;
1769 struct nf_conntrack_helper *helper; 1770 struct nf_conntrack_helper *helper;
1770 struct nf_conn_tstamp *tstamp; 1771 struct nf_conn_tstamp *tstamp;
1772 u64 timeout;
1771 1773
1772 ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); 1774 ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
1773 if (IS_ERR(ct)) 1775 if (IS_ERR(ct))
@@ -1776,7 +1778,10 @@ ctnetlink_create_conntrack(struct net *net,
1776 if (!cda[CTA_TIMEOUT]) 1778 if (!cda[CTA_TIMEOUT])
1777 goto err1; 1779 goto err1;
1778 1780
1779 ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; 1781 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
1782 if (timeout > INT_MAX)
1783 timeout = INT_MAX;
1784 ct->timeout = (u32)timeout + nfct_time_stamp;
1780 1785
1781 rcu_read_lock(); 1786 rcu_read_lock();
1782 if (cda[CTA_HELP]) { 1787 if (cda[CTA_HELP]) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b12fc07111d0..37ef35b861f2 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1039,6 +1039,9 @@ static int tcp_packet(struct nf_conn *ct,
1039 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && 1039 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1040 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) 1040 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1041 timeout = timeouts[TCP_CONNTRACK_UNACK]; 1041 timeout = timeouts[TCP_CONNTRACK_UNACK];
1042 else if (ct->proto.tcp.last_win == 0 &&
1043 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1044 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1042 else 1045 else
1043 timeout = timeouts[new_state]; 1046 timeout = timeouts[new_state];
1044 spin_unlock_bh(&ct->lock); 1047 spin_unlock_bh(&ct->lock);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d8327b43e4dc..10798b357481 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5847,6 +5847,12 @@ static int __net_init nf_tables_init_net(struct net *net)
5847 return 0; 5847 return 0;
5848} 5848}
5849 5849
5850static void __net_exit nf_tables_exit_net(struct net *net)
5851{
5852 WARN_ON_ONCE(!list_empty(&net->nft.af_info));
5853 WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
5854}
5855
5850int __nft_release_basechain(struct nft_ctx *ctx) 5856int __nft_release_basechain(struct nft_ctx *ctx)
5851{ 5857{
5852 struct nft_rule *rule, *nr; 5858 struct nft_rule *rule, *nr;
@@ -5917,6 +5923,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
5917 5923
5918static struct pernet_operations nf_tables_net_ops = { 5924static struct pernet_operations nf_tables_net_ops = {
5919 .init = nf_tables_init_net, 5925 .init = nf_tables_init_net,
5926 .exit = nf_tables_exit_net,
5920}; 5927};
5921 5928
5922static int __init nf_tables_module_init(void) 5929static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 41628b393673..d33ce6d5ebce 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -17,6 +17,7 @@
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/capability.h>
20#include <net/netlink.h> 21#include <net/netlink.h>
21#include <net/sock.h> 22#include <net/sock.h>
22 23
@@ -407,6 +408,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
407 struct nfnl_cthelper *nlcth; 408 struct nfnl_cthelper *nlcth;
408 int ret = 0; 409 int ret = 0;
409 410
411 if (!capable(CAP_NET_ADMIN))
412 return -EPERM;
413
410 if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) 414 if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
411 return -EINVAL; 415 return -EINVAL;
412 416
@@ -611,6 +615,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
611 struct nfnl_cthelper *nlcth; 615 struct nfnl_cthelper *nlcth;
612 bool tuple_set = false; 616 bool tuple_set = false;
613 617
618 if (!capable(CAP_NET_ADMIN))
619 return -EPERM;
620
614 if (nlh->nlmsg_flags & NLM_F_DUMP) { 621 if (nlh->nlmsg_flags & NLM_F_DUMP) {
615 struct netlink_dump_control c = { 622 struct netlink_dump_control c = {
616 .dump = nfnl_cthelper_dump_table, 623 .dump = nfnl_cthelper_dump_table,
@@ -678,6 +685,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
678 struct nfnl_cthelper *nlcth, *n; 685 struct nfnl_cthelper *nlcth, *n;
679 int j = 0, ret; 686 int j = 0, ret;
680 687
688 if (!capable(CAP_NET_ADMIN))
689 return -EPERM;
690
681 if (tb[NFCTH_NAME]) 691 if (tb[NFCTH_NAME])
682 helper_name = nla_data(tb[NFCTH_NAME]); 692 helper_name = nla_data(tb[NFCTH_NAME]);
683 693
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e5afab86381c..e955bec0acc6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(struct net *net)
1093 1093
1094static void __net_exit nfnl_log_net_exit(struct net *net) 1094static void __net_exit nfnl_log_net_exit(struct net *net)
1095{ 1095{
1096 struct nfnl_log_net *log = nfnl_log_pernet(net);
1097 unsigned int i;
1098
1096#ifdef CONFIG_PROC_FS 1099#ifdef CONFIG_PROC_FS
1097 remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); 1100 remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
1098#endif 1101#endif
1099 nf_log_unset(net, &nfulnl_logger); 1102 nf_log_unset(net, &nfulnl_logger);
1103 for (i = 0; i < INSTANCE_BUCKETS; i++)
1104 WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
1100} 1105}
1101 1106
1102static struct pernet_operations nfnl_log_net_ops = { 1107static struct pernet_operations nfnl_log_net_ops = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index a16356cacec3..c09b36755ed7 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_init(struct net *net)
1512 1512
1513static void __net_exit nfnl_queue_net_exit(struct net *net) 1513static void __net_exit nfnl_queue_net_exit(struct net *net)
1514{ 1514{
1515 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
1516 unsigned int i;
1517
1515 nf_unregister_queue_handler(net); 1518 nf_unregister_queue_handler(net);
1516#ifdef CONFIG_PROC_FS 1519#ifdef CONFIG_PROC_FS
1517 remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); 1520 remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
1518#endif 1521#endif
1522 for (i = 0; i < INSTANCE_BUCKETS; i++)
1523 WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
1519} 1524}
1520 1525
1521static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) 1526static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a0a93d987a3b..47ec1046ad11 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -214,6 +214,8 @@ static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
214 [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, 214 [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
215 [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, 215 [NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
216 [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, 216 [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
217 [NFTA_EXTHDR_OP] = { .type = NLA_U32 },
218 [NFTA_EXTHDR_SREG] = { .type = NLA_U32 },
217}; 219};
218 220
219static int nft_exthdr_init(const struct nft_ctx *ctx, 221static int nft_exthdr_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a77dd514297c..55802e97f906 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1729,8 +1729,17 @@ static int __net_init xt_net_init(struct net *net)
1729 return 0; 1729 return 0;
1730} 1730}
1731 1731
1732static void __net_exit xt_net_exit(struct net *net)
1733{
1734 int i;
1735
1736 for (i = 0; i < NFPROTO_NUMPROTO; i++)
1737 WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
1738}
1739
1732static struct pernet_operations xt_net_ops = { 1740static struct pernet_operations xt_net_ops = {
1733 .init = xt_net_init, 1741 .init = xt_net_init,
1742 .exit = xt_net_exit,
1734}; 1743};
1735 1744
1736static int __init xt_init(void) 1745static int __init xt_init(void)
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 041da0d9c06f..1f7fbd3c7e5a 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -27,6 +27,9 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
27{ 27{
28 struct sock_fprog_kern program; 28 struct sock_fprog_kern program;
29 29
30 if (len > XT_BPF_MAX_NUM_INSTR)
31 return -EINVAL;
32
30 program.len = len; 33 program.len = len;
31 program.filter = insns; 34 program.filter = insns;
32 35
@@ -55,6 +58,9 @@ static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
55 mm_segment_t oldfs = get_fs(); 58 mm_segment_t oldfs = get_fs();
56 int retval, fd; 59 int retval, fd;
57 60
61 if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
62 return -EINVAL;
63
58 set_fs(KERNEL_DS); 64 set_fs(KERNEL_DS);
59 fd = bpf_obj_get_user(path, 0); 65 fd = bpf_obj_get_user(path, 0);
60 set_fs(oldfs); 66 set_fs(oldfs);
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 36e14b1f061d..a34f314a8c23 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/kernel.h> 20#include <linux/kernel.h>
21 21
22#include <linux/capability.h>
22#include <linux/if.h> 23#include <linux/if.h>
23#include <linux/inetdevice.h> 24#include <linux/inetdevice.h>
24#include <linux/ip.h> 25#include <linux/ip.h>
@@ -70,6 +71,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
70 struct xt_osf_finger *kf = NULL, *sf; 71 struct xt_osf_finger *kf = NULL, *sf;
71 int err = 0; 72 int err = 0;
72 73
74 if (!capable(CAP_NET_ADMIN))
75 return -EPERM;
76
73 if (!osf_attrs[OSF_ATTR_FINGER]) 77 if (!osf_attrs[OSF_ATTR_FINGER])
74 return -EINVAL; 78 return -EINVAL;
75 79
@@ -115,6 +119,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
115 struct xt_osf_finger *sf; 119 struct xt_osf_finger *sf;
116 int err = -ENOENT; 120 int err = -ENOENT;
117 121
122 if (!capable(CAP_NET_ADMIN))
123 return -EPERM;
124
118 if (!osf_attrs[OSF_ATTR_FINGER]) 125 if (!osf_attrs[OSF_ATTR_FINGER])
119 return -EINVAL; 126 return -EINVAL;
120 127
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b9e0ee4e22f5..79cc1bf36e4a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -253,6 +253,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
253 struct sock *sk = skb->sk; 253 struct sock *sk = skb->sk;
254 int ret = -ENOMEM; 254 int ret = -ENOMEM;
255 255
256 if (!net_eq(dev_net(dev), sock_net(sk)))
257 return 0;
258
256 dev_hold(dev); 259 dev_hold(dev);
257 260
258 if (is_vmalloc_addr(skb->head)) 261 if (is_vmalloc_addr(skb->head))
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 99cfafc2a139..ef38e5aecd28 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -308,7 +308,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
308 const struct dp_upcall_info *upcall_info, 308 const struct dp_upcall_info *upcall_info,
309 uint32_t cutlen) 309 uint32_t cutlen)
310{ 310{
311 unsigned short gso_type = skb_shinfo(skb)->gso_type; 311 unsigned int gso_type = skb_shinfo(skb)->gso_type;
312 struct sw_flow_key later_key; 312 struct sw_flow_key later_key;
313 struct sk_buff *segs, *nskb; 313 struct sk_buff *segs, *nskb;
314 int err; 314 int err;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index dc424798ba6f..624ea74353dd 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2241,14 +2241,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
2241 2241
2242#define MAX_ACTIONS_BUFSIZE (32 * 1024) 2242#define MAX_ACTIONS_BUFSIZE (32 * 1024)
2243 2243
2244static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) 2244static struct sw_flow_actions *nla_alloc_flow_actions(int size)
2245{ 2245{
2246 struct sw_flow_actions *sfa; 2246 struct sw_flow_actions *sfa;
2247 2247
2248 if (size > MAX_ACTIONS_BUFSIZE) { 2248 WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
2249 OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
2250 return ERR_PTR(-EINVAL);
2251 }
2252 2249
2253 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 2250 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
2254 if (!sfa) 2251 if (!sfa)
@@ -2321,12 +2318,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
2321 new_acts_size = ksize(*sfa) * 2; 2318 new_acts_size = ksize(*sfa) * 2;
2322 2319
2323 if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 2320 if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
2324 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 2321 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
2322 OVS_NLERR(log, "Flow action size exceeds max %u",
2323 MAX_ACTIONS_BUFSIZE);
2325 return ERR_PTR(-EMSGSIZE); 2324 return ERR_PTR(-EMSGSIZE);
2325 }
2326 new_acts_size = MAX_ACTIONS_BUFSIZE; 2326 new_acts_size = MAX_ACTIONS_BUFSIZE;
2327 } 2327 }
2328 2328
2329 acts = nla_alloc_flow_actions(new_acts_size, log); 2329 acts = nla_alloc_flow_actions(new_acts_size);
2330 if (IS_ERR(acts)) 2330 if (IS_ERR(acts))
2331 return (void *)acts; 2331 return (void *)acts;
2332 2332
@@ -3059,7 +3059,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3059{ 3059{
3060 int err; 3060 int err;
3061 3061
3062 *sfa = nla_alloc_flow_actions(nla_len(attr), log); 3062 *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
3063 if (IS_ERR(*sfa)) 3063 if (IS_ERR(*sfa))
3064 return PTR_ERR(*sfa); 3064 return PTR_ERR(*sfa);
3065 3065
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 737092ca9b4e..da215e5c1399 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1687,7 +1687,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1687 atomic_long_set(&rollover->num, 0); 1687 atomic_long_set(&rollover->num, 0);
1688 atomic_long_set(&rollover->num_huge, 0); 1688 atomic_long_set(&rollover->num_huge, 0);
1689 atomic_long_set(&rollover->num_failed, 0); 1689 atomic_long_set(&rollover->num_failed, 0);
1690 po->rollover = rollover;
1691 } 1690 }
1692 1691
1693 if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { 1692 if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
@@ -1745,6 +1744,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1745 if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { 1744 if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1746 __dev_remove_pack(&po->prot_hook); 1745 __dev_remove_pack(&po->prot_hook);
1747 po->fanout = match; 1746 po->fanout = match;
1747 po->rollover = rollover;
1748 rollover = NULL;
1748 refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); 1749 refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
1749 __fanout_link(sk, po); 1750 __fanout_link(sk, po);
1750 err = 0; 1751 err = 0;
@@ -1758,10 +1759,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1758 } 1759 }
1759 1760
1760out: 1761out:
1761 if (err && rollover) { 1762 kfree(rollover);
1762 kfree_rcu(rollover, rcu);
1763 po->rollover = NULL;
1764 }
1765 mutex_unlock(&fanout_mutex); 1763 mutex_unlock(&fanout_mutex);
1766 return err; 1764 return err;
1767} 1765}
@@ -1785,11 +1783,6 @@ static struct packet_fanout *fanout_release(struct sock *sk)
1785 list_del(&f->list); 1783 list_del(&f->list);
1786 else 1784 else
1787 f = NULL; 1785 f = NULL;
1788
1789 if (po->rollover) {
1790 kfree_rcu(po->rollover, rcu);
1791 po->rollover = NULL;
1792 }
1793 } 1786 }
1794 mutex_unlock(&fanout_mutex); 1787 mutex_unlock(&fanout_mutex);
1795 1788
@@ -3029,6 +3022,7 @@ static int packet_release(struct socket *sock)
3029 synchronize_net(); 3022 synchronize_net();
3030 3023
3031 if (f) { 3024 if (f) {
3025 kfree(po->rollover);
3032 fanout_release_data(f); 3026 fanout_release_data(f);
3033 kfree(f); 3027 kfree(f);
3034 } 3028 }
@@ -3097,6 +3091,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
3097 if (need_rehook) { 3091 if (need_rehook) {
3098 if (po->running) { 3092 if (po->running) {
3099 rcu_read_unlock(); 3093 rcu_read_unlock();
3094 /* prevents packet_notifier() from calling
3095 * register_prot_hook()
3096 */
3097 po->num = 0;
3100 __unregister_prot_hook(sk, true); 3098 __unregister_prot_hook(sk, true);
3101 rcu_read_lock(); 3099 rcu_read_lock();
3102 dev_curr = po->prot_hook.dev; 3100 dev_curr = po->prot_hook.dev;
@@ -3105,6 +3103,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
3105 dev->ifindex); 3103 dev->ifindex);
3106 } 3104 }
3107 3105
3106 BUG_ON(po->running);
3108 po->num = proto; 3107 po->num = proto;
3109 po->prot_hook.type = proto; 3108 po->prot_hook.type = proto;
3110 3109
@@ -3843,7 +3842,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3843 void *data = &val; 3842 void *data = &val;
3844 union tpacket_stats_u st; 3843 union tpacket_stats_u st;
3845 struct tpacket_rollover_stats rstats; 3844 struct tpacket_rollover_stats rstats;
3846 struct packet_rollover *rollover;
3847 3845
3848 if (level != SOL_PACKET) 3846 if (level != SOL_PACKET)
3849 return -ENOPROTOOPT; 3847 return -ENOPROTOOPT;
@@ -3922,18 +3920,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3922 0); 3920 0);
3923 break; 3921 break;
3924 case PACKET_ROLLOVER_STATS: 3922 case PACKET_ROLLOVER_STATS:
3925 rcu_read_lock(); 3923 if (!po->rollover)
3926 rollover = rcu_dereference(po->rollover);
3927 if (rollover) {
3928 rstats.tp_all = atomic_long_read(&rollover->num);
3929 rstats.tp_huge = atomic_long_read(&rollover->num_huge);
3930 rstats.tp_failed = atomic_long_read(&rollover->num_failed);
3931 data = &rstats;
3932 lv = sizeof(rstats);
3933 }
3934 rcu_read_unlock();
3935 if (!rollover)
3936 return -EINVAL; 3924 return -EINVAL;
3925 rstats.tp_all = atomic_long_read(&po->rollover->num);
3926 rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
3927 rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
3928 data = &rstats;
3929 lv = sizeof(rstats);
3937 break; 3930 break;
3938 case PACKET_TX_HAS_OFF: 3931 case PACKET_TX_HAS_OFF:
3939 val = po->tp_tx_has_off; 3932 val = po->tp_tx_has_off;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 562fbc155006..a1d2b2319ae9 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -95,7 +95,6 @@ struct packet_fanout {
95 95
96struct packet_rollover { 96struct packet_rollover {
97 int sock; 97 int sock;
98 struct rcu_head rcu;
99 atomic_long_t num; 98 atomic_long_t num;
100 atomic_long_t num_huge; 99 atomic_long_t num_huge;
101 atomic_long_t num_failed; 100 atomic_long_t num_failed;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 8886f15abe90..bc2f1e0977d6 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
183 long i; 183 long i;
184 int ret; 184 int ret;
185 185
186 if (rs->rs_bound_addr == 0) { 186 if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
187 ret = -ENOTCONN; /* XXX not a great errno */ 187 ret = -ENOTCONN; /* XXX not a great errno */
188 goto out; 188 goto out;
189 } 189 }
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 9b5c46b052fd..dcd818fa837e 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -285,6 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
285 bool upgrade) 285 bool upgrade)
286{ 286{
287 struct rxrpc_conn_parameters cp; 287 struct rxrpc_conn_parameters cp;
288 struct rxrpc_call_params p;
288 struct rxrpc_call *call; 289 struct rxrpc_call *call;
289 struct rxrpc_sock *rx = rxrpc_sk(sock->sk); 290 struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
290 int ret; 291 int ret;
@@ -302,6 +303,10 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
302 if (key && !key->payload.data[0]) 303 if (key && !key->payload.data[0])
303 key = NULL; /* a no-security key */ 304 key = NULL; /* a no-security key */
304 305
306 memset(&p, 0, sizeof(p));
307 p.user_call_ID = user_call_ID;
308 p.tx_total_len = tx_total_len;
309
305 memset(&cp, 0, sizeof(cp)); 310 memset(&cp, 0, sizeof(cp));
306 cp.local = rx->local; 311 cp.local = rx->local;
307 cp.key = key; 312 cp.key = key;
@@ -309,8 +314,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
309 cp.exclusive = false; 314 cp.exclusive = false;
310 cp.upgrade = upgrade; 315 cp.upgrade = upgrade;
311 cp.service_id = srx->srx_service; 316 cp.service_id = srx->srx_service;
312 call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, 317 call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp);
313 gfp);
314 /* The socket has been unlocked. */ 318 /* The socket has been unlocked. */
315 if (!IS_ERR(call)) { 319 if (!IS_ERR(call)) {
316 call->notify_rx = notify_rx; 320 call->notify_rx = notify_rx;
@@ -856,6 +860,7 @@ static void rxrpc_sock_destructor(struct sock *sk)
856static int rxrpc_release_sock(struct sock *sk) 860static int rxrpc_release_sock(struct sock *sk)
857{ 861{
858 struct rxrpc_sock *rx = rxrpc_sk(sk); 862 struct rxrpc_sock *rx = rxrpc_sk(sk);
863 struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
859 864
860 _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); 865 _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
861 866
@@ -863,6 +868,19 @@ static int rxrpc_release_sock(struct sock *sk)
863 sock_orphan(sk); 868 sock_orphan(sk);
864 sk->sk_shutdown = SHUTDOWN_MASK; 869 sk->sk_shutdown = SHUTDOWN_MASK;
865 870
871 /* We want to kill off all connections from a service socket
872 * as fast as possible because we can't share these; client
873 * sockets, on the other hand, can share an endpoint.
874 */
875 switch (sk->sk_state) {
876 case RXRPC_SERVER_BOUND:
877 case RXRPC_SERVER_BOUND2:
878 case RXRPC_SERVER_LISTENING:
879 case RXRPC_SERVER_LISTEN_DISABLED:
880 rx->local->service_closed = true;
881 break;
882 }
883
866 spin_lock_bh(&sk->sk_receive_queue.lock); 884 spin_lock_bh(&sk->sk_receive_queue.lock);
867 sk->sk_state = RXRPC_CLOSE; 885 sk->sk_state = RXRPC_CLOSE;
868 spin_unlock_bh(&sk->sk_receive_queue.lock); 886 spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -878,6 +896,8 @@ static int rxrpc_release_sock(struct sock *sk)
878 rxrpc_release_calls_on_socket(rx); 896 rxrpc_release_calls_on_socket(rx);
879 flush_workqueue(rxrpc_workqueue); 897 flush_workqueue(rxrpc_workqueue);
880 rxrpc_purge_queue(&sk->sk_receive_queue); 898 rxrpc_purge_queue(&sk->sk_receive_queue);
899 rxrpc_queue_work(&rxnet->service_conn_reaper);
900 rxrpc_queue_work(&rxnet->client_conn_reaper);
881 901
882 rxrpc_put_local(rx->local); 902 rxrpc_put_local(rx->local);
883 rx->local = NULL; 903 rx->local = NULL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index b2151993d384..416688381eb7 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -79,17 +79,20 @@ struct rxrpc_net {
79 struct list_head conn_proc_list; /* List of conns in this namespace for proc */ 79 struct list_head conn_proc_list; /* List of conns in this namespace for proc */
80 struct list_head service_conns; /* Service conns in this namespace */ 80 struct list_head service_conns; /* Service conns in this namespace */
81 rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ 81 rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */
82 struct delayed_work service_conn_reaper; 82 struct work_struct service_conn_reaper;
83 struct timer_list service_conn_reap_timer;
83 84
84 unsigned int nr_client_conns; 85 unsigned int nr_client_conns;
85 unsigned int nr_active_client_conns; 86 unsigned int nr_active_client_conns;
86 bool kill_all_client_conns; 87 bool kill_all_client_conns;
88 bool live;
87 spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ 89 spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */
88 spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ 90 spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */
89 struct list_head waiting_client_conns; 91 struct list_head waiting_client_conns;
90 struct list_head active_client_conns; 92 struct list_head active_client_conns;
91 struct list_head idle_client_conns; 93 struct list_head idle_client_conns;
92 struct delayed_work client_conn_reaper; 94 struct work_struct client_conn_reaper;
95 struct timer_list client_conn_reap_timer;
93 96
94 struct list_head local_endpoints; 97 struct list_head local_endpoints;
95 struct mutex local_mutex; /* Lock for ->local_endpoints */ 98 struct mutex local_mutex; /* Lock for ->local_endpoints */
@@ -265,6 +268,7 @@ struct rxrpc_local {
265 rwlock_t services_lock; /* lock for services list */ 268 rwlock_t services_lock; /* lock for services list */
266 int debug_id; /* debug ID for printks */ 269 int debug_id; /* debug ID for printks */
267 bool dead; 270 bool dead;
271 bool service_closed; /* Service socket closed */
268 struct sockaddr_rxrpc srx; /* local address */ 272 struct sockaddr_rxrpc srx; /* local address */
269}; 273};
270 274
@@ -338,8 +342,17 @@ enum rxrpc_conn_flag {
338 RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ 342 RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */
339 RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ 343 RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */
340 RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ 344 RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */
345 RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */
346 RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */
347 RXRPC_CONN_FINAL_ACK_2, /* Need final ACK for channel 2 */
348 RXRPC_CONN_FINAL_ACK_3, /* Need final ACK for channel 3 */
341}; 349};
342 350
351#define RXRPC_CONN_FINAL_ACK_MASK ((1UL << RXRPC_CONN_FINAL_ACK_0) | \
352 (1UL << RXRPC_CONN_FINAL_ACK_1) | \
353 (1UL << RXRPC_CONN_FINAL_ACK_2) | \
354 (1UL << RXRPC_CONN_FINAL_ACK_3))
355
343/* 356/*
344 * Events that can be raised upon a connection. 357 * Events that can be raised upon a connection.
345 */ 358 */
@@ -393,6 +406,7 @@ struct rxrpc_connection {
393#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1) 406#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1)
394 struct list_head waiting_calls; /* Calls waiting for channels */ 407 struct list_head waiting_calls; /* Calls waiting for channels */
395 struct rxrpc_channel { 408 struct rxrpc_channel {
409 unsigned long final_ack_at; /* Time at which to issue final ACK */
396 struct rxrpc_call __rcu *call; /* Active call */ 410 struct rxrpc_call __rcu *call; /* Active call */
397 u32 call_id; /* ID of current call */ 411 u32 call_id; /* ID of current call */
398 u32 call_counter; /* Call ID counter */ 412 u32 call_counter; /* Call ID counter */
@@ -404,6 +418,7 @@ struct rxrpc_connection {
404 }; 418 };
405 } channels[RXRPC_MAXCALLS]; 419 } channels[RXRPC_MAXCALLS];
406 420
421 struct timer_list timer; /* Conn event timer */
407 struct work_struct processor; /* connection event processor */ 422 struct work_struct processor; /* connection event processor */
408 union { 423 union {
409 struct rb_node client_node; /* Node in local->client_conns */ 424 struct rb_node client_node; /* Node in local->client_conns */
@@ -457,9 +472,10 @@ enum rxrpc_call_flag {
457enum rxrpc_call_event { 472enum rxrpc_call_event {
458 RXRPC_CALL_EV_ACK, /* need to generate ACK */ 473 RXRPC_CALL_EV_ACK, /* need to generate ACK */
459 RXRPC_CALL_EV_ABORT, /* need to generate abort */ 474 RXRPC_CALL_EV_ABORT, /* need to generate abort */
460 RXRPC_CALL_EV_TIMER, /* Timer expired */
461 RXRPC_CALL_EV_RESEND, /* Tx resend required */ 475 RXRPC_CALL_EV_RESEND, /* Tx resend required */
462 RXRPC_CALL_EV_PING, /* Ping send required */ 476 RXRPC_CALL_EV_PING, /* Ping send required */
477 RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */
478 RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */
463}; 479};
464 480
465/* 481/*
@@ -503,10 +519,16 @@ struct rxrpc_call {
503 struct rxrpc_peer *peer; /* Peer record for remote address */ 519 struct rxrpc_peer *peer; /* Peer record for remote address */
504 struct rxrpc_sock __rcu *socket; /* socket responsible */ 520 struct rxrpc_sock __rcu *socket; /* socket responsible */
505 struct mutex user_mutex; /* User access mutex */ 521 struct mutex user_mutex; /* User access mutex */
506 ktime_t ack_at; /* When deferred ACK needs to happen */ 522 unsigned long ack_at; /* When deferred ACK needs to happen */
507 ktime_t resend_at; /* When next resend needs to happen */ 523 unsigned long ack_lost_at; /* When ACK is figured as lost */
508 ktime_t ping_at; /* When next to send a ping */ 524 unsigned long resend_at; /* When next resend needs to happen */
509 ktime_t expire_at; /* When the call times out */ 525 unsigned long ping_at; /* When next to send a ping */
526 unsigned long keepalive_at; /* When next to send a keepalive ping */
527 unsigned long expect_rx_by; /* When we expect to get a packet by */
528 unsigned long expect_req_by; /* When we expect to get a request DATA packet by */
529 unsigned long expect_term_by; /* When we expect call termination by */
530 u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
531 u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
510 struct timer_list timer; /* Combined event timer */ 532 struct timer_list timer; /* Combined event timer */
511 struct work_struct processor; /* Event processor */ 533 struct work_struct processor; /* Event processor */
512 rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ 534 rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
@@ -609,6 +631,8 @@ struct rxrpc_call {
609 ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ 631 ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
610 rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ 632 rxrpc_serial_t acks_latest; /* serial number of latest ACK received */
611 rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ 633 rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
634 rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
635 rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
612}; 636};
613 637
614/* 638/*
@@ -632,6 +656,35 @@ struct rxrpc_ack_summary {
632 u8 cumulative_acks; 656 u8 cumulative_acks;
633}; 657};
634 658
659/*
660 * sendmsg() cmsg-specified parameters.
661 */
662enum rxrpc_command {
663 RXRPC_CMD_SEND_DATA, /* send data message */
664 RXRPC_CMD_SEND_ABORT, /* request abort generation */
665 RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
666 RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
667};
668
669struct rxrpc_call_params {
670 s64 tx_total_len; /* Total Tx data length (if send data) */
671 unsigned long user_call_ID; /* User's call ID */
672 struct {
673 u32 hard; /* Maximum lifetime (sec) */
674 u32 idle; /* Max time since last data packet (msec) */
675 u32 normal; /* Max time since last call packet (msec) */
676 } timeouts;
677 u8 nr_timeouts; /* Number of timeouts specified */
678};
679
680struct rxrpc_send_params {
681 struct rxrpc_call_params call;
682 u32 abort_code; /* Abort code to Tx (if abort) */
683 enum rxrpc_command command : 8; /* The command to implement */
684 bool exclusive; /* Shared or exclusive call */
685 bool upgrade; /* If the connection is upgradeable */
686};
687
635#include <trace/events/rxrpc.h> 688#include <trace/events/rxrpc.h>
636 689
637/* 690/*
@@ -657,12 +710,19 @@ int rxrpc_reject_call(struct rxrpc_sock *);
657/* 710/*
658 * call_event.c 711 * call_event.c
659 */ 712 */
660void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
661void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
662void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, 713void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
663 enum rxrpc_propose_ack_trace); 714 enum rxrpc_propose_ack_trace);
664void rxrpc_process_call(struct work_struct *); 715void rxrpc_process_call(struct work_struct *);
665 716
717static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call,
718 unsigned long expire_at,
719 unsigned long now,
720 enum rxrpc_timer_trace why)
721{
722 trace_rxrpc_timer(call, why, now);
723 timer_reduce(&call->timer, expire_at);
724}
725
666/* 726/*
667 * call_object.c 727 * call_object.c
668 */ 728 */
@@ -672,11 +732,11 @@ extern unsigned int rxrpc_max_call_lifetime;
672extern struct kmem_cache *rxrpc_call_jar; 732extern struct kmem_cache *rxrpc_call_jar;
673 733
674struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); 734struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
675struct rxrpc_call *rxrpc_alloc_call(gfp_t); 735struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t);
676struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, 736struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
677 struct rxrpc_conn_parameters *, 737 struct rxrpc_conn_parameters *,
678 struct sockaddr_rxrpc *, 738 struct sockaddr_rxrpc *,
679 unsigned long, s64, gfp_t); 739 struct rxrpc_call_params *, gfp_t);
680int rxrpc_retry_client_call(struct rxrpc_sock *, 740int rxrpc_retry_client_call(struct rxrpc_sock *,
681 struct rxrpc_call *, 741 struct rxrpc_call *,
682 struct rxrpc_conn_parameters *, 742 struct rxrpc_conn_parameters *,
@@ -803,8 +863,8 @@ static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
803 */ 863 */
804extern unsigned int rxrpc_max_client_connections; 864extern unsigned int rxrpc_max_client_connections;
805extern unsigned int rxrpc_reap_client_connections; 865extern unsigned int rxrpc_reap_client_connections;
806extern unsigned int rxrpc_conn_idle_client_expiry; 866extern unsigned long rxrpc_conn_idle_client_expiry;
807extern unsigned int rxrpc_conn_idle_client_fast_expiry; 867extern unsigned long rxrpc_conn_idle_client_fast_expiry;
808extern struct idr rxrpc_client_conn_ids; 868extern struct idr rxrpc_client_conn_ids;
809 869
810void rxrpc_destroy_client_conn_ids(void); 870void rxrpc_destroy_client_conn_ids(void);
@@ -825,6 +885,7 @@ void rxrpc_process_connection(struct work_struct *);
825 * conn_object.c 885 * conn_object.c
826 */ 886 */
827extern unsigned int rxrpc_connection_expiry; 887extern unsigned int rxrpc_connection_expiry;
888extern unsigned int rxrpc_closed_conn_expiry;
828 889
829struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); 890struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
830struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, 891struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
@@ -861,6 +922,12 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
861 rxrpc_put_service_conn(conn); 922 rxrpc_put_service_conn(conn);
862} 923}
863 924
925static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn,
926 unsigned long expire_at)
927{
928 timer_reduce(&conn->timer, expire_at);
929}
930
864/* 931/*
865 * conn_service.c 932 * conn_service.c
866 */ 933 */
@@ -930,13 +997,13 @@ static inline void rxrpc_queue_local(struct rxrpc_local *local)
930 * misc.c 997 * misc.c
931 */ 998 */
932extern unsigned int rxrpc_max_backlog __read_mostly; 999extern unsigned int rxrpc_max_backlog __read_mostly;
933extern unsigned int rxrpc_requested_ack_delay; 1000extern unsigned long rxrpc_requested_ack_delay;
934extern unsigned int rxrpc_soft_ack_delay; 1001extern unsigned long rxrpc_soft_ack_delay;
935extern unsigned int rxrpc_idle_ack_delay; 1002extern unsigned long rxrpc_idle_ack_delay;
936extern unsigned int rxrpc_rx_window_size; 1003extern unsigned int rxrpc_rx_window_size;
937extern unsigned int rxrpc_rx_mtu; 1004extern unsigned int rxrpc_rx_mtu;
938extern unsigned int rxrpc_rx_jumbo_max; 1005extern unsigned int rxrpc_rx_jumbo_max;
939extern unsigned int rxrpc_resend_timeout; 1006extern unsigned long rxrpc_resend_timeout;
940 1007
941extern const s8 rxrpc_ack_priority[]; 1008extern const s8 rxrpc_ack_priority[];
942 1009
@@ -954,7 +1021,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
954/* 1021/*
955 * output.c 1022 * output.c
956 */ 1023 */
957int rxrpc_send_ack_packet(struct rxrpc_call *, bool); 1024int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *);
958int rxrpc_send_abort_packet(struct rxrpc_call *); 1025int rxrpc_send_abort_packet(struct rxrpc_call *);
959int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); 1026int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool);
960void rxrpc_reject_packets(struct rxrpc_local *); 1027void rxrpc_reject_packets(struct rxrpc_local *);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index cbd1701e813a..3028298ca561 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -94,7 +94,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
94 /* Now it gets complicated, because calls get registered with the 94 /* Now it gets complicated, because calls get registered with the
95 * socket here, particularly if a user ID is preassigned by the user. 95 * socket here, particularly if a user ID is preassigned by the user.
96 */ 96 */
97 call = rxrpc_alloc_call(gfp); 97 call = rxrpc_alloc_call(rx, gfp);
98 if (!call) 98 if (!call)
99 return -ENOMEM; 99 return -ENOMEM;
100 call->flags |= (1 << RXRPC_CALL_IS_SERVICE); 100 call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 3574508baf9a..ad2ab1103189 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -22,80 +22,6 @@
22#include "ar-internal.h" 22#include "ar-internal.h"
23 23
24/* 24/*
25 * Set the timer
26 */
27void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
28 ktime_t now)
29{
30 unsigned long t_j, now_j = jiffies;
31 ktime_t t;
32 bool queue = false;
33
34 if (call->state < RXRPC_CALL_COMPLETE) {
35 t = call->expire_at;
36 if (!ktime_after(t, now)) {
37 trace_rxrpc_timer(call, why, now, now_j);
38 queue = true;
39 goto out;
40 }
41
42 if (!ktime_after(call->resend_at, now)) {
43 call->resend_at = call->expire_at;
44 if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
45 queue = true;
46 } else if (ktime_before(call->resend_at, t)) {
47 t = call->resend_at;
48 }
49
50 if (!ktime_after(call->ack_at, now)) {
51 call->ack_at = call->expire_at;
52 if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
53 queue = true;
54 } else if (ktime_before(call->ack_at, t)) {
55 t = call->ack_at;
56 }
57
58 if (!ktime_after(call->ping_at, now)) {
59 call->ping_at = call->expire_at;
60 if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
61 queue = true;
62 } else if (ktime_before(call->ping_at, t)) {
63 t = call->ping_at;
64 }
65
66 t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now)));
67 t_j += jiffies;
68
69 /* We have to make sure that the calculated jiffies value falls
70 * at or after the nsec value, or we may loop ceaselessly
71 * because the timer times out, but we haven't reached the nsec
72 * timeout yet.
73 */
74 t_j++;
75
76 if (call->timer.expires != t_j || !timer_pending(&call->timer)) {
77 mod_timer(&call->timer, t_j);
78 trace_rxrpc_timer(call, why, now, now_j);
79 }
80 }
81
82out:
83 if (queue)
84 rxrpc_queue_call(call);
85}
86
87/*
88 * Set the timer
89 */
90void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
91 ktime_t now)
92{
93 read_lock_bh(&call->state_lock);
94 __rxrpc_set_timer(call, why, now);
95 read_unlock_bh(&call->state_lock);
96}
97
98/*
99 * Propose a PING ACK be sent. 25 * Propose a PING ACK be sent.
100 */ 26 */
101static void rxrpc_propose_ping(struct rxrpc_call *call, 27static void rxrpc_propose_ping(struct rxrpc_call *call,
@@ -106,12 +32,13 @@ static void rxrpc_propose_ping(struct rxrpc_call *call,
106 !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) 32 !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
107 rxrpc_queue_call(call); 33 rxrpc_queue_call(call);
108 } else { 34 } else {
109 ktime_t now = ktime_get_real(); 35 unsigned long now = jiffies;
110 ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay); 36 unsigned long ping_at = now + rxrpc_idle_ack_delay;
111 37
112 if (ktime_before(ping_at, call->ping_at)) { 38 if (time_before(ping_at, call->ping_at)) {
113 call->ping_at = ping_at; 39 WRITE_ONCE(call->ping_at, ping_at);
114 rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now); 40 rxrpc_reduce_call_timer(call, ping_at, now,
41 rxrpc_timer_set_for_ping);
115 } 42 }
116 } 43 }
117} 44}
@@ -125,8 +52,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
125 enum rxrpc_propose_ack_trace why) 52 enum rxrpc_propose_ack_trace why)
126{ 53{
127 enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; 54 enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use;
128 unsigned int expiry = rxrpc_soft_ack_delay; 55 unsigned long expiry = rxrpc_soft_ack_delay;
129 ktime_t now, ack_at;
130 s8 prior = rxrpc_ack_priority[ack_reason]; 56 s8 prior = rxrpc_ack_priority[ack_reason];
131 57
132 /* Pings are handled specially because we don't want to accidentally 58 /* Pings are handled specially because we don't want to accidentally
@@ -190,11 +116,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
190 background) 116 background)
191 rxrpc_queue_call(call); 117 rxrpc_queue_call(call);
192 } else { 118 } else {
193 now = ktime_get_real(); 119 unsigned long now = jiffies, ack_at;
194 ack_at = ktime_add_ms(now, expiry); 120
195 if (ktime_before(ack_at, call->ack_at)) { 121 if (call->peer->rtt_usage > 0)
196 call->ack_at = ack_at; 122 ack_at = nsecs_to_jiffies(call->peer->rtt);
197 rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now); 123 else
124 ack_at = expiry;
125
126 ack_at += now;
127 if (time_before(ack_at, call->ack_at)) {
128 WRITE_ONCE(call->ack_at, ack_at);
129 rxrpc_reduce_call_timer(call, ack_at, now,
130 rxrpc_timer_set_for_ack);
198 } 131 }
199 } 132 }
200 133
@@ -227,18 +160,28 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
227/* 160/*
228 * Perform retransmission of NAK'd and unack'd packets. 161 * Perform retransmission of NAK'd and unack'd packets.
229 */ 162 */
230static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) 163static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
231{ 164{
232 struct rxrpc_skb_priv *sp; 165 struct rxrpc_skb_priv *sp;
233 struct sk_buff *skb; 166 struct sk_buff *skb;
167 unsigned long resend_at;
234 rxrpc_seq_t cursor, seq, top; 168 rxrpc_seq_t cursor, seq, top;
235 ktime_t max_age, oldest, ack_ts; 169 ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo;
236 int ix; 170 int ix;
237 u8 annotation, anno_type, retrans = 0, unacked = 0; 171 u8 annotation, anno_type, retrans = 0, unacked = 0;
238 172
239 _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); 173 _enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
240 174
241 max_age = ktime_sub_ms(now, rxrpc_resend_timeout); 175 if (call->peer->rtt_usage > 1)
176 timeout = ns_to_ktime(call->peer->rtt * 3 / 2);
177 else
178 timeout = ms_to_ktime(rxrpc_resend_timeout);
179 min_timeo = ns_to_ktime((1000000000 / HZ) * 4);
180 if (ktime_before(timeout, min_timeo))
181 timeout = min_timeo;
182
183 now = ktime_get_real();
184 max_age = ktime_sub(now, timeout);
242 185
243 spin_lock_bh(&call->lock); 186 spin_lock_bh(&call->lock);
244 187
@@ -282,7 +225,9 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
282 ktime_to_ns(ktime_sub(skb->tstamp, max_age))); 225 ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
283 } 226 }
284 227
285 call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); 228 resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(oldest, now)));
229 resend_at += jiffies + rxrpc_resend_timeout;
230 WRITE_ONCE(call->resend_at, resend_at);
286 231
287 if (unacked) 232 if (unacked)
288 rxrpc_congestion_timeout(call); 233 rxrpc_congestion_timeout(call);
@@ -292,14 +237,15 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
292 * retransmitting data. 237 * retransmitting data.
293 */ 238 */
294 if (!retrans) { 239 if (!retrans) {
295 rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); 240 rxrpc_reduce_call_timer(call, resend_at, now,
241 rxrpc_timer_set_for_resend);
296 spin_unlock_bh(&call->lock); 242 spin_unlock_bh(&call->lock);
297 ack_ts = ktime_sub(now, call->acks_latest_ts); 243 ack_ts = ktime_sub(now, call->acks_latest_ts);
298 if (ktime_to_ns(ack_ts) < call->peer->rtt) 244 if (ktime_to_ns(ack_ts) < call->peer->rtt)
299 goto out; 245 goto out;
300 rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, 246 rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
301 rxrpc_propose_ack_ping_for_lost_ack); 247 rxrpc_propose_ack_ping_for_lost_ack);
302 rxrpc_send_ack_packet(call, true); 248 rxrpc_send_ack_packet(call, true, NULL);
303 goto out; 249 goto out;
304 } 250 }
305 251
@@ -364,7 +310,8 @@ void rxrpc_process_call(struct work_struct *work)
364{ 310{
365 struct rxrpc_call *call = 311 struct rxrpc_call *call =
366 container_of(work, struct rxrpc_call, processor); 312 container_of(work, struct rxrpc_call, processor);
367 ktime_t now; 313 rxrpc_serial_t *send_ack;
314 unsigned long now, next, t;
368 315
369 rxrpc_see_call(call); 316 rxrpc_see_call(call);
370 317
@@ -384,22 +331,89 @@ recheck_state:
384 goto out_put; 331 goto out_put;
385 } 332 }
386 333
387 now = ktime_get_real(); 334 /* Work out if any timeouts tripped */
388 if (ktime_before(call->expire_at, now)) { 335 now = jiffies;
336 t = READ_ONCE(call->expect_rx_by);
337 if (time_after_eq(now, t)) {
338 trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now);
339 set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
340 }
341
342 t = READ_ONCE(call->expect_req_by);
343 if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST &&
344 time_after_eq(now, t)) {
345 trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
346 set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
347 }
348
349 t = READ_ONCE(call->expect_term_by);
350 if (time_after_eq(now, t)) {
351 trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now);
352 set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
353 }
354
355 t = READ_ONCE(call->ack_at);
356 if (time_after_eq(now, t)) {
357 trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
358 cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET);
359 set_bit(RXRPC_CALL_EV_ACK, &call->events);
360 }
361
362 t = READ_ONCE(call->ack_lost_at);
363 if (time_after_eq(now, t)) {
364 trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now);
365 cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET);
366 set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events);
367 }
368
369 t = READ_ONCE(call->keepalive_at);
370 if (time_after_eq(now, t)) {
371 trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now);
372 cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET);
373 rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true,
374 rxrpc_propose_ack_ping_for_keepalive);
375 set_bit(RXRPC_CALL_EV_PING, &call->events);
376 }
377
378 t = READ_ONCE(call->ping_at);
379 if (time_after_eq(now, t)) {
380 trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now);
381 cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET);
382 set_bit(RXRPC_CALL_EV_PING, &call->events);
383 }
384
385 t = READ_ONCE(call->resend_at);
386 if (time_after_eq(now, t)) {
387 trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now);
388 cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET);
389 set_bit(RXRPC_CALL_EV_RESEND, &call->events);
390 }
391
392 /* Process events */
393 if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) {
389 rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); 394 rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
390 set_bit(RXRPC_CALL_EV_ABORT, &call->events); 395 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
391 goto recheck_state; 396 goto recheck_state;
392 } 397 }
393 398
394 if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { 399 send_ack = NULL;
400 if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) {
401 call->acks_lost_top = call->tx_top;
402 rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
403 rxrpc_propose_ack_ping_for_lost_ack);
404 send_ack = &call->acks_lost_ping;
405 }
406
407 if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) ||
408 send_ack) {
395 if (call->ackr_reason) { 409 if (call->ackr_reason) {
396 rxrpc_send_ack_packet(call, false); 410 rxrpc_send_ack_packet(call, false, send_ack);
397 goto recheck_state; 411 goto recheck_state;
398 } 412 }
399 } 413 }
400 414
401 if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { 415 if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) {
402 rxrpc_send_ack_packet(call, true); 416 rxrpc_send_ack_packet(call, true, NULL);
403 goto recheck_state; 417 goto recheck_state;
404 } 418 }
405 419
@@ -408,7 +422,24 @@ recheck_state:
408 goto recheck_state; 422 goto recheck_state;
409 } 423 }
410 424
411 rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); 425 /* Make sure the timer is restarted */
426 next = call->expect_rx_by;
427
428#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
429
430 set(call->expect_req_by);
431 set(call->expect_term_by);
432 set(call->ack_at);
433 set(call->ack_lost_at);
434 set(call->resend_at);
435 set(call->keepalive_at);
436 set(call->ping_at);
437
438 now = jiffies;
439 if (time_after_eq(now, next))
440 goto recheck_state;
441
442 rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
412 443
413 /* other events may have been raised since we started checking */ 444 /* other events may have been raised since we started checking */
414 if (call->events && call->state < RXRPC_CALL_COMPLETE) { 445 if (call->events && call->state < RXRPC_CALL_COMPLETE) {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 994dc2df57e4..0b2db38dd32d 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -51,10 +51,14 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
51 51
52 _enter("%d", call->debug_id); 52 _enter("%d", call->debug_id);
53 53
54 if (call->state < RXRPC_CALL_COMPLETE) 54 if (call->state < RXRPC_CALL_COMPLETE) {
55 rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); 55 trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
56 rxrpc_queue_call(call);
57 }
56} 58}
57 59
60static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
61
58/* 62/*
59 * find an extant server call 63 * find an extant server call
60 * - called in process context with IRQs enabled 64 * - called in process context with IRQs enabled
@@ -95,7 +99,7 @@ found_extant_call:
95/* 99/*
96 * allocate a new call 100 * allocate a new call
97 */ 101 */
98struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) 102struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp)
99{ 103{
100 struct rxrpc_call *call; 104 struct rxrpc_call *call;
101 105
@@ -114,6 +118,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
114 goto nomem_2; 118 goto nomem_2;
115 119
116 mutex_init(&call->user_mutex); 120 mutex_init(&call->user_mutex);
121
122 /* Prevent lockdep reporting a deadlock false positive between the afs
123 * filesystem and sys_sendmsg() via the mmap sem.
124 */
125 if (rx->sk.sk_kern_sock)
126 lockdep_set_class(&call->user_mutex,
127 &rxrpc_call_user_mutex_lock_class_key);
128
117 timer_setup(&call->timer, rxrpc_call_timer_expired, 0); 129 timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
118 INIT_WORK(&call->processor, &rxrpc_process_call); 130 INIT_WORK(&call->processor, &rxrpc_process_call);
119 INIT_LIST_HEAD(&call->link); 131 INIT_LIST_HEAD(&call->link);
@@ -128,6 +140,8 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
128 atomic_set(&call->usage, 1); 140 atomic_set(&call->usage, 1);
129 call->debug_id = atomic_inc_return(&rxrpc_debug_id); 141 call->debug_id = atomic_inc_return(&rxrpc_debug_id);
130 call->tx_total_len = -1; 142 call->tx_total_len = -1;
143 call->next_rx_timo = 20 * HZ;
144 call->next_req_timo = 1 * HZ;
131 145
132 memset(&call->sock_node, 0xed, sizeof(call->sock_node)); 146 memset(&call->sock_node, 0xed, sizeof(call->sock_node));
133 147
@@ -150,7 +164,8 @@ nomem:
150/* 164/*
151 * Allocate a new client call. 165 * Allocate a new client call.
152 */ 166 */
153static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, 167static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
168 struct sockaddr_rxrpc *srx,
154 gfp_t gfp) 169 gfp_t gfp)
155{ 170{
156 struct rxrpc_call *call; 171 struct rxrpc_call *call;
@@ -158,7 +173,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
158 173
159 _enter(""); 174 _enter("");
160 175
161 call = rxrpc_alloc_call(gfp); 176 call = rxrpc_alloc_call(rx, gfp);
162 if (!call) 177 if (!call)
163 return ERR_PTR(-ENOMEM); 178 return ERR_PTR(-ENOMEM);
164 call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; 179 call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
@@ -177,15 +192,17 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
177 */ 192 */
178static void rxrpc_start_call_timer(struct rxrpc_call *call) 193static void rxrpc_start_call_timer(struct rxrpc_call *call)
179{ 194{
180 ktime_t now = ktime_get_real(), expire_at; 195 unsigned long now = jiffies;
181 196 unsigned long j = now + MAX_JIFFY_OFFSET;
182 expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); 197
183 call->expire_at = expire_at; 198 call->ack_at = j;
184 call->ack_at = expire_at; 199 call->ack_lost_at = j;
185 call->ping_at = expire_at; 200 call->resend_at = j;
186 call->resend_at = expire_at; 201 call->ping_at = j;
187 call->timer.expires = jiffies + LONG_MAX / 2; 202 call->expect_rx_by = j;
188 rxrpc_set_timer(call, rxrpc_timer_begin, now); 203 call->expect_req_by = j;
204 call->expect_term_by = j;
205 call->timer.expires = now;
189} 206}
190 207
191/* 208/*
@@ -196,8 +213,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
196struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, 213struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
197 struct rxrpc_conn_parameters *cp, 214 struct rxrpc_conn_parameters *cp,
198 struct sockaddr_rxrpc *srx, 215 struct sockaddr_rxrpc *srx,
199 unsigned long user_call_ID, 216 struct rxrpc_call_params *p,
200 s64 tx_total_len,
201 gfp_t gfp) 217 gfp_t gfp)
202 __releases(&rx->sk.sk_lock.slock) 218 __releases(&rx->sk.sk_lock.slock)
203{ 219{
@@ -207,18 +223,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
207 const void *here = __builtin_return_address(0); 223 const void *here = __builtin_return_address(0);
208 int ret; 224 int ret;
209 225
210 _enter("%p,%lx", rx, user_call_ID); 226 _enter("%p,%lx", rx, p->user_call_ID);
211 227
212 call = rxrpc_alloc_client_call(srx, gfp); 228 call = rxrpc_alloc_client_call(rx, srx, gfp);
213 if (IS_ERR(call)) { 229 if (IS_ERR(call)) {
214 release_sock(&rx->sk); 230 release_sock(&rx->sk);
215 _leave(" = %ld", PTR_ERR(call)); 231 _leave(" = %ld", PTR_ERR(call));
216 return call; 232 return call;
217 } 233 }
218 234
219 call->tx_total_len = tx_total_len; 235 call->tx_total_len = p->tx_total_len;
220 trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), 236 trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
221 here, (const void *)user_call_ID); 237 here, (const void *)p->user_call_ID);
222 238
223 /* We need to protect a partially set up call against the user as we 239 /* We need to protect a partially set up call against the user as we
224 * will be acting outside the socket lock. 240 * will be acting outside the socket lock.
@@ -234,16 +250,16 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
234 parent = *pp; 250 parent = *pp;
235 xcall = rb_entry(parent, struct rxrpc_call, sock_node); 251 xcall = rb_entry(parent, struct rxrpc_call, sock_node);
236 252
237 if (user_call_ID < xcall->user_call_ID) 253 if (p->user_call_ID < xcall->user_call_ID)
238 pp = &(*pp)->rb_left; 254 pp = &(*pp)->rb_left;
239 else if (user_call_ID > xcall->user_call_ID) 255 else if (p->user_call_ID > xcall->user_call_ID)
240 pp = &(*pp)->rb_right; 256 pp = &(*pp)->rb_right;
241 else 257 else
242 goto error_dup_user_ID; 258 goto error_dup_user_ID;
243 } 259 }
244 260
245 rcu_assign_pointer(call->socket, rx); 261 rcu_assign_pointer(call->socket, rx);
246 call->user_call_ID = user_call_ID; 262 call->user_call_ID = p->user_call_ID;
247 __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); 263 __set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
248 rxrpc_get_call(call, rxrpc_call_got_userid); 264 rxrpc_get_call(call, rxrpc_call_got_userid);
249 rb_link_node(&call->sock_node, parent, pp); 265 rb_link_node(&call->sock_node, parent, pp);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 5f9624bd311c..7f74ca3059f8 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -85,8 +85,8 @@
85 85
86__read_mostly unsigned int rxrpc_max_client_connections = 1000; 86__read_mostly unsigned int rxrpc_max_client_connections = 1000;
87__read_mostly unsigned int rxrpc_reap_client_connections = 900; 87__read_mostly unsigned int rxrpc_reap_client_connections = 900;
88__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; 88__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
89__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ; 89__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
90 90
91/* 91/*
92 * We use machine-unique IDs for our client connections. 92 * We use machine-unique IDs for our client connections.
@@ -554,6 +554,11 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
554 554
555 trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); 555 trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
556 556
557 /* Cancel the final ACK on the previous call if it hasn't been sent yet
558 * as the DATA packet will implicitly ACK it.
559 */
560 clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
561
557 write_lock_bh(&call->state_lock); 562 write_lock_bh(&call->state_lock);
558 if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags)) 563 if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags))
559 call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; 564 call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
@@ -686,7 +691,7 @@ int rxrpc_connect_call(struct rxrpc_call *call,
686 691
687 _enter("{%d,%lx},", call->debug_id, call->user_call_ID); 692 _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
688 693
689 rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work); 694 rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper);
690 rxrpc_cull_active_client_conns(rxnet); 695 rxrpc_cull_active_client_conns(rxnet);
691 696
692 ret = rxrpc_get_client_conn(call, cp, srx, gfp); 697 ret = rxrpc_get_client_conn(call, cp, srx, gfp);
@@ -752,6 +757,18 @@ void rxrpc_expose_client_call(struct rxrpc_call *call)
752} 757}
753 758
754/* 759/*
760 * Set the reap timer.
761 */
762static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
763{
764 unsigned long now = jiffies;
765 unsigned long reap_at = now + rxrpc_conn_idle_client_expiry;
766
767 if (rxnet->live)
768 timer_reduce(&rxnet->client_conn_reap_timer, reap_at);
769}
770
771/*
755 * Disconnect a client call. 772 * Disconnect a client call.
756 */ 773 */
757void rxrpc_disconnect_client_call(struct rxrpc_call *call) 774void rxrpc_disconnect_client_call(struct rxrpc_call *call)
@@ -813,6 +830,19 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
813 goto out_2; 830 goto out_2;
814 } 831 }
815 832
833 /* Schedule the final ACK to be transmitted in a short while so that it
834 * can be skipped if we find a follow-on call. The first DATA packet
835 * of the follow on call will implicitly ACK this call.
836 */
837 if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
838 unsigned long final_ack_at = jiffies + 2;
839
840 WRITE_ONCE(chan->final_ack_at, final_ack_at);
841 smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */
842 set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
843 rxrpc_reduce_conn_timer(conn, final_ack_at);
844 }
845
816 /* Things are more complex and we need the cache lock. We might be 846 /* Things are more complex and we need the cache lock. We might be
817 * able to simply idle the conn or it might now be lurking on the wait 847 * able to simply idle the conn or it might now be lurking on the wait
818 * list. It might even get moved back to the active list whilst we're 848 * list. It might even get moved back to the active list whilst we're
@@ -878,9 +908,7 @@ idle_connection:
878 list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); 908 list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
879 if (rxnet->idle_client_conns.next == &conn->cache_link && 909 if (rxnet->idle_client_conns.next == &conn->cache_link &&
880 !rxnet->kill_all_client_conns) 910 !rxnet->kill_all_client_conns)
881 queue_delayed_work(rxrpc_workqueue, 911 rxrpc_set_client_reap_timer(rxnet);
882 &rxnet->client_conn_reaper,
883 rxrpc_conn_idle_client_expiry);
884 } else { 912 } else {
885 trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); 913 trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive);
886 conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; 914 conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
@@ -1018,8 +1046,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work)
1018{ 1046{
1019 struct rxrpc_connection *conn; 1047 struct rxrpc_connection *conn;
1020 struct rxrpc_net *rxnet = 1048 struct rxrpc_net *rxnet =
1021 container_of(to_delayed_work(work), 1049 container_of(work, struct rxrpc_net, client_conn_reaper);
1022 struct rxrpc_net, client_conn_reaper);
1023 unsigned long expiry, conn_expires_at, now; 1050 unsigned long expiry, conn_expires_at, now;
1024 unsigned int nr_conns; 1051 unsigned int nr_conns;
1025 bool did_discard = false; 1052 bool did_discard = false;
@@ -1061,6 +1088,8 @@ next:
1061 expiry = rxrpc_conn_idle_client_expiry; 1088 expiry = rxrpc_conn_idle_client_expiry;
1062 if (nr_conns > rxrpc_reap_client_connections) 1089 if (nr_conns > rxrpc_reap_client_connections)
1063 expiry = rxrpc_conn_idle_client_fast_expiry; 1090 expiry = rxrpc_conn_idle_client_fast_expiry;
1091 if (conn->params.local->service_closed)
1092 expiry = rxrpc_closed_conn_expiry * HZ;
1064 1093
1065 conn_expires_at = conn->idle_timestamp + expiry; 1094 conn_expires_at = conn->idle_timestamp + expiry;
1066 1095
@@ -1096,9 +1125,8 @@ not_yet_expired:
1096 */ 1125 */
1097 _debug("not yet"); 1126 _debug("not yet");
1098 if (!rxnet->kill_all_client_conns) 1127 if (!rxnet->kill_all_client_conns)
1099 queue_delayed_work(rxrpc_workqueue, 1128 timer_reduce(&rxnet->client_conn_reap_timer,
1100 &rxnet->client_conn_reaper, 1129 conn_expires_at);
1101 conn_expires_at - now);
1102 1130
1103out: 1131out:
1104 spin_unlock(&rxnet->client_conn_cache_lock); 1132 spin_unlock(&rxnet->client_conn_cache_lock);
@@ -1118,9 +1146,9 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
1118 rxnet->kill_all_client_conns = true; 1146 rxnet->kill_all_client_conns = true;
1119 spin_unlock(&rxnet->client_conn_cache_lock); 1147 spin_unlock(&rxnet->client_conn_cache_lock);
1120 1148
1121 cancel_delayed_work(&rxnet->client_conn_reaper); 1149 del_timer_sync(&rxnet->client_conn_reap_timer);
1122 1150
1123 if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0)) 1151 if (!rxrpc_queue_work(&rxnet->client_conn_reaper))
1124 _debug("destroy: queue failed"); 1152 _debug("destroy: queue failed");
1125 1153
1126 _leave(""); 1154 _leave("");
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 59a51a56e7c8..4ca11be6be3c 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -24,31 +24,28 @@
24 * Retransmit terminal ACK or ABORT of the previous call. 24 * Retransmit terminal ACK or ABORT of the previous call.
25 */ 25 */
26static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, 26static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
27 struct sk_buff *skb) 27 struct sk_buff *skb,
28 unsigned int channel)
28{ 29{
29 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 30 struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL;
30 struct rxrpc_channel *chan; 31 struct rxrpc_channel *chan;
31 struct msghdr msg; 32 struct msghdr msg;
32 struct kvec iov; 33 struct kvec iov[3];
33 struct { 34 struct {
34 struct rxrpc_wire_header whdr; 35 struct rxrpc_wire_header whdr;
35 union { 36 union {
36 struct { 37 __be32 abort_code;
37 __be32 code; 38 struct rxrpc_ackpacket ack;
38 } abort;
39 struct {
40 struct rxrpc_ackpacket ack;
41 u8 padding[3];
42 struct rxrpc_ackinfo info;
43 };
44 }; 39 };
45 } __attribute__((packed)) pkt; 40 } __attribute__((packed)) pkt;
41 struct rxrpc_ackinfo ack_info;
46 size_t len; 42 size_t len;
47 u32 serial, mtu, call_id; 43 int ioc;
44 u32 serial, mtu, call_id, padding;
48 45
49 _enter("%d", conn->debug_id); 46 _enter("%d", conn->debug_id);
50 47
51 chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK]; 48 chan = &conn->channels[channel];
52 49
53 /* If the last call got moved on whilst we were waiting to run, just 50 /* If the last call got moved on whilst we were waiting to run, just
54 * ignore this packet. 51 * ignore this packet.
@@ -56,7 +53,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
56 call_id = READ_ONCE(chan->last_call); 53 call_id = READ_ONCE(chan->last_call);
57 /* Sync with __rxrpc_disconnect_call() */ 54 /* Sync with __rxrpc_disconnect_call() */
58 smp_rmb(); 55 smp_rmb();
59 if (call_id != sp->hdr.callNumber) 56 if (skb && call_id != sp->hdr.callNumber)
60 return; 57 return;
61 58
62 msg.msg_name = &conn->params.peer->srx.transport; 59 msg.msg_name = &conn->params.peer->srx.transport;
@@ -65,9 +62,16 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
65 msg.msg_controllen = 0; 62 msg.msg_controllen = 0;
66 msg.msg_flags = 0; 63 msg.msg_flags = 0;
67 64
68 pkt.whdr.epoch = htonl(sp->hdr.epoch); 65 iov[0].iov_base = &pkt;
69 pkt.whdr.cid = htonl(sp->hdr.cid); 66 iov[0].iov_len = sizeof(pkt.whdr);
70 pkt.whdr.callNumber = htonl(sp->hdr.callNumber); 67 iov[1].iov_base = &padding;
68 iov[1].iov_len = 3;
69 iov[2].iov_base = &ack_info;
70 iov[2].iov_len = sizeof(ack_info);
71
72 pkt.whdr.epoch = htonl(conn->proto.epoch);
73 pkt.whdr.cid = htonl(conn->proto.cid);
74 pkt.whdr.callNumber = htonl(call_id);
71 pkt.whdr.seq = 0; 75 pkt.whdr.seq = 0;
72 pkt.whdr.type = chan->last_type; 76 pkt.whdr.type = chan->last_type;
73 pkt.whdr.flags = conn->out_clientflag; 77 pkt.whdr.flags = conn->out_clientflag;
@@ -79,27 +83,35 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
79 len = sizeof(pkt.whdr); 83 len = sizeof(pkt.whdr);
80 switch (chan->last_type) { 84 switch (chan->last_type) {
81 case RXRPC_PACKET_TYPE_ABORT: 85 case RXRPC_PACKET_TYPE_ABORT:
82 pkt.abort.code = htonl(chan->last_abort); 86 pkt.abort_code = htonl(chan->last_abort);
83 len += sizeof(pkt.abort); 87 iov[0].iov_len += sizeof(pkt.abort_code);
88 len += sizeof(pkt.abort_code);
89 ioc = 1;
84 break; 90 break;
85 91
86 case RXRPC_PACKET_TYPE_ACK: 92 case RXRPC_PACKET_TYPE_ACK:
87 mtu = conn->params.peer->if_mtu; 93 mtu = conn->params.peer->if_mtu;
88 mtu -= conn->params.peer->hdrsize; 94 mtu -= conn->params.peer->hdrsize;
89 pkt.ack.bufferSpace = 0; 95 pkt.ack.bufferSpace = 0;
90 pkt.ack.maxSkew = htons(skb->priority); 96 pkt.ack.maxSkew = htons(skb ? skb->priority : 0);
91 pkt.ack.firstPacket = htonl(chan->last_seq); 97 pkt.ack.firstPacket = htonl(chan->last_seq + 1);
92 pkt.ack.previousPacket = htonl(chan->last_seq - 1); 98 pkt.ack.previousPacket = htonl(chan->last_seq);
93 pkt.ack.serial = htonl(sp->hdr.serial); 99 pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0);
94 pkt.ack.reason = RXRPC_ACK_DUPLICATE; 100 pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
95 pkt.ack.nAcks = 0; 101 pkt.ack.nAcks = 0;
96 pkt.info.rxMTU = htonl(rxrpc_rx_mtu); 102 ack_info.rxMTU = htonl(rxrpc_rx_mtu);
97 pkt.info.maxMTU = htonl(mtu); 103 ack_info.maxMTU = htonl(mtu);
98 pkt.info.rwind = htonl(rxrpc_rx_window_size); 104 ack_info.rwind = htonl(rxrpc_rx_window_size);
99 pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); 105 ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
100 pkt.whdr.flags |= RXRPC_SLOW_START_OK; 106 pkt.whdr.flags |= RXRPC_SLOW_START_OK;
101 len += sizeof(pkt.ack) + sizeof(pkt.info); 107 padding = 0;
108 iov[0].iov_len += sizeof(pkt.ack);
109 len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
110 ioc = 3;
102 break; 111 break;
112
113 default:
114 return;
103 } 115 }
104 116
105 /* Resync with __rxrpc_disconnect_call() and check that the last call 117 /* Resync with __rxrpc_disconnect_call() and check that the last call
@@ -109,9 +121,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
109 if (READ_ONCE(chan->last_call) != call_id) 121 if (READ_ONCE(chan->last_call) != call_id)
110 return; 122 return;
111 123
112 iov.iov_base = &pkt;
113 iov.iov_len = len;
114
115 serial = atomic_inc_return(&conn->serial); 124 serial = atomic_inc_return(&conn->serial);
116 pkt.whdr.serial = htonl(serial); 125 pkt.whdr.serial = htonl(serial);
117 126
@@ -126,7 +135,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
126 break; 135 break;
127 } 136 }
128 137
129 kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len); 138 kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
130 _leave(""); 139 _leave("");
131 return; 140 return;
132} 141}
@@ -272,7 +281,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
272 switch (sp->hdr.type) { 281 switch (sp->hdr.type) {
273 case RXRPC_PACKET_TYPE_DATA: 282 case RXRPC_PACKET_TYPE_DATA:
274 case RXRPC_PACKET_TYPE_ACK: 283 case RXRPC_PACKET_TYPE_ACK:
275 rxrpc_conn_retransmit_call(conn, skb); 284 rxrpc_conn_retransmit_call(conn, skb,
285 sp->hdr.cid & RXRPC_CHANNELMASK);
276 return 0; 286 return 0;
277 287
278 case RXRPC_PACKET_TYPE_BUSY: 288 case RXRPC_PACKET_TYPE_BUSY:
@@ -379,6 +389,48 @@ abort:
379} 389}
380 390
381/* 391/*
392 * Process delayed final ACKs that we haven't subsumed into a subsequent call.
393 */
394static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn)
395{
396 unsigned long j = jiffies, next_j;
397 unsigned int channel;
398 bool set;
399
400again:
401 next_j = j + LONG_MAX;
402 set = false;
403 for (channel = 0; channel < RXRPC_MAXCALLS; channel++) {
404 struct rxrpc_channel *chan = &conn->channels[channel];
405 unsigned long ack_at;
406
407 if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags))
408 continue;
409
410 smp_rmb(); /* vs rxrpc_disconnect_client_call */
411 ack_at = READ_ONCE(chan->final_ack_at);
412
413 if (time_before(j, ack_at)) {
414 if (time_before(ack_at, next_j)) {
415 next_j = ack_at;
416 set = true;
417 }
418 continue;
419 }
420
421 if (test_and_clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel,
422 &conn->flags))
423 rxrpc_conn_retransmit_call(conn, NULL, channel);
424 }
425
426 j = jiffies;
427 if (time_before_eq(next_j, j))
428 goto again;
429 if (set)
430 rxrpc_reduce_conn_timer(conn, next_j);
431}
432
433/*
382 * connection-level event processor 434 * connection-level event processor
383 */ 435 */
384void rxrpc_process_connection(struct work_struct *work) 436void rxrpc_process_connection(struct work_struct *work)
@@ -394,6 +446,10 @@ void rxrpc_process_connection(struct work_struct *work)
394 if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) 446 if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
395 rxrpc_secure_connection(conn); 447 rxrpc_secure_connection(conn);
396 448
449 /* Process delayed ACKs whose time has come. */
450 if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
451 rxrpc_process_delayed_final_acks(conn);
452
397 /* go through the conn-level event packets, releasing the ref on this 453 /* go through the conn-level event packets, releasing the ref on this
398 * connection that each one has when we've finished with it */ 454 * connection that each one has when we've finished with it */
399 while ((skb = skb_dequeue(&conn->rx_queue))) { 455 while ((skb = skb_dequeue(&conn->rx_queue))) {
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index fe575798592f..c628351eb900 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -20,10 +20,19 @@
20/* 20/*
21 * Time till a connection expires after last use (in seconds). 21 * Time till a connection expires after last use (in seconds).
22 */ 22 */
23unsigned int rxrpc_connection_expiry = 10 * 60; 23unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60;
24unsigned int __read_mostly rxrpc_closed_conn_expiry = 10;
24 25
25static void rxrpc_destroy_connection(struct rcu_head *); 26static void rxrpc_destroy_connection(struct rcu_head *);
26 27
28static void rxrpc_connection_timer(struct timer_list *timer)
29{
30 struct rxrpc_connection *conn =
31 container_of(timer, struct rxrpc_connection, timer);
32
33 rxrpc_queue_conn(conn);
34}
35
27/* 36/*
28 * allocate a new connection 37 * allocate a new connection
29 */ 38 */
@@ -38,6 +47,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
38 INIT_LIST_HEAD(&conn->cache_link); 47 INIT_LIST_HEAD(&conn->cache_link);
39 spin_lock_init(&conn->channel_lock); 48 spin_lock_init(&conn->channel_lock);
40 INIT_LIST_HEAD(&conn->waiting_calls); 49 INIT_LIST_HEAD(&conn->waiting_calls);
50 timer_setup(&conn->timer, &rxrpc_connection_timer, 0);
41 INIT_WORK(&conn->processor, &rxrpc_process_connection); 51 INIT_WORK(&conn->processor, &rxrpc_process_connection);
42 INIT_LIST_HEAD(&conn->proc_link); 52 INIT_LIST_HEAD(&conn->proc_link);
43 INIT_LIST_HEAD(&conn->link); 53 INIT_LIST_HEAD(&conn->link);
@@ -301,21 +311,29 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
301} 311}
302 312
303/* 313/*
314 * Set the service connection reap timer.
315 */
316static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
317 unsigned long reap_at)
318{
319 if (rxnet->live)
320 timer_reduce(&rxnet->service_conn_reap_timer, reap_at);
321}
322
323/*
304 * Release a service connection 324 * Release a service connection
305 */ 325 */
306void rxrpc_put_service_conn(struct rxrpc_connection *conn) 326void rxrpc_put_service_conn(struct rxrpc_connection *conn)
307{ 327{
308 struct rxrpc_net *rxnet;
309 const void *here = __builtin_return_address(0); 328 const void *here = __builtin_return_address(0);
310 int n; 329 int n;
311 330
312 n = atomic_dec_return(&conn->usage); 331 n = atomic_dec_return(&conn->usage);
313 trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); 332 trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
314 ASSERTCMP(n, >=, 0); 333 ASSERTCMP(n, >=, 0);
315 if (n == 0) { 334 if (n == 1)
316 rxnet = conn->params.local->rxnet; 335 rxrpc_set_service_reap_timer(conn->params.local->rxnet,
317 rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); 336 jiffies + rxrpc_connection_expiry);
318 }
319} 337}
320 338
321/* 339/*
@@ -332,6 +350,7 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
332 350
333 _net("DESTROY CONN %d", conn->debug_id); 351 _net("DESTROY CONN %d", conn->debug_id);
334 352
353 del_timer_sync(&conn->timer);
335 rxrpc_purge_queue(&conn->rx_queue); 354 rxrpc_purge_queue(&conn->rx_queue);
336 355
337 conn->security->clear(conn); 356 conn->security->clear(conn);
@@ -351,17 +370,15 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
351{ 370{
352 struct rxrpc_connection *conn, *_p; 371 struct rxrpc_connection *conn, *_p;
353 struct rxrpc_net *rxnet = 372 struct rxrpc_net *rxnet =
354 container_of(to_delayed_work(work), 373 container_of(work, struct rxrpc_net, service_conn_reaper);
355 struct rxrpc_net, service_conn_reaper); 374 unsigned long expire_at, earliest, idle_timestamp, now;
356 unsigned long reap_older_than, earliest, idle_timestamp, now;
357 375
358 LIST_HEAD(graveyard); 376 LIST_HEAD(graveyard);
359 377
360 _enter(""); 378 _enter("");
361 379
362 now = jiffies; 380 now = jiffies;
363 reap_older_than = now - rxrpc_connection_expiry * HZ; 381 earliest = now + MAX_JIFFY_OFFSET;
364 earliest = ULONG_MAX;
365 382
366 write_lock(&rxnet->conn_lock); 383 write_lock(&rxnet->conn_lock);
367 list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { 384 list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
@@ -371,15 +388,21 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
371 if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) 388 if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
372 continue; 389 continue;
373 390
374 idle_timestamp = READ_ONCE(conn->idle_timestamp); 391 if (rxnet->live) {
375 _debug("reap CONN %d { u=%d,t=%ld }", 392 idle_timestamp = READ_ONCE(conn->idle_timestamp);
376 conn->debug_id, atomic_read(&conn->usage), 393 expire_at = idle_timestamp + rxrpc_connection_expiry * HZ;
377 (long)reap_older_than - (long)idle_timestamp); 394 if (conn->params.local->service_closed)
378 395 expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ;
379 if (time_after(idle_timestamp, reap_older_than)) { 396
380 if (time_before(idle_timestamp, earliest)) 397 _debug("reap CONN %d { u=%d,t=%ld }",
381 earliest = idle_timestamp; 398 conn->debug_id, atomic_read(&conn->usage),
382 continue; 399 (long)expire_at - (long)now);
400
401 if (time_before(now, expire_at)) {
402 if (time_before(expire_at, earliest))
403 earliest = expire_at;
404 continue;
405 }
383 } 406 }
384 407
385 /* The usage count sits at 1 whilst the object is unused on the 408 /* The usage count sits at 1 whilst the object is unused on the
@@ -387,6 +410,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
387 */ 410 */
388 if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) 411 if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
389 continue; 412 continue;
413 trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0);
390 414
391 if (rxrpc_conn_is_client(conn)) 415 if (rxrpc_conn_is_client(conn))
392 BUG(); 416 BUG();
@@ -397,11 +421,10 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
397 } 421 }
398 write_unlock(&rxnet->conn_lock); 422 write_unlock(&rxnet->conn_lock);
399 423
400 if (earliest != ULONG_MAX) { 424 if (earliest != now + MAX_JIFFY_OFFSET) {
401 _debug("reschedule reaper %ld", (long) earliest - now); 425 _debug("reschedule reaper %ld", (long)earliest - (long)now);
402 ASSERT(time_after(earliest, now)); 426 ASSERT(time_after(earliest, now));
403 rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 427 rxrpc_set_service_reap_timer(rxnet, earliest);
404 earliest - now);
405 } 428 }
406 429
407 while (!list_empty(&graveyard)) { 430 while (!list_empty(&graveyard)) {
@@ -429,9 +452,8 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
429 452
430 rxrpc_destroy_all_client_connections(rxnet); 453 rxrpc_destroy_all_client_connections(rxnet);
431 454
432 rxrpc_connection_expiry = 0; 455 del_timer_sync(&rxnet->service_conn_reap_timer);
433 cancel_delayed_work(&rxnet->client_conn_reaper); 456 rxrpc_queue_work(&rxnet->service_conn_reaper);
434 rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0);
435 flush_workqueue(rxrpc_workqueue); 457 flush_workqueue(rxrpc_workqueue);
436 458
437 write_lock(&rxnet->conn_lock); 459 write_lock(&rxnet->conn_lock);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 1b592073ec96..6fc61400337f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -318,16 +318,18 @@ bad_state:
318static bool rxrpc_receiving_reply(struct rxrpc_call *call) 318static bool rxrpc_receiving_reply(struct rxrpc_call *call)
319{ 319{
320 struct rxrpc_ack_summary summary = { 0 }; 320 struct rxrpc_ack_summary summary = { 0 };
321 unsigned long now, timo;
321 rxrpc_seq_t top = READ_ONCE(call->tx_top); 322 rxrpc_seq_t top = READ_ONCE(call->tx_top);
322 323
323 if (call->ackr_reason) { 324 if (call->ackr_reason) {
324 spin_lock_bh(&call->lock); 325 spin_lock_bh(&call->lock);
325 call->ackr_reason = 0; 326 call->ackr_reason = 0;
326 call->resend_at = call->expire_at;
327 call->ack_at = call->expire_at;
328 spin_unlock_bh(&call->lock); 327 spin_unlock_bh(&call->lock);
329 rxrpc_set_timer(call, rxrpc_timer_init_for_reply, 328 now = jiffies;
330 ktime_get_real()); 329 timo = now + MAX_JIFFY_OFFSET;
330 WRITE_ONCE(call->resend_at, timo);
331 WRITE_ONCE(call->ack_at, timo);
332 trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
331 } 333 }
332 334
333 if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) 335 if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags))
@@ -437,6 +439,19 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
437 if (state >= RXRPC_CALL_COMPLETE) 439 if (state >= RXRPC_CALL_COMPLETE)
438 return; 440 return;
439 441
442 if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) {
443 unsigned long timo = READ_ONCE(call->next_req_timo);
444 unsigned long now, expect_req_by;
445
446 if (timo) {
447 now = jiffies;
448 expect_req_by = now + timo;
449 WRITE_ONCE(call->expect_req_by, expect_req_by);
450 rxrpc_reduce_call_timer(call, expect_req_by, now,
451 rxrpc_timer_set_for_idle);
452 }
453 }
454
440 /* Received data implicitly ACKs all of the request packets we sent 455 /* Received data implicitly ACKs all of the request packets we sent
441 * when we're acting as a client. 456 * when we're acting as a client.
442 */ 457 */
@@ -616,6 +631,43 @@ found:
616} 631}
617 632
618/* 633/*
634 * Process the response to a ping that we sent to find out if we lost an ACK.
635 *
636 * If we got back a ping response that indicates a lower tx_top than what we
637 * had at the time of the ping transmission, we adjudge all the DATA packets
638 * sent between the response tx_top and the ping-time tx_top to have been lost.
639 */
640static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call)
641{
642 rxrpc_seq_t top, bottom, seq;
643 bool resend = false;
644
645 spin_lock_bh(&call->lock);
646
647 bottom = call->tx_hard_ack + 1;
648 top = call->acks_lost_top;
649 if (before(bottom, top)) {
650 for (seq = bottom; before_eq(seq, top); seq++) {
651 int ix = seq & RXRPC_RXTX_BUFF_MASK;
652 u8 annotation = call->rxtx_annotations[ix];
653 u8 anno_type = annotation & RXRPC_TX_ANNO_MASK;
654
655 if (anno_type != RXRPC_TX_ANNO_UNACK)
656 continue;
657 annotation &= ~RXRPC_TX_ANNO_MASK;
658 annotation |= RXRPC_TX_ANNO_RETRANS;
659 call->rxtx_annotations[ix] = annotation;
660 resend = true;
661 }
662 }
663
664 spin_unlock_bh(&call->lock);
665
666 if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
667 rxrpc_queue_call(call);
668}
669
670/*
619 * Process a ping response. 671 * Process a ping response.
620 */ 672 */
621static void rxrpc_input_ping_response(struct rxrpc_call *call, 673static void rxrpc_input_ping_response(struct rxrpc_call *call,
@@ -630,6 +682,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call,
630 smp_rmb(); 682 smp_rmb();
631 ping_serial = call->ping_serial; 683 ping_serial = call->ping_serial;
632 684
685 if (orig_serial == call->acks_lost_ping)
686 rxrpc_input_check_for_lost_ack(call);
687
633 if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || 688 if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
634 before(orig_serial, ping_serial)) 689 before(orig_serial, ping_serial))
635 return; 690 return;
@@ -908,9 +963,20 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
908 struct sk_buff *skb, u16 skew) 963 struct sk_buff *skb, u16 skew)
909{ 964{
910 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 965 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
966 unsigned long timo;
911 967
912 _enter("%p,%p", call, skb); 968 _enter("%p,%p", call, skb);
913 969
970 timo = READ_ONCE(call->next_rx_timo);
971 if (timo) {
972 unsigned long now = jiffies, expect_rx_by;
973
974 expect_rx_by = jiffies + timo;
975 WRITE_ONCE(call->expect_rx_by, expect_rx_by);
976 rxrpc_reduce_call_timer(call, expect_rx_by, now,
977 rxrpc_timer_set_for_normal);
978 }
979
914 switch (sp->hdr.type) { 980 switch (sp->hdr.type) {
915 case RXRPC_PACKET_TYPE_DATA: 981 case RXRPC_PACKET_TYPE_DATA:
916 rxrpc_input_data(call, skb, skew); 982 rxrpc_input_data(call, skb, skew);
@@ -1147,7 +1213,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
1147 goto reupgrade; 1213 goto reupgrade;
1148 conn->service_id = sp->hdr.serviceId; 1214 conn->service_id = sp->hdr.serviceId;
1149 } 1215 }
1150 1216
1151 if (sp->hdr.callNumber == 0) { 1217 if (sp->hdr.callNumber == 0) {
1152 /* Connection-level packet */ 1218 /* Connection-level packet */
1153 _debug("CONN %p {%d}", conn, conn->debug_id); 1219 _debug("CONN %p {%d}", conn, conn->debug_id);
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 1a2d4b112064..c1d9e7fd7448 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -21,33 +21,28 @@
21unsigned int rxrpc_max_backlog __read_mostly = 10; 21unsigned int rxrpc_max_backlog __read_mostly = 10;
22 22
23/* 23/*
24 * Maximum lifetime of a call (in mx).
25 */
26unsigned int rxrpc_max_call_lifetime = 60 * 1000;
27
28/*
29 * How long to wait before scheduling ACK generation after seeing a 24 * How long to wait before scheduling ACK generation after seeing a
30 * packet with RXRPC_REQUEST_ACK set (in ms). 25 * packet with RXRPC_REQUEST_ACK set (in jiffies).
31 */ 26 */
32unsigned int rxrpc_requested_ack_delay = 1; 27unsigned long rxrpc_requested_ack_delay = 1;
33 28
34/* 29/*
35 * How long to wait before scheduling an ACK with subtype DELAY (in ms). 30 * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
36 * 31 *
37 * We use this when we've received new data packets. If those packets aren't 32 * We use this when we've received new data packets. If those packets aren't
38 * all consumed within this time we will send a DELAY ACK if an ACK was not 33 * all consumed within this time we will send a DELAY ACK if an ACK was not
39 * requested to let the sender know it doesn't need to resend. 34 * requested to let the sender know it doesn't need to resend.
40 */ 35 */
41unsigned int rxrpc_soft_ack_delay = 1 * 1000; 36unsigned long rxrpc_soft_ack_delay = HZ;
42 37
43/* 38/*
44 * How long to wait before scheduling an ACK with subtype IDLE (in ms). 39 * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
45 * 40 *
46 * We use this when we've consumed some previously soft-ACK'd packets when 41 * We use this when we've consumed some previously soft-ACK'd packets when
47 * further packets aren't immediately received to decide when to send an IDLE 42 * further packets aren't immediately received to decide when to send an IDLE
48 * ACK let the other end know that it can free up its Tx buffer space. 43 * ACK let the other end know that it can free up its Tx buffer space.
49 */ 44 */
50unsigned int rxrpc_idle_ack_delay = 0.5 * 1000; 45unsigned long rxrpc_idle_ack_delay = HZ / 2;
51 46
52/* 47/*
53 * Receive window size in packets. This indicates the maximum number of 48 * Receive window size in packets. This indicates the maximum number of
@@ -75,7 +70,7 @@ unsigned int rxrpc_rx_jumbo_max = 4;
75/* 70/*
76 * Time till packet resend (in milliseconds). 71 * Time till packet resend (in milliseconds).
77 */ 72 */
78unsigned int rxrpc_resend_timeout = 4 * 1000; 73unsigned long rxrpc_resend_timeout = 4 * HZ;
79 74
80const s8 rxrpc_ack_priority[] = { 75const s8 rxrpc_ack_priority[] = {
81 [0] = 0, 76 [0] = 0,
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 7edceb8522f5..f18c9248e0d4 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -14,6 +14,24 @@
14 14
15unsigned int rxrpc_net_id; 15unsigned int rxrpc_net_id;
16 16
17static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
18{
19 struct rxrpc_net *rxnet =
20 container_of(timer, struct rxrpc_net, client_conn_reap_timer);
21
22 if (rxnet->live)
23 rxrpc_queue_work(&rxnet->client_conn_reaper);
24}
25
26static void rxrpc_service_conn_reap_timeout(struct timer_list *timer)
27{
28 struct rxrpc_net *rxnet =
29 container_of(timer, struct rxrpc_net, service_conn_reap_timer);
30
31 if (rxnet->live)
32 rxrpc_queue_work(&rxnet->service_conn_reaper);
33}
34
17/* 35/*
18 * Initialise a per-network namespace record. 36 * Initialise a per-network namespace record.
19 */ 37 */
@@ -22,6 +40,7 @@ static __net_init int rxrpc_init_net(struct net *net)
22 struct rxrpc_net *rxnet = rxrpc_net(net); 40 struct rxrpc_net *rxnet = rxrpc_net(net);
23 int ret; 41 int ret;
24 42
43 rxnet->live = true;
25 get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); 44 get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch));
26 rxnet->epoch |= RXRPC_RANDOM_EPOCH; 45 rxnet->epoch |= RXRPC_RANDOM_EPOCH;
27 46
@@ -31,8 +50,10 @@ static __net_init int rxrpc_init_net(struct net *net)
31 INIT_LIST_HEAD(&rxnet->conn_proc_list); 50 INIT_LIST_HEAD(&rxnet->conn_proc_list);
32 INIT_LIST_HEAD(&rxnet->service_conns); 51 INIT_LIST_HEAD(&rxnet->service_conns);
33 rwlock_init(&rxnet->conn_lock); 52 rwlock_init(&rxnet->conn_lock);
34 INIT_DELAYED_WORK(&rxnet->service_conn_reaper, 53 INIT_WORK(&rxnet->service_conn_reaper,
35 rxrpc_service_connection_reaper); 54 rxrpc_service_connection_reaper);
55 timer_setup(&rxnet->service_conn_reap_timer,
56 rxrpc_service_conn_reap_timeout, 0);
36 57
37 rxnet->nr_client_conns = 0; 58 rxnet->nr_client_conns = 0;
38 rxnet->nr_active_client_conns = 0; 59 rxnet->nr_active_client_conns = 0;
@@ -42,8 +63,10 @@ static __net_init int rxrpc_init_net(struct net *net)
42 INIT_LIST_HEAD(&rxnet->waiting_client_conns); 63 INIT_LIST_HEAD(&rxnet->waiting_client_conns);
43 INIT_LIST_HEAD(&rxnet->active_client_conns); 64 INIT_LIST_HEAD(&rxnet->active_client_conns);
44 INIT_LIST_HEAD(&rxnet->idle_client_conns); 65 INIT_LIST_HEAD(&rxnet->idle_client_conns);
45 INIT_DELAYED_WORK(&rxnet->client_conn_reaper, 66 INIT_WORK(&rxnet->client_conn_reaper,
46 rxrpc_discard_expired_client_conns); 67 rxrpc_discard_expired_client_conns);
68 timer_setup(&rxnet->client_conn_reap_timer,
69 rxrpc_client_conn_reap_timeout, 0);
47 70
48 INIT_LIST_HEAD(&rxnet->local_endpoints); 71 INIT_LIST_HEAD(&rxnet->local_endpoints);
49 mutex_init(&rxnet->local_mutex); 72 mutex_init(&rxnet->local_mutex);
@@ -60,6 +83,7 @@ static __net_init int rxrpc_init_net(struct net *net)
60 return 0; 83 return 0;
61 84
62err_proc: 85err_proc:
86 rxnet->live = false;
63 return ret; 87 return ret;
64} 88}
65 89
@@ -70,6 +94,7 @@ static __net_exit void rxrpc_exit_net(struct net *net)
70{ 94{
71 struct rxrpc_net *rxnet = rxrpc_net(net); 95 struct rxrpc_net *rxnet = rxrpc_net(net);
72 96
97 rxnet->live = false;
73 rxrpc_destroy_all_calls(rxnet); 98 rxrpc_destroy_all_calls(rxnet);
74 rxrpc_destroy_all_connections(rxnet); 99 rxrpc_destroy_all_connections(rxnet);
75 rxrpc_destroy_all_locals(rxnet); 100 rxrpc_destroy_all_locals(rxnet);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f47659c7b224..42410e910aff 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -33,6 +33,24 @@ struct rxrpc_abort_buffer {
33}; 33};
34 34
35/* 35/*
36 * Arrange for a keepalive ping a certain time after we last transmitted. This
37 * lets the far side know we're still interested in this call and helps keep
38 * the route through any intervening firewall open.
39 *
40 * Receiving a response to the ping will prevent the ->expect_rx_by timer from
41 * expiring.
42 */
43static void rxrpc_set_keepalive(struct rxrpc_call *call)
44{
45 unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6;
46
47 keepalive_at += now;
48 WRITE_ONCE(call->keepalive_at, keepalive_at);
49 rxrpc_reduce_call_timer(call, keepalive_at, now,
50 rxrpc_timer_set_for_keepalive);
51}
52
53/*
36 * Fill out an ACK packet. 54 * Fill out an ACK packet.
37 */ 55 */
38static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, 56static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
@@ -95,7 +113,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
95/* 113/*
96 * Send an ACK call packet. 114 * Send an ACK call packet.
97 */ 115 */
98int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) 116int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
117 rxrpc_serial_t *_serial)
99{ 118{
100 struct rxrpc_connection *conn = NULL; 119 struct rxrpc_connection *conn = NULL;
101 struct rxrpc_ack_buffer *pkt; 120 struct rxrpc_ack_buffer *pkt;
@@ -165,6 +184,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
165 ntohl(pkt->ack.firstPacket), 184 ntohl(pkt->ack.firstPacket),
166 ntohl(pkt->ack.serial), 185 ntohl(pkt->ack.serial),
167 pkt->ack.reason, pkt->ack.nAcks); 186 pkt->ack.reason, pkt->ack.nAcks);
187 if (_serial)
188 *_serial = serial;
168 189
169 if (ping) { 190 if (ping) {
170 call->ping_serial = serial; 191 call->ping_serial = serial;
@@ -202,6 +223,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
202 call->ackr_seen = top; 223 call->ackr_seen = top;
203 spin_unlock_bh(&call->lock); 224 spin_unlock_bh(&call->lock);
204 } 225 }
226
227 rxrpc_set_keepalive(call);
205 } 228 }
206 229
207out: 230out:
@@ -323,7 +346,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
323 * ACKs if a DATA packet appears to have been lost. 346 * ACKs if a DATA packet appears to have been lost.
324 */ 347 */
325 if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && 348 if (!(sp->hdr.flags & RXRPC_LAST_PACKET) &&
326 (retrans || 349 (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) ||
350 retrans ||
327 call->cong_mode == RXRPC_CALL_SLOW_START || 351 call->cong_mode == RXRPC_CALL_SLOW_START ||
328 (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || 352 (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
329 ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), 353 ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
@@ -370,8 +394,23 @@ done:
370 if (whdr.flags & RXRPC_REQUEST_ACK) { 394 if (whdr.flags & RXRPC_REQUEST_ACK) {
371 call->peer->rtt_last_req = now; 395 call->peer->rtt_last_req = now;
372 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); 396 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
397 if (call->peer->rtt_usage > 1) {
398 unsigned long nowj = jiffies, ack_lost_at;
399
400 ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt);
401 if (ack_lost_at < 1)
402 ack_lost_at = 1;
403
404 ack_lost_at += nowj;
405 WRITE_ONCE(call->ack_lost_at, ack_lost_at);
406 rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
407 rxrpc_timer_set_for_lost_ack);
408 }
373 } 409 }
374 } 410 }
411
412 rxrpc_set_keepalive(call);
413
375 _leave(" = %d [%u]", ret, call->peer->maxdata); 414 _leave(" = %d [%u]", ret, call->peer->maxdata);
376 return ret; 415 return ret;
377 416
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 8510a98b87e1..cc21e8db25b0 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -144,11 +144,13 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
144 trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); 144 trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top);
145 ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); 145 ASSERTCMP(call->rx_hard_ack, ==, call->rx_top);
146 146
147#if 0 // TODO: May want to transmit final ACK under some circumstances anyway
147 if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { 148 if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
148 rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, 149 rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false,
149 rxrpc_propose_ack_terminal_ack); 150 rxrpc_propose_ack_terminal_ack);
150 rxrpc_send_ack_packet(call, false); 151 rxrpc_send_ack_packet(call, false, NULL);
151 } 152 }
153#endif
152 154
153 write_lock_bh(&call->state_lock); 155 write_lock_bh(&call->state_lock);
154 156
@@ -161,7 +163,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
161 case RXRPC_CALL_SERVER_RECV_REQUEST: 163 case RXRPC_CALL_SERVER_RECV_REQUEST:
162 call->tx_phase = true; 164 call->tx_phase = true;
163 call->state = RXRPC_CALL_SERVER_ACK_REQUEST; 165 call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
164 call->ack_at = call->expire_at; 166 call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
165 write_unlock_bh(&call->state_lock); 167 write_unlock_bh(&call->state_lock);
166 rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, 168 rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true,
167 rxrpc_propose_ack_processing_op); 169 rxrpc_propose_ack_processing_op);
@@ -217,10 +219,10 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
217 after_eq(top, call->ackr_seen + 2) || 219 after_eq(top, call->ackr_seen + 2) ||
218 (hard_ack == top && after(hard_ack, call->ackr_consumed))) 220 (hard_ack == top && after(hard_ack, call->ackr_consumed)))
219 rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, 221 rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial,
220 true, false, 222 true, true,
221 rxrpc_propose_ack_rotate_rx); 223 rxrpc_propose_ack_rotate_rx);
222 if (call->ackr_reason) 224 if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY)
223 rxrpc_send_ack_packet(call, false); 225 rxrpc_send_ack_packet(call, false, NULL);
224 } 226 }
225} 227}
226 228
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 7d2595582c09..09f2a3e05221 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -21,22 +21,6 @@
21#include <net/af_rxrpc.h> 21#include <net/af_rxrpc.h>
22#include "ar-internal.h" 22#include "ar-internal.h"
23 23
24enum rxrpc_command {
25 RXRPC_CMD_SEND_DATA, /* send data message */
26 RXRPC_CMD_SEND_ABORT, /* request abort generation */
27 RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
28 RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
29};
30
31struct rxrpc_send_params {
32 s64 tx_total_len; /* Total Tx data length (if send data) */
33 unsigned long user_call_ID; /* User's call ID */
34 u32 abort_code; /* Abort code to Tx (if abort) */
35 enum rxrpc_command command : 8; /* The command to implement */
36 bool exclusive; /* Shared or exclusive call */
37 bool upgrade; /* If the connection is upgradeable */
38};
39
40/* 24/*
41 * Wait for space to appear in the Tx queue or a signal to occur. 25 * Wait for space to appear in the Tx queue or a signal to occur.
42 */ 26 */
@@ -174,6 +158,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
174 rxrpc_notify_end_tx_t notify_end_tx) 158 rxrpc_notify_end_tx_t notify_end_tx)
175{ 159{
176 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 160 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
161 unsigned long now;
177 rxrpc_seq_t seq = sp->hdr.seq; 162 rxrpc_seq_t seq = sp->hdr.seq;
178 int ret, ix; 163 int ret, ix;
179 u8 annotation = RXRPC_TX_ANNO_UNACK; 164 u8 annotation = RXRPC_TX_ANNO_UNACK;
@@ -213,11 +198,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
213 break; 198 break;
214 case RXRPC_CALL_SERVER_ACK_REQUEST: 199 case RXRPC_CALL_SERVER_ACK_REQUEST:
215 call->state = RXRPC_CALL_SERVER_SEND_REPLY; 200 call->state = RXRPC_CALL_SERVER_SEND_REPLY;
216 call->ack_at = call->expire_at; 201 now = jiffies;
202 WRITE_ONCE(call->ack_at, now + MAX_JIFFY_OFFSET);
217 if (call->ackr_reason == RXRPC_ACK_DELAY) 203 if (call->ackr_reason == RXRPC_ACK_DELAY)
218 call->ackr_reason = 0; 204 call->ackr_reason = 0;
219 __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply, 205 trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
220 ktime_get_real());
221 if (!last) 206 if (!last)
222 break; 207 break;
223 /* Fall through */ 208 /* Fall through */
@@ -239,14 +224,19 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
239 _debug("need instant resend %d", ret); 224 _debug("need instant resend %d", ret);
240 rxrpc_instant_resend(call, ix); 225 rxrpc_instant_resend(call, ix);
241 } else { 226 } else {
242 ktime_t now = ktime_get_real(), resend_at; 227 unsigned long now = jiffies, resend_at;
243 228
244 resend_at = ktime_add_ms(now, rxrpc_resend_timeout); 229 if (call->peer->rtt_usage > 1)
245 230 resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2);
246 if (ktime_before(resend_at, call->resend_at)) { 231 else
247 call->resend_at = resend_at; 232 resend_at = rxrpc_resend_timeout;
248 rxrpc_set_timer(call, rxrpc_timer_set_for_send, now); 233 if (resend_at < 1)
249 } 234 resend_at = 1;
235
236 resend_at += now;
237 WRITE_ONCE(call->resend_at, resend_at);
238 rxrpc_reduce_call_timer(call, resend_at, now,
239 rxrpc_timer_set_for_send);
250 } 240 }
251 241
252 rxrpc_free_skb(skb, rxrpc_skb_tx_freed); 242 rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
@@ -295,7 +285,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
295 do { 285 do {
296 /* Check to see if there's a ping ACK to reply to. */ 286 /* Check to see if there's a ping ACK to reply to. */
297 if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) 287 if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
298 rxrpc_send_ack_packet(call, false); 288 rxrpc_send_ack_packet(call, false, NULL);
299 289
300 if (!skb) { 290 if (!skb) {
301 size_t size, chunk, max, space; 291 size_t size, chunk, max, space;
@@ -480,11 +470,11 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
480 if (msg->msg_flags & MSG_CMSG_COMPAT) { 470 if (msg->msg_flags & MSG_CMSG_COMPAT) {
481 if (len != sizeof(u32)) 471 if (len != sizeof(u32))
482 return -EINVAL; 472 return -EINVAL;
483 p->user_call_ID = *(u32 *)CMSG_DATA(cmsg); 473 p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg);
484 } else { 474 } else {
485 if (len != sizeof(unsigned long)) 475 if (len != sizeof(unsigned long))
486 return -EINVAL; 476 return -EINVAL;
487 p->user_call_ID = *(unsigned long *) 477 p->call.user_call_ID = *(unsigned long *)
488 CMSG_DATA(cmsg); 478 CMSG_DATA(cmsg);
489 } 479 }
490 got_user_ID = true; 480 got_user_ID = true;
@@ -522,11 +512,24 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
522 break; 512 break;
523 513
524 case RXRPC_TX_LENGTH: 514 case RXRPC_TX_LENGTH:
525 if (p->tx_total_len != -1 || len != sizeof(__s64)) 515 if (p->call.tx_total_len != -1 || len != sizeof(__s64))
516 return -EINVAL;
517 p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg);
518 if (p->call.tx_total_len < 0)
526 return -EINVAL; 519 return -EINVAL;
527 p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg); 520 break;
528 if (p->tx_total_len < 0) 521
522 case RXRPC_SET_CALL_TIMEOUT:
523 if (len & 3 || len < 4 || len > 12)
529 return -EINVAL; 524 return -EINVAL;
525 memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len);
526 p->call.nr_timeouts = len / 4;
527 if (p->call.timeouts.hard > INT_MAX / HZ)
528 return -ERANGE;
529 if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000)
530 return -ERANGE;
531 if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000)
532 return -ERANGE;
530 break; 533 break;
531 534
532 default: 535 default:
@@ -536,7 +539,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
536 539
537 if (!got_user_ID) 540 if (!got_user_ID)
538 return -EINVAL; 541 return -EINVAL;
539 if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) 542 if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA)
540 return -EINVAL; 543 return -EINVAL;
541 _leave(" = 0"); 544 _leave(" = 0");
542 return 0; 545 return 0;
@@ -576,8 +579,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
576 cp.exclusive = rx->exclusive | p->exclusive; 579 cp.exclusive = rx->exclusive | p->exclusive;
577 cp.upgrade = p->upgrade; 580 cp.upgrade = p->upgrade;
578 cp.service_id = srx->srx_service; 581 cp.service_id = srx->srx_service;
579 call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID, 582 call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL);
580 p->tx_total_len, GFP_KERNEL);
581 /* The socket is now unlocked */ 583 /* The socket is now unlocked */
582 584
583 _leave(" = %p\n", call); 585 _leave(" = %p\n", call);
@@ -594,15 +596,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
594{ 596{
595 enum rxrpc_call_state state; 597 enum rxrpc_call_state state;
596 struct rxrpc_call *call; 598 struct rxrpc_call *call;
599 unsigned long now, j;
597 int ret; 600 int ret;
598 601
599 struct rxrpc_send_params p = { 602 struct rxrpc_send_params p = {
600 .tx_total_len = -1, 603 .call.tx_total_len = -1,
601 .user_call_ID = 0, 604 .call.user_call_ID = 0,
602 .abort_code = 0, 605 .call.nr_timeouts = 0,
603 .command = RXRPC_CMD_SEND_DATA, 606 .abort_code = 0,
604 .exclusive = false, 607 .command = RXRPC_CMD_SEND_DATA,
605 .upgrade = true, 608 .exclusive = false,
609 .upgrade = false,
606 }; 610 };
607 611
608 _enter(""); 612 _enter("");
@@ -615,15 +619,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
615 ret = -EINVAL; 619 ret = -EINVAL;
616 if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) 620 if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
617 goto error_release_sock; 621 goto error_release_sock;
618 call = rxrpc_accept_call(rx, p.user_call_ID, NULL); 622 call = rxrpc_accept_call(rx, p.call.user_call_ID, NULL);
619 /* The socket is now unlocked. */ 623 /* The socket is now unlocked. */
620 if (IS_ERR(call)) 624 if (IS_ERR(call))
621 return PTR_ERR(call); 625 return PTR_ERR(call);
622 rxrpc_put_call(call, rxrpc_call_put); 626 ret = 0;
623 return 0; 627 goto out_put_unlock;
624 } 628 }
625 629
626 call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID); 630 call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID);
627 if (!call) { 631 if (!call) {
628 ret = -EBADSLT; 632 ret = -EBADSLT;
629 if (p.command != RXRPC_CMD_SEND_DATA) 633 if (p.command != RXRPC_CMD_SEND_DATA)
@@ -653,14 +657,39 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
653 goto error_put; 657 goto error_put;
654 } 658 }
655 659
656 if (p.tx_total_len != -1) { 660 if (p.call.tx_total_len != -1) {
657 ret = -EINVAL; 661 ret = -EINVAL;
658 if (call->tx_total_len != -1 || 662 if (call->tx_total_len != -1 ||
659 call->tx_pending || 663 call->tx_pending ||
660 call->tx_top != 0) 664 call->tx_top != 0)
661 goto error_put; 665 goto error_put;
662 call->tx_total_len = p.tx_total_len; 666 call->tx_total_len = p.call.tx_total_len;
667 }
668 }
669
670 switch (p.call.nr_timeouts) {
671 case 3:
672 j = msecs_to_jiffies(p.call.timeouts.normal);
673 if (p.call.timeouts.normal > 0 && j == 0)
674 j = 1;
675 WRITE_ONCE(call->next_rx_timo, j);
676 /* Fall through */
677 case 2:
678 j = msecs_to_jiffies(p.call.timeouts.idle);
679 if (p.call.timeouts.idle > 0 && j == 0)
680 j = 1;
681 WRITE_ONCE(call->next_req_timo, j);
682 /* Fall through */
683 case 1:
684 if (p.call.timeouts.hard > 0) {
685 j = msecs_to_jiffies(p.call.timeouts.hard);
686 now = jiffies;
687 j += now;
688 WRITE_ONCE(call->expect_term_by, j);
689 rxrpc_reduce_call_timer(call, j, now,
690 rxrpc_timer_set_for_hard);
663 } 691 }
692 break;
664 } 693 }
665 694
666 state = READ_ONCE(call->state); 695 state = READ_ONCE(call->state);
@@ -689,6 +718,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
689 ret = rxrpc_send_data(rx, call, msg, len, NULL); 718 ret = rxrpc_send_data(rx, call, msg, len, NULL);
690 } 719 }
691 720
721out_put_unlock:
692 mutex_unlock(&call->user_mutex); 722 mutex_unlock(&call->user_mutex);
693error_put: 723error_put:
694 rxrpc_put_call(call, rxrpc_call_put); 724 rxrpc_put_call(call, rxrpc_call_put);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 34c706d2f79c..4a7af7aff37d 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -21,6 +21,8 @@ static const unsigned int four = 4;
21static const unsigned int thirtytwo = 32; 21static const unsigned int thirtytwo = 32;
22static const unsigned int n_65535 = 65535; 22static const unsigned int n_65535 = 65535;
23static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; 23static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
24static const unsigned long one_jiffy = 1;
25static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
24 26
25/* 27/*
26 * RxRPC operating parameters. 28 * RxRPC operating parameters.
@@ -29,64 +31,60 @@ static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
29 * information on the individual parameters. 31 * information on the individual parameters.
30 */ 32 */
31static struct ctl_table rxrpc_sysctl_table[] = { 33static struct ctl_table rxrpc_sysctl_table[] = {
32 /* Values measured in milliseconds */ 34 /* Values measured in milliseconds but used in jiffies */
33 { 35 {
34 .procname = "req_ack_delay", 36 .procname = "req_ack_delay",
35 .data = &rxrpc_requested_ack_delay, 37 .data = &rxrpc_requested_ack_delay,
36 .maxlen = sizeof(unsigned int), 38 .maxlen = sizeof(unsigned long),
37 .mode = 0644, 39 .mode = 0644,
38 .proc_handler = proc_dointvec, 40 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
39 .extra1 = (void *)&zero, 41 .extra1 = (void *)&one_jiffy,
42 .extra2 = (void *)&max_jiffies,
40 }, 43 },
41 { 44 {
42 .procname = "soft_ack_delay", 45 .procname = "soft_ack_delay",
43 .data = &rxrpc_soft_ack_delay, 46 .data = &rxrpc_soft_ack_delay,
44 .maxlen = sizeof(unsigned int), 47 .maxlen = sizeof(unsigned long),
45 .mode = 0644, 48 .mode = 0644,
46 .proc_handler = proc_dointvec, 49 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
47 .extra1 = (void *)&one, 50 .extra1 = (void *)&one_jiffy,
51 .extra2 = (void *)&max_jiffies,
48 }, 52 },
49 { 53 {
50 .procname = "idle_ack_delay", 54 .procname = "idle_ack_delay",
51 .data = &rxrpc_idle_ack_delay, 55 .data = &rxrpc_idle_ack_delay,
52 .maxlen = sizeof(unsigned int), 56 .maxlen = sizeof(unsigned long),
53 .mode = 0644, 57 .mode = 0644,
54 .proc_handler = proc_dointvec, 58 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
55 .extra1 = (void *)&one, 59 .extra1 = (void *)&one_jiffy,
56 }, 60 .extra2 = (void *)&max_jiffies,
57 {
58 .procname = "resend_timeout",
59 .data = &rxrpc_resend_timeout,
60 .maxlen = sizeof(unsigned int),
61 .mode = 0644,
62 .proc_handler = proc_dointvec,
63 .extra1 = (void *)&one,
64 }, 61 },
65 { 62 {
66 .procname = "idle_conn_expiry", 63 .procname = "idle_conn_expiry",
67 .data = &rxrpc_conn_idle_client_expiry, 64 .data = &rxrpc_conn_idle_client_expiry,
68 .maxlen = sizeof(unsigned int), 65 .maxlen = sizeof(unsigned long),
69 .mode = 0644, 66 .mode = 0644,
70 .proc_handler = proc_dointvec_ms_jiffies, 67 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
71 .extra1 = (void *)&one, 68 .extra1 = (void *)&one_jiffy,
69 .extra2 = (void *)&max_jiffies,
72 }, 70 },
73 { 71 {
74 .procname = "idle_conn_fast_expiry", 72 .procname = "idle_conn_fast_expiry",
75 .data = &rxrpc_conn_idle_client_fast_expiry, 73 .data = &rxrpc_conn_idle_client_fast_expiry,
76 .maxlen = sizeof(unsigned int), 74 .maxlen = sizeof(unsigned long),
77 .mode = 0644, 75 .mode = 0644,
78 .proc_handler = proc_dointvec_ms_jiffies, 76 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
79 .extra1 = (void *)&one, 77 .extra1 = (void *)&one_jiffy,
78 .extra2 = (void *)&max_jiffies,
80 }, 79 },
81
82 /* Values measured in seconds but used in jiffies */
83 { 80 {
84 .procname = "max_call_lifetime", 81 .procname = "resend_timeout",
85 .data = &rxrpc_max_call_lifetime, 82 .data = &rxrpc_resend_timeout,
86 .maxlen = sizeof(unsigned int), 83 .maxlen = sizeof(unsigned long),
87 .mode = 0644, 84 .mode = 0644,
88 .proc_handler = proc_dointvec, 85 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
89 .extra1 = (void *)&one, 86 .extra1 = (void *)&one_jiffy,
87 .extra2 = (void *)&max_jiffies,
90 }, 88 },
91 89
92 /* Non-time values */ 90 /* Non-time values */
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 1e3f10e5da99..6445184b2759 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -22,7 +22,6 @@
22#include <net/pkt_sched.h> 22#include <net/pkt_sched.h>
23#include <uapi/linux/tc_act/tc_ife.h> 23#include <uapi/linux/tc_act/tc_ife.h>
24#include <net/tc_act/tc_ife.h> 24#include <net/tc_act/tc_ife.h>
25#include <linux/rtnetlink.h>
26 25
27static int skbmark_encode(struct sk_buff *skb, void *skbdata, 26static int skbmark_encode(struct sk_buff *skb, void *skbdata,
28 struct tcf_meta_info *e) 27 struct tcf_meta_info *e)
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 2ea1f26c9e96..7221437ca3a6 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -22,7 +22,6 @@
22#include <net/pkt_sched.h> 22#include <net/pkt_sched.h>
23#include <uapi/linux/tc_act/tc_ife.h> 23#include <uapi/linux/tc_act/tc_ife.h>
24#include <net/tc_act/tc_ife.h> 24#include <net/tc_act/tc_ife.h>
25#include <linux/rtnetlink.h>
26 25
27static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, 26static int skbtcindex_encode(struct sk_buff *skb, void *skbdata,
28 struct tcf_meta_info *e) 27 struct tcf_meta_info *e)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 8b5abcd2f32f..9438969290a6 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -96,23 +96,16 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
96 return ret; 96 return ret;
97} 97}
98 98
99static void tcf_sample_cleanup_rcu(struct rcu_head *rcu) 99static void tcf_sample_cleanup(struct tc_action *a, int bind)
100{ 100{
101 struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu); 101 struct tcf_sample *s = to_sample(a);
102 struct psample_group *psample_group; 102 struct psample_group *psample_group;
103 103
104 psample_group = rcu_dereference_protected(s->psample_group, 1); 104 psample_group = rtnl_dereference(s->psample_group);
105 RCU_INIT_POINTER(s->psample_group, NULL); 105 RCU_INIT_POINTER(s->psample_group, NULL);
106 psample_group_put(psample_group); 106 psample_group_put(psample_group);
107} 107}
108 108
109static void tcf_sample_cleanup(struct tc_action *a, int bind)
110{
111 struct tcf_sample *s = to_sample(a);
112
113 call_rcu(&s->rcu, tcf_sample_cleanup_rcu);
114}
115
116static bool tcf_sample_dev_ok_push(struct net_device *dev) 109static bool tcf_sample_dev_ok_push(struct net_device *dev)
117{ 110{
118 switch (dev->type) { 111 switch (dev->type) {
@@ -264,7 +257,6 @@ static int __init sample_init_module(void)
264 257
265static void __exit sample_cleanup_module(void) 258static void __exit sample_cleanup_module(void)
266{ 259{
267 rcu_barrier();
268 tcf_unregister_action(&act_sample_ops, &sample_net_ops); 260 tcf_unregister_action(&act_sample_ops, &sample_net_ops);
269} 261}
270 262
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 7d97f612c9b9..b91ea03e3afa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,7 +23,6 @@
23#include <linux/skbuff.h> 23#include <linux/skbuff.h>
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#include <linux/err.h>
27#include <linux/slab.h> 26#include <linux/slab.h>
28#include <net/net_namespace.h> 27#include <net/net_namespace.h>
29#include <net/sock.h> 28#include <net/sock.h>
@@ -336,7 +335,8 @@ static void tcf_block_put_final(struct work_struct *work)
336 struct tcf_chain *chain, *tmp; 335 struct tcf_chain *chain, *tmp;
337 336
338 rtnl_lock(); 337 rtnl_lock();
339 /* Only chain 0 should be still here. */ 338
339 /* At this point, all the chains should have refcnt == 1. */
340 list_for_each_entry_safe(chain, tmp, &block->chain_list, list) 340 list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
341 tcf_chain_put(chain); 341 tcf_chain_put(chain);
342 rtnl_unlock(); 342 rtnl_unlock();
@@ -344,15 +344,23 @@ static void tcf_block_put_final(struct work_struct *work)
344} 344}
345 345
346/* XXX: Standalone actions are not allowed to jump to any chain, and bound 346/* XXX: Standalone actions are not allowed to jump to any chain, and bound
347 * actions should be all removed after flushing. However, filters are now 347 * actions should be all removed after flushing.
348 * destroyed in tc filter workqueue with RTNL lock, they can not race here.
349 */ 348 */
350void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, 349void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
351 struct tcf_block_ext_info *ei) 350 struct tcf_block_ext_info *ei)
352{ 351{
353 struct tcf_chain *chain, *tmp; 352 struct tcf_chain *chain;
354 353
355 list_for_each_entry_safe(chain, tmp, &block->chain_list, list) 354 if (!block)
355 return;
356 /* Hold a refcnt for all chains, except 0, so that they don't disappear
357 * while we are iterating.
358 */
359 list_for_each_entry(chain, &block->chain_list, list)
360 if (chain->index)
361 tcf_chain_hold(chain);
362
363 list_for_each_entry(chain, &block->chain_list, list)
356 tcf_chain_flush(chain); 364 tcf_chain_flush(chain);
357 365
358 tcf_block_offload_unbind(block, q, ei); 366 tcf_block_offload_unbind(block, q, ei);
@@ -371,8 +379,6 @@ void tcf_block_put(struct tcf_block *block)
371{ 379{
372 struct tcf_block_ext_info ei = {0, }; 380 struct tcf_block_ext_info ei = {0, };
373 381
374 if (!block)
375 return;
376 tcf_block_put_ext(block, block->q, &ei); 382 tcf_block_put_ext(block, block->q, &ei);
377} 383}
378 384
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index a9f3e317055c..6fe798c2df1a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -258,11 +258,8 @@ static int cls_bpf_init(struct tcf_proto *tp)
258 return 0; 258 return 0;
259} 259}
260 260
261static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) 261static void cls_bpf_free_parms(struct cls_bpf_prog *prog)
262{ 262{
263 tcf_exts_destroy(&prog->exts);
264 tcf_exts_put_net(&prog->exts);
265
266 if (cls_bpf_is_ebpf(prog)) 263 if (cls_bpf_is_ebpf(prog))
267 bpf_prog_put(prog->filter); 264 bpf_prog_put(prog->filter);
268 else 265 else
@@ -270,6 +267,14 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
270 267
271 kfree(prog->bpf_name); 268 kfree(prog->bpf_name);
272 kfree(prog->bpf_ops); 269 kfree(prog->bpf_ops);
270}
271
272static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
273{
274 tcf_exts_destroy(&prog->exts);
275 tcf_exts_put_net(&prog->exts);
276
277 cls_bpf_free_parms(prog);
273 kfree(prog); 278 kfree(prog);
274} 279}
275 280
@@ -514,12 +519,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
514 goto errout_idr; 519 goto errout_idr;
515 520
516 ret = cls_bpf_offload(tp, prog, oldprog); 521 ret = cls_bpf_offload(tp, prog, oldprog);
517 if (ret) { 522 if (ret)
518 if (!oldprog) 523 goto errout_parms;
519 idr_remove_ext(&head->handle_idr, prog->handle);
520 __cls_bpf_delete_prog(prog);
521 return ret;
522 }
523 524
524 if (!tc_in_hw(prog->gen_flags)) 525 if (!tc_in_hw(prog->gen_flags))
525 prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; 526 prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
@@ -537,6 +538,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
537 *arg = prog; 538 *arg = prog;
538 return 0; 539 return 0;
539 540
541errout_parms:
542 cls_bpf_free_parms(prog);
540errout_idr: 543errout_idr:
541 if (!oldprog) 544 if (!oldprog)
542 idr_remove_ext(&head->handle_idr, prog->handle); 545 idr_remove_ext(&head->handle_idr, prog->handle);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ac152b4f4247..507859cdd1cb 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -45,7 +45,6 @@
45#include <net/netlink.h> 45#include <net/netlink.h>
46#include <net/act_api.h> 46#include <net/act_api.h>
47#include <net/pkt_cls.h> 47#include <net/pkt_cls.h>
48#include <linux/netdevice.h>
49#include <linux/idr.h> 48#include <linux/idr.h>
50 49
51struct tc_u_knode { 50struct tc_u_knode {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b6c4f536876b..0f1eab99ff4e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -795,6 +795,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
795 tcm->tcm_info = refcount_read(&q->refcnt); 795 tcm->tcm_info = refcount_read(&q->refcnt);
796 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 796 if (nla_put_string(skb, TCA_KIND, q->ops->id))
797 goto nla_put_failure; 797 goto nla_put_failure;
798 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
799 goto nla_put_failure;
798 if (q->ops->dump && q->ops->dump(q, skb) < 0) 800 if (q->ops->dump && q->ops->dump(q, skb) < 0)
799 goto nla_put_failure; 801 goto nla_put_failure;
800 qlen = q->q.qlen; 802 qlen = q->q.qlen;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 6361be7881f1..525eb3a6d625 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1158,9 +1158,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1158 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) 1158 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1159 return -EINVAL; 1159 return -EINVAL;
1160 1160
1161 err = tcf_block_get(&q->link.block, &q->link.filter_list, sch);
1162 if (err)
1163 goto put_rtab;
1164
1161 err = qdisc_class_hash_init(&q->clhash); 1165 err = qdisc_class_hash_init(&q->clhash);
1162 if (err < 0) 1166 if (err < 0)
1163 goto put_rtab; 1167 goto put_block;
1164 1168
1165 q->link.sibling = &q->link; 1169 q->link.sibling = &q->link;
1166 q->link.common.classid = sch->handle; 1170 q->link.common.classid = sch->handle;
@@ -1194,6 +1198,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1194 cbq_addprio(q, &q->link); 1198 cbq_addprio(q, &q->link);
1195 return 0; 1199 return 0;
1196 1200
1201put_block:
1202 tcf_block_put(q->link.block);
1203
1197put_rtab: 1204put_rtab:
1198 qdisc_put_rtab(q->link.R_tab); 1205 qdisc_put_rtab(q->link.R_tab);
1199 return err; 1206 return err;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index b30a2c70bd48..531250fceb9e 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -369,6 +369,9 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
369 369
370 ctl = nla_data(tb[TCA_CHOKE_PARMS]); 370 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
371 371
372 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
373 return -EINVAL;
374
372 if (ctl->limit > CHOKE_MAX_QUEUE) 375 if (ctl->limit > CHOKE_MAX_QUEUE)
373 return -EINVAL; 376 return -EINVAL;
374 377
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 3839cbbdc32b..cd1b200acae7 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,6 +26,7 @@
26#include <linux/list.h> 26#include <linux/list.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/if_vlan.h> 28#include <linux/if_vlan.h>
29#include <linux/if_macvlan.h>
29#include <net/sch_generic.h> 30#include <net/sch_generic.h>
30#include <net/pkt_sched.h> 31#include <net/pkt_sched.h>
31#include <net/dst.h> 32#include <net/dst.h>
@@ -277,6 +278,8 @@ unsigned long dev_trans_start(struct net_device *dev)
277 278
278 if (is_vlan_dev(dev)) 279 if (is_vlan_dev(dev))
279 dev = vlan_dev_real_dev(dev); 280 dev = vlan_dev_real_dev(dev);
281 else if (netif_is_macvlan(dev))
282 dev = macvlan_dev_real_dev(dev);
280 res = netdev_get_tx_queue(dev, 0)->trans_start; 283 res = netdev_get_tx_queue(dev, 0)->trans_start;
281 for (i = 1; i < dev->num_tx_queues; i++) { 284 for (i = 1; i < dev->num_tx_queues; i++) {
282 val = netdev_get_tx_queue(dev, i)->trans_start; 285 val = netdev_get_tx_queue(dev, i)->trans_start;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 17c7130454bd..bc30f9186ac6 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -356,6 +356,9 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
356 struct gred_sched *table = qdisc_priv(sch); 356 struct gred_sched *table = qdisc_priv(sch);
357 struct gred_sched_data *q = table->tab[dp]; 357 struct gred_sched_data *q = table->tab[dp];
358 358
359 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
360 return -EINVAL;
361
359 if (!q) { 362 if (!q) {
360 table->tab[dp] = q = *prealloc; 363 table->tab[dp] = q = *prealloc;
361 *prealloc = NULL; 364 *prealloc = NULL;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 5ecc38f35d47..fc1286f499c1 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -68,6 +68,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
68 struct net_device *dev = qdisc_dev(sch); 68 struct net_device *dev = qdisc_dev(sch);
69 int err; 69 int err;
70 70
71 net_inc_ingress_queue();
72
71 mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); 73 mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
72 74
73 q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; 75 q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -78,7 +80,6 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
78 if (err) 80 if (err)
79 return err; 81 return err;
80 82
81 net_inc_ingress_queue();
82 sch->flags |= TCQ_F_CPUSTATS; 83 sch->flags |= TCQ_F_CPUSTATS;
83 84
84 return 0; 85 return 0;
@@ -172,6 +173,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
172 struct net_device *dev = qdisc_dev(sch); 173 struct net_device *dev = qdisc_dev(sch);
173 int err; 174 int err;
174 175
176 net_inc_ingress_queue();
177 net_inc_egress_queue();
178
175 mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); 179 mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
176 180
177 q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; 181 q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -190,18 +194,11 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
190 194
191 err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); 195 err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
192 if (err) 196 if (err)
193 goto err_egress_block_get; 197 return err;
194
195 net_inc_ingress_queue();
196 net_inc_egress_queue();
197 198
198 sch->flags |= TCQ_F_CPUSTATS; 199 sch->flags |= TCQ_F_CPUSTATS;
199 200
200 return 0; 201 return 0;
201
202err_egress_block_get:
203 tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
204 return err;
205} 202}
206 203
207static void clsact_destroy(struct Qdisc *sch) 204static void clsact_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 7f8ea9e297c3..f0747eb87dc4 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -157,6 +157,7 @@ static int red_offload(struct Qdisc *sch, bool enable)
157 .handle = sch->handle, 157 .handle = sch->handle,
158 .parent = sch->parent, 158 .parent = sch->parent,
159 }; 159 };
160 int err;
160 161
161 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 162 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 return -EOPNOTSUPP; 163 return -EOPNOTSUPP;
@@ -171,7 +172,14 @@ static int red_offload(struct Qdisc *sch, bool enable)
171 opt.command = TC_RED_DESTROY; 172 opt.command = TC_RED_DESTROY;
172 } 173 }
173 174
174 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); 175 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176
177 if (!err && enable)
178 sch->flags |= TCQ_F_OFFLOADED;
179 else
180 sch->flags &= ~TCQ_F_OFFLOADED;
181
182 return err;
175} 183}
176 184
177static void red_destroy(struct Qdisc *sch) 185static void red_destroy(struct Qdisc *sch)
@@ -212,6 +220,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
212 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; 220 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
213 221
214 ctl = nla_data(tb[TCA_RED_PARMS]); 222 ctl = nla_data(tb[TCA_RED_PARMS]);
223 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
224 return -EINVAL;
215 225
216 if (ctl->limit > 0) { 226 if (ctl->limit > 0) {
217 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); 227 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
@@ -272,7 +282,7 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
272 return red_change(sch, opt); 282 return red_change(sch, opt);
273} 283}
274 284
275static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) 285static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
276{ 286{
277 struct net_device *dev = qdisc_dev(sch); 287 struct net_device *dev = qdisc_dev(sch);
278 struct tc_red_qopt_offload hw_stats = { 288 struct tc_red_qopt_offload hw_stats = {
@@ -284,21 +294,12 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
284 .stats.qstats = &sch->qstats, 294 .stats.qstats = &sch->qstats,
285 }, 295 },
286 }; 296 };
287 int err;
288 297
289 opt->flags &= ~TC_RED_OFFLOADED; 298 if (!(sch->flags & TCQ_F_OFFLOADED))
290 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
291 return 0;
292
293 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
294 &hw_stats);
295 if (err == -EOPNOTSUPP)
296 return 0; 299 return 0;
297 300
298 if (!err) 301 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
299 opt->flags |= TC_RED_OFFLOADED; 302 &hw_stats);
300
301 return err;
302} 303}
303 304
304static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 305static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -317,7 +318,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
317 int err; 318 int err;
318 319
319 sch->qstats.backlog = q->qdisc->qstats.backlog; 320 sch->qstats.backlog = q->qdisc->qstats.backlog;
320 err = red_dump_offload(sch, &opt); 321 err = red_dump_offload_stats(sch, &opt);
321 if (err) 322 if (err)
322 goto nla_put_failure; 323 goto nla_put_failure;
323 324
@@ -345,7 +346,7 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
345 .marked = q->stats.prob_mark + q->stats.forced_mark, 346 .marked = q->stats.prob_mark + q->stats.forced_mark,
346 }; 347 };
347 348
348 if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) { 349 if (sch->flags & TCQ_F_OFFLOADED) {
349 struct red_stats hw_stats = {0}; 350 struct red_stats hw_stats = {0};
350 struct tc_red_qopt_offload hw_stats_request = { 351 struct tc_red_qopt_offload hw_stats_request = {
351 .command = TC_RED_XSTATS, 352 .command = TC_RED_XSTATS,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 890f4a4564e7..930e5bd26d3d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -639,6 +639,9 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
639 if (ctl->divisor && 639 if (ctl->divisor &&
640 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) 640 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
641 return -EINVAL; 641 return -EINVAL;
642 if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
643 ctl_v1->Wlog))
644 return -EINVAL;
642 if (ctl_v1 && ctl_v1->qth_min) { 645 if (ctl_v1 && ctl_v1->qth_min) {
643 p = kmalloc(sizeof(*p), GFP_KERNEL); 646 p = kmalloc(sizeof(*p), GFP_KERNEL);
644 if (!p) 647 if (!p)
@@ -724,6 +727,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
724 int i; 727 int i;
725 int err; 728 int err;
726 729
730 q->sch = sch;
727 timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); 731 timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
728 732
729 err = tcf_block_get(&q->block, &q->filter_list, sch); 733 err = tcf_block_get(&q->block, &q->filter_list, sch);
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7b261afc47b9..7f8baa48e7c2 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -53,6 +53,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
53 msg->send_failed = 0; 53 msg->send_failed = 0;
54 msg->send_error = 0; 54 msg->send_error = 0;
55 msg->can_delay = 1; 55 msg->can_delay = 1;
56 msg->abandoned = 0;
56 msg->expires_at = 0; 57 msg->expires_at = 0;
57 INIT_LIST_HEAD(&msg->chunks); 58 INIT_LIST_HEAD(&msg->chunks);
58} 59}
@@ -304,6 +305,13 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
304 if (!chunk->asoc->peer.prsctp_capable) 305 if (!chunk->asoc->peer.prsctp_capable)
305 return 0; 306 return 0;
306 307
308 if (chunk->msg->abandoned)
309 return 1;
310
311 if (!chunk->has_tsn &&
312 !(chunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG))
313 return 0;
314
307 if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && 315 if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
308 time_after(jiffies, chunk->msg->expires_at)) { 316 time_after(jiffies, chunk->msg->expires_at)) {
309 struct sctp_stream_out *streamout = 317 struct sctp_stream_out *streamout =
@@ -316,6 +324,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
316 chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; 324 chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
317 streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; 325 streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
318 } 326 }
327 chunk->msg->abandoned = 1;
319 return 1; 328 return 1;
320 } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && 329 } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
321 chunk->sent_count > chunk->sinfo.sinfo_timetolive) { 330 chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
@@ -324,10 +333,12 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
324 333
325 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; 334 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
326 streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++; 335 streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
336 chunk->msg->abandoned = 1;
327 return 1; 337 return 1;
328 } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && 338 } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
329 chunk->msg->expires_at && 339 chunk->msg->expires_at &&
330 time_after(jiffies, chunk->msg->expires_at)) { 340 time_after(jiffies, chunk->msg->expires_at)) {
341 chunk->msg->abandoned = 1;
331 return 1; 342 return 1;
332 } 343 }
333 /* PRIO policy is processed by sendmsg, not here */ 344 /* PRIO policy is processed by sendmsg, not here */
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4db012aa25f7..7d67feeeffc1 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -364,10 +364,12 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
364 list_for_each_entry_safe(chk, temp, queue, transmitted_list) { 364 list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
365 struct sctp_stream_out *streamout; 365 struct sctp_stream_out *streamout;
366 366
367 if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || 367 if (!chk->msg->abandoned &&
368 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) 368 (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
369 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
369 continue; 370 continue;
370 371
372 chk->msg->abandoned = 1;
371 list_del_init(&chk->transmitted_list); 373 list_del_init(&chk->transmitted_list);
372 sctp_insert_list(&asoc->outqueue.abandoned, 374 sctp_insert_list(&asoc->outqueue.abandoned,
373 &chk->transmitted_list); 375 &chk->transmitted_list);
@@ -377,7 +379,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
377 asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; 379 asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
378 streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; 380 streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
379 381
380 if (!chk->tsn_gap_acked) { 382 if (queue != &asoc->outqueue.retransmit &&
383 !chk->tsn_gap_acked) {
381 if (chk->transport) 384 if (chk->transport)
382 chk->transport->flight_size -= 385 chk->transport->flight_size -=
383 sctp_data_size(chk); 386 sctp_data_size(chk);
@@ -403,10 +406,13 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
403 q->sched->unsched_all(&asoc->stream); 406 q->sched->unsched_all(&asoc->stream);
404 407
405 list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { 408 list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
406 if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || 409 if (!chk->msg->abandoned &&
407 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) 410 (!(chk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) ||
411 !SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
412 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
408 continue; 413 continue;
409 414
415 chk->msg->abandoned = 1;
410 sctp_sched_dequeue_common(q, chk); 416 sctp_sched_dequeue_common(q, chk);
411 asoc->sent_cnt_removable--; 417 asoc->sent_cnt_removable--;
412 asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; 418 asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
@@ -1434,7 +1440,8 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1434 /* If this chunk has not been acked, stop 1440 /* If this chunk has not been acked, stop
1435 * considering it as 'outstanding'. 1441 * considering it as 'outstanding'.
1436 */ 1442 */
1437 if (!tchunk->tsn_gap_acked) { 1443 if (transmitted_queue != &q->retransmit &&
1444 !tchunk->tsn_gap_acked) {
1438 if (tchunk->transport) 1445 if (tchunk->transport)
1439 tchunk->transport->flight_size -= 1446 tchunk->transport->flight_size -=
1440 sctp_data_size(tchunk); 1447 sctp_data_size(tchunk);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f5172c21349b..6a38c2503649 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1499,6 +1499,7 @@ static __init int sctp_init(void)
1499 INIT_LIST_HEAD(&sctp_address_families); 1499 INIT_LIST_HEAD(&sctp_address_families);
1500 sctp_v4_pf_init(); 1500 sctp_v4_pf_init();
1501 sctp_v6_pf_init(); 1501 sctp_v6_pf_init();
1502 sctp_sched_ops_init();
1502 1503
1503 status = register_pernet_subsys(&sctp_defaults_ops); 1504 status = register_pernet_subsys(&sctp_defaults_ops);
1504 if (status) 1505 if (status)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3204a9b29407..3253f724a995 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -188,13 +188,13 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
188 list_for_each_entry(chunk, &t->transmitted, transmitted_list) 188 list_for_each_entry(chunk, &t->transmitted, transmitted_list)
189 cb(chunk); 189 cb(chunk);
190 190
191 list_for_each_entry(chunk, &q->retransmit, list) 191 list_for_each_entry(chunk, &q->retransmit, transmitted_list)
192 cb(chunk); 192 cb(chunk);
193 193
194 list_for_each_entry(chunk, &q->sacked, list) 194 list_for_each_entry(chunk, &q->sacked, transmitted_list)
195 cb(chunk); 195 cb(chunk);
196 196
197 list_for_each_entry(chunk, &q->abandoned, list) 197 list_for_each_entry(chunk, &q->abandoned, transmitted_list)
198 cb(chunk); 198 cb(chunk);
199 199
200 list_for_each_entry(chunk, &q->out_chunk_list, list) 200 list_for_each_entry(chunk, &q->out_chunk_list, list)
@@ -3891,13 +3891,17 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
3891 struct sctp_association *asoc; 3891 struct sctp_association *asoc;
3892 int retval = -EINVAL; 3892 int retval = -EINVAL;
3893 3893
3894 if (optlen < sizeof(struct sctp_reset_streams)) 3894 if (optlen < sizeof(*params))
3895 return -EINVAL; 3895 return -EINVAL;
3896 3896
3897 params = memdup_user(optval, optlen); 3897 params = memdup_user(optval, optlen);
3898 if (IS_ERR(params)) 3898 if (IS_ERR(params))
3899 return PTR_ERR(params); 3899 return PTR_ERR(params);
3900 3900
3901 if (params->srs_number_streams * sizeof(__u16) >
3902 optlen - sizeof(*params))
3903 goto out;
3904
3901 asoc = sctp_id2assoc(sk, params->srs_assoc_id); 3905 asoc = sctp_id2assoc(sk, params->srs_assoc_id);
3902 if (!asoc) 3906 if (!asoc)
3903 goto out; 3907 goto out;
@@ -5080,7 +5084,6 @@ static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *p
5080 *newfile = sock_alloc_file(newsock, 0, NULL); 5084 *newfile = sock_alloc_file(newsock, 0, NULL);
5081 if (IS_ERR(*newfile)) { 5085 if (IS_ERR(*newfile)) {
5082 put_unused_fd(retval); 5086 put_unused_fd(retval);
5083 sock_release(newsock);
5084 retval = PTR_ERR(*newfile); 5087 retval = PTR_ERR(*newfile);
5085 *newfile = NULL; 5088 *newfile = NULL;
5086 return retval; 5089 return retval;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index a11db21dc8a0..76ea66be0bbe 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -64,7 +64,7 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
64 */ 64 */
65 65
66 /* Mark as failed send. */ 66 /* Mark as failed send. */
67 sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM); 67 sctp_chunk_fail(ch, (__force __u32)SCTP_ERROR_INV_STRM);
68 if (asoc->peer.prsctp_capable && 68 if (asoc->peer.prsctp_capable &&
69 SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags)) 69 SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags))
70 asoc->sent_cnt_removable--; 70 asoc->sent_cnt_removable--;
@@ -254,6 +254,30 @@ static int sctp_send_reconf(struct sctp_association *asoc,
254 return retval; 254 return retval;
255} 255}
256 256
257static bool sctp_stream_outq_is_empty(struct sctp_stream *stream,
258 __u16 str_nums, __be16 *str_list)
259{
260 struct sctp_association *asoc;
261 __u16 i;
262
263 asoc = container_of(stream, struct sctp_association, stream);
264 if (!asoc->outqueue.out_qlen)
265 return true;
266
267 if (!str_nums)
268 return false;
269
270 for (i = 0; i < str_nums; i++) {
271 __u16 sid = ntohs(str_list[i]);
272
273 if (stream->out[sid].ext &&
274 !list_empty(&stream->out[sid].ext->outq))
275 return false;
276 }
277
278 return true;
279}
280
257int sctp_send_reset_streams(struct sctp_association *asoc, 281int sctp_send_reset_streams(struct sctp_association *asoc,
258 struct sctp_reset_streams *params) 282 struct sctp_reset_streams *params)
259{ 283{
@@ -317,6 +341,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
317 for (i = 0; i < str_nums; i++) 341 for (i = 0; i < str_nums; i++)
318 nstr_list[i] = htons(str_list[i]); 342 nstr_list[i] = htons(str_list[i]);
319 343
344 if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) {
345 retval = -EAGAIN;
346 goto out;
347 }
348
320 chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in); 349 chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in);
321 350
322 kfree(nstr_list); 351 kfree(nstr_list);
@@ -377,6 +406,9 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
377 if (asoc->strreset_outstanding) 406 if (asoc->strreset_outstanding)
378 return -EINPROGRESS; 407 return -EINPROGRESS;
379 408
409 if (!sctp_outq_is_empty(&asoc->outqueue))
410 return -EAGAIN;
411
380 chunk = sctp_make_strreset_tsnreq(asoc); 412 chunk = sctp_make_strreset_tsnreq(asoc);
381 if (!chunk) 413 if (!chunk)
382 return -ENOMEM; 414 return -ENOMEM;
@@ -563,7 +595,7 @@ struct sctp_chunk *sctp_process_strreset_outreq(
563 flags = SCTP_STREAM_RESET_INCOMING_SSN; 595 flags = SCTP_STREAM_RESET_INCOMING_SSN;
564 } 596 }
565 597
566 nums = (ntohs(param.p->length) - sizeof(*outreq)) / 2; 598 nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
567 if (nums) { 599 if (nums) {
568 str_p = outreq->list_of_streams; 600 str_p = outreq->list_of_streams;
569 for (i = 0; i < nums; i++) { 601 for (i = 0; i < nums; i++) {
@@ -627,7 +659,7 @@ struct sctp_chunk *sctp_process_strreset_inreq(
627 goto out; 659 goto out;
628 } 660 }
629 661
630 nums = (ntohs(param.p->length) - sizeof(*inreq)) / 2; 662 nums = (ntohs(param.p->length) - sizeof(*inreq)) / sizeof(__u16);
631 str_p = inreq->list_of_streams; 663 str_p = inreq->list_of_streams;
632 for (i = 0; i < nums; i++) { 664 for (i = 0; i < nums; i++) {
633 if (ntohs(str_p[i]) >= stream->outcnt) { 665 if (ntohs(str_p[i]) >= stream->outcnt) {
@@ -636,6 +668,12 @@ struct sctp_chunk *sctp_process_strreset_inreq(
636 } 668 }
637 } 669 }
638 670
671 if (!sctp_stream_outq_is_empty(stream, nums, str_p)) {
672 result = SCTP_STRRESET_IN_PROGRESS;
673 asoc->strreset_inseq--;
674 goto err;
675 }
676
639 chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0); 677 chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0);
640 if (!chunk) 678 if (!chunk)
641 goto out; 679 goto out;
@@ -687,12 +725,18 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
687 i = asoc->strreset_inseq - request_seq - 1; 725 i = asoc->strreset_inseq - request_seq - 1;
688 result = asoc->strreset_result[i]; 726 result = asoc->strreset_result[i];
689 if (result == SCTP_STRRESET_PERFORMED) { 727 if (result == SCTP_STRRESET_PERFORMED) {
690 next_tsn = asoc->next_tsn; 728 next_tsn = asoc->ctsn_ack_point + 1;
691 init_tsn = 729 init_tsn =
692 sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1; 730 sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1;
693 } 731 }
694 goto err; 732 goto err;
695 } 733 }
734
735 if (!sctp_outq_is_empty(&asoc->outqueue)) {
736 result = SCTP_STRRESET_IN_PROGRESS;
737 goto err;
738 }
739
696 asoc->strreset_inseq++; 740 asoc->strreset_inseq++;
697 741
698 if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) 742 if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ))
@@ -703,9 +747,10 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
703 goto out; 747 goto out;
704 } 748 }
705 749
706 /* G3: The same processing as though a SACK chunk with no gap report 750 /* G4: The same processing as though a FWD-TSN chunk (as defined in
707 * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were 751 * [RFC3758]) with all streams affected and a new cumulative TSN
708 * received MUST be performed. 752 * ACK of the Receiver's Next TSN minus 1 were received MUST be
753 * performed.
709 */ 754 */
710 max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); 755 max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
711 sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); 756 sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen);
@@ -720,10 +765,9 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
720 sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, 765 sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
721 init_tsn, GFP_ATOMIC); 766 init_tsn, GFP_ATOMIC);
722 767
723 /* G4: The same processing as though a FWD-TSN chunk (as defined in 768 /* G3: The same processing as though a SACK chunk with no gap report
724 * [RFC3758]) with all streams affected and a new cumulative TSN 769 * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were
725 * ACK of the Receiver's Next TSN minus 1 were received MUST be 770 * received MUST be performed.
726 * performed.
727 */ 771 */
728 sctp_outq_free(&asoc->outqueue); 772 sctp_outq_free(&asoc->outqueue);
729 773
@@ -927,7 +971,8 @@ struct sctp_chunk *sctp_process_strreset_resp(
927 971
928 outreq = (struct sctp_strreset_outreq *)req; 972 outreq = (struct sctp_strreset_outreq *)req;
929 str_p = outreq->list_of_streams; 973 str_p = outreq->list_of_streams;
930 nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2; 974 nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) /
975 sizeof(__u16);
931 976
932 if (result == SCTP_STRRESET_PERFORMED) { 977 if (result == SCTP_STRRESET_PERFORMED) {
933 if (nums) { 978 if (nums) {
@@ -956,7 +1001,8 @@ struct sctp_chunk *sctp_process_strreset_resp(
956 1001
957 inreq = (struct sctp_strreset_inreq *)req; 1002 inreq = (struct sctp_strreset_inreq *)req;
958 str_p = inreq->list_of_streams; 1003 str_p = inreq->list_of_streams;
959 nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2; 1004 nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) /
1005 sizeof(__u16);
960 1006
961 *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, 1007 *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
962 nums, str_p, GFP_ATOMIC); 1008 nums, str_p, GFP_ATOMIC);
@@ -975,6 +1021,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
975 if (result == SCTP_STRRESET_PERFORMED) { 1021 if (result == SCTP_STRRESET_PERFORMED) {
976 __u32 mtsn = sctp_tsnmap_get_max_tsn_seen( 1022 __u32 mtsn = sctp_tsnmap_get_max_tsn_seen(
977 &asoc->peer.tsn_map); 1023 &asoc->peer.tsn_map);
1024 LIST_HEAD(temp);
978 1025
979 sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); 1026 sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn);
980 sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); 1027 sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
@@ -983,7 +1030,13 @@ struct sctp_chunk *sctp_process_strreset_resp(
983 SCTP_TSN_MAP_INITIAL, 1030 SCTP_TSN_MAP_INITIAL,
984 stsn, GFP_ATOMIC); 1031 stsn, GFP_ATOMIC);
985 1032
1033 /* Clean up sacked and abandoned queues only. As the
1034 * out_chunk_list may not be empty, splice it to temp,
1035 * then get it back after sctp_outq_free is done.
1036 */
1037 list_splice_init(&asoc->outqueue.out_chunk_list, &temp);
986 sctp_outq_free(&asoc->outqueue); 1038 sctp_outq_free(&asoc->outqueue);
1039 list_splice_init(&temp, &asoc->outqueue.out_chunk_list);
987 1040
988 asoc->next_tsn = rtsn; 1041 asoc->next_tsn = rtsn;
989 asoc->ctsn_ack_point = asoc->next_tsn - 1; 1042 asoc->ctsn_ack_point = asoc->next_tsn - 1;
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 0b83ec51e43b..d8c162a4089c 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -119,16 +119,27 @@ static struct sctp_sched_ops sctp_sched_fcfs = {
119 .unsched_all = sctp_sched_fcfs_unsched_all, 119 .unsched_all = sctp_sched_fcfs_unsched_all,
120}; 120};
121 121
122static void sctp_sched_ops_fcfs_init(void)
123{
124 sctp_sched_ops_register(SCTP_SS_FCFS, &sctp_sched_fcfs);
125}
126
122/* API to other parts of the stack */ 127/* API to other parts of the stack */
123 128
124extern struct sctp_sched_ops sctp_sched_prio; 129static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1];
125extern struct sctp_sched_ops sctp_sched_rr;
126 130
127static struct sctp_sched_ops *sctp_sched_ops[] = { 131void sctp_sched_ops_register(enum sctp_sched_type sched,
128 &sctp_sched_fcfs, 132 struct sctp_sched_ops *sched_ops)
129 &sctp_sched_prio, 133{
130 &sctp_sched_rr, 134 sctp_sched_ops[sched] = sched_ops;
131}; 135}
136
137void sctp_sched_ops_init(void)
138{
139 sctp_sched_ops_fcfs_init();
140 sctp_sched_ops_prio_init();
141 sctp_sched_ops_rr_init();
142}
132 143
133int sctp_sched_set_sched(struct sctp_association *asoc, 144int sctp_sched_set_sched(struct sctp_association *asoc,
134 enum sctp_sched_type sched) 145 enum sctp_sched_type sched)
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
index 384dbf3c8760..7997d35dd0fd 100644
--- a/net/sctp/stream_sched_prio.c
+++ b/net/sctp/stream_sched_prio.c
@@ -333,7 +333,7 @@ static void sctp_sched_prio_unsched_all(struct sctp_stream *stream)
333 sctp_sched_prio_unsched(soute); 333 sctp_sched_prio_unsched(soute);
334} 334}
335 335
336struct sctp_sched_ops sctp_sched_prio = { 336static struct sctp_sched_ops sctp_sched_prio = {
337 .set = sctp_sched_prio_set, 337 .set = sctp_sched_prio_set,
338 .get = sctp_sched_prio_get, 338 .get = sctp_sched_prio_get,
339 .init = sctp_sched_prio_init, 339 .init = sctp_sched_prio_init,
@@ -345,3 +345,8 @@ struct sctp_sched_ops sctp_sched_prio = {
345 .sched_all = sctp_sched_prio_sched_all, 345 .sched_all = sctp_sched_prio_sched_all,
346 .unsched_all = sctp_sched_prio_unsched_all, 346 .unsched_all = sctp_sched_prio_unsched_all,
347}; 347};
348
349void sctp_sched_ops_prio_init(void)
350{
351 sctp_sched_ops_register(SCTP_SS_PRIO, &sctp_sched_prio);
352}
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
index 7612a438c5b9..1155692448f1 100644
--- a/net/sctp/stream_sched_rr.c
+++ b/net/sctp/stream_sched_rr.c
@@ -187,7 +187,7 @@ static void sctp_sched_rr_unsched_all(struct sctp_stream *stream)
187 sctp_sched_rr_unsched(stream, soute); 187 sctp_sched_rr_unsched(stream, soute);
188} 188}
189 189
190struct sctp_sched_ops sctp_sched_rr = { 190static struct sctp_sched_ops sctp_sched_rr = {
191 .set = sctp_sched_rr_set, 191 .set = sctp_sched_rr_set,
192 .get = sctp_sched_rr_get, 192 .get = sctp_sched_rr_get,
193 .init = sctp_sched_rr_init, 193 .init = sctp_sched_rr_init,
@@ -199,3 +199,8 @@ struct sctp_sched_ops sctp_sched_rr = {
199 .sched_all = sctp_sched_rr_sched_all, 199 .sched_all = sctp_sched_rr_sched_all,
200 .unsched_all = sctp_sched_rr_unsched_all, 200 .unsched_all = sctp_sched_rr_unsched_all,
201}; 201};
202
203void sctp_sched_ops_rr_init(void)
204{
205 sctp_sched_ops_register(SCTP_SS_RR, &sctp_sched_rr);
206}
diff --git a/net/socket.c b/net/socket.c
index 42d8e9c9ccd5..05f361faec45 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -406,8 +406,10 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
406 name.len = strlen(name.name); 406 name.len = strlen(name.name);
407 } 407 }
408 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); 408 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
409 if (unlikely(!path.dentry)) 409 if (unlikely(!path.dentry)) {
410 sock_release(sock);
410 return ERR_PTR(-ENOMEM); 411 return ERR_PTR(-ENOMEM);
412 }
411 path.mnt = mntget(sock_mnt); 413 path.mnt = mntget(sock_mnt);
412 414
413 d_instantiate(path.dentry, SOCK_INODE(sock)); 415 d_instantiate(path.dentry, SOCK_INODE(sock));
@@ -415,9 +417,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
415 file = alloc_file(&path, FMODE_READ | FMODE_WRITE, 417 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
416 &socket_file_ops); 418 &socket_file_ops);
417 if (IS_ERR(file)) { 419 if (IS_ERR(file)) {
418 /* drop dentry, keep inode */ 420 /* drop dentry, keep inode for a bit */
419 ihold(d_inode(path.dentry)); 421 ihold(d_inode(path.dentry));
420 path_put(&path); 422 path_put(&path);
423 /* ... and now kill it properly */
424 sock_release(sock);
421 return file; 425 return file;
422 } 426 }
423 427
@@ -1330,19 +1334,9 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1330 1334
1331 retval = sock_create(family, type, protocol, &sock); 1335 retval = sock_create(family, type, protocol, &sock);
1332 if (retval < 0) 1336 if (retval < 0)
1333 goto out; 1337 return retval;
1334
1335 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1336 if (retval < 0)
1337 goto out_release;
1338
1339out:
1340 /* It may be already another descriptor 8) Not kernel problem. */
1341 return retval;
1342 1338
1343out_release: 1339 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1344 sock_release(sock);
1345 return retval;
1346} 1340}
1347 1341
1348/* 1342/*
@@ -1366,87 +1360,72 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1366 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1360 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1367 1361
1368 /* 1362 /*
1363 * reserve descriptors and make sure we won't fail
1364 * to return them to userland.
1365 */
1366 fd1 = get_unused_fd_flags(flags);
1367 if (unlikely(fd1 < 0))
1368 return fd1;
1369
1370 fd2 = get_unused_fd_flags(flags);
1371 if (unlikely(fd2 < 0)) {
1372 put_unused_fd(fd1);
1373 return fd2;
1374 }
1375
1376 err = put_user(fd1, &usockvec[0]);
1377 if (err)
1378 goto out;
1379
1380 err = put_user(fd2, &usockvec[1]);
1381 if (err)
1382 goto out;
1383
1384 /*
1369 * Obtain the first socket and check if the underlying protocol 1385 * Obtain the first socket and check if the underlying protocol
1370 * supports the socketpair call. 1386 * supports the socketpair call.
1371 */ 1387 */
1372 1388
1373 err = sock_create(family, type, protocol, &sock1); 1389 err = sock_create(family, type, protocol, &sock1);
1374 if (err < 0) 1390 if (unlikely(err < 0))
1375 goto out; 1391 goto out;
1376 1392
1377 err = sock_create(family, type, protocol, &sock2); 1393 err = sock_create(family, type, protocol, &sock2);
1378 if (err < 0) 1394 if (unlikely(err < 0)) {
1379 goto out_release_1; 1395 sock_release(sock1);
1380 1396 goto out;
1381 err = sock1->ops->socketpair(sock1, sock2);
1382 if (err < 0)
1383 goto out_release_both;
1384
1385 fd1 = get_unused_fd_flags(flags);
1386 if (unlikely(fd1 < 0)) {
1387 err = fd1;
1388 goto out_release_both;
1389 } 1397 }
1390 1398
1391 fd2 = get_unused_fd_flags(flags); 1399 err = sock1->ops->socketpair(sock1, sock2);
1392 if (unlikely(fd2 < 0)) { 1400 if (unlikely(err < 0)) {
1393 err = fd2; 1401 sock_release(sock2);
1394 goto out_put_unused_1; 1402 sock_release(sock1);
1403 goto out;
1395 } 1404 }
1396 1405
1397 newfile1 = sock_alloc_file(sock1, flags, NULL); 1406 newfile1 = sock_alloc_file(sock1, flags, NULL);
1398 if (IS_ERR(newfile1)) { 1407 if (IS_ERR(newfile1)) {
1399 err = PTR_ERR(newfile1); 1408 err = PTR_ERR(newfile1);
1400 goto out_put_unused_both; 1409 sock_release(sock2);
1410 goto out;
1401 } 1411 }
1402 1412
1403 newfile2 = sock_alloc_file(sock2, flags, NULL); 1413 newfile2 = sock_alloc_file(sock2, flags, NULL);
1404 if (IS_ERR(newfile2)) { 1414 if (IS_ERR(newfile2)) {
1405 err = PTR_ERR(newfile2); 1415 err = PTR_ERR(newfile2);
1406 goto out_fput_1; 1416 fput(newfile1);
1417 goto out;
1407 } 1418 }
1408 1419
1409 err = put_user(fd1, &usockvec[0]);
1410 if (err)
1411 goto out_fput_both;
1412
1413 err = put_user(fd2, &usockvec[1]);
1414 if (err)
1415 goto out_fput_both;
1416
1417 audit_fd_pair(fd1, fd2); 1420 audit_fd_pair(fd1, fd2);
1418 1421
1419 fd_install(fd1, newfile1); 1422 fd_install(fd1, newfile1);
1420 fd_install(fd2, newfile2); 1423 fd_install(fd2, newfile2);
1421 /* fd1 and fd2 may be already another descriptors.
1422 * Not kernel problem.
1423 */
1424
1425 return 0; 1424 return 0;
1426 1425
1427out_fput_both: 1426out:
1428 fput(newfile2);
1429 fput(newfile1);
1430 put_unused_fd(fd2);
1431 put_unused_fd(fd1);
1432 goto out;
1433
1434out_fput_1:
1435 fput(newfile1);
1436 put_unused_fd(fd2);
1437 put_unused_fd(fd1);
1438 sock_release(sock2);
1439 goto out;
1440
1441out_put_unused_both:
1442 put_unused_fd(fd2); 1427 put_unused_fd(fd2);
1443out_put_unused_1:
1444 put_unused_fd(fd1); 1428 put_unused_fd(fd1);
1445out_release_both:
1446 sock_release(sock2);
1447out_release_1:
1448 sock_release(sock1);
1449out:
1450 return err; 1429 return err;
1451} 1430}
1452 1431
@@ -1562,7 +1541,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1562 if (IS_ERR(newfile)) { 1541 if (IS_ERR(newfile)) {
1563 err = PTR_ERR(newfile); 1542 err = PTR_ERR(newfile);
1564 put_unused_fd(newfd); 1543 put_unused_fd(newfd);
1565 sock_release(newsock);
1566 goto out_put; 1544 goto out_put;
1567 } 1545 }
1568 1546
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index c4778cae58ef..444380f968f1 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
231 goto out_free_groups; 231 goto out_free_groups;
232 creds->cr_group_info->gid[i] = kgid; 232 creds->cr_group_info->gid[i] = kgid;
233 } 233 }
234 groups_sort(creds->cr_group_info);
234 235
235 return 0; 236 return 0;
236out_free_groups: 237out_free_groups:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 73165e9ca5bf..26531193fce4 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -264,7 +264,7 @@ out:
264 return status; 264 return status;
265} 265}
266 266
267static struct cache_detail rsi_cache_template = { 267static const struct cache_detail rsi_cache_template = {
268 .owner = THIS_MODULE, 268 .owner = THIS_MODULE,
269 .hash_size = RSI_HASHMAX, 269 .hash_size = RSI_HASHMAX,
270 .name = "auth.rpcsec.init", 270 .name = "auth.rpcsec.init",
@@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd,
481 goto out; 481 goto out;
482 rsci.cred.cr_group_info->gid[i] = kgid; 482 rsci.cred.cr_group_info->gid[i] = kgid;
483 } 483 }
484 groups_sort(rsci.cred.cr_group_info);
484 485
485 /* mech name */ 486 /* mech name */
486 len = qword_get(&mesg, buf, mlen); 487 len = qword_get(&mesg, buf, mlen);
@@ -524,7 +525,7 @@ out:
524 return status; 525 return status;
525} 526}
526 527
527static struct cache_detail rsc_cache_template = { 528static const struct cache_detail rsc_cache_template = {
528 .owner = THIS_MODULE, 529 .owner = THIS_MODULE,
529 .hash_size = RSC_HASHMAX, 530 .hash_size = RSC_HASHMAX,
530 .name = "auth.rpcsec.context", 531 .name = "auth.rpcsec.context",
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 79d55d949d9a..e68943895be4 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1674,7 +1674,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net)
1674} 1674}
1675EXPORT_SYMBOL_GPL(cache_unregister_net); 1675EXPORT_SYMBOL_GPL(cache_unregister_net);
1676 1676
1677struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) 1677struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net)
1678{ 1678{
1679 struct cache_detail *cd; 1679 struct cache_detail *cd;
1680 int i; 1680 int i;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a801da812f86..e2a4184f3c5d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1841,6 +1841,7 @@ call_bind_status(struct rpc_task *task)
1841 case -ECONNABORTED: 1841 case -ECONNABORTED:
1842 case -ENOTCONN: 1842 case -ENOTCONN:
1843 case -EHOSTDOWN: 1843 case -EHOSTDOWN:
1844 case -ENETDOWN:
1844 case -EHOSTUNREACH: 1845 case -EHOSTUNREACH:
1845 case -ENETUNREACH: 1846 case -ENETUNREACH:
1846 case -ENOBUFS: 1847 case -ENOBUFS:
@@ -1917,6 +1918,7 @@ call_connect_status(struct rpc_task *task)
1917 /* fall through */ 1918 /* fall through */
1918 case -ECONNRESET: 1919 case -ECONNRESET:
1919 case -ECONNABORTED: 1920 case -ECONNABORTED:
1921 case -ENETDOWN:
1920 case -ENETUNREACH: 1922 case -ENETUNREACH:
1921 case -EHOSTUNREACH: 1923 case -EHOSTUNREACH:
1922 case -EADDRINUSE: 1924 case -EADDRINUSE:
@@ -2022,6 +2024,7 @@ call_transmit_status(struct rpc_task *task)
2022 */ 2024 */
2023 case -ECONNREFUSED: 2025 case -ECONNREFUSED:
2024 case -EHOSTDOWN: 2026 case -EHOSTDOWN:
2027 case -ENETDOWN:
2025 case -EHOSTUNREACH: 2028 case -EHOSTUNREACH:
2026 case -ENETUNREACH: 2029 case -ENETUNREACH:
2027 case -EPERM: 2030 case -EPERM:
@@ -2071,6 +2074,7 @@ call_bc_transmit(struct rpc_task *task)
2071 switch (task->tk_status) { 2074 switch (task->tk_status) {
2072 case 0: 2075 case 0:
2073 /* Success */ 2076 /* Success */
2077 case -ENETDOWN:
2074 case -EHOSTDOWN: 2078 case -EHOSTDOWN:
2075 case -EHOSTUNREACH: 2079 case -EHOSTUNREACH:
2076 case -ENETUNREACH: 2080 case -ENETUNREACH:
@@ -2139,6 +2143,7 @@ call_status(struct rpc_task *task)
2139 task->tk_status = 0; 2143 task->tk_status = 0;
2140 switch(status) { 2144 switch(status) {
2141 case -EHOSTDOWN: 2145 case -EHOSTDOWN:
2146 case -ENETDOWN:
2142 case -EHOSTUNREACH: 2147 case -EHOSTUNREACH:
2143 case -ENETUNREACH: 2148 case -ENETUNREACH:
2144 case -EPERM: 2149 case -EPERM:
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index f81eaa8e0888..af7f28fb8102 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd,
520 ug.gi->gid[i] = kgid; 520 ug.gi->gid[i] = kgid;
521 } 521 }
522 522
523 groups_sort(ug.gi);
523 ugp = unix_gid_lookup(cd, uid); 524 ugp = unix_gid_lookup(cd, uid);
524 if (ugp) { 525 if (ugp) {
525 struct cache_head *ch; 526 struct cache_head *ch;
@@ -569,7 +570,7 @@ static int unix_gid_show(struct seq_file *m,
569 return 0; 570 return 0;
570} 571}
571 572
572static struct cache_detail unix_gid_cache_template = { 573static const struct cache_detail unix_gid_cache_template = {
573 .owner = THIS_MODULE, 574 .owner = THIS_MODULE,
574 .hash_size = GID_HASHMAX, 575 .hash_size = GID_HASHMAX,
575 .name = "auth.unix.gid", 576 .name = "auth.unix.gid",
@@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
819 kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); 820 kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
820 cred->cr_group_info->gid[i] = kgid; 821 cred->cr_group_info->gid[i] = kgid;
821 } 822 }
823 groups_sort(cred->cr_group_info);
822 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { 824 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
823 *authp = rpc_autherr_badverf; 825 *authp = rpc_autherr_badverf;
824 return SVC_DENIED; 826 return SVC_DENIED;
@@ -862,7 +864,7 @@ struct auth_ops svcauth_unix = {
862 .set_client = svcauth_unix_set_client, 864 .set_client = svcauth_unix_set_client,
863}; 865};
864 866
865static struct cache_detail ip_map_cache_template = { 867static const struct cache_detail ip_map_cache_template = {
866 .owner = THIS_MODULE, 868 .owner = THIS_MODULE,
867 .hash_size = IP_HASHMAX, 869 .hash_size = IP_HASHMAX,
868 .name = "auth.unix.ip", 870 .name = "auth.unix.ip",
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 333b9d697ae5..33b74fd84051 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1001,6 +1001,7 @@ void xprt_transmit(struct rpc_task *task)
1001{ 1001{
1002 struct rpc_rqst *req = task->tk_rqstp; 1002 struct rpc_rqst *req = task->tk_rqstp;
1003 struct rpc_xprt *xprt = req->rq_xprt; 1003 struct rpc_xprt *xprt = req->rq_xprt;
1004 unsigned int connect_cookie;
1004 int status, numreqs; 1005 int status, numreqs;
1005 1006
1006 dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); 1007 dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
@@ -1024,6 +1025,7 @@ void xprt_transmit(struct rpc_task *task)
1024 } else if (!req->rq_bytes_sent) 1025 } else if (!req->rq_bytes_sent)
1025 return; 1026 return;
1026 1027
1028 connect_cookie = xprt->connect_cookie;
1027 req->rq_xtime = ktime_get(); 1029 req->rq_xtime = ktime_get();
1028 status = xprt->ops->send_request(task); 1030 status = xprt->ops->send_request(task);
1029 trace_xprt_transmit(xprt, req->rq_xid, status); 1031 trace_xprt_transmit(xprt, req->rq_xid, status);
@@ -1047,20 +1049,28 @@ void xprt_transmit(struct rpc_task *task)
1047 xprt->stat.bklog_u += xprt->backlog.qlen; 1049 xprt->stat.bklog_u += xprt->backlog.qlen;
1048 xprt->stat.sending_u += xprt->sending.qlen; 1050 xprt->stat.sending_u += xprt->sending.qlen;
1049 xprt->stat.pending_u += xprt->pending.qlen; 1051 xprt->stat.pending_u += xprt->pending.qlen;
1052 spin_unlock_bh(&xprt->transport_lock);
1050 1053
1051 /* Don't race with disconnect */ 1054 req->rq_connect_cookie = connect_cookie;
1052 if (!xprt_connected(xprt)) 1055 if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
1053 task->tk_status = -ENOTCONN;
1054 else {
1055 /* 1056 /*
1056 * Sleep on the pending queue since 1057 * Sleep on the pending queue if we're expecting a reply.
1057 * we're expecting a reply. 1058 * The spinlock ensures atomicity between the test of
1059 * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
1058 */ 1060 */
1059 if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) 1061 spin_lock(&xprt->recv_lock);
1062 if (!req->rq_reply_bytes_recvd) {
1060 rpc_sleep_on(&xprt->pending, task, xprt_timer); 1063 rpc_sleep_on(&xprt->pending, task, xprt_timer);
1061 req->rq_connect_cookie = xprt->connect_cookie; 1064 /*
1065 * Send an extra queue wakeup call if the
1066 * connection was dropped in case the call to
1067 * rpc_sleep_on() raced.
1068 */
1069 if (!xprt_connected(xprt))
1070 xprt_wake_pending_tasks(xprt, -ENOTCONN);
1071 }
1072 spin_unlock(&xprt->recv_lock);
1062 } 1073 }
1063 spin_unlock_bh(&xprt->transport_lock);
1064} 1074}
1065 1075
1066static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) 1076static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ed34dc0f144c..a3f2ab283aeb 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1408,11 +1408,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1408 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", 1408 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
1409 __func__, rep, req, be32_to_cpu(rep->rr_xid)); 1409 __func__, rep, req, be32_to_cpu(rep->rr_xid));
1410 1410
1411 if (list_empty(&req->rl_registered) && 1411 queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
1412 !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
1413 rpcrdma_complete_rqst(rep);
1414 else
1415 queue_work(rpcrdma_receive_wq, &rep->rr_work);
1416 return; 1412 return;
1417 1413
1418out_badstatus: 1414out_badstatus:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 646c24494ea7..6ee1ad8978f3 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,6 +52,7 @@
52#include <linux/slab.h> 52#include <linux/slab.h>
53#include <linux/seq_file.h> 53#include <linux/seq_file.h>
54#include <linux/sunrpc/addr.h> 54#include <linux/sunrpc/addr.h>
55#include <linux/smp.h>
55 56
56#include "xprt_rdma.h" 57#include "xprt_rdma.h"
57 58
@@ -656,6 +657,7 @@ xprt_rdma_allocate(struct rpc_task *task)
656 task->tk_pid, __func__, rqst->rq_callsize, 657 task->tk_pid, __func__, rqst->rq_callsize,
657 rqst->rq_rcvsize, req); 658 rqst->rq_rcvsize, req);
658 659
660 req->rl_cpu = smp_processor_id();
659 req->rl_connect_cookie = 0; /* our reserved value */ 661 req->rl_connect_cookie = 0; /* our reserved value */
660 rpcrdma_set_xprtdata(rqst, req); 662 rpcrdma_set_xprtdata(rqst, req);
661 rqst->rq_buffer = req->rl_sendbuf->rg_base; 663 rqst->rq_buffer = req->rl_sendbuf->rg_base;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 710b3f77db82..8607c029c0dd 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -83,7 +83,7 @@ rpcrdma_alloc_wq(void)
83 struct workqueue_struct *recv_wq; 83 struct workqueue_struct *recv_wq;
84 84
85 recv_wq = alloc_workqueue("xprtrdma_receive", 85 recv_wq = alloc_workqueue("xprtrdma_receive",
86 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI, 86 WQ_MEM_RECLAIM | WQ_HIGHPRI,
87 0); 87 0);
88 if (!recv_wq) 88 if (!recv_wq)
89 return -ENOMEM; 89 return -ENOMEM;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 51686d9eac5f..1342f743f1c4 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -342,6 +342,7 @@ enum {
342struct rpcrdma_buffer; 342struct rpcrdma_buffer;
343struct rpcrdma_req { 343struct rpcrdma_req {
344 struct list_head rl_list; 344 struct list_head rl_list;
345 int rl_cpu;
345 unsigned int rl_connect_cookie; 346 unsigned int rl_connect_cookie;
346 struct rpcrdma_buffer *rl_buffer; 347 struct rpcrdma_buffer *rl_buffer;
347 struct rpcrdma_rep *rl_reply; 348 struct rpcrdma_rep *rl_reply;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9cc850c2719e..6d0cc3b8f932 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2440,7 +2440,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2440 */ 2440 */
2441 case -ECONNREFUSED: 2441 case -ECONNREFUSED:
2442 case -ECONNRESET: 2442 case -ECONNRESET:
2443 case -ENETDOWN:
2443 case -ENETUNREACH: 2444 case -ENETUNREACH:
2445 case -EHOSTUNREACH:
2444 case -EADDRINUSE: 2446 case -EADDRINUSE:
2445 case -ENOBUFS: 2447 case -ENOBUFS:
2446 /* 2448 /*
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 12777cac638a..95fec2c057d6 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -497,6 +497,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
497 while ((skb = skb_peek(defq))) { 497 while ((skb = skb_peek(defq))) {
498 hdr = buf_msg(skb); 498 hdr = buf_msg(skb);
499 mtyp = msg_type(hdr); 499 mtyp = msg_type(hdr);
500 blks = msg_blocks(hdr);
500 deliver = true; 501 deliver = true;
501 ack = false; 502 ack = false;
502 update = false; 503 update = false;
@@ -546,7 +547,6 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
546 if (!update) 547 if (!update)
547 continue; 548 continue;
548 549
549 blks = msg_blocks(hdr);
550 tipc_group_update_rcv_win(grp, blks, node, port, xmitq); 550 tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
551 } 551 }
552 return; 552 return;
diff --git a/net/tipc/server.c b/net/tipc/server.c
index acaef80fb88c..d60c30342327 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -314,6 +314,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
314 newcon->usr_data = s->tipc_conn_new(newcon->conid); 314 newcon->usr_data = s->tipc_conn_new(newcon->conid);
315 if (!newcon->usr_data) { 315 if (!newcon->usr_data) {
316 sock_release(newsock); 316 sock_release(newsock);
317 conn_put(newcon);
317 return -ENOMEM; 318 return -ENOMEM;
318 } 319 }
319 320
@@ -511,7 +512,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
511 s = con->server; 512 s = con->server;
512 scbr = s->tipc_conn_new(*conid); 513 scbr = s->tipc_conn_new(*conid);
513 if (!scbr) { 514 if (!scbr) {
514 tipc_close_conn(con); 515 conn_put(con);
515 return false; 516 return false;
516 } 517 }
517 518
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 5d18c0caa92b..41127d0b925e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1140,7 +1140,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
1140 __skb_dequeue(arrvq); 1140 __skb_dequeue(arrvq);
1141 __skb_queue_tail(inputq, skb); 1141 __skb_queue_tail(inputq, skb);
1142 } 1142 }
1143 refcount_dec(&skb->users); 1143 kfree_skb(skb);
1144 spin_unlock_bh(&inputq->lock); 1144 spin_unlock_bh(&inputq->lock);
1145 continue; 1145 continue;
1146 } 1146 }
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ecca64fc6a6f..3deabcab4882 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
371 goto rcu_out; 371 goto rcu_out;
372 } 372 }
373 373
374 tipc_rcv(sock_net(sk), skb, b);
375 rcu_read_unlock();
376 return 0;
377
378rcu_out: 374rcu_out:
379 rcu_read_unlock(); 375 rcu_read_unlock();
380out: 376out:
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 5583df708b8c..a827547aa102 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -487,7 +487,7 @@ static void hvs_release(struct vsock_sock *vsk)
487 487
488 lock_sock(sk); 488 lock_sock(sk);
489 489
490 sk->sk_state = SS_DISCONNECTING; 490 sk->sk_state = TCP_CLOSING;
491 vsock_remove_sock(vsk); 491 vsock_remove_sock(vsk);
492 492
493 release_sock(sk); 493 release_sock(sk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 391775e3575c..a7a73ffe675b 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -797,11 +797,13 @@ static void vmci_transport_handle_detach(struct sock *sk)
797 797
798 /* We should not be sending anymore since the peer won't be 798 /* We should not be sending anymore since the peer won't be
799 * there to receive, but we can still receive if there is data 799 * there to receive, but we can still receive if there is data
800 * left in our consume queue. 800 * left in our consume queue. If the local endpoint is a host,
801 * we can't call vsock_stream_has_data, since that may block,
802 * but a host endpoint can't read data once the VM has
803 * detached, so there is no available data in that case.
801 */ 804 */
802 if (vsock_stream_has_data(vsk) <= 0) { 805 if (vsk->local_addr.svm_cid == VMADDR_CID_HOST ||
803 sk->sk_state = TCP_CLOSE; 806 vsock_stream_has_data(vsk) <= 0) {
804
805 if (sk->sk_state == TCP_SYN_SENT) { 807 if (sk->sk_state == TCP_SYN_SENT) {
806 /* The peer may detach from a queue pair while 808 /* The peer may detach from a queue pair while
807 * we are still in the connecting state, i.e., 809 * we are still in the connecting state, i.e.,
@@ -811,10 +813,12 @@ static void vmci_transport_handle_detach(struct sock *sk)
811 * event like a reset. 813 * event like a reset.
812 */ 814 */
813 815
816 sk->sk_state = TCP_CLOSE;
814 sk->sk_err = ECONNRESET; 817 sk->sk_err = ECONNRESET;
815 sk->sk_error_report(sk); 818 sk->sk_error_report(sk);
816 return; 819 return;
817 } 820 }
821 sk->sk_state = TCP_CLOSE;
818 } 822 }
819 sk->sk_state_change(sk); 823 sk->sk_state_change(sk);
820 } 824 }
@@ -2144,7 +2148,7 @@ module_exit(vmci_transport_exit);
2144 2148
2145MODULE_AUTHOR("VMware, Inc."); 2149MODULE_AUTHOR("VMware, Inc.");
2146MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); 2150MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
2147MODULE_VERSION("1.0.4.0-k"); 2151MODULE_VERSION("1.0.5.0-k");
2148MODULE_LICENSE("GPL v2"); 2152MODULE_LICENSE("GPL v2");
2149MODULE_ALIAS("vmware_vsock"); 2153MODULE_ALIAS("vmware_vsock");
2150MODULE_ALIAS_NETPROTO(PF_VSOCK); 2154MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index da91bb547db3..1abcc4fc4df1 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -20,6 +20,10 @@ config CFG80211
20 tristate "cfg80211 - wireless configuration API" 20 tristate "cfg80211 - wireless configuration API"
21 depends on RFKILL || !RFKILL 21 depends on RFKILL || !RFKILL
22 select FW_LOADER 22 select FW_LOADER
23 # may need to update this when certificates are changed and are
24 # using a different algorithm, though right now they shouldn't
25 # (this is here rather than below to allow it to be a module)
26 select CRYPTO_SHA256 if CFG80211_USE_KERNEL_REGDB_KEYS
23 ---help--- 27 ---help---
24 cfg80211 is the Linux wireless LAN (802.11) configuration API. 28 cfg80211 is the Linux wireless LAN (802.11) configuration API.
25 Enable this if you have a wireless device. 29 Enable this if you have a wireless device.
@@ -113,6 +117,9 @@ config CFG80211_EXTRA_REGDB_KEYDIR
113 certificates like in the kernel sources (net/wireless/certs/) 117 certificates like in the kernel sources (net/wireless/certs/)
114 that shall be accepted for a signed regulatory database. 118 that shall be accepted for a signed regulatory database.
115 119
120 Note that you need to also select the correct CRYPTO_<hash> modules
121 for your certificates, and if cfg80211 is built-in they also must be.
122
116config CFG80211_REG_CELLULAR_HINTS 123config CFG80211_REG_CELLULAR_HINTS
117 bool "cfg80211 regulatory support for cellular base station hints" 124 bool "cfg80211 regulatory support for cellular base station hints"
118 depends on CFG80211_CERTIFICATION_ONUS 125 depends on CFG80211_CERTIFICATION_ONUS
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 278d979c211a..d7d6cb00c47b 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -25,17 +25,45 @@ endif
25 25
26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) 26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
27 @$(kecho) " GEN $@" 27 @$(kecho) " GEN $@"
28 @echo '#include "reg.h"' > $@ 28 @(set -e; \
29 @echo 'const u8 shipped_regdb_certs[] = {' >> $@ 29 allf=""; \
30 @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done 30 for f in $^ ; do \
31 @echo '};' >> $@ 31 # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
32 @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@ 32 thisf=$$(od -An -v -tx1 < $$f | \
33 sed -e 's/ /\n/g' | \
34 sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
35 sed -e 's/^/0x/;s/$$/,/'); \
36 # file should not be empty - maybe command substitution failed? \
37 test ! -z "$$thisf";\
38 allf=$$allf$$thisf;\
39 done; \
40 ( \
41 echo '#include "reg.h"'; \
42 echo 'const u8 shipped_regdb_certs[] = {'; \
43 echo "$$allf"; \
44 echo '};'; \
45 echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
46 ) >> $@)
33 47
34$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ 48$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
35 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) 49 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
36 @$(kecho) " GEN $@" 50 @$(kecho) " GEN $@"
37 @echo '#include "reg.h"' > $@ 51 @(set -e; \
38 @echo 'const u8 extra_regdb_certs[] = {' >> $@ 52 allf=""; \
39 @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done 53 for f in $^ ; do \
40 @echo '};' >> $@ 54 # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
41 @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@ 55 thisf=$$(od -An -v -tx1 < $$f | \
56 sed -e 's/ /\n/g' | \
57 sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
58 sed -e 's/^/0x/;s/$$/,/'); \
59 # file should not be empty - maybe command substitution failed? \
60 test ! -z "$$thisf";\
61 allf=$$allf$$thisf;\
62 done; \
63 ( \
64 echo '#include "reg.h"'; \
65 echo 'const u8 extra_regdb_certs[] = {'; \
66 echo "$$allf"; \
67 echo '};'; \
68 echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
69 ) >> $@)