aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-03-09 06:06:41 -0500
committerThomas Gleixner <tglx@linutronix.de>2017-03-09 06:06:41 -0500
commit920c634aff6cb66e7f352668521eb1313897e93c (patch)
tree0f2e2eb15756fdd93c8ea47f9080fc3c1abeeae6 /net
parentb28ace12661fbcfd90959c1e84ff5a85113a82a1 (diff)
parent4b9de5da7e120c7f02395da729f0ec77ce7a6044 (diff)
Merge tag 'irq-fixes-4.11-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms into irq/urgent
Pull irqchip/irqdomain updates for 4.11-rc2 from Marc Zyngier - irqchip/crossbar: Some type tidying up - irqchip/gicv3-its: Workaround for a Qualcomm erratum - irqdomain: Compile for for systems that don't use CONFIG_IRQ_DOMAIN Fixed up minor conflict in the crossbar driver.
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c20
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/batman-adv/fragmentation.c20
-rw-r--r--net/batman-adv/types.h2
-rw-r--r--net/bluetooth/af_bluetooth.c2
-rw-r--r--net/bluetooth/cmtp/capi.c2
-rw-r--r--net/bluetooth/hci_request.c2
-rw-r--r--net/bluetooth/l2cap_sock.c1
-rw-r--r--net/bluetooth/rfcomm/sock.c1
-rw-r--r--net/bluetooth/sco.c1
-rw-r--r--net/bridge/br_forward.c3
-rw-r--r--net/bridge/br_sysfs_br.c1
-rw-r--r--net/bridge/br_sysfs_if.c1
-rw-r--r--net/bridge/br_vlan.c2
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/ceph/cls_lock_client.c14
-rw-r--r--net/ceph/crush/crush.c5
-rw-r--r--net/ceph/crush/mapper.c227
-rw-r--r--net/ceph/crypto.c2
-rw-r--r--net/ceph/messenger.c44
-rw-r--r--net/ceph/osd_client.c130
-rw-r--r--net/ceph/osdmap.c101
-rw-r--r--net/ceph/snapshot.c2
-rw-r--r--net/core/dev.c111
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/net-sysfs.c1
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netclassid_cgroup.c2
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/scm.c1
-rw-r--r--net/core/sock.c16
-rw-r--r--net/core/stream.c1
-rw-r--r--net/dccp/input.c10
-rw-r--r--net/dccp/minisocks.c5
-rw-r--r--net/dccp/output.c1
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/dns_resolver/dns_query.c6
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/netfilter.c7
-rw-r--r--net/ipv4/tcp.c15
-rw-r--r--net/ipv4/tcp_cdg.c2
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv6/addrconf.c23
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c1
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/irda/af_irda.c1
-rw-r--r--net/irda/ircomm/ircomm_tty.c2
-rw-r--r--net/irda/irnet/irnet_ppp.c3
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/kcm/kcmsock.c2
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/mac80211/agg-rx.c3
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/mesh_plink.c2
-rw-r--r--net/mac80211/pm.c1
-rw-r--r--net/mac80211/rx.c31
-rw-r--r--net/mac80211/sta_info.c4
-rw-r--r--net/mac80211/sta_info.h8
-rw-r--r--net/mac80211/status.c7
-rw-r--r--net/mac802154/llsec.c2
-rw-r--r--net/netfilter/nf_conntrack_sip.c2
-rw-r--r--net/netfilter/nf_tables_api.c133
-rw-r--r--net/netfilter/nft_set_rbtree.c9
-rw-r--r--net/netfilter/xt_owner.c2
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/llcp_sock.c1
-rw-r--r--net/openvswitch/actions.c3
-rw-r--r--net/openvswitch/conntrack.c1
-rw-r--r--net/packet/af_packet.c8
-rw-r--r--net/phonet/pep.c1
-rw-r--r--net/phonet/socket.c2
-rw-r--r--net/rds/ib.c10
-rw-r--r--net/rds/ib_mr.h2
-rw-r--r--net/rds/page.c29
-rw-r--r--net/rds/rds.h9
-rw-r--r--net/rds/tcp.c6
-rw-r--r--net/rds/transport.c4
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rxrpc/af_rxrpc.c12
-rw-r--r--net/rxrpc/ar-internal.h1
-rw-r--r--net/rxrpc/call_accept.c48
-rw-r--r--net/rxrpc/call_object.c18
-rw-r--r--net/rxrpc/conn_client.c2
-rw-r--r--net/rxrpc/input.c1
-rw-r--r--net/rxrpc/recvmsg.c41
-rw-r--r--net/rxrpc/sendmsg.c60
-rw-r--r--net/sched/em_meta.c1
-rw-r--r--net/sctp/input.c3
-rw-r--r--net/sctp/socket.c1
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/smc/smc_clc.c2
-rw-r--r--net/smc/smc_close.c2
-rw-r--r--net/smc/smc_rx.c2
-rw-r--r--net/smc/smc_tx.c2
-rw-r--r--net/strparser/strparser.c1
-rw-r--r--net/sunrpc/auth.c16
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c4
-rw-r--r--net/sunrpc/auth_null.c3
-rw-r--r--net/sunrpc/auth_unix.c18
-rw-r--r--net/sunrpc/cache.c121
-rw-r--r--net/sunrpc/clnt.c51
-rw-r--r--net/sunrpc/debugfs.c35
-rw-r--r--net/sunrpc/svc.c26
-rw-r--r--net/sunrpc/svcauth_unix.c4
-rw-r--r--net/sunrpc/svcsock.c1
-rw-r--r--net/sunrpc/xdr.c34
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c5
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c11
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c17
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c299
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c20
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c22
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c69
-rw-r--r--net/sunrpc/xprtrdma/transport.c6
-rw-r--r--net/sunrpc/xprtrdma/verbs.c96
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h30
-rw-r--r--net/sunrpc/xprtsock.c94
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/vmw_vsock/af_vsock.c1
-rw-r--r--net/vmw_vsock/virtio_transport.c3
-rw-r--r--net/vmw_vsock/virtio_transport_common.c1
-rw-r--r--net/x25/af_x25.c2
128 files changed, 1390 insertions, 1018 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 3fc94a49ccd5..3ce672af1596 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -32,7 +32,7 @@
32#include <linux/idr.h> 32#include <linux/idr.h>
33#include <linux/mutex.h> 33#include <linux/mutex.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/sched.h> 35#include <linux/sched/signal.h>
36#include <linux/uaccess.h> 36#include <linux/uaccess.h>
37#include <linux/uio.h> 37#include <linux/uio.h>
38#include <net/9p/9p.h> 38#include <net/9p/9p.h>
@@ -1101,7 +1101,7 @@ void p9_client_begin_disconnect(struct p9_client *clnt)
1101EXPORT_SYMBOL(p9_client_begin_disconnect); 1101EXPORT_SYMBOL(p9_client_begin_disconnect);
1102 1102
1103struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, 1103struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
1104 char *uname, kuid_t n_uname, char *aname) 1104 const char *uname, kuid_t n_uname, const char *aname)
1105{ 1105{
1106 int err = 0; 1106 int err = 0;
1107 struct p9_req_t *req; 1107 struct p9_req_t *req;
@@ -1149,7 +1149,7 @@ error:
1149EXPORT_SYMBOL(p9_client_attach); 1149EXPORT_SYMBOL(p9_client_attach);
1150 1150
1151struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, 1151struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
1152 char **wnames, int clone) 1152 const unsigned char * const *wnames, int clone)
1153{ 1153{
1154 int err; 1154 int err;
1155 struct p9_client *clnt; 1155 struct p9_client *clnt;
@@ -1271,7 +1271,7 @@ error:
1271} 1271}
1272EXPORT_SYMBOL(p9_client_open); 1272EXPORT_SYMBOL(p9_client_open);
1273 1273
1274int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, 1274int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode,
1275 kgid_t gid, struct p9_qid *qid) 1275 kgid_t gid, struct p9_qid *qid)
1276{ 1276{
1277 int err = 0; 1277 int err = 0;
@@ -1316,7 +1316,7 @@ error:
1316} 1316}
1317EXPORT_SYMBOL(p9_client_create_dotl); 1317EXPORT_SYMBOL(p9_client_create_dotl);
1318 1318
1319int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, 1319int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
1320 char *extension) 1320 char *extension)
1321{ 1321{
1322 int err; 1322 int err;
@@ -1361,8 +1361,8 @@ error:
1361} 1361}
1362EXPORT_SYMBOL(p9_client_fcreate); 1362EXPORT_SYMBOL(p9_client_fcreate);
1363 1363
1364int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid, 1364int p9_client_symlink(struct p9_fid *dfid, const char *name,
1365 struct p9_qid *qid) 1365 const char *symtgt, kgid_t gid, struct p9_qid *qid)
1366{ 1366{
1367 int err = 0; 1367 int err = 0;
1368 struct p9_client *clnt; 1368 struct p9_client *clnt;
@@ -1395,7 +1395,7 @@ error:
1395} 1395}
1396EXPORT_SYMBOL(p9_client_symlink); 1396EXPORT_SYMBOL(p9_client_symlink);
1397 1397
1398int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) 1398int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newname)
1399{ 1399{
1400 struct p9_client *clnt; 1400 struct p9_client *clnt;
1401 struct p9_req_t *req; 1401 struct p9_req_t *req;
@@ -2117,7 +2117,7 @@ error:
2117} 2117}
2118EXPORT_SYMBOL(p9_client_readdir); 2118EXPORT_SYMBOL(p9_client_readdir);
2119 2119
2120int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, 2120int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
2121 dev_t rdev, kgid_t gid, struct p9_qid *qid) 2121 dev_t rdev, kgid_t gid, struct p9_qid *qid)
2122{ 2122{
2123 int err; 2123 int err;
@@ -2148,7 +2148,7 @@ error:
2148} 2148}
2149EXPORT_SYMBOL(p9_client_mknod_dotl); 2149EXPORT_SYMBOL(p9_client_mknod_dotl);
2150 2150
2151int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, 2151int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
2152 kgid_t gid, struct p9_qid *qid) 2152 kgid_t gid, struct p9_qid *qid)
2153{ 2153{
2154 int err; 2154 int err;
diff --git a/net/atm/common.c b/net/atm/common.c
index a3ca922d307b..9613381f5db0 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -13,7 +13,7 @@
13#include <linux/errno.h> /* error codes */ 13#include <linux/errno.h> /* error codes */
14#include <linux/capability.h> 14#include <linux/capability.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/sched.h> 16#include <linux/sched/signal.h>
17#include <linux/time.h> /* struct timeval */ 17#include <linux/time.h> /* struct timeval */
18#include <linux/skbuff.h> 18#include <linux/skbuff.h>
19#include <linux/bitops.h> 19#include <linux/bitops.h>
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 878563a8354d..db9794ec61d8 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -10,7 +10,7 @@
10#include <linux/kernel.h> /* printk */ 10#include <linux/kernel.h> /* printk */
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/wait.h> 12#include <linux/wait.h>
13#include <linux/sched.h> /* jiffies and HZ */ 13#include <linux/sched/signal.h>
14#include <linux/fcntl.h> /* O_NONBLOCK */ 14#include <linux/fcntl.h> /* O_NONBLOCK */
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/atm.h> /* ATM stuff */ 16#include <linux/atm.h> /* ATM stuff */
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 90fcf5fc2e0a..a8e42cedf1db 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -20,7 +20,7 @@
20#include <linux/socket.h> 20#include <linux/socket.h>
21#include <linux/in.h> 21#include <linux/in.h>
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23#include <linux/sched.h> 23#include <linux/sched/signal.h>
24#include <linux/timer.h> 24#include <linux/timer.h>
25#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/sockios.h> 26#include <linux/sockios.h>
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index ead18ca836de..11a23fd6e1a0 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -239,8 +239,10 @@ err_unlock:
239 spin_unlock_bh(&chain->lock); 239 spin_unlock_bh(&chain->lock);
240 240
241err: 241err:
242 if (!ret) 242 if (!ret) {
243 kfree(frag_entry_new); 243 kfree(frag_entry_new);
244 kfree_skb(skb);
245 }
244 246
245 return ret; 247 return ret;
246} 248}
@@ -313,7 +315,7 @@ free:
313 * 315 *
314 * There are three possible outcomes: 1) Packet is merged: Return true and 316 * There are three possible outcomes: 1) Packet is merged: Return true and
315 * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb 317 * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
316 * to NULL; 3) Error: Return false and leave skb as is. 318 * to NULL; 3) Error: Return false and free skb.
317 * 319 *
318 * Return: true when packet is merged or buffered, false when skb is not not 320 * Return: true when packet is merged or buffered, false when skb is not not
319 * used. 321 * used.
@@ -338,9 +340,9 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb,
338 goto out_err; 340 goto out_err;
339 341
340out: 342out:
341 *skb = skb_out;
342 ret = true; 343 ret = true;
343out_err: 344out_err:
345 *skb = skb_out;
344 return ret; 346 return ret;
345} 347}
346 348
@@ -499,6 +501,12 @@ int batadv_frag_send_packet(struct sk_buff *skb,
499 501
500 /* Eat and send fragments from the tail of skb */ 502 /* Eat and send fragments from the tail of skb */
501 while (skb->len > max_fragment_size) { 503 while (skb->len > max_fragment_size) {
504 /* The initial check in this function should cover this case */
505 if (unlikely(frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)) {
506 ret = -EINVAL;
507 goto put_primary_if;
508 }
509
502 skb_fragment = batadv_frag_create(skb, &frag_header, mtu); 510 skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
503 if (!skb_fragment) { 511 if (!skb_fragment) {
504 ret = -ENOMEM; 512 ret = -ENOMEM;
@@ -515,12 +523,6 @@ int batadv_frag_send_packet(struct sk_buff *skb,
515 } 523 }
516 524
517 frag_header.no++; 525 frag_header.no++;
518
519 /* The initial check in this function should cover this case */
520 if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
521 ret = -EINVAL;
522 goto put_primary_if;
523 }
524 } 526 }
525 527
526 /* Make room for the fragment header. */ 528 /* Make room for the fragment header. */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 8f64a5c01345..66b25e410a41 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -402,7 +402,7 @@ struct batadv_gw_node {
402 struct rcu_head rcu; 402 struct rcu_head rcu;
403}; 403};
404 404
405DECLARE_EWMA(throughput, 1024, 8) 405DECLARE_EWMA(throughput, 10, 8)
406 406
407/** 407/**
408 * struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor 408 * struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index cfb2faba46de..69e1f7d362a8 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -27,6 +27,8 @@
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/debugfs.h> 28#include <linux/debugfs.h>
29#include <linux/stringify.h> 29#include <linux/stringify.h>
30#include <linux/sched/signal.h>
31
30#include <asm/ioctls.h> 32#include <asm/ioctls.h>
31 33
32#include <net/bluetooth/bluetooth.h> 34#include <net/bluetooth/bluetooth.h>
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 46ac686c8911..bb308224099c 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -26,7 +26,7 @@
26#include <linux/types.h> 26#include <linux/types.h>
27#include <linux/errno.h> 27#include <linux/errno.h>
28#include <linux/kernel.h> 28#include <linux/kernel.h>
29#include <linux/sched.h> 29#include <linux/sched/signal.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/poll.h> 31#include <linux/poll.h>
32#include <linux/fcntl.h> 32#include <linux/fcntl.h>
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 1015d9c8d97d..b5faff458d8b 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -21,6 +21,8 @@
21 SOFTWARE IS DISCLAIMED. 21 SOFTWARE IS DISCLAIMED.
22*/ 22*/
23 23
24#include <linux/sched/signal.h>
25
24#include <net/bluetooth/bluetooth.h> 26#include <net/bluetooth/bluetooth.h>
25#include <net/bluetooth/hci_core.h> 27#include <net/bluetooth/hci_core.h>
26#include <net/bluetooth/mgmt.h> 28#include <net/bluetooth/mgmt.h>
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a8ba752732c9..f307b145ea54 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -29,6 +29,7 @@
29 29
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/export.h> 31#include <linux/export.h>
32#include <linux/sched/signal.h>
32 33
33#include <net/bluetooth/bluetooth.h> 34#include <net/bluetooth/bluetooth.h>
34#include <net/bluetooth/hci_core.h> 35#include <net/bluetooth/hci_core.h>
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7511df72347f..aa1a814ceddc 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -27,6 +27,7 @@
27 27
28#include <linux/export.h> 28#include <linux/export.h>
29#include <linux/debugfs.h> 29#include <linux/debugfs.h>
30#include <linux/sched/signal.h>
30 31
31#include <net/bluetooth/bluetooth.h> 32#include <net/bluetooth/bluetooth.h>
32#include <net/bluetooth/hci_core.h> 33#include <net/bluetooth/hci_core.h>
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 3125ce670c2f..e4e9a2da1e7e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -27,6 +27,7 @@
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/debugfs.h> 28#include <linux/debugfs.h>
29#include <linux/seq_file.h> 29#include <linux/seq_file.h>
30#include <linux/sched/signal.h>
30 31
31#include <net/bluetooth/bluetooth.h> 32#include <net/bluetooth/bluetooth.h>
32#include <net/bluetooth/hci_core.h> 33#include <net/bluetooth/hci_core.h>
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6bfac29318f2..902af6ba481c 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -186,8 +186,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
186 /* Do not flood unicast traffic to ports that turn it off */ 186 /* Do not flood unicast traffic to ports that turn it off */
187 if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD)) 187 if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD))
188 continue; 188 continue;
189 /* Do not flood if mc off, except for traffic we originate */
189 if (pkt_type == BR_PKT_MULTICAST && 190 if (pkt_type == BR_PKT_MULTICAST &&
190 !(p->flags & BR_MCAST_FLOOD)) 191 !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev)
191 continue; 192 continue;
192 193
193 /* Do not flood to ports that enable proxy ARP */ 194 /* Do not flood to ports that enable proxy ARP */
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 0f4034934d56..0b5dd607444c 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -19,6 +19,7 @@
19#include <linux/rtnetlink.h> 19#include <linux/rtnetlink.h>
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21#include <linux/times.h> 21#include <linux/times.h>
22#include <linux/sched/signal.h>
22 23
23#include "br_private.h" 24#include "br_private.h"
24 25
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 05e8946ccc03..79aee759aba5 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -17,6 +17,7 @@
17#include <linux/if_bridge.h> 17#include <linux/if_bridge.h>
18#include <linux/rtnetlink.h> 18#include <linux/rtnetlink.h>
19#include <linux/spinlock.h> 19#include <linux/spinlock.h>
20#include <linux/sched/signal.h>
20 21
21#include "br_private.h" 22#include "br_private.h"
22 23
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 62e68c0dc687..b838213c408e 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -997,10 +997,10 @@ err_vlan_add:
997 RCU_INIT_POINTER(p->vlgrp, NULL); 997 RCU_INIT_POINTER(p->vlgrp, NULL);
998 synchronize_rcu(); 998 synchronize_rcu();
999 vlan_tunnel_deinit(vg); 999 vlan_tunnel_deinit(vg);
1000err_vlan_enabled:
1001err_tunnel_init: 1000err_tunnel_init:
1002 rhashtable_destroy(&vg->vlan_hash); 1001 rhashtable_destroy(&vg->vlan_hash);
1003err_rhtbl: 1002err_rhtbl:
1003err_vlan_enabled:
1004 kfree(vg); 1004 kfree(vg);
1005 1005
1006 goto out; 1006 goto out;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 92cbbd2afddb..adcad344c843 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -9,7 +9,7 @@
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/sched.h> 12#include <linux/sched/signal.h>
13#include <linux/spinlock.h> 13#include <linux/spinlock.h>
14#include <linux/mutex.h> 14#include <linux/mutex.h>
15#include <linux/list.h> 15#include <linux/list.h>
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 50f040fdb2a9..b9233b990399 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -69,8 +69,8 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
69 dout("%s lock_name %s type %d cookie %s tag %s desc %s flags 0x%x\n", 69 dout("%s lock_name %s type %d cookie %s tag %s desc %s flags 0x%x\n",
70 __func__, lock_name, type, cookie, tag, desc, flags); 70 __func__, lock_name, type, cookie, tag, desc, flags);
71 ret = ceph_osdc_call(osdc, oid, oloc, "lock", "lock", 71 ret = ceph_osdc_call(osdc, oid, oloc, "lock", "lock",
72 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 72 CEPH_OSD_FLAG_WRITE, lock_op_page,
73 lock_op_page, lock_op_buf_size, NULL, NULL); 73 lock_op_buf_size, NULL, NULL);
74 74
75 dout("%s: status %d\n", __func__, ret); 75 dout("%s: status %d\n", __func__, ret);
76 __free_page(lock_op_page); 76 __free_page(lock_op_page);
@@ -117,8 +117,8 @@ int ceph_cls_unlock(struct ceph_osd_client *osdc,
117 117
118 dout("%s lock_name %s cookie %s\n", __func__, lock_name, cookie); 118 dout("%s lock_name %s cookie %s\n", __func__, lock_name, cookie);
119 ret = ceph_osdc_call(osdc, oid, oloc, "lock", "unlock", 119 ret = ceph_osdc_call(osdc, oid, oloc, "lock", "unlock",
120 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 120 CEPH_OSD_FLAG_WRITE, unlock_op_page,
121 unlock_op_page, unlock_op_buf_size, NULL, NULL); 121 unlock_op_buf_size, NULL, NULL);
122 122
123 dout("%s: status %d\n", __func__, ret); 123 dout("%s: status %d\n", __func__, ret);
124 __free_page(unlock_op_page); 124 __free_page(unlock_op_page);
@@ -170,8 +170,8 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc,
170 dout("%s lock_name %s cookie %s locker %s%llu\n", __func__, lock_name, 170 dout("%s lock_name %s cookie %s locker %s%llu\n", __func__, lock_name,
171 cookie, ENTITY_NAME(*locker)); 171 cookie, ENTITY_NAME(*locker));
172 ret = ceph_osdc_call(osdc, oid, oloc, "lock", "break_lock", 172 ret = ceph_osdc_call(osdc, oid, oloc, "lock", "break_lock",
173 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 173 CEPH_OSD_FLAG_WRITE, break_op_page,
174 break_op_page, break_op_buf_size, NULL, NULL); 174 break_op_buf_size, NULL, NULL);
175 175
176 dout("%s: status %d\n", __func__, ret); 176 dout("%s: status %d\n", __func__, ret);
177 __free_page(break_op_page); 177 __free_page(break_op_page);
@@ -278,7 +278,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
278 int get_info_op_buf_size; 278 int get_info_op_buf_size;
279 int name_len = strlen(lock_name); 279 int name_len = strlen(lock_name);
280 struct page *get_info_op_page, *reply_page; 280 struct page *get_info_op_page, *reply_page;
281 size_t reply_len; 281 size_t reply_len = PAGE_SIZE;
282 void *p, *end; 282 void *p, *end;
283 int ret; 283 int ret;
284 284
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 80d7c3a97cb8..5bf94c04f645 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -45,7 +45,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
45 45
46void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b) 46void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
47{ 47{
48 kfree(b->h.perm);
49 kfree(b->h.items); 48 kfree(b->h.items);
50 kfree(b); 49 kfree(b);
51} 50}
@@ -54,14 +53,12 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
54{ 53{
55 kfree(b->item_weights); 54 kfree(b->item_weights);
56 kfree(b->sum_weights); 55 kfree(b->sum_weights);
57 kfree(b->h.perm);
58 kfree(b->h.items); 56 kfree(b->h.items);
59 kfree(b); 57 kfree(b);
60} 58}
61 59
62void crush_destroy_bucket_tree(struct crush_bucket_tree *b) 60void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
63{ 61{
64 kfree(b->h.perm);
65 kfree(b->h.items); 62 kfree(b->h.items);
66 kfree(b->node_weights); 63 kfree(b->node_weights);
67 kfree(b); 64 kfree(b);
@@ -71,7 +68,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
71{ 68{
72 kfree(b->straws); 69 kfree(b->straws);
73 kfree(b->item_weights); 70 kfree(b->item_weights);
74 kfree(b->h.perm);
75 kfree(b->h.items); 71 kfree(b->h.items);
76 kfree(b); 72 kfree(b);
77} 73}
@@ -79,7 +75,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
79void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b) 75void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
80{ 76{
81 kfree(b->item_weights); 77 kfree(b->item_weights);
82 kfree(b->h.perm);
83 kfree(b->h.items); 78 kfree(b->h.items);
84 kfree(b); 79 kfree(b);
85} 80}
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 130ab407c5ec..b5cd8c21bfdf 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -54,7 +54,6 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size
54 return -1; 54 return -1;
55} 55}
56 56
57
58/* 57/*
59 * bucket choose methods 58 * bucket choose methods
60 * 59 *
@@ -72,59 +71,60 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size
72 * Since this is expensive, we optimize for the r=0 case, which 71 * Since this is expensive, we optimize for the r=0 case, which
73 * captures the vast majority of calls. 72 * captures the vast majority of calls.
74 */ 73 */
75static int bucket_perm_choose(struct crush_bucket *bucket, 74static int bucket_perm_choose(const struct crush_bucket *bucket,
75 struct crush_work_bucket *work,
76 int x, int r) 76 int x, int r)
77{ 77{
78 unsigned int pr = r % bucket->size; 78 unsigned int pr = r % bucket->size;
79 unsigned int i, s; 79 unsigned int i, s;
80 80
81 /* start a new permutation if @x has changed */ 81 /* start a new permutation if @x has changed */
82 if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) { 82 if (work->perm_x != (__u32)x || work->perm_n == 0) {
83 dprintk("bucket %d new x=%d\n", bucket->id, x); 83 dprintk("bucket %d new x=%d\n", bucket->id, x);
84 bucket->perm_x = x; 84 work->perm_x = x;
85 85
86 /* optimize common r=0 case */ 86 /* optimize common r=0 case */
87 if (pr == 0) { 87 if (pr == 0) {
88 s = crush_hash32_3(bucket->hash, x, bucket->id, 0) % 88 s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
89 bucket->size; 89 bucket->size;
90 bucket->perm[0] = s; 90 work->perm[0] = s;
91 bucket->perm_n = 0xffff; /* magic value, see below */ 91 work->perm_n = 0xffff; /* magic value, see below */
92 goto out; 92 goto out;
93 } 93 }
94 94
95 for (i = 0; i < bucket->size; i++) 95 for (i = 0; i < bucket->size; i++)
96 bucket->perm[i] = i; 96 work->perm[i] = i;
97 bucket->perm_n = 0; 97 work->perm_n = 0;
98 } else if (bucket->perm_n == 0xffff) { 98 } else if (work->perm_n == 0xffff) {
99 /* clean up after the r=0 case above */ 99 /* clean up after the r=0 case above */
100 for (i = 1; i < bucket->size; i++) 100 for (i = 1; i < bucket->size; i++)
101 bucket->perm[i] = i; 101 work->perm[i] = i;
102 bucket->perm[bucket->perm[0]] = 0; 102 work->perm[work->perm[0]] = 0;
103 bucket->perm_n = 1; 103 work->perm_n = 1;
104 } 104 }
105 105
106 /* calculate permutation up to pr */ 106 /* calculate permutation up to pr */
107 for (i = 0; i < bucket->perm_n; i++) 107 for (i = 0; i < work->perm_n; i++)
108 dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]); 108 dprintk(" perm_choose have %d: %d\n", i, work->perm[i]);
109 while (bucket->perm_n <= pr) { 109 while (work->perm_n <= pr) {
110 unsigned int p = bucket->perm_n; 110 unsigned int p = work->perm_n;
111 /* no point in swapping the final entry */ 111 /* no point in swapping the final entry */
112 if (p < bucket->size - 1) { 112 if (p < bucket->size - 1) {
113 i = crush_hash32_3(bucket->hash, x, bucket->id, p) % 113 i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
114 (bucket->size - p); 114 (bucket->size - p);
115 if (i) { 115 if (i) {
116 unsigned int t = bucket->perm[p + i]; 116 unsigned int t = work->perm[p + i];
117 bucket->perm[p + i] = bucket->perm[p]; 117 work->perm[p + i] = work->perm[p];
118 bucket->perm[p] = t; 118 work->perm[p] = t;
119 } 119 }
120 dprintk(" perm_choose swap %d with %d\n", p, p+i); 120 dprintk(" perm_choose swap %d with %d\n", p, p+i);
121 } 121 }
122 bucket->perm_n++; 122 work->perm_n++;
123 } 123 }
124 for (i = 0; i < bucket->size; i++) 124 for (i = 0; i < bucket->size; i++)
125 dprintk(" perm_choose %d: %d\n", i, bucket->perm[i]); 125 dprintk(" perm_choose %d: %d\n", i, work->perm[i]);
126 126
127 s = bucket->perm[pr]; 127 s = work->perm[pr];
128out: 128out:
129 dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id, 129 dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
130 bucket->size, x, r, pr, s); 130 bucket->size, x, r, pr, s);
@@ -132,14 +132,14 @@ out:
132} 132}
133 133
134/* uniform */ 134/* uniform */
135static int bucket_uniform_choose(struct crush_bucket_uniform *bucket, 135static int bucket_uniform_choose(const struct crush_bucket_uniform *bucket,
136 int x, int r) 136 struct crush_work_bucket *work, int x, int r)
137{ 137{
138 return bucket_perm_choose(&bucket->h, x, r); 138 return bucket_perm_choose(&bucket->h, work, x, r);
139} 139}
140 140
141/* list */ 141/* list */
142static int bucket_list_choose(struct crush_bucket_list *bucket, 142static int bucket_list_choose(const struct crush_bucket_list *bucket,
143 int x, int r) 143 int x, int r)
144{ 144{
145 int i; 145 int i;
@@ -155,8 +155,9 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
155 w *= bucket->sum_weights[i]; 155 w *= bucket->sum_weights[i];
156 w = w >> 16; 156 w = w >> 16;
157 /*dprintk(" scaled %llx\n", w);*/ 157 /*dprintk(" scaled %llx\n", w);*/
158 if (w < bucket->item_weights[i]) 158 if (w < bucket->item_weights[i]) {
159 return bucket->h.items[i]; 159 return bucket->h.items[i];
160 }
160 } 161 }
161 162
162 dprintk("bad list sums for bucket %d\n", bucket->h.id); 163 dprintk("bad list sums for bucket %d\n", bucket->h.id);
@@ -192,7 +193,7 @@ static int terminal(int x)
192 return x & 1; 193 return x & 1;
193} 194}
194 195
195static int bucket_tree_choose(struct crush_bucket_tree *bucket, 196static int bucket_tree_choose(const struct crush_bucket_tree *bucket,
196 int x, int r) 197 int x, int r)
197{ 198{
198 int n; 199 int n;
@@ -224,7 +225,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
224 225
225/* straw */ 226/* straw */
226 227
227static int bucket_straw_choose(struct crush_bucket_straw *bucket, 228static int bucket_straw_choose(const struct crush_bucket_straw *bucket,
228 int x, int r) 229 int x, int r)
229{ 230{
230 __u32 i; 231 __u32 i;
@@ -301,7 +302,7 @@ static __u64 crush_ln(unsigned int xin)
301 * 302 *
302 */ 303 */
303 304
304static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket, 305static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
305 int x, int r) 306 int x, int r)
306{ 307{
307 unsigned int i, high = 0; 308 unsigned int i, high = 0;
@@ -344,37 +345,42 @@ static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
344 high_draw = draw; 345 high_draw = draw;
345 } 346 }
346 } 347 }
348
347 return bucket->h.items[high]; 349 return bucket->h.items[high];
348} 350}
349 351
350 352
351static int crush_bucket_choose(struct crush_bucket *in, int x, int r) 353static int crush_bucket_choose(const struct crush_bucket *in,
354 struct crush_work_bucket *work,
355 int x, int r)
352{ 356{
353 dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); 357 dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
354 BUG_ON(in->size == 0); 358 BUG_ON(in->size == 0);
355 switch (in->alg) { 359 switch (in->alg) {
356 case CRUSH_BUCKET_UNIFORM: 360 case CRUSH_BUCKET_UNIFORM:
357 return bucket_uniform_choose((struct crush_bucket_uniform *)in, 361 return bucket_uniform_choose(
358 x, r); 362 (const struct crush_bucket_uniform *)in,
363 work, x, r);
359 case CRUSH_BUCKET_LIST: 364 case CRUSH_BUCKET_LIST:
360 return bucket_list_choose((struct crush_bucket_list *)in, 365 return bucket_list_choose((const struct crush_bucket_list *)in,
361 x, r); 366 x, r);
362 case CRUSH_BUCKET_TREE: 367 case CRUSH_BUCKET_TREE:
363 return bucket_tree_choose((struct crush_bucket_tree *)in, 368 return bucket_tree_choose((const struct crush_bucket_tree *)in,
364 x, r); 369 x, r);
365 case CRUSH_BUCKET_STRAW: 370 case CRUSH_BUCKET_STRAW:
366 return bucket_straw_choose((struct crush_bucket_straw *)in, 371 return bucket_straw_choose(
367 x, r); 372 (const struct crush_bucket_straw *)in,
373 x, r);
368 case CRUSH_BUCKET_STRAW2: 374 case CRUSH_BUCKET_STRAW2:
369 return bucket_straw2_choose((struct crush_bucket_straw2 *)in, 375 return bucket_straw2_choose(
370 x, r); 376 (const struct crush_bucket_straw2 *)in,
377 x, r);
371 default: 378 default:
372 dprintk("unknown bucket %d alg %d\n", in->id, in->alg); 379 dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
373 return in->items[0]; 380 return in->items[0];
374 } 381 }
375} 382}
376 383
377
378/* 384/*
379 * true if device is marked "out" (failed, fully offloaded) 385 * true if device is marked "out" (failed, fully offloaded)
380 * of the cluster 386 * of the cluster
@@ -416,7 +422,8 @@ static int is_out(const struct crush_map *map,
416 * @parent_r: r value passed from the parent 422 * @parent_r: r value passed from the parent
417 */ 423 */
418static int crush_choose_firstn(const struct crush_map *map, 424static int crush_choose_firstn(const struct crush_map *map,
419 struct crush_bucket *bucket, 425 struct crush_work *work,
426 const struct crush_bucket *bucket,
420 const __u32 *weight, int weight_max, 427 const __u32 *weight, int weight_max,
421 int x, int numrep, int type, 428 int x, int numrep, int type,
422 int *out, int outpos, 429 int *out, int outpos,
@@ -434,7 +441,7 @@ static int crush_choose_firstn(const struct crush_map *map,
434 int rep; 441 int rep;
435 unsigned int ftotal, flocal; 442 unsigned int ftotal, flocal;
436 int retry_descent, retry_bucket, skip_rep; 443 int retry_descent, retry_bucket, skip_rep;
437 struct crush_bucket *in = bucket; 444 const struct crush_bucket *in = bucket;
438 int r; 445 int r;
439 int i; 446 int i;
440 int item = 0; 447 int item = 0;
@@ -473,9 +480,13 @@ static int crush_choose_firstn(const struct crush_map *map,
473 if (local_fallback_retries > 0 && 480 if (local_fallback_retries > 0 &&
474 flocal >= (in->size>>1) && 481 flocal >= (in->size>>1) &&
475 flocal > local_fallback_retries) 482 flocal > local_fallback_retries)
476 item = bucket_perm_choose(in, x, r); 483 item = bucket_perm_choose(
484 in, work->work[-1-in->id],
485 x, r);
477 else 486 else
478 item = crush_bucket_choose(in, x, r); 487 item = crush_bucket_choose(
488 in, work->work[-1-in->id],
489 x, r);
479 if (item >= map->max_devices) { 490 if (item >= map->max_devices) {
480 dprintk(" bad item %d\n", item); 491 dprintk(" bad item %d\n", item);
481 skip_rep = 1; 492 skip_rep = 1;
@@ -518,19 +529,21 @@ static int crush_choose_firstn(const struct crush_map *map,
518 sub_r = r >> (vary_r-1); 529 sub_r = r >> (vary_r-1);
519 else 530 else
520 sub_r = 0; 531 sub_r = 0;
521 if (crush_choose_firstn(map, 532 if (crush_choose_firstn(
522 map->buckets[-1-item], 533 map,
523 weight, weight_max, 534 work,
524 x, stable ? 1 : outpos+1, 0, 535 map->buckets[-1-item],
525 out2, outpos, count, 536 weight, weight_max,
526 recurse_tries, 0, 537 x, stable ? 1 : outpos+1, 0,
527 local_retries, 538 out2, outpos, count,
528 local_fallback_retries, 539 recurse_tries, 0,
529 0, 540 local_retries,
530 vary_r, 541 local_fallback_retries,
531 stable, 542 0,
532 NULL, 543 vary_r,
533 sub_r) <= outpos) 544 stable,
545 NULL,
546 sub_r) <= outpos)
534 /* didn't get leaf */ 547 /* didn't get leaf */
535 reject = 1; 548 reject = 1;
536 } else { 549 } else {
@@ -539,14 +552,12 @@ static int crush_choose_firstn(const struct crush_map *map,
539 } 552 }
540 } 553 }
541 554
542 if (!reject) { 555 if (!reject && !collide) {
543 /* out? */ 556 /* out? */
544 if (itemtype == 0) 557 if (itemtype == 0)
545 reject = is_out(map, weight, 558 reject = is_out(map, weight,
546 weight_max, 559 weight_max,
547 item, x); 560 item, x);
548 else
549 reject = 0;
550 } 561 }
551 562
552reject: 563reject:
@@ -600,7 +611,8 @@ reject:
600 * 611 *
601 */ 612 */
602static void crush_choose_indep(const struct crush_map *map, 613static void crush_choose_indep(const struct crush_map *map,
603 struct crush_bucket *bucket, 614 struct crush_work *work,
615 const struct crush_bucket *bucket,
604 const __u32 *weight, int weight_max, 616 const __u32 *weight, int weight_max,
605 int x, int left, int numrep, int type, 617 int x, int left, int numrep, int type,
606 int *out, int outpos, 618 int *out, int outpos,
@@ -610,7 +622,7 @@ static void crush_choose_indep(const struct crush_map *map,
610 int *out2, 622 int *out2,
611 int parent_r) 623 int parent_r)
612{ 624{
613 struct crush_bucket *in = bucket; 625 const struct crush_bucket *in = bucket;
614 int endpos = outpos + left; 626 int endpos = outpos + left;
615 int rep; 627 int rep;
616 unsigned int ftotal; 628 unsigned int ftotal;
@@ -678,7 +690,9 @@ static void crush_choose_indep(const struct crush_map *map,
678 break; 690 break;
679 } 691 }
680 692
681 item = crush_bucket_choose(in, x, r); 693 item = crush_bucket_choose(
694 in, work->work[-1-in->id],
695 x, r);
682 if (item >= map->max_devices) { 696 if (item >= map->max_devices) {
683 dprintk(" bad item %d\n", item); 697 dprintk(" bad item %d\n", item);
684 out[rep] = CRUSH_ITEM_NONE; 698 out[rep] = CRUSH_ITEM_NONE;
@@ -724,13 +738,15 @@ static void crush_choose_indep(const struct crush_map *map,
724 738
725 if (recurse_to_leaf) { 739 if (recurse_to_leaf) {
726 if (item < 0) { 740 if (item < 0) {
727 crush_choose_indep(map, 741 crush_choose_indep(
728 map->buckets[-1-item], 742 map,
729 weight, weight_max, 743 work,
730 x, 1, numrep, 0, 744 map->buckets[-1-item],
731 out2, rep, 745 weight, weight_max,
732 recurse_tries, 0, 746 x, 1, numrep, 0,
733 0, NULL, r); 747 out2, rep,
748 recurse_tries, 0,
749 0, NULL, r);
734 if (out2[rep] == CRUSH_ITEM_NONE) { 750 if (out2[rep] == CRUSH_ITEM_NONE) {
735 /* placed nothing; no leaf */ 751 /* placed nothing; no leaf */
736 break; 752 break;
@@ -781,6 +797,53 @@ static void crush_choose_indep(const struct crush_map *map,
781#endif 797#endif
782} 798}
783 799
800
801/*
802 * This takes a chunk of memory and sets it up to be a shiny new
803 * working area for a CRUSH placement computation. It must be called
804 * on any newly allocated memory before passing it in to
805 * crush_do_rule. It may be used repeatedly after that, so long as the
806 * map has not changed. If the map /has/ changed, you must make sure
807 * the working size is no smaller than what was allocated and re-run
808 * crush_init_workspace.
809 *
810 * If you do retain the working space between calls to crush, make it
811 * thread-local.
812 */
813void crush_init_workspace(const struct crush_map *map, void *v)
814{
815 struct crush_work *w = v;
816 __s32 b;
817
818 /*
819 * We work by moving through the available space and setting
820 * values and pointers as we go.
821 *
822 * It's a bit like Forth's use of the 'allot' word since we
823 * set the pointer first and then reserve the space for it to
824 * point to by incrementing the point.
825 */
826 v += sizeof(struct crush_work *);
827 w->work = v;
828 v += map->max_buckets * sizeof(struct crush_work_bucket *);
829 for (b = 0; b < map->max_buckets; ++b) {
830 if (!map->buckets[b])
831 continue;
832
833 w->work[b] = v;
834 switch (map->buckets[b]->alg) {
835 default:
836 v += sizeof(struct crush_work_bucket);
837 break;
838 }
839 w->work[b]->perm_x = 0;
840 w->work[b]->perm_n = 0;
841 w->work[b]->perm = v;
842 v += map->buckets[b]->size * sizeof(__u32);
843 }
844 BUG_ON(v - (void *)w != map->working_size);
845}
846
784/** 847/**
785 * crush_do_rule - calculate a mapping with the given input and rule 848 * crush_do_rule - calculate a mapping with the given input and rule
786 * @map: the crush_map 849 * @map: the crush_map
@@ -790,24 +853,25 @@ static void crush_choose_indep(const struct crush_map *map,
790 * @result_max: maximum result size 853 * @result_max: maximum result size
791 * @weight: weight vector (for map leaves) 854 * @weight: weight vector (for map leaves)
792 * @weight_max: size of weight vector 855 * @weight_max: size of weight vector
793 * @scratch: scratch vector for private use; must be >= 3 * result_max 856 * @cwin: pointer to at least crush_work_size() bytes of memory
794 */ 857 */
795int crush_do_rule(const struct crush_map *map, 858int crush_do_rule(const struct crush_map *map,
796 int ruleno, int x, int *result, int result_max, 859 int ruleno, int x, int *result, int result_max,
797 const __u32 *weight, int weight_max, 860 const __u32 *weight, int weight_max,
798 int *scratch) 861 void *cwin)
799{ 862{
800 int result_len; 863 int result_len;
801 int *a = scratch; 864 struct crush_work *cw = cwin;
802 int *b = scratch + result_max; 865 int *a = cwin + map->working_size;
803 int *c = scratch + result_max*2; 866 int *b = a + result_max;
867 int *c = b + result_max;
868 int *w = a;
869 int *o = b;
804 int recurse_to_leaf; 870 int recurse_to_leaf;
805 int *w;
806 int wsize = 0; 871 int wsize = 0;
807 int *o;
808 int osize; 872 int osize;
809 int *tmp; 873 int *tmp;
810 struct crush_rule *rule; 874 const struct crush_rule *rule;
811 __u32 step; 875 __u32 step;
812 int i, j; 876 int i, j;
813 int numrep; 877 int numrep;
@@ -835,12 +899,10 @@ int crush_do_rule(const struct crush_map *map,
835 899
836 rule = map->rules[ruleno]; 900 rule = map->rules[ruleno];
837 result_len = 0; 901 result_len = 0;
838 w = a;
839 o = b;
840 902
841 for (step = 0; step < rule->len; step++) { 903 for (step = 0; step < rule->len; step++) {
842 int firstn = 0; 904 int firstn = 0;
843 struct crush_rule_step *curstep = &rule->steps[step]; 905 const struct crush_rule_step *curstep = &rule->steps[step];
844 906
845 switch (curstep->op) { 907 switch (curstep->op) {
846 case CRUSH_RULE_TAKE: 908 case CRUSH_RULE_TAKE:
@@ -936,6 +998,7 @@ int crush_do_rule(const struct crush_map *map,
936 recurse_tries = choose_tries; 998 recurse_tries = choose_tries;
937 osize += crush_choose_firstn( 999 osize += crush_choose_firstn(
938 map, 1000 map,
1001 cw,
939 map->buckets[bno], 1002 map->buckets[bno],
940 weight, weight_max, 1003 weight, weight_max,
941 x, numrep, 1004 x, numrep,
@@ -956,6 +1019,7 @@ int crush_do_rule(const struct crush_map *map,
956 numrep : (result_max-osize)); 1019 numrep : (result_max-osize));
957 crush_choose_indep( 1020 crush_choose_indep(
958 map, 1021 map,
1022 cw,
959 map->buckets[bno], 1023 map->buckets[bno],
960 weight, weight_max, 1024 weight, weight_max,
961 x, out_size, numrep, 1025 x, out_size, numrep,
@@ -997,5 +1061,6 @@ int crush_do_rule(const struct crush_map *map,
997 break; 1061 break;
998 } 1062 }
999 } 1063 }
1064
1000 return result_len; 1065 return result_len;
1001} 1066}
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 292e33bd916e..46008d5ac504 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -3,10 +3,12 @@
3 3
4#include <linux/err.h> 4#include <linux/err.h>
5#include <linux/scatterlist.h> 5#include <linux/scatterlist.h>
6#include <linux/sched.h>
6#include <linux/slab.h> 7#include <linux/slab.h>
7#include <crypto/aes.h> 8#include <crypto/aes.h>
8#include <crypto/skcipher.h> 9#include <crypto/skcipher.h>
9#include <linux/key-type.h> 10#include <linux/key-type.h>
11#include <linux/sched/mm.h>
10 12
11#include <keys/ceph-type.h> 13#include <keys/ceph-type.h>
12#include <keys/user-type.h> 14#include <keys/user-type.h>
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index bad3d4ae43f6..38dcf1eb427d 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -520,7 +520,8 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
520 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 520 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
521 int r; 521 int r;
522 522
523 r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); 523 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
524 r = sock_recvmsg(sock, &msg, msg.msg_flags);
524 if (r == -EAGAIN) 525 if (r == -EAGAIN)
525 r = 0; 526 r = 0;
526 return r; 527 return r;
@@ -529,17 +530,20 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
529static int ceph_tcp_recvpage(struct socket *sock, struct page *page, 530static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
530 int page_offset, size_t length) 531 int page_offset, size_t length)
531{ 532{
532 void *kaddr; 533 struct bio_vec bvec = {
533 int ret; 534 .bv_page = page,
535 .bv_offset = page_offset,
536 .bv_len = length
537 };
538 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
539 int r;
534 540
535 BUG_ON(page_offset + length > PAGE_SIZE); 541 BUG_ON(page_offset + length > PAGE_SIZE);
536 542 iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length);
537 kaddr = kmap(page); 543 r = sock_recvmsg(sock, &msg, msg.msg_flags);
538 BUG_ON(!kaddr); 544 if (r == -EAGAIN)
539 ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length); 545 r = 0;
540 kunmap(page); 546 return r;
541
542 return ret;
543} 547}
544 548
545/* 549/*
@@ -579,18 +583,28 @@ static int __ceph_tcp_sendpage(struct socket *sock, struct page *page,
579static int ceph_tcp_sendpage(struct socket *sock, struct page *page, 583static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
580 int offset, size_t size, bool more) 584 int offset, size_t size, bool more)
581{ 585{
586 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
587 struct bio_vec bvec;
582 int ret; 588 int ret;
583 struct kvec iov;
584 589
585 /* sendpage cannot properly handle pages with page_count == 0, 590 /* sendpage cannot properly handle pages with page_count == 0,
586 * we need to fallback to sendmsg if that's the case */ 591 * we need to fallback to sendmsg if that's the case */
587 if (page_count(page) >= 1) 592 if (page_count(page) >= 1)
588 return __ceph_tcp_sendpage(sock, page, offset, size, more); 593 return __ceph_tcp_sendpage(sock, page, offset, size, more);
589 594
590 iov.iov_base = kmap(page) + offset; 595 bvec.bv_page = page;
591 iov.iov_len = size; 596 bvec.bv_offset = offset;
592 ret = ceph_tcp_sendmsg(sock, &iov, 1, size, more); 597 bvec.bv_len = size;
593 kunmap(page); 598
599 if (more)
600 msg.msg_flags |= MSG_MORE;
601 else
602 msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
603
604 iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
605 ret = sock_sendmsg(sock, &msg);
606 if (ret == -EAGAIN)
607 ret = 0;
594 608
595 return ret; 609 return ret;
596} 610}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index f3378ba1a828..b65bbf9f45eb 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -460,7 +460,6 @@ static void request_init(struct ceph_osd_request *req)
460 460
461 kref_init(&req->r_kref); 461 kref_init(&req->r_kref);
462 init_completion(&req->r_completion); 462 init_completion(&req->r_completion);
463 init_completion(&req->r_done_completion);
464 RB_CLEAR_NODE(&req->r_node); 463 RB_CLEAR_NODE(&req->r_node);
465 RB_CLEAR_NODE(&req->r_mc_node); 464 RB_CLEAR_NODE(&req->r_mc_node);
466 INIT_LIST_HEAD(&req->r_unsafe_item); 465 INIT_LIST_HEAD(&req->r_unsafe_item);
@@ -672,7 +671,8 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
672 BUG_ON(length > previous); 671 BUG_ON(length > previous);
673 672
674 op->extent.length = length; 673 op->extent.length = length;
675 op->indata_len -= previous - length; 674 if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL)
675 op->indata_len -= previous - length;
676} 676}
677EXPORT_SYMBOL(osd_req_op_extent_update); 677EXPORT_SYMBOL(osd_req_op_extent_update);
678 678
@@ -1636,7 +1636,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
1636 bool need_send = false; 1636 bool need_send = false;
1637 bool promoted = false; 1637 bool promoted = false;
1638 1638
1639 WARN_ON(req->r_tid || req->r_got_reply); 1639 WARN_ON(req->r_tid);
1640 dout("%s req %p wrlocked %d\n", __func__, req, wrlocked); 1640 dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
1641 1641
1642again: 1642again:
@@ -1704,17 +1704,10 @@ promote:
1704 1704
1705static void account_request(struct ceph_osd_request *req) 1705static void account_request(struct ceph_osd_request *req)
1706{ 1706{
1707 unsigned int mask = CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK; 1707 WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK));
1708 WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE)));
1708 1709
1709 if (req->r_flags & CEPH_OSD_FLAG_READ) { 1710 req->r_flags |= CEPH_OSD_FLAG_ONDISK;
1710 WARN_ON(req->r_flags & mask);
1711 req->r_flags |= CEPH_OSD_FLAG_ACK;
1712 } else if (req->r_flags & CEPH_OSD_FLAG_WRITE)
1713 WARN_ON(!(req->r_flags & mask));
1714 else
1715 WARN_ON(1);
1716
1717 WARN_ON(req->r_unsafe_callback && (req->r_flags & mask) != mask);
1718 atomic_inc(&req->r_osdc->num_requests); 1711 atomic_inc(&req->r_osdc->num_requests);
1719} 1712}
1720 1713
@@ -1749,15 +1742,15 @@ static void finish_request(struct ceph_osd_request *req)
1749 1742
1750static void __complete_request(struct ceph_osd_request *req) 1743static void __complete_request(struct ceph_osd_request *req)
1751{ 1744{
1752 if (req->r_callback) 1745 if (req->r_callback) {
1746 dout("%s req %p tid %llu cb %pf result %d\n", __func__, req,
1747 req->r_tid, req->r_callback, req->r_result);
1753 req->r_callback(req); 1748 req->r_callback(req);
1754 else 1749 }
1755 complete_all(&req->r_completion);
1756} 1750}
1757 1751
1758/* 1752/*
1759 * Note that this is open-coded in handle_reply(), which has to deal 1753 * This is open-coded in handle_reply().
1760 * with ack vs commit, dup acks, etc.
1761 */ 1754 */
1762static void complete_request(struct ceph_osd_request *req, int err) 1755static void complete_request(struct ceph_osd_request *req, int err)
1763{ 1756{
@@ -1766,7 +1759,7 @@ static void complete_request(struct ceph_osd_request *req, int err)
1766 req->r_result = err; 1759 req->r_result = err;
1767 finish_request(req); 1760 finish_request(req);
1768 __complete_request(req); 1761 __complete_request(req);
1769 complete_all(&req->r_done_completion); 1762 complete_all(&req->r_completion);
1770 ceph_osdc_put_request(req); 1763 ceph_osdc_put_request(req);
1771} 1764}
1772 1765
@@ -1792,7 +1785,7 @@ static void cancel_request(struct ceph_osd_request *req)
1792 1785
1793 cancel_map_check(req); 1786 cancel_map_check(req);
1794 finish_request(req); 1787 finish_request(req);
1795 complete_all(&req->r_done_completion); 1788 complete_all(&req->r_completion);
1796 ceph_osdc_put_request(req); 1789 ceph_osdc_put_request(req);
1797} 1790}
1798 1791
@@ -2169,7 +2162,6 @@ static void linger_commit_cb(struct ceph_osd_request *req)
2169 mutex_lock(&lreq->lock); 2162 mutex_lock(&lreq->lock);
2170 dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq, 2163 dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
2171 lreq->linger_id, req->r_result); 2164 lreq->linger_id, req->r_result);
2172 WARN_ON(!__linger_registered(lreq));
2173 linger_reg_commit_complete(lreq, req->r_result); 2165 linger_reg_commit_complete(lreq, req->r_result);
2174 lreq->committed = true; 2166 lreq->committed = true;
2175 2167
@@ -2785,31 +2777,8 @@ e_inval:
2785} 2777}
2786 2778
2787/* 2779/*
2788 * We are done with @req if 2780 * Handle MOSDOpReply. Set ->r_result and call the callback if it is
2789 * - @m is a safe reply, or 2781 * specified.
2790 * - @m is an unsafe reply and we didn't want a safe one
2791 */
2792static bool done_request(const struct ceph_osd_request *req,
2793 const struct MOSDOpReply *m)
2794{
2795 return (m->result < 0 ||
2796 (m->flags & CEPH_OSD_FLAG_ONDISK) ||
2797 !(req->r_flags & CEPH_OSD_FLAG_ONDISK));
2798}
2799
2800/*
2801 * handle osd op reply. either call the callback if it is specified,
2802 * or do the completion to wake up the waiting thread.
2803 *
2804 * ->r_unsafe_callback is set? yes no
2805 *
2806 * first reply is OK (needed r_cb/r_completion, r_cb/r_completion,
2807 * any or needed/got safe) r_done_completion r_done_completion
2808 *
2809 * first reply is unsafe r_unsafe_cb(true) (nothing)
2810 *
2811 * when we get the safe reply r_unsafe_cb(false), r_cb/r_completion,
2812 * r_done_completion r_done_completion
2813 */ 2782 */
2814static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) 2783static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
2815{ 2784{
@@ -2818,7 +2787,6 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
2818 struct MOSDOpReply m; 2787 struct MOSDOpReply m;
2819 u64 tid = le64_to_cpu(msg->hdr.tid); 2788 u64 tid = le64_to_cpu(msg->hdr.tid);
2820 u32 data_len = 0; 2789 u32 data_len = 0;
2821 bool already_acked;
2822 int ret; 2790 int ret;
2823 int i; 2791 int i;
2824 2792
@@ -2897,50 +2865,22 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
2897 le32_to_cpu(msg->hdr.data_len), req->r_tid); 2865 le32_to_cpu(msg->hdr.data_len), req->r_tid);
2898 goto fail_request; 2866 goto fail_request;
2899 } 2867 }
2900 dout("%s req %p tid %llu acked %d result %d data_len %u\n", __func__, 2868 dout("%s req %p tid %llu result %d data_len %u\n", __func__,
2901 req, req->r_tid, req->r_got_reply, m.result, data_len); 2869 req, req->r_tid, m.result, data_len);
2902
2903 already_acked = req->r_got_reply;
2904 if (!already_acked) {
2905 req->r_result = m.result ?: data_len;
2906 req->r_replay_version = m.replay_version; /* struct */
2907 req->r_got_reply = true;
2908 } else if (!(m.flags & CEPH_OSD_FLAG_ONDISK)) {
2909 dout("req %p tid %llu dup ack\n", req, req->r_tid);
2910 goto out_unlock_session;
2911 }
2912
2913 if (done_request(req, &m)) {
2914 finish_request(req);
2915 if (req->r_linger) {
2916 WARN_ON(req->r_unsafe_callback);
2917 dout("req %p tid %llu cb (locked)\n", req, req->r_tid);
2918 __complete_request(req);
2919 }
2920 }
2921 2870
2871 /*
2872 * Since we only ever request ONDISK, we should only ever get
2873 * one (type of) reply back.
2874 */
2875 WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK));
2876 req->r_result = m.result ?: data_len;
2877 finish_request(req);
2922 mutex_unlock(&osd->lock); 2878 mutex_unlock(&osd->lock);
2923 up_read(&osdc->lock); 2879 up_read(&osdc->lock);
2924 2880
2925 if (done_request(req, &m)) { 2881 __complete_request(req);
2926 if (already_acked && req->r_unsafe_callback) { 2882 complete_all(&req->r_completion);
2927 dout("req %p tid %llu safe-cb\n", req, req->r_tid); 2883 ceph_osdc_put_request(req);
2928 req->r_unsafe_callback(req, false);
2929 } else if (!req->r_linger) {
2930 dout("req %p tid %llu cb\n", req, req->r_tid);
2931 __complete_request(req);
2932 }
2933 complete_all(&req->r_done_completion);
2934 ceph_osdc_put_request(req);
2935 } else {
2936 if (req->r_unsafe_callback) {
2937 dout("req %p tid %llu unsafe-cb\n", req, req->r_tid);
2938 req->r_unsafe_callback(req, true);
2939 } else {
2940 WARN_ON(1);
2941 }
2942 }
2943
2944 return; 2884 return;
2945 2885
2946fail_request: 2886fail_request:
@@ -3540,7 +3480,7 @@ again:
3540 up_read(&osdc->lock); 3480 up_read(&osdc->lock);
3541 dout("%s waiting on req %p tid %llu last_tid %llu\n", 3481 dout("%s waiting on req %p tid %llu last_tid %llu\n",
3542 __func__, req, req->r_tid, last_tid); 3482 __func__, req, req->r_tid, last_tid);
3543 wait_for_completion(&req->r_done_completion); 3483 wait_for_completion(&req->r_completion);
3544 ceph_osdc_put_request(req); 3484 ceph_osdc_put_request(req);
3545 goto again; 3485 goto again;
3546 } 3486 }
@@ -3599,7 +3539,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
3599 3539
3600 ceph_oid_copy(&lreq->t.base_oid, oid); 3540 ceph_oid_copy(&lreq->t.base_oid, oid);
3601 ceph_oloc_copy(&lreq->t.base_oloc, oloc); 3541 ceph_oloc_copy(&lreq->t.base_oloc, oloc);
3602 lreq->t.flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; 3542 lreq->t.flags = CEPH_OSD_FLAG_WRITE;
3603 lreq->mtime = CURRENT_TIME; 3543 lreq->mtime = CURRENT_TIME;
3604 3544
3605 lreq->reg_req = alloc_linger_request(lreq); 3545 lreq->reg_req = alloc_linger_request(lreq);
@@ -3657,7 +3597,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
3657 3597
3658 ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); 3598 ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
3659 ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); 3599 ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
3660 req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; 3600 req->r_flags = CEPH_OSD_FLAG_WRITE;
3661 req->r_mtime = CURRENT_TIME; 3601 req->r_mtime = CURRENT_TIME;
3662 osd_req_op_watch_init(req, 0, lreq->linger_id, 3602 osd_req_op_watch_init(req, 0, lreq->linger_id,
3663 CEPH_OSD_WATCH_OP_UNWATCH); 3603 CEPH_OSD_WATCH_OP_UNWATCH);
@@ -4022,7 +3962,7 @@ EXPORT_SYMBOL(ceph_osdc_maybe_request_map);
4022 * Execute an OSD class method on an object. 3962 * Execute an OSD class method on an object.
4023 * 3963 *
4024 * @flags: CEPH_OSD_FLAG_* 3964 * @flags: CEPH_OSD_FLAG_*
4025 * @resp_len: out param for reply length 3965 * @resp_len: in/out param for reply length
4026 */ 3966 */
4027int ceph_osdc_call(struct ceph_osd_client *osdc, 3967int ceph_osdc_call(struct ceph_osd_client *osdc,
4028 struct ceph_object_id *oid, 3968 struct ceph_object_id *oid,
@@ -4035,6 +3975,9 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
4035 struct ceph_osd_request *req; 3975 struct ceph_osd_request *req;
4036 int ret; 3976 int ret;
4037 3977
3978 if (req_len > PAGE_SIZE || (resp_page && *resp_len > PAGE_SIZE))
3979 return -E2BIG;
3980
4038 req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); 3981 req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
4039 if (!req) 3982 if (!req)
4040 return -ENOMEM; 3983 return -ENOMEM;
@@ -4053,7 +3996,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
4053 0, false, false); 3996 0, false, false);
4054 if (resp_page) 3997 if (resp_page)
4055 osd_req_op_cls_response_data_pages(req, 0, &resp_page, 3998 osd_req_op_cls_response_data_pages(req, 0, &resp_page,
4056 PAGE_SIZE, 0, false, false); 3999 *resp_len, 0, false, false);
4057 4000
4058 ceph_osdc_start_request(osdc, req, false); 4001 ceph_osdc_start_request(osdc, req, false);
4059 ret = ceph_osdc_wait_request(osdc, req); 4002 ret = ceph_osdc_wait_request(osdc, req);
@@ -4220,8 +4163,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
4220 int page_align = off & ~PAGE_MASK; 4163 int page_align = off & ~PAGE_MASK;
4221 4164
4222 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1, 4165 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
4223 CEPH_OSD_OP_WRITE, 4166 CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
4224 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
4225 snapc, truncate_seq, truncate_size, 4167 snapc, truncate_seq, truncate_size,
4226 true); 4168 true);
4227 if (IS_ERR(req)) 4169 if (IS_ERR(req))
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index d2436880b305..6824c0ec8373 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -153,6 +153,32 @@ bad:
153 return -EINVAL; 153 return -EINVAL;
154} 154}
155 155
156static void crush_finalize(struct crush_map *c)
157{
158 __s32 b;
159
160 /* Space for the array of pointers to per-bucket workspace */
161 c->working_size = sizeof(struct crush_work) +
162 c->max_buckets * sizeof(struct crush_work_bucket *);
163
164 for (b = 0; b < c->max_buckets; b++) {
165 if (!c->buckets[b])
166 continue;
167
168 switch (c->buckets[b]->alg) {
169 default:
170 /*
171 * The base case, permutation variables and
172 * the pointer to the permutation array.
173 */
174 c->working_size += sizeof(struct crush_work_bucket);
175 break;
176 }
177 /* Every bucket has a permutation array. */
178 c->working_size += c->buckets[b]->size * sizeof(__u32);
179 }
180}
181
156static struct crush_map *crush_decode(void *pbyval, void *end) 182static struct crush_map *crush_decode(void *pbyval, void *end)
157{ 183{
158 struct crush_map *c; 184 struct crush_map *c;
@@ -246,10 +272,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
246 b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS); 272 b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
247 if (b->items == NULL) 273 if (b->items == NULL)
248 goto badmem; 274 goto badmem;
249 b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS);
250 if (b->perm == NULL)
251 goto badmem;
252 b->perm_n = 0;
253 275
254 ceph_decode_need(p, end, b->size*sizeof(u32), bad); 276 ceph_decode_need(p, end, b->size*sizeof(u32), bad);
255 for (j = 0; j < b->size; j++) 277 for (j = 0; j < b->size; j++)
@@ -368,6 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
368 dout("crush decode tunable chooseleaf_stable = %d\n", 390 dout("crush decode tunable chooseleaf_stable = %d\n",
369 c->chooseleaf_stable); 391 c->chooseleaf_stable);
370 392
393 crush_finalize(c);
394
371done: 395done:
372 dout("crush_decode success\n"); 396 dout("crush_decode success\n");
373 return c; 397 return c;
@@ -719,7 +743,7 @@ struct ceph_osdmap *ceph_osdmap_alloc(void)
719 map->pool_max = -1; 743 map->pool_max = -1;
720 map->pg_temp = RB_ROOT; 744 map->pg_temp = RB_ROOT;
721 map->primary_temp = RB_ROOT; 745 map->primary_temp = RB_ROOT;
722 mutex_init(&map->crush_scratch_mutex); 746 mutex_init(&map->crush_workspace_mutex);
723 747
724 return map; 748 return map;
725} 749}
@@ -753,6 +777,7 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
753 kfree(map->osd_weight); 777 kfree(map->osd_weight);
754 kfree(map->osd_addr); 778 kfree(map->osd_addr);
755 kfree(map->osd_primary_affinity); 779 kfree(map->osd_primary_affinity);
780 kfree(map->crush_workspace);
756 kfree(map); 781 kfree(map);
757} 782}
758 783
@@ -808,6 +833,31 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
808 return 0; 833 return 0;
809} 834}
810 835
836static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
837{
838 void *workspace;
839 size_t work_size;
840
841 if (IS_ERR(crush))
842 return PTR_ERR(crush);
843
844 work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE);
845 dout("%s work_size %zu bytes\n", __func__, work_size);
846 workspace = kmalloc(work_size, GFP_NOIO);
847 if (!workspace) {
848 crush_destroy(crush);
849 return -ENOMEM;
850 }
851 crush_init_workspace(crush, workspace);
852
853 if (map->crush)
854 crush_destroy(map->crush);
855 kfree(map->crush_workspace);
856 map->crush = crush;
857 map->crush_workspace = workspace;
858 return 0;
859}
860
811#define OSDMAP_WRAPPER_COMPAT_VER 7 861#define OSDMAP_WRAPPER_COMPAT_VER 7
812#define OSDMAP_CLIENT_DATA_COMPAT_VER 1 862#define OSDMAP_CLIENT_DATA_COMPAT_VER 1
813 863
@@ -1214,13 +1264,9 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
1214 1264
1215 /* crush */ 1265 /* crush */
1216 ceph_decode_32_safe(p, end, len, e_inval); 1266 ceph_decode_32_safe(p, end, len, e_inval);
1217 map->crush = crush_decode(*p, min(*p + len, end)); 1267 err = osdmap_set_crush(map, crush_decode(*p, min(*p + len, end)));
1218 if (IS_ERR(map->crush)) { 1268 if (err)
1219 err = PTR_ERR(map->crush);
1220 map->crush = NULL;
1221 goto bad; 1269 goto bad;
1222 }
1223 *p += len;
1224 1270
1225 /* ignore the rest */ 1271 /* ignore the rest */
1226 *p = end; 1272 *p = end;
@@ -1375,7 +1421,6 @@ e_inval:
1375struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, 1421struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
1376 struct ceph_osdmap *map) 1422 struct ceph_osdmap *map)
1377{ 1423{
1378 struct crush_map *newcrush = NULL;
1379 struct ceph_fsid fsid; 1424 struct ceph_fsid fsid;
1380 u32 epoch = 0; 1425 u32 epoch = 0;
1381 struct ceph_timespec modified; 1426 struct ceph_timespec modified;
@@ -1414,12 +1459,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
1414 /* new crush? */ 1459 /* new crush? */
1415 ceph_decode_32_safe(p, end, len, e_inval); 1460 ceph_decode_32_safe(p, end, len, e_inval);
1416 if (len > 0) { 1461 if (len > 0) {
1417 newcrush = crush_decode(*p, min(*p+len, end)); 1462 err = osdmap_set_crush(map,
1418 if (IS_ERR(newcrush)) { 1463 crush_decode(*p, min(*p + len, end)));
1419 err = PTR_ERR(newcrush); 1464 if (err)
1420 newcrush = NULL;
1421 goto bad; 1465 goto bad;
1422 }
1423 *p += len; 1466 *p += len;
1424 } 1467 }
1425 1468
@@ -1439,12 +1482,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
1439 1482
1440 map->epoch++; 1483 map->epoch++;
1441 map->modified = modified; 1484 map->modified = modified;
1442 if (newcrush) {
1443 if (map->crush)
1444 crush_destroy(map->crush);
1445 map->crush = newcrush;
1446 newcrush = NULL;
1447 }
1448 1485
1449 /* new_pools */ 1486 /* new_pools */
1450 err = decode_new_pools(p, end, map); 1487 err = decode_new_pools(p, end, map);
@@ -1505,8 +1542,6 @@ bad:
1505 print_hex_dump(KERN_DEBUG, "osdmap: ", 1542 print_hex_dump(KERN_DEBUG, "osdmap: ",
1506 DUMP_PREFIX_OFFSET, 16, 1, 1543 DUMP_PREFIX_OFFSET, 16, 1,
1507 start, end - start, true); 1544 start, end - start, true);
1508 if (newcrush)
1509 crush_destroy(newcrush);
1510 return ERR_PTR(err); 1545 return ERR_PTR(err);
1511} 1546}
1512 1547
@@ -1942,10 +1977,10 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
1942 1977
1943 BUG_ON(result_max > CEPH_PG_MAX_SIZE); 1978 BUG_ON(result_max > CEPH_PG_MAX_SIZE);
1944 1979
1945 mutex_lock(&map->crush_scratch_mutex); 1980 mutex_lock(&map->crush_workspace_mutex);
1946 r = crush_do_rule(map->crush, ruleno, x, result, result_max, 1981 r = crush_do_rule(map->crush, ruleno, x, result, result_max,
1947 weight, weight_max, map->crush_scratch_ary); 1982 weight, weight_max, map->crush_workspace);
1948 mutex_unlock(&map->crush_scratch_mutex); 1983 mutex_unlock(&map->crush_workspace_mutex);
1949 1984
1950 return r; 1985 return r;
1951} 1986}
@@ -1978,8 +2013,14 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
1978 return; 2013 return;
1979 } 2014 }
1980 2015
1981 len = do_crush(osdmap, ruleno, pps, raw->osds, 2016 if (pi->size > ARRAY_SIZE(raw->osds)) {
1982 min_t(int, pi->size, ARRAY_SIZE(raw->osds)), 2017 pr_err_ratelimited("pool %lld ruleset %d type %d too wide: size %d > %zu\n",
2018 pi->id, pi->crush_ruleset, pi->type, pi->size,
2019 ARRAY_SIZE(raw->osds));
2020 return;
2021 }
2022
2023 len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size,
1983 osdmap->osd_weight, osdmap->max_osd); 2024 osdmap->osd_weight, osdmap->max_osd);
1984 if (len < 0) { 2025 if (len < 0) {
1985 pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n", 2026 pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
index 154683f5f14c..705414e78ae0 100644
--- a/net/ceph/snapshot.c
+++ b/net/ceph/snapshot.c
@@ -18,8 +18,6 @@
18 * 02110-1301, USA. 18 * 02110-1301, USA.
19 */ 19 */
20 20
21#include <stddef.h>
22
23#include <linux/types.h> 21#include <linux/types.h>
24#include <linux/export.h> 22#include <linux/export.h>
25#include <linux/ceph/libceph.h> 23#include <linux/ceph/libceph.h>
diff --git a/net/core/dev.c b/net/core/dev.c
index 304f2deae5f9..8637b2b71f3d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1698,27 +1698,54 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
1698static struct static_key netstamp_needed __read_mostly; 1698static struct static_key netstamp_needed __read_mostly;
1699#ifdef HAVE_JUMP_LABEL 1699#ifdef HAVE_JUMP_LABEL
1700static atomic_t netstamp_needed_deferred; 1700static atomic_t netstamp_needed_deferred;
1701static atomic_t netstamp_wanted;
1701static void netstamp_clear(struct work_struct *work) 1702static void netstamp_clear(struct work_struct *work)
1702{ 1703{
1703 int deferred = atomic_xchg(&netstamp_needed_deferred, 0); 1704 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1705 int wanted;
1704 1706
1705 while (deferred--) 1707 wanted = atomic_add_return(deferred, &netstamp_wanted);
1706 static_key_slow_dec(&netstamp_needed); 1708 if (wanted > 0)
1709 static_key_enable(&netstamp_needed);
1710 else
1711 static_key_disable(&netstamp_needed);
1707} 1712}
1708static DECLARE_WORK(netstamp_work, netstamp_clear); 1713static DECLARE_WORK(netstamp_work, netstamp_clear);
1709#endif 1714#endif
1710 1715
1711void net_enable_timestamp(void) 1716void net_enable_timestamp(void)
1712{ 1717{
1718#ifdef HAVE_JUMP_LABEL
1719 int wanted;
1720
1721 while (1) {
1722 wanted = atomic_read(&netstamp_wanted);
1723 if (wanted <= 0)
1724 break;
1725 if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
1726 return;
1727 }
1728 atomic_inc(&netstamp_needed_deferred);
1729 schedule_work(&netstamp_work);
1730#else
1713 static_key_slow_inc(&netstamp_needed); 1731 static_key_slow_inc(&netstamp_needed);
1732#endif
1714} 1733}
1715EXPORT_SYMBOL(net_enable_timestamp); 1734EXPORT_SYMBOL(net_enable_timestamp);
1716 1735
1717void net_disable_timestamp(void) 1736void net_disable_timestamp(void)
1718{ 1737{
1719#ifdef HAVE_JUMP_LABEL 1738#ifdef HAVE_JUMP_LABEL
1720 /* net_disable_timestamp() can be called from non process context */ 1739 int wanted;
1721 atomic_inc(&netstamp_needed_deferred); 1740
1741 while (1) {
1742 wanted = atomic_read(&netstamp_wanted);
1743 if (wanted <= 1)
1744 break;
1745 if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
1746 return;
1747 }
1748 atomic_dec(&netstamp_needed_deferred);
1722 schedule_work(&netstamp_work); 1749 schedule_work(&netstamp_work);
1723#else 1750#else
1724 static_key_slow_dec(&netstamp_needed); 1751 static_key_slow_dec(&netstamp_needed);
@@ -4884,6 +4911,39 @@ void __napi_schedule(struct napi_struct *n)
4884EXPORT_SYMBOL(__napi_schedule); 4911EXPORT_SYMBOL(__napi_schedule);
4885 4912
4886/** 4913/**
4914 * napi_schedule_prep - check if napi can be scheduled
4915 * @n: napi context
4916 *
4917 * Test if NAPI routine is already running, and if not mark
4918 * it as running. This is used as a condition variable
4919 * insure only one NAPI poll instance runs. We also make
4920 * sure there is no pending NAPI disable.
4921 */
4922bool napi_schedule_prep(struct napi_struct *n)
4923{
4924 unsigned long val, new;
4925
4926 do {
4927 val = READ_ONCE(n->state);
4928 if (unlikely(val & NAPIF_STATE_DISABLE))
4929 return false;
4930 new = val | NAPIF_STATE_SCHED;
4931
4932 /* Sets STATE_MISSED bit if STATE_SCHED was already set
4933 * This was suggested by Alexander Duyck, as compiler
4934 * emits better code than :
4935 * if (val & NAPIF_STATE_SCHED)
4936 * new |= NAPIF_STATE_MISSED;
4937 */
4938 new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
4939 NAPIF_STATE_MISSED;
4940 } while (cmpxchg(&n->state, val, new) != val);
4941
4942 return !(val & NAPIF_STATE_SCHED);
4943}
4944EXPORT_SYMBOL(napi_schedule_prep);
4945
4946/**
4887 * __napi_schedule_irqoff - schedule for receive 4947 * __napi_schedule_irqoff - schedule for receive
4888 * @n: entry to schedule 4948 * @n: entry to schedule
4889 * 4949 *
@@ -4897,7 +4957,7 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);
4897 4957
4898bool napi_complete_done(struct napi_struct *n, int work_done) 4958bool napi_complete_done(struct napi_struct *n, int work_done)
4899{ 4959{
4900 unsigned long flags; 4960 unsigned long flags, val, new;
4901 4961
4902 /* 4962 /*
4903 * 1) Don't let napi dequeue from the cpu poll list 4963 * 1) Don't let napi dequeue from the cpu poll list
@@ -4927,7 +4987,27 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
4927 list_del_init(&n->poll_list); 4987 list_del_init(&n->poll_list);
4928 local_irq_restore(flags); 4988 local_irq_restore(flags);
4929 } 4989 }
4930 WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state)); 4990
4991 do {
4992 val = READ_ONCE(n->state);
4993
4994 WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
4995
4996 new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
4997
4998 /* If STATE_MISSED was set, leave STATE_SCHED set,
4999 * because we will call napi->poll() one more time.
5000 * This C code was suggested by Alexander Duyck to help gcc.
5001 */
5002 new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
5003 NAPIF_STATE_SCHED;
5004 } while (cmpxchg(&n->state, val, new) != val);
5005
5006 if (unlikely(val & NAPIF_STATE_MISSED)) {
5007 __napi_schedule(n);
5008 return false;
5009 }
5010
4931 return true; 5011 return true;
4932} 5012}
4933EXPORT_SYMBOL(napi_complete_done); 5013EXPORT_SYMBOL(napi_complete_done);
@@ -4953,6 +5033,16 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
4953{ 5033{
4954 int rc; 5034 int rc;
4955 5035
5036 /* Busy polling means there is a high chance device driver hard irq
5037 * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
5038 * set in napi_schedule_prep().
5039 * Since we are about to call napi->poll() once more, we can safely
5040 * clear NAPI_STATE_MISSED.
5041 *
5042 * Note: x86 could use a single "lock and ..." instruction
5043 * to perform these two clear_bit()
5044 */
5045 clear_bit(NAPI_STATE_MISSED, &napi->state);
4956 clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state); 5046 clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
4957 5047
4958 local_bh_disable(); 5048 local_bh_disable();
@@ -5088,8 +5178,13 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
5088 struct napi_struct *napi; 5178 struct napi_struct *napi;
5089 5179
5090 napi = container_of(timer, struct napi_struct, timer); 5180 napi = container_of(timer, struct napi_struct, timer);
5091 if (napi->gro_list) 5181
5092 napi_schedule_irqoff(napi); 5182 /* Note : we use a relaxed variant of napi_schedule_prep() not setting
5183 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
5184 */
5185 if (napi->gro_list && !napi_disable_pending(napi) &&
5186 !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
5187 __napi_schedule_irqoff(napi);
5093 5188
5094 return HRTIMER_NORESTART; 5189 return HRTIMER_NORESTART;
5095} 5190}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index be7bab1adcde..aecb2c7241b6 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -24,7 +24,7 @@
24#include <linux/vmalloc.h> 24#include <linux/vmalloc.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/rtnetlink.h> 26#include <linux/rtnetlink.h>
27#include <linux/sched.h> 27#include <linux/sched/signal.h>
28#include <linux/net.h> 28#include <linux/net.h>
29 29
30/* 30/*
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b0c04cf4851d..3945821e9c1f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -15,6 +15,7 @@
15#include <net/switchdev.h> 15#include <net/switchdev.h>
16#include <linux/if_arp.h> 16#include <linux/if_arp.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/sched/signal.h>
18#include <linux/nsproxy.h> 19#include <linux/nsproxy.h>
19#include <net/sock.h> 20#include <net/sock.h>
20#include <net/net_namespace.h> 21#include <net/net_namespace.h>
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3c4bbec39713..652468ff65b7 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -16,6 +16,8 @@
16#include <linux/export.h> 16#include <linux/export.h>
17#include <linux/user_namespace.h> 17#include <linux/user_namespace.h>
18#include <linux/net_namespace.h> 18#include <linux/net_namespace.h>
19#include <linux/sched/task.h>
20
19#include <net/sock.h> 21#include <net/sock.h>
20#include <net/netlink.h> 22#include <net/netlink.h>
21#include <net/net_namespace.h> 23#include <net/net_namespace.h>
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 11fce17274f6..6ae56037bb13 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -12,6 +12,8 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/cgroup.h> 13#include <linux/cgroup.h>
14#include <linux/fdtable.h> 14#include <linux/fdtable.h>
15#include <linux/sched/task.h>
16
15#include <net/cls_cgroup.h> 17#include <net/cls_cgroup.h>
16#include <net/sock.h> 18#include <net/sock.h>
17 19
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 756637dc7a57..0f9275ee5595 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -20,6 +20,8 @@
20#include <linux/cgroup.h> 20#include <linux/cgroup.h>
21#include <linux/rcupdate.h> 21#include <linux/rcupdate.h>
22#include <linux/atomic.h> 22#include <linux/atomic.h>
23#include <linux/sched/task.h>
24
23#include <net/rtnetlink.h> 25#include <net/rtnetlink.h>
24#include <net/pkt_cls.h> 26#include <net/pkt_cls.h>
25#include <net/sock.h> 27#include <net/sock.h>
diff --git a/net/core/scm.c b/net/core/scm.c
index b6d83686e149..b1ff8a441748 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -14,6 +14,7 @@
14#include <linux/capability.h> 14#include <linux/capability.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/sched/user.h>
17#include <linux/mm.h> 18#include <linux/mm.h>
18#include <linux/kernel.h> 19#include <linux/kernel.h>
19#include <linux/stat.h> 20#include <linux/stat.h>
diff --git a/net/core/sock.c b/net/core/sock.c
index e7d74940e863..f6fd79f33097 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1539,11 +1539,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1539 is_charged = sk_filter_charge(newsk, filter); 1539 is_charged = sk_filter_charge(newsk, filter);
1540 1540
1541 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { 1541 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1542 /* It is still raw copy of parent, so invalidate 1542 sk_free_unlock_clone(newsk);
1543 * destructor and make plain sk_free() */
1544 newsk->sk_destruct = NULL;
1545 bh_unlock_sock(newsk);
1546 sk_free(newsk);
1547 newsk = NULL; 1543 newsk = NULL;
1548 goto out; 1544 goto out;
1549 } 1545 }
@@ -1592,6 +1588,16 @@ out:
1592} 1588}
1593EXPORT_SYMBOL_GPL(sk_clone_lock); 1589EXPORT_SYMBOL_GPL(sk_clone_lock);
1594 1590
1591void sk_free_unlock_clone(struct sock *sk)
1592{
1593 /* It is still raw copy of parent, so invalidate
1594 * destructor and make plain sk_free() */
1595 sk->sk_destruct = NULL;
1596 bh_unlock_sock(sk);
1597 sk_free(sk);
1598}
1599EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
1600
1595void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 1601void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1596{ 1602{
1597 u32 max_segs = 1; 1603 u32 max_segs = 1;
diff --git a/net/core/stream.c b/net/core/stream.c
index f575bcf64af2..20231dbb1da0 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -13,6 +13,7 @@
13 */ 13 */
14 14
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/sched/signal.h>
16#include <linux/net.h> 17#include <linux/net.h>
17#include <linux/signal.h> 18#include <linux/signal.h>
18#include <linux/tcp.h> 19#include <linux/tcp.h>
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 8fedc2d49770..4a05d7876850 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -577,6 +577,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
577 struct dccp_sock *dp = dccp_sk(sk); 577 struct dccp_sock *dp = dccp_sk(sk);
578 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 578 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
579 const int old_state = sk->sk_state; 579 const int old_state = sk->sk_state;
580 bool acceptable;
580 int queued = 0; 581 int queued = 0;
581 582
582 /* 583 /*
@@ -603,8 +604,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
603 */ 604 */
604 if (sk->sk_state == DCCP_LISTEN) { 605 if (sk->sk_state == DCCP_LISTEN) {
605 if (dh->dccph_type == DCCP_PKT_REQUEST) { 606 if (dh->dccph_type == DCCP_PKT_REQUEST) {
606 if (inet_csk(sk)->icsk_af_ops->conn_request(sk, 607 /* It is possible that we process SYN packets from backlog,
607 skb) < 0) 608 * so we need to make sure to disable BH right there.
609 */
610 local_bh_disable();
611 acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0;
612 local_bh_enable();
613 if (!acceptable)
608 return 1; 614 return 1;
609 consume_skb(skb); 615 consume_skb(skb);
610 return 0; 616 return 0;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 53eddf99e4f6..e267e6f4c9a5 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -119,10 +119,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
119 * Activate features: initialise CCIDs, sequence windows etc. 119 * Activate features: initialise CCIDs, sequence windows etc.
120 */ 120 */
121 if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { 121 if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
122 /* It is still raw copy of parent, so invalidate 122 sk_free_unlock_clone(newsk);
123 * destructor and make plain sk_free() */
124 newsk->sk_destruct = NULL;
125 sk_free(newsk);
126 return NULL; 123 return NULL;
127 } 124 }
128 dccp_init_xmit_timers(newsk); 125 dccp_init_xmit_timers(newsk);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b66c84db0766..91a15b3c4915 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -14,6 +14,7 @@
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/skbuff.h> 15#include <linux/skbuff.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/sched/signal.h>
17 18
18#include <net/inet_sock.h> 19#include <net/inet_sock.h>
19#include <net/sock.h> 20#include <net/sock.h>
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index a90ed67027b0..e6e79eda9763 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -106,7 +106,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
106#include <linux/socket.h> 106#include <linux/socket.h>
107#include <linux/in.h> 107#include <linux/in.h>
108#include <linux/kernel.h> 108#include <linux/kernel.h>
109#include <linux/sched.h> 109#include <linux/sched/signal.h>
110#include <linux/timer.h> 110#include <linux/timer.h>
111#include <linux/string.h> 111#include <linux/string.h>
112#include <linux/sockios.h> 112#include <linux/sockios.h>
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index ecc28cff08ab..af781010753b 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -37,8 +37,10 @@
37 37
38#include <linux/module.h> 38#include <linux/module.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/cred.h>
40#include <linux/dns_resolver.h> 41#include <linux/dns_resolver.h>
41#include <linux/err.h> 42#include <linux/err.h>
43
42#include <keys/dns_resolver-type.h> 44#include <keys/dns_resolver-type.h>
43#include <keys/user-type.h> 45#include <keys/user-type.h>
44 46
@@ -70,7 +72,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
70 const char *options, char **_result, time64_t *_expiry) 72 const char *options, char **_result, time64_t *_expiry)
71{ 73{
72 struct key *rkey; 74 struct key *rkey;
73 const struct user_key_payload *upayload; 75 struct user_key_payload *upayload;
74 const struct cred *saved_cred; 76 const struct cred *saved_cred;
75 size_t typelen, desclen; 77 size_t typelen, desclen;
76 char *desc, *cp; 78 char *desc, *cp;
@@ -141,7 +143,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
141 if (ret) 143 if (ret)
142 goto put; 144 goto put;
143 145
144 upayload = user_key_payload(rkey); 146 upayload = user_key_payload_locked(rkey);
145 len = upayload->datalen; 147 len = upayload->datalen;
146 148
147 ret = -ENOMEM; 149 ret = -ENOMEM;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5d367b7ff542..cebedd545e5e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -32,6 +32,7 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/types.h> 33#include <linux/types.h>
34#include <linux/kernel.h> 34#include <linux/kernel.h>
35#include <linux/sched/signal.h>
35#include <linux/string.h> 36#include <linux/string.h>
36#include <linux/mm.h> 37#include <linux/mm.h>
37#include <linux/socket.h> 38#include <linux/socket.h>
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b39a791f6756..42bfd08109dd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -622,6 +622,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
622 [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, 622 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
623 [RTA_ENCAP] = { .type = NLA_NESTED }, 623 [RTA_ENCAP] = { .type = NLA_NESTED },
624 [RTA_UID] = { .type = NLA_U32 }, 624 [RTA_UID] = { .type = NLA_U32 },
625 [RTA_MARK] = { .type = NLA_U32 },
625}; 626};
626 627
627static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 628static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index b3cc1335adbc..c0cc6aa8cfaa 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -23,7 +23,8 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
23 struct rtable *rt; 23 struct rtable *rt;
24 struct flowi4 fl4 = {}; 24 struct flowi4 fl4 = {};
25 __be32 saddr = iph->saddr; 25 __be32 saddr = iph->saddr;
26 __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; 26 const struct sock *sk = skb_to_full_sk(skb);
27 __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
27 struct net_device *dev = skb_dst(skb)->dev; 28 struct net_device *dev = skb_dst(skb)->dev;
28 unsigned int hh_len; 29 unsigned int hh_len;
29 30
@@ -40,7 +41,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
40 fl4.daddr = iph->daddr; 41 fl4.daddr = iph->daddr;
41 fl4.saddr = saddr; 42 fl4.saddr = saddr;
42 fl4.flowi4_tos = RT_TOS(iph->tos); 43 fl4.flowi4_tos = RT_TOS(iph->tos);
43 fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; 44 fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
44 if (!fl4.flowi4_oif) 45 if (!fl4.flowi4_oif)
45 fl4.flowi4_oif = l3mdev_master_ifindex(dev); 46 fl4.flowi4_oif = l3mdev_master_ifindex(dev);
46 fl4.flowi4_mark = skb->mark; 47 fl4.flowi4_mark = skb->mark;
@@ -61,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
61 xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { 62 xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
62 struct dst_entry *dst = skb_dst(skb); 63 struct dst_entry *dst = skb_dst(skb);
63 skb_dst_set(skb, NULL); 64 skb_dst_set(skb, NULL);
64 dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); 65 dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
65 if (IS_ERR(dst)) 66 if (IS_ERR(dst))
66 return PTR_ERR(dst); 67 return PTR_ERR(dst);
67 skb_dst_set(skb, dst); 68 skb_dst_set(skb, dst);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index da385ae997a3..cf4555581282 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1110,9 +1110,14 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
1110 flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; 1110 flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
1111 err = __inet_stream_connect(sk->sk_socket, msg->msg_name, 1111 err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
1112 msg->msg_namelen, flags, 1); 1112 msg->msg_namelen, flags, 1);
1113 inet->defer_connect = 0; 1113 /* fastopen_req could already be freed in __inet_stream_connect
1114 *copied = tp->fastopen_req->copied; 1114 * if the connection times out or gets rst
1115 tcp_free_fastopen_req(tp); 1115 */
1116 if (tp->fastopen_req) {
1117 *copied = tp->fastopen_req->copied;
1118 tcp_free_fastopen_req(tp);
1119 inet->defer_connect = 0;
1120 }
1116 return err; 1121 return err;
1117} 1122}
1118 1123
@@ -2318,6 +2323,10 @@ int tcp_disconnect(struct sock *sk, int flags)
2318 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); 2323 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2319 __sk_dst_reset(sk); 2324 __sk_dst_reset(sk);
2320 2325
2326 /* Clean up fastopen related fields */
2327 tcp_free_fastopen_req(tp);
2328 inet->defer_connect = 0;
2329
2321 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); 2330 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
2322 2331
2323 sk->sk_error_report(sk); 2332 sk->sk_error_report(sk);
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 35b280361cb2..50a0f3e51d5b 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -27,6 +27,8 @@
27#include <linux/kernel.h> 27#include <linux/kernel.h>
28#include <linux/random.h> 28#include <linux/random.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/sched/clock.h>
31
30#include <net/tcp.h> 32#include <net/tcp.h>
31 33
32#define HYSTART_ACK_TRAIN 1 34#define HYSTART_ACK_TRAIN 1
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2c0ff327b6df..39c393cc0fd3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5886,9 +5886,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
5886 if (th->syn) { 5886 if (th->syn) {
5887 if (th->fin) 5887 if (th->fin)
5888 goto discard; 5888 goto discard;
5889 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) 5889 /* It is possible that we process SYN packets from backlog,
5890 return 1; 5890 * so we need to make sure to disable BH right there.
5891 */
5892 local_bh_disable();
5893 acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
5894 local_bh_enable();
5891 5895
5896 if (!acceptable)
5897 return 1;
5892 consume_skb(skb); 5898 consume_skb(skb);
5893 return 0; 5899 return 0;
5894 } 5900 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3a2025f5bf2c..363172527e43 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -43,6 +43,7 @@
43#include <linux/errno.h> 43#include <linux/errno.h>
44#include <linux/types.h> 44#include <linux/types.h>
45#include <linux/kernel.h> 45#include <linux/kernel.h>
46#include <linux/sched/signal.h>
46#include <linux/socket.h> 47#include <linux/socket.h>
47#include <linux/sockios.h> 48#include <linux/sockios.h>
48#include <linux/net.h> 49#include <linux/net.h>
@@ -5692,13 +5693,18 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
5692 struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; 5693 struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
5693 struct net *net = (struct net *)ctl->extra2; 5694 struct net *net = (struct net *)ctl->extra2;
5694 5695
5696 if (!rtnl_trylock())
5697 return restart_syscall();
5698
5695 ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 5699 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5696 5700
5697 if (write) { 5701 if (write) {
5698 new_val = *((int *)ctl->data); 5702 new_val = *((int *)ctl->data);
5699 5703
5700 if (check_addr_gen_mode(new_val) < 0) 5704 if (check_addr_gen_mode(new_val) < 0) {
5701 return -EINVAL; 5705 ret = -EINVAL;
5706 goto out;
5707 }
5702 5708
5703 /* request for default */ 5709 /* request for default */
5704 if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) { 5710 if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) {
@@ -5707,20 +5713,23 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
5707 /* request for individual net device */ 5713 /* request for individual net device */
5708 } else { 5714 } else {
5709 if (!idev) 5715 if (!idev)
5710 return ret; 5716 goto out;
5711 5717
5712 if (check_stable_privacy(idev, net, new_val) < 0) 5718 if (check_stable_privacy(idev, net, new_val) < 0) {
5713 return -EINVAL; 5719 ret = -EINVAL;
5720 goto out;
5721 }
5714 5722
5715 if (idev->cnf.addr_gen_mode != new_val) { 5723 if (idev->cnf.addr_gen_mode != new_val) {
5716 idev->cnf.addr_gen_mode = new_val; 5724 idev->cnf.addr_gen_mode = new_val;
5717 rtnl_lock();
5718 addrconf_dev_config(idev->dev); 5725 addrconf_dev_config(idev->dev);
5719 rtnl_unlock();
5720 } 5726 }
5721 } 5727 }
5722 } 5728 }
5723 5729
5730out:
5731 rtnl_unlock();
5732
5724 return ret; 5733 return ret;
5725} 5734}
5726 5735
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 9948b5ce52da..986d4ca38832 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -589,6 +589,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
589 hdr = ipv6_hdr(skb); 589 hdr = ipv6_hdr(skb);
590 fhdr = (struct frag_hdr *)skb_transport_header(skb); 590 fhdr = (struct frag_hdr *)skb_transport_header(skb);
591 591
592 skb_orphan(skb);
592 fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, 593 fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
593 skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); 594 skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
594 if (fq == NULL) { 595 if (fq == NULL) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f54f4265b37f..229bfcc451ef 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2169,10 +2169,13 @@ int ip6_del_rt(struct rt6_info *rt)
2169static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) 2169static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2170{ 2170{
2171 struct nl_info *info = &cfg->fc_nlinfo; 2171 struct nl_info *info = &cfg->fc_nlinfo;
2172 struct net *net = info->nl_net;
2172 struct sk_buff *skb = NULL; 2173 struct sk_buff *skb = NULL;
2173 struct fib6_table *table; 2174 struct fib6_table *table;
2174 int err; 2175 int err = -ENOENT;
2175 2176
2177 if (rt == net->ipv6.ip6_null_entry)
2178 goto out_put;
2176 table = rt->rt6i_table; 2179 table = rt->rt6i_table;
2177 write_lock_bh(&table->tb6_lock); 2180 write_lock_bh(&table->tb6_lock);
2178 2181
@@ -2184,7 +2187,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2184 if (skb) { 2187 if (skb) {
2185 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 2188 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2186 2189
2187 if (rt6_fill_node(info->nl_net, skb, rt, 2190 if (rt6_fill_node(net, skb, rt,
2188 NULL, NULL, 0, RTM_DELROUTE, 2191 NULL, NULL, 0, RTM_DELROUTE,
2189 info->portid, seq, 0) < 0) { 2192 info->portid, seq, 0) < 0) {
2190 kfree_skb(skb); 2193 kfree_skb(skb);
@@ -2198,17 +2201,18 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2198 rt6i_siblings) { 2201 rt6i_siblings) {
2199 err = fib6_del(sibling, info); 2202 err = fib6_del(sibling, info);
2200 if (err) 2203 if (err)
2201 goto out; 2204 goto out_unlock;
2202 } 2205 }
2203 } 2206 }
2204 2207
2205 err = fib6_del(rt, info); 2208 err = fib6_del(rt, info);
2206out: 2209out_unlock:
2207 write_unlock_bh(&table->tb6_lock); 2210 write_unlock_bh(&table->tb6_lock);
2211out_put:
2208 ip6_rt_put(rt); 2212 ip6_rt_put(rt);
2209 2213
2210 if (skb) { 2214 if (skb) {
2211 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV6_ROUTE, 2215 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2212 info->nlh, gfp_any()); 2216 info->nlh, gfp_any());
2213 } 2217 }
2214 return err; 2218 return err;
@@ -2891,6 +2895,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2891 [RTA_ENCAP] = { .type = NLA_NESTED }, 2895 [RTA_ENCAP] = { .type = NLA_NESTED },
2892 [RTA_EXPIRES] = { .type = NLA_U32 }, 2896 [RTA_EXPIRES] = { .type = NLA_U32 },
2893 [RTA_UID] = { .type = NLA_U32 }, 2897 [RTA_UID] = { .type = NLA_U32 },
2898 [RTA_MARK] = { .type = NLA_U32 },
2894}; 2899};
2895 2900
2896static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2901static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -3627,6 +3632,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3627 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); 3632 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3628 } 3633 }
3629 3634
3635 if (rt == net->ipv6.ip6_null_entry) {
3636 err = rt->dst.error;
3637 ip6_rt_put(rt);
3638 goto errout;
3639 }
3640
3630 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 3641 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3631 if (!skb) { 3642 if (!skb) {
3632 ip6_rt_put(rt); 3643 ip6_rt_put(rt);
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index ab254041dab7..81adc29a448d 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -46,6 +46,7 @@
46#include <linux/socket.h> 46#include <linux/socket.h>
47#include <linux/sockios.h> 47#include <linux/sockios.h>
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include <linux/sched/signal.h>
49#include <linux/init.h> 50#include <linux/init.h>
50#include <linux/net.h> 51#include <linux/net.h>
51#include <linux/irda.h> 52#include <linux/irda.h>
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 817b1b186aff..f6061c4bb0a8 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -32,7 +32,7 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/fs.h> 33#include <linux/fs.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/sched.h> 35#include <linux/sched/signal.h>
36#include <linux/seq_file.h> 36#include <linux/seq_file.h>
37#include <linux/termios.h> 37#include <linux/termios.h>
38#include <linux/tty.h> 38#include <linux/tty.h>
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 35dbf3dc3d28..7025dcb853d0 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -13,8 +13,9 @@
13 * 2) as a control channel (write commands, read events) 13 * 2) as a control channel (write commands, read events)
14 */ 14 */
15 15
16#include <linux/sched.h> 16#include <linux/sched/signal.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18
18#include "irnet_ppp.h" /* Private header */ 19#include "irnet_ppp.h" /* Private header */
19/* Please put other headers in irnet.h - Thanks */ 20/* Please put other headers in irnet.h - Thanks */
20 21
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 13190b38f22e..89bbde1081ce 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -17,7 +17,7 @@
17#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/sched.h> 20#include <linux/sched/signal.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/skbuff.h> 22#include <linux/skbuff.h>
23#include <linux/init.h> 23#include <linux/init.h>
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index a646f3481240..309062f3debe 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -24,6 +24,8 @@
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/workqueue.h> 25#include <linux/workqueue.h>
26#include <linux/syscalls.h> 26#include <linux/syscalls.h>
27#include <linux/sched/signal.h>
28
27#include <net/kcm.h> 29#include <net/kcm.h>
28#include <net/netns/generic.h> 30#include <net/netns/generic.h>
29#include <net/sock.h> 31#include <net/sock.h>
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 5e9296382420..06186d608a27 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -26,6 +26,8 @@
26#include <linux/rtnetlink.h> 26#include <linux/rtnetlink.h>
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/sched/signal.h>
30
29#include <net/llc.h> 31#include <net/llc.h>
30#include <net/llc_sap.h> 32#include <net/llc_sap.h>
31#include <net/llc_pdu.h> 33#include <net/llc_pdu.h>
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 3b5fd4188f2a..4456559cb056 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -85,7 +85,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
85 ht_dbg(sta->sdata, 85 ht_dbg(sta->sdata,
86 "Rx BA session stop requested for %pM tid %u %s reason: %d\n", 86 "Rx BA session stop requested for %pM tid %u %s reason: %d\n",
87 sta->sta.addr, tid, 87 sta->sta.addr, tid,
88 initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", 88 initiator == WLAN_BACK_RECIPIENT ? "recipient" : "initiator",
89 (int)reason); 89 (int)reason);
90 90
91 if (drv_ampdu_action(local, sta->sdata, &params)) 91 if (drv_ampdu_action(local, sta->sdata, &params))
@@ -398,6 +398,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
398 tid_agg_rx->timeout = timeout; 398 tid_agg_rx->timeout = timeout;
399 tid_agg_rx->stored_mpdu_num = 0; 399 tid_agg_rx->stored_mpdu_num = 0;
400 tid_agg_rx->auto_seq = auto_seq; 400 tid_agg_rx->auto_seq = auto_seq;
401 tid_agg_rx->started = false;
401 tid_agg_rx->reorder_buf_filtered = 0; 402 tid_agg_rx->reorder_buf_filtered = 0;
402 status = WLAN_STATUS_SUCCESS; 403 status = WLAN_STATUS_SUCCESS;
403 404
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 159a1a733725..0e718437d080 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -428,7 +428,7 @@ struct ieee80211_sta_tx_tspec {
428 bool downgraded; 428 bool downgraded;
429}; 429};
430 430
431DECLARE_EWMA(beacon_signal, 16, 4) 431DECLARE_EWMA(beacon_signal, 4, 4)
432 432
433struct ieee80211_if_managed { 433struct ieee80211_if_managed {
434 struct timer_list timer; 434 struct timer_list timer;
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index fcba70e57073..953d71e784a9 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -9,6 +9,8 @@
9#include <linux/gfp.h> 9#include <linux/gfp.h>
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/random.h> 11#include <linux/random.h>
12#include <linux/rculist.h>
13
12#include "ieee80211_i.h" 14#include "ieee80211_i.h"
13#include "rate.h" 15#include "rate.h"
14#include "mesh.h" 16#include "mesh.h"
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 28a3a0957c9e..76a8bcd8ef11 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -168,6 +168,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
168 break; 168 break;
169 } 169 }
170 170
171 flush_delayed_work(&sdata->dec_tailroom_needed_wk);
171 drv_remove_interface(local, sdata); 172 drv_remove_interface(local, sdata);
172 } 173 }
173 174
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 50ca3828b124..e48724a6725e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4,7 +4,7 @@
4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
5 * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> 5 * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
6 * Copyright 2013-2014 Intel Mobile Communications GmbH 6 * Copyright 2013-2014 Intel Mobile Communications GmbH
7 * Copyright(c) 2015 - 2016 Intel Deutschland GmbH 7 * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
@@ -1034,6 +1034,18 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
1034 buf_size = tid_agg_rx->buf_size; 1034 buf_size = tid_agg_rx->buf_size;
1035 head_seq_num = tid_agg_rx->head_seq_num; 1035 head_seq_num = tid_agg_rx->head_seq_num;
1036 1036
1037 /*
1038 * If the current MPDU's SN is smaller than the SSN, it shouldn't
1039 * be reordered.
1040 */
1041 if (unlikely(!tid_agg_rx->started)) {
1042 if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
1043 ret = false;
1044 goto out;
1045 }
1046 tid_agg_rx->started = true;
1047 }
1048
1037 /* frame with out of date sequence number */ 1049 /* frame with out of date sequence number */
1038 if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) { 1050 if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
1039 dev_kfree_skb(skb); 1051 dev_kfree_skb(skb);
@@ -3880,6 +3892,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
3880 stats->last_rate = sta_stats_encode_rate(status); 3892 stats->last_rate = sta_stats_encode_rate(status);
3881 3893
3882 stats->fragments++; 3894 stats->fragments++;
3895 stats->packets++;
3883 3896
3884 if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { 3897 if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
3885 stats->last_signal = status->signal; 3898 stats->last_signal = status->signal;
@@ -4073,15 +4086,17 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
4073 ieee80211_is_beacon(hdr->frame_control))) 4086 ieee80211_is_beacon(hdr->frame_control)))
4074 ieee80211_scan_rx(local, skb); 4087 ieee80211_scan_rx(local, skb);
4075 4088
4076 if (pubsta) { 4089 if (ieee80211_is_data(fc)) {
4077 rx.sta = container_of(pubsta, struct sta_info, sta);
4078 rx.sdata = rx.sta->sdata;
4079 if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
4080 return;
4081 goto out;
4082 } else if (ieee80211_is_data(fc)) {
4083 struct sta_info *sta, *prev_sta; 4090 struct sta_info *sta, *prev_sta;
4084 4091
4092 if (pubsta) {
4093 rx.sta = container_of(pubsta, struct sta_info, sta);
4094 rx.sdata = rx.sta->sdata;
4095 if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
4096 return;
4097 goto out;
4098 }
4099
4085 prev_sta = NULL; 4100 prev_sta = NULL;
4086 4101
4087 for_each_sta_info(local, hdr->addr2, sta, tmp) { 4102 for_each_sta_info(local, hdr->addr2, sta, tmp) {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4774e663a411..3323a2fb289b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
688 } 688 }
689 689
690 /* No need to do anything if the driver does all */ 690 /* No need to do anything if the driver does all */
691 if (ieee80211_hw_check(&local->hw, AP_LINK_PS)) 691 if (ieee80211_hw_check(&local->hw, AP_LINK_PS) && !local->ops->set_tim)
692 return; 692 return;
693 693
694 if (sta->dead) 694 if (sta->dead)
@@ -1264,7 +1264,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
1264 sta_info_recalc_tim(sta); 1264 sta_info_recalc_tim(sta);
1265 1265
1266 ps_dbg(sdata, 1266 ps_dbg(sdata,
1267 "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n", 1267 "STA %pM aid %d sending %d filtered/%d PS frames since STA woke up\n",
1268 sta->sta.addr, sta->sta.aid, filtered, buffered); 1268 sta->sta.addr, sta->sta.aid, filtered, buffered);
1269 1269
1270 ieee80211_check_fast_xmit(sta); 1270 ieee80211_check_fast_xmit(sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index dd06ef0b8861..e65cda34d2bc 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -189,6 +189,7 @@ struct tid_ampdu_tx {
189 * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and 189 * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
190 * and ssn. 190 * and ssn.
191 * @removed: this session is removed (but might have been found due to RCU) 191 * @removed: this session is removed (but might have been found due to RCU)
192 * @started: this session has started (head ssn or higher was received)
192 * 193 *
193 * This structure's lifetime is managed by RCU, assignments to 194 * This structure's lifetime is managed by RCU, assignments to
194 * the array holding it must hold the aggregation mutex. 195 * the array holding it must hold the aggregation mutex.
@@ -212,8 +213,9 @@ struct tid_ampdu_rx {
212 u16 ssn; 213 u16 ssn;
213 u16 buf_size; 214 u16 buf_size;
214 u16 timeout; 215 u16 timeout;
215 bool auto_seq; 216 u8 auto_seq:1,
216 bool removed; 217 removed:1,
218 started:1;
217}; 219};
218 220
219/** 221/**
@@ -370,7 +372,7 @@ struct mesh_sta {
370 unsigned int fail_avg; 372 unsigned int fail_avg;
371}; 373};
372 374
373DECLARE_EWMA(signal, 1024, 8) 375DECLARE_EWMA(signal, 10, 8)
374 376
375struct ieee80211_sta_rx_stats { 377struct ieee80211_sta_rx_stats {
376 unsigned long packets; 378 unsigned long packets;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index a3af6e1bfd98..83b8b11f24ea 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -51,7 +51,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
51 struct ieee80211_hdr *hdr = (void *)skb->data; 51 struct ieee80211_hdr *hdr = (void *)skb->data;
52 int ac; 52 int ac;
53 53
54 if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) { 54 if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER |
55 IEEE80211_TX_CTL_AMPDU)) {
55 ieee80211_free_txskb(&local->hw, skb); 56 ieee80211_free_txskb(&local->hw, skb);
56 return; 57 return;
57 } 58 }
@@ -462,9 +463,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
462 unsigned long flags; 463 unsigned long flags;
463 464
464 spin_lock_irqsave(&local->ack_status_lock, flags); 465 spin_lock_irqsave(&local->ack_status_lock, flags);
465 skb = idr_find(&local->ack_status_frames, info->ack_frame_id); 466 skb = idr_remove(&local->ack_status_frames, info->ack_frame_id);
466 if (skb)
467 idr_remove(&local->ack_status_frames, info->ack_frame_id);
468 spin_unlock_irqrestore(&local->ack_status_lock, flags); 467 spin_unlock_irqrestore(&local->ack_status_lock, flags);
469 468
470 if (!skb) 469 if (!skb)
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 6a3e1c2181d3..1e1c9b20bab7 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -18,6 +18,8 @@
18#include <linux/bug.h> 18#include <linux/bug.h>
19#include <linux/completion.h> 19#include <linux/completion.h>
20#include <linux/ieee802154.h> 20#include <linux/ieee802154.h>
21#include <linux/rculist.h>
22
21#include <crypto/aead.h> 23#include <crypto/aead.h>
22#include <crypto/skcipher.h> 24#include <crypto/skcipher.h>
23 25
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 24174c520239..0d17894798b5 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1628,8 +1628,6 @@ static int __init nf_conntrack_sip_init(void)
1628 ports[ports_c++] = SIP_PORT; 1628 ports[ports_c++] = SIP_PORT;
1629 1629
1630 for (i = 0; i < ports_c; i++) { 1630 for (i = 0; i < ports_c; i++) {
1631 memset(&sip[i], 0, sizeof(sip[i]));
1632
1633 nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip", 1631 nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip",
1634 SIP_PORT, ports[i], i, sip_exp_policy, 1632 SIP_PORT, ports[i], i, sip_exp_policy,
1635 SIP_EXPECT_MAX, 1633 SIP_EXPECT_MAX,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ff7304ae58ac..5e0ccfd5bb37 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -461,16 +461,15 @@ nla_put_failure:
461 return -1; 461 return -1;
462} 462}
463 463
464static int nf_tables_table_notify(const struct nft_ctx *ctx, int event) 464static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
465{ 465{
466 struct sk_buff *skb; 466 struct sk_buff *skb;
467 int err; 467 int err;
468 468
469 if (!ctx->report && 469 if (!ctx->report &&
470 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) 470 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
471 return 0; 471 return;
472 472
473 err = -ENOBUFS;
474 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 473 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
475 if (skb == NULL) 474 if (skb == NULL)
476 goto err; 475 goto err;
@@ -482,14 +481,11 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
482 goto err; 481 goto err;
483 } 482 }
484 483
485 err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, 484 nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
486 ctx->report, GFP_KERNEL); 485 ctx->report, GFP_KERNEL);
486 return;
487err: 487err:
488 if (err < 0) { 488 nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
489 nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
490 err);
491 }
492 return err;
493} 489}
494 490
495static int nf_tables_dump_tables(struct sk_buff *skb, 491static int nf_tables_dump_tables(struct sk_buff *skb,
@@ -1050,16 +1046,15 @@ nla_put_failure:
1050 return -1; 1046 return -1;
1051} 1047}
1052 1048
1053static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event) 1049static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
1054{ 1050{
1055 struct sk_buff *skb; 1051 struct sk_buff *skb;
1056 int err; 1052 int err;
1057 1053
1058 if (!ctx->report && 1054 if (!ctx->report &&
1059 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) 1055 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
1060 return 0; 1056 return;
1061 1057
1062 err = -ENOBUFS;
1063 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 1058 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
1064 if (skb == NULL) 1059 if (skb == NULL)
1065 goto err; 1060 goto err;
@@ -1072,14 +1067,11 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
1072 goto err; 1067 goto err;
1073 } 1068 }
1074 1069
1075 err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, 1070 nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
1076 ctx->report, GFP_KERNEL); 1071 ctx->report, GFP_KERNEL);
1072 return;
1077err: 1073err:
1078 if (err < 0) { 1074 nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
1079 nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
1080 err);
1081 }
1082 return err;
1083} 1075}
1084 1076
1085static int nf_tables_dump_chains(struct sk_buff *skb, 1077static int nf_tables_dump_chains(struct sk_buff *skb,
@@ -1934,18 +1926,16 @@ nla_put_failure:
1934 return -1; 1926 return -1;
1935} 1927}
1936 1928
1937static int nf_tables_rule_notify(const struct nft_ctx *ctx, 1929static void nf_tables_rule_notify(const struct nft_ctx *ctx,
1938 const struct nft_rule *rule, 1930 const struct nft_rule *rule, int event)
1939 int event)
1940{ 1931{
1941 struct sk_buff *skb; 1932 struct sk_buff *skb;
1942 int err; 1933 int err;
1943 1934
1944 if (!ctx->report && 1935 if (!ctx->report &&
1945 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) 1936 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
1946 return 0; 1937 return;
1947 1938
1948 err = -ENOBUFS;
1949 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 1939 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
1950 if (skb == NULL) 1940 if (skb == NULL)
1951 goto err; 1941 goto err;
@@ -1958,14 +1948,11 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
1958 goto err; 1948 goto err;
1959 } 1949 }
1960 1950
1961 err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, 1951 nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
1962 ctx->report, GFP_KERNEL); 1952 ctx->report, GFP_KERNEL);
1953 return;
1963err: 1954err:
1964 if (err < 0) { 1955 nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
1965 nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
1966 err);
1967 }
1968 return err;
1969} 1956}
1970 1957
1971struct nft_rule_dump_ctx { 1958struct nft_rule_dump_ctx {
@@ -2696,9 +2683,9 @@ nla_put_failure:
2696 return -1; 2683 return -1;
2697} 2684}
2698 2685
2699static int nf_tables_set_notify(const struct nft_ctx *ctx, 2686static void nf_tables_set_notify(const struct nft_ctx *ctx,
2700 const struct nft_set *set, 2687 const struct nft_set *set, int event,
2701 int event, gfp_t gfp_flags) 2688 gfp_t gfp_flags)
2702{ 2689{
2703 struct sk_buff *skb; 2690 struct sk_buff *skb;
2704 u32 portid = ctx->portid; 2691 u32 portid = ctx->portid;
@@ -2706,9 +2693,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
2706 2693
2707 if (!ctx->report && 2694 if (!ctx->report &&
2708 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) 2695 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
2709 return 0; 2696 return;
2710 2697
2711 err = -ENOBUFS;
2712 skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags); 2698 skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags);
2713 if (skb == NULL) 2699 if (skb == NULL)
2714 goto err; 2700 goto err;
@@ -2719,12 +2705,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
2719 goto err; 2705 goto err;
2720 } 2706 }
2721 2707
2722 err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, 2708 nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report,
2723 ctx->report, gfp_flags); 2709 gfp_flags);
2710 return;
2724err: 2711err:
2725 if (err < 0) 2712 nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
2726 nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
2727 return err;
2728} 2713}
2729 2714
2730static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) 2715static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
@@ -3504,10 +3489,10 @@ nla_put_failure:
3504 return -1; 3489 return -1;
3505} 3490}
3506 3491
3507static int nf_tables_setelem_notify(const struct nft_ctx *ctx, 3492static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
3508 const struct nft_set *set, 3493 const struct nft_set *set,
3509 const struct nft_set_elem *elem, 3494 const struct nft_set_elem *elem,
3510 int event, u16 flags) 3495 int event, u16 flags)
3511{ 3496{
3512 struct net *net = ctx->net; 3497 struct net *net = ctx->net;
3513 u32 portid = ctx->portid; 3498 u32 portid = ctx->portid;
@@ -3515,9 +3500,8 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
3515 int err; 3500 int err;
3516 3501
3517 if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) 3502 if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
3518 return 0; 3503 return;
3519 3504
3520 err = -ENOBUFS;
3521 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 3505 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
3522 if (skb == NULL) 3506 if (skb == NULL)
3523 goto err; 3507 goto err;
@@ -3529,12 +3513,11 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
3529 goto err; 3513 goto err;
3530 } 3514 }
3531 3515
3532 err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, 3516 nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
3533 GFP_KERNEL); 3517 GFP_KERNEL);
3518 return;
3534err: 3519err:
3535 if (err < 0) 3520 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
3536 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
3537 return err;
3538} 3521}
3539 3522
3540static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, 3523static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
@@ -4476,18 +4459,17 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
4476 return nft_delobj(&ctx, obj); 4459 return nft_delobj(&ctx, obj);
4477} 4460}
4478 4461
4479int nft_obj_notify(struct net *net, struct nft_table *table, 4462void nft_obj_notify(struct net *net, struct nft_table *table,
4480 struct nft_object *obj, u32 portid, u32 seq, int event, 4463 struct nft_object *obj, u32 portid, u32 seq, int event,
4481 int family, int report, gfp_t gfp) 4464 int family, int report, gfp_t gfp)
4482{ 4465{
4483 struct sk_buff *skb; 4466 struct sk_buff *skb;
4484 int err; 4467 int err;
4485 4468
4486 if (!report && 4469 if (!report &&
4487 !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) 4470 !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
4488 return 0; 4471 return;
4489 4472
4490 err = -ENOBUFS;
4491 skb = nlmsg_new(NLMSG_GOODSIZE, gfp); 4473 skb = nlmsg_new(NLMSG_GOODSIZE, gfp);
4492 if (skb == NULL) 4474 if (skb == NULL)
4493 goto err; 4475 goto err;
@@ -4499,21 +4481,18 @@ int nft_obj_notify(struct net *net, struct nft_table *table,
4499 goto err; 4481 goto err;
4500 } 4482 }
4501 4483
4502 err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); 4484 nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
4485 return;
4503err: 4486err:
4504 if (err < 0) { 4487 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
4505 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
4506 }
4507 return err;
4508} 4488}
4509EXPORT_SYMBOL_GPL(nft_obj_notify); 4489EXPORT_SYMBOL_GPL(nft_obj_notify);
4510 4490
4511static int nf_tables_obj_notify(const struct nft_ctx *ctx, 4491static void nf_tables_obj_notify(const struct nft_ctx *ctx,
4512 struct nft_object *obj, int event) 4492 struct nft_object *obj, int event)
4513{ 4493{
4514 return nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, 4494 nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
4515 ctx->seq, event, ctx->afi->family, ctx->report, 4495 ctx->afi->family, ctx->report, GFP_KERNEL);
4516 GFP_KERNEL);
4517} 4496}
4518 4497
4519static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, 4498static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
@@ -4543,7 +4522,8 @@ nla_put_failure:
4543 return -EMSGSIZE; 4522 return -EMSGSIZE;
4544} 4523}
4545 4524
4546static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event) 4525static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
4526 int event)
4547{ 4527{
4548 struct nlmsghdr *nlh = nlmsg_hdr(skb); 4528 struct nlmsghdr *nlh = nlmsg_hdr(skb);
4549 struct sk_buff *skb2; 4529 struct sk_buff *skb2;
@@ -4551,9 +4531,8 @@ static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
4551 4531
4552 if (nlmsg_report(nlh) && 4532 if (nlmsg_report(nlh) &&
4553 !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) 4533 !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
4554 return 0; 4534 return;
4555 4535
4556 err = -ENOBUFS;
4557 skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 4536 skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
4558 if (skb2 == NULL) 4537 if (skb2 == NULL)
4559 goto err; 4538 goto err;
@@ -4565,14 +4544,12 @@ static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
4565 goto err; 4544 goto err;
4566 } 4545 }
4567 4546
4568 err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, 4547 nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
4569 NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL); 4548 nlmsg_report(nlh), GFP_KERNEL);
4549 return;
4570err: 4550err:
4571 if (err < 0) { 4551 nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
4572 nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES, 4552 -ENOBUFS);
4573 err);
4574 }
4575 return err;
4576} 4553}
4577 4554
4578static int nf_tables_getgen(struct net *net, struct sock *nlsk, 4555static int nf_tables_getgen(struct net *net, struct sock *nlsk,
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 71e8fb886a73..78dfbf9588b3 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -60,11 +60,10 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
60 d = memcmp(this, key, set->klen); 60 d = memcmp(this, key, set->klen);
61 if (d < 0) { 61 if (d < 0) {
62 parent = parent->rb_left; 62 parent = parent->rb_left;
63 /* In case of adjacent ranges, we always see the high 63 if (interval &&
64 * part of the range in first place, before the low one. 64 nft_rbtree_equal(set, this, interval) &&
65 * So don't update interval if the keys are equal. 65 nft_rbtree_interval_end(this) &&
66 */ 66 !nft_rbtree_interval_end(interval))
67 if (interval && nft_rbtree_equal(set, this, interval))
68 continue; 67 continue;
69 interval = rbe; 68 interval = rbe;
70 } else if (d > 0) 69 } else if (d > 0)
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 16477df45b3b..3d705c688a27 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -13,6 +13,8 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/file.h> 15#include <linux/file.h>
16#include <linux/cred.h>
17
16#include <net/sock.h> 18#include <net/sock.h>
17#include <net/inet_sock.h> 19#include <net/inet_sock.h>
18#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ed212ffc1d9d..4bbf4526b885 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -17,7 +17,7 @@
17#include <linux/in.h> 17#include <linux/in.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/sched.h> 20#include <linux/sched/signal.h>
21#include <linux/timer.h> 21#include <linux/timer.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/sockios.h> 23#include <linux/sockios.h>
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index b9edf5fae6ae..879885b31cce 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -21,6 +21,7 @@
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/nfc.h> 23#include <linux/nfc.h>
24#include <linux/sched/signal.h>
24 25
25#include "nfc.h" 26#include "nfc.h"
26#include "llcp.h" 27#include "llcp.h"
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b1beb2b94ec7..c82301ce3fff 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -796,9 +796,8 @@ static void ovs_fragment(struct net *net, struct vport *vport,
796 unsigned long orig_dst; 796 unsigned long orig_dst;
797 struct rt6_info ovs_rt; 797 struct rt6_info ovs_rt;
798 798
799 if (!v6ops) { 799 if (!v6ops)
800 goto err; 800 goto err;
801 }
802 801
803 prepare_frag(vport, skb, orig_network_offset, 802 prepare_frag(vport, skb, orig_network_offset,
804 ovs_key_mac_proto(key)); 803 ovs_key_mac_proto(key));
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 85cd59526670..e0a87776a010 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -485,7 +485,6 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
485 } else if (key->eth.type == htons(ETH_P_IPV6)) { 485 } else if (key->eth.type == htons(ETH_P_IPV6)) {
486 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; 486 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
487 487
488 skb_orphan(skb);
489 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 488 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
490 err = nf_ct_frag6_gather(net, skb, user); 489 err = nf_ct_frag6_gather(net, skb, user);
491 if (err) { 490 if (err) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2bd0d1949312..a0dbe7ca8f72 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3103,7 +3103,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
3103 int addr_len) 3103 int addr_len)
3104{ 3104{
3105 struct sock *sk = sock->sk; 3105 struct sock *sk = sock->sk;
3106 char name[15]; 3106 char name[sizeof(uaddr->sa_data) + 1];
3107 3107
3108 /* 3108 /*
3109 * Check legality 3109 * Check legality
@@ -3111,7 +3111,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
3111 3111
3112 if (addr_len != sizeof(struct sockaddr)) 3112 if (addr_len != sizeof(struct sockaddr))
3113 return -EINVAL; 3113 return -EINVAL;
3114 strlcpy(name, uaddr->sa_data, sizeof(name)); 3114 /* uaddr->sa_data comes from the userspace, it's not guaranteed to be
3115 * zero-terminated.
3116 */
3117 memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
3118 name[sizeof(uaddr->sa_data)] = 0;
3115 3119
3116 return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); 3120 return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
3117} 3121}
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 8bad5624a27a..222bedcd9575 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -23,6 +23,7 @@
23 */ 23 */
24 24
25#include <linux/kernel.h> 25#include <linux/kernel.h>
26#include <linux/sched/signal.h>
26#include <linux/slab.h> 27#include <linux/slab.h>
27#include <linux/socket.h> 28#include <linux/socket.h>
28#include <net/sock.h> 29#include <net/sock.h>
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index ffd5f2297584..a6c8da3ee893 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -27,6 +27,8 @@
27#include <linux/kernel.h> 27#include <linux/kernel.h>
28#include <linux/net.h> 28#include <linux/net.h>
29#include <linux/poll.h> 29#include <linux/poll.h>
30#include <linux/sched/signal.h>
31
30#include <net/sock.h> 32#include <net/sock.h>
31#include <net/tcp_states.h> 33#include <net/tcp_states.h>
32 34
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 91fe46f1e4cc..7a64c8db81ab 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -45,8 +45,8 @@
45#include "ib.h" 45#include "ib.h"
46#include "ib_mr.h" 46#include "ib_mr.h"
47 47
48unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE; 48static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
49unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE; 49static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
50unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; 50unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
51 51
52module_param(rds_ib_mr_1m_pool_size, int, 0444); 52module_param(rds_ib_mr_1m_pool_size, int, 0444);
@@ -438,16 +438,12 @@ int rds_ib_init(void)
438 if (ret) 438 if (ret)
439 goto out_sysctl; 439 goto out_sysctl;
440 440
441 ret = rds_trans_register(&rds_ib_transport); 441 rds_trans_register(&rds_ib_transport);
442 if (ret)
443 goto out_recv;
444 442
445 rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 443 rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
446 444
447 goto out; 445 goto out;
448 446
449out_recv:
450 rds_ib_recv_exit();
451out_sysctl: 447out_sysctl:
452 rds_ib_sysctl_exit(); 448 rds_ib_sysctl_exit();
453out_ibreg: 449out_ibreg:
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 24c086db4511..5d6e98a79a5e 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -107,8 +107,6 @@ struct rds_ib_mr_pool {
107}; 107};
108 108
109extern struct workqueue_struct *rds_ib_mr_wq; 109extern struct workqueue_struct *rds_ib_mr_wq;
110extern unsigned int rds_ib_mr_1m_pool_size;
111extern unsigned int rds_ib_mr_8k_pool_size;
112extern bool prefer_frmr; 110extern bool prefer_frmr;
113 111
114struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev, 112struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
diff --git a/net/rds/page.c b/net/rds/page.c
index e2b5a5832d3d..7cc57e098ddb 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -45,35 +45,6 @@ struct rds_page_remainder {
45static 45static
46DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders); 46DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
47 47
48/*
49 * returns 0 on success or -errno on failure.
50 *
51 * We don't have to worry about flush_dcache_page() as this only works
52 * with private pages. If, say, we were to do directed receive to pinned
53 * user pages we'd have to worry more about cache coherence. (Though
54 * the flush_dcache_page() in get_user_pages() would probably be enough).
55 */
56int rds_page_copy_user(struct page *page, unsigned long offset,
57 void __user *ptr, unsigned long bytes,
58 int to_user)
59{
60 unsigned long ret;
61 void *addr;
62
63 addr = kmap(page);
64 if (to_user) {
65 rds_stats_add(s_copy_to_user, bytes);
66 ret = copy_to_user(ptr, addr + offset, bytes);
67 } else {
68 rds_stats_add(s_copy_from_user, bytes);
69 ret = copy_from_user(addr + offset, ptr, bytes);
70 }
71 kunmap(page);
72
73 return ret ? -EFAULT : 0;
74}
75EXPORT_SYMBOL_GPL(rds_page_copy_user);
76
77/** 48/**
78 * rds_page_remainder_alloc - build up regions of a message. 49 * rds_page_remainder_alloc - build up regions of a message.
79 * 50 *
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 07fff73dd4f3..39518ef7af4d 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -798,13 +798,6 @@ static inline int rds_message_verify_checksum(const struct rds_header *hdr)
798/* page.c */ 798/* page.c */
799int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, 799int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
800 gfp_t gfp); 800 gfp_t gfp);
801int rds_page_copy_user(struct page *page, unsigned long offset,
802 void __user *ptr, unsigned long bytes,
803 int to_user);
804#define rds_page_copy_to_user(page, offset, ptr, bytes) \
805 rds_page_copy_user(page, offset, ptr, bytes, 1)
806#define rds_page_copy_from_user(page, offset, ptr, bytes) \
807 rds_page_copy_user(page, offset, ptr, bytes, 0)
808void rds_page_exit(void); 801void rds_page_exit(void);
809 802
810/* recv.c */ 803/* recv.c */
@@ -910,7 +903,7 @@ void rds_connect_path_complete(struct rds_conn_path *conn, int curr);
910void rds_connect_complete(struct rds_connection *conn); 903void rds_connect_complete(struct rds_connection *conn);
911 904
912/* transport.c */ 905/* transport.c */
913int rds_trans_register(struct rds_transport *trans); 906void rds_trans_register(struct rds_transport *trans);
914void rds_trans_unregister(struct rds_transport *trans); 907void rds_trans_unregister(struct rds_transport *trans);
915struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr); 908struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
916void rds_trans_put(struct rds_transport *trans); 909void rds_trans_put(struct rds_transport *trans);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 5438f6725092..a973d3b4dff0 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -652,16 +652,12 @@ static int rds_tcp_init(void)
652 if (ret) 652 if (ret)
653 goto out_pernet; 653 goto out_pernet;
654 654
655 ret = rds_trans_register(&rds_tcp_transport); 655 rds_trans_register(&rds_tcp_transport);
656 if (ret)
657 goto out_recv;
658 656
659 rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); 657 rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
660 658
661 goto out; 659 goto out;
662 660
663out_recv:
664 rds_tcp_recv_exit();
665out_pernet: 661out_pernet:
666 unregister_pernet_subsys(&rds_tcp_net_ops); 662 unregister_pernet_subsys(&rds_tcp_net_ops);
667out_notifier: 663out_notifier:
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 2ffd3e30c643..0b188dd0a344 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -40,7 +40,7 @@
40static struct rds_transport *transports[RDS_TRANS_COUNT]; 40static struct rds_transport *transports[RDS_TRANS_COUNT];
41static DECLARE_RWSEM(rds_trans_sem); 41static DECLARE_RWSEM(rds_trans_sem);
42 42
43int rds_trans_register(struct rds_transport *trans) 43void rds_trans_register(struct rds_transport *trans)
44{ 44{
45 BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ); 45 BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ);
46 46
@@ -55,8 +55,6 @@ int rds_trans_register(struct rds_transport *trans)
55 } 55 }
56 56
57 up_write(&rds_trans_sem); 57 up_write(&rds_trans_sem);
58
59 return 0;
60} 58}
61EXPORT_SYMBOL_GPL(rds_trans_register); 59EXPORT_SYMBOL_GPL(rds_trans_register);
62 60
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 9ad301c46b88..b8a1df2c9785 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -20,7 +20,7 @@
20#include <linux/in.h> 20#include <linux/in.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23#include <linux/sched.h> 23#include <linux/sched/signal.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/string.h> 26#include <linux/string.h>
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 199b46e93e64..7fb59c3f1542 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -290,10 +290,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
290 cp.exclusive = false; 290 cp.exclusive = false;
291 cp.service_id = srx->srx_service; 291 cp.service_id = srx->srx_service;
292 call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp); 292 call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
293 /* The socket has been unlocked. */
293 if (!IS_ERR(call)) 294 if (!IS_ERR(call))
294 call->notify_rx = notify_rx; 295 call->notify_rx = notify_rx;
295 296
296 release_sock(&rx->sk); 297 mutex_unlock(&call->user_mutex);
297 _leave(" = %p", call); 298 _leave(" = %p", call);
298 return call; 299 return call;
299} 300}
@@ -310,7 +311,10 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call);
310void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) 311void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
311{ 312{
312 _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); 313 _enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
314
315 mutex_lock(&call->user_mutex);
313 rxrpc_release_call(rxrpc_sk(sock->sk), call); 316 rxrpc_release_call(rxrpc_sk(sock->sk), call);
317 mutex_unlock(&call->user_mutex);
314 rxrpc_put_call(call, rxrpc_call_put_kernel); 318 rxrpc_put_call(call, rxrpc_call_put_kernel);
315} 319}
316EXPORT_SYMBOL(rxrpc_kernel_end_call); 320EXPORT_SYMBOL(rxrpc_kernel_end_call);
@@ -450,14 +454,16 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
450 case RXRPC_SERVER_BOUND: 454 case RXRPC_SERVER_BOUND:
451 case RXRPC_SERVER_LISTENING: 455 case RXRPC_SERVER_LISTENING:
452 ret = rxrpc_do_sendmsg(rx, m, len); 456 ret = rxrpc_do_sendmsg(rx, m, len);
453 break; 457 /* The socket has been unlocked */
458 goto out;
454 default: 459 default:
455 ret = -EINVAL; 460 ret = -EINVAL;
456 break; 461 goto error_unlock;
457 } 462 }
458 463
459error_unlock: 464error_unlock:
460 release_sock(&rx->sk); 465 release_sock(&rx->sk);
466out:
461 _leave(" = %d", ret); 467 _leave(" = %d", ret);
462 return ret; 468 return ret;
463} 469}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 12be432be9b2..26a7b1db1361 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -467,6 +467,7 @@ struct rxrpc_call {
467 struct rxrpc_connection *conn; /* connection carrying call */ 467 struct rxrpc_connection *conn; /* connection carrying call */
468 struct rxrpc_peer *peer; /* Peer record for remote address */ 468 struct rxrpc_peer *peer; /* Peer record for remote address */
469 struct rxrpc_sock __rcu *socket; /* socket responsible */ 469 struct rxrpc_sock __rcu *socket; /* socket responsible */
470 struct mutex user_mutex; /* User access mutex */
470 ktime_t ack_at; /* When deferred ACK needs to happen */ 471 ktime_t ack_at; /* When deferred ACK needs to happen */
471 ktime_t resend_at; /* When next resend needs to happen */ 472 ktime_t resend_at; /* When next resend needs to happen */
472 ktime_t ping_at; /* When next to send a ping */ 473 ktime_t ping_at; /* When next to send a ping */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 7c4c64ab8da2..0ed181f53f32 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -323,6 +323,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
323 * 323 *
324 * If we want to report an error, we mark the skb with the packet type and 324 * If we want to report an error, we mark the skb with the packet type and
325 * abort code and return NULL. 325 * abort code and return NULL.
326 *
327 * The call is returned with the user access mutex held.
326 */ 328 */
327struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, 329struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
328 struct rxrpc_connection *conn, 330 struct rxrpc_connection *conn,
@@ -371,6 +373,18 @@ found_service:
371 trace_rxrpc_receive(call, rxrpc_receive_incoming, 373 trace_rxrpc_receive(call, rxrpc_receive_incoming,
372 sp->hdr.serial, sp->hdr.seq); 374 sp->hdr.serial, sp->hdr.seq);
373 375
376 /* Lock the call to prevent rxrpc_kernel_send/recv_data() and
377 * sendmsg()/recvmsg() inconveniently stealing the mutex once the
378 * notification is generated.
379 *
380 * The BUG should never happen because the kernel should be well
381 * behaved enough not to access the call before the first notification
382 * event and userspace is prevented from doing so until the state is
383 * appropriate.
384 */
385 if (!mutex_trylock(&call->user_mutex))
386 BUG();
387
374 /* Make the call live. */ 388 /* Make the call live. */
375 rxrpc_incoming_call(rx, call, skb); 389 rxrpc_incoming_call(rx, call, skb);
376 conn = call->conn; 390 conn = call->conn;
@@ -429,10 +443,12 @@ out:
429/* 443/*
430 * handle acceptance of a call by userspace 444 * handle acceptance of a call by userspace
431 * - assign the user call ID to the call at the front of the queue 445 * - assign the user call ID to the call at the front of the queue
446 * - called with the socket locked.
432 */ 447 */
433struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, 448struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
434 unsigned long user_call_ID, 449 unsigned long user_call_ID,
435 rxrpc_notify_rx_t notify_rx) 450 rxrpc_notify_rx_t notify_rx)
451 __releases(&rx->sk.sk_lock.slock)
436{ 452{
437 struct rxrpc_call *call; 453 struct rxrpc_call *call;
438 struct rb_node *parent, **pp; 454 struct rb_node *parent, **pp;
@@ -446,6 +462,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
446 462
447 if (list_empty(&rx->to_be_accepted)) { 463 if (list_empty(&rx->to_be_accepted)) {
448 write_unlock(&rx->call_lock); 464 write_unlock(&rx->call_lock);
465 release_sock(&rx->sk);
449 kleave(" = -ENODATA [empty]"); 466 kleave(" = -ENODATA [empty]");
450 return ERR_PTR(-ENODATA); 467 return ERR_PTR(-ENODATA);
451 } 468 }
@@ -470,10 +487,39 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
470 */ 487 */
471 call = list_entry(rx->to_be_accepted.next, 488 call = list_entry(rx->to_be_accepted.next,
472 struct rxrpc_call, accept_link); 489 struct rxrpc_call, accept_link);
490 write_unlock(&rx->call_lock);
491
492 /* We need to gain the mutex from the interrupt handler without
493 * upsetting lockdep, so we have to release it there and take it here.
494 * We are, however, still holding the socket lock, so other accepts
495 * must wait for us and no one can add the user ID behind our backs.
496 */
497 if (mutex_lock_interruptible(&call->user_mutex) < 0) {
498 release_sock(&rx->sk);
499 kleave(" = -ERESTARTSYS");
500 return ERR_PTR(-ERESTARTSYS);
501 }
502
503 write_lock(&rx->call_lock);
473 list_del_init(&call->accept_link); 504 list_del_init(&call->accept_link);
474 sk_acceptq_removed(&rx->sk); 505 sk_acceptq_removed(&rx->sk);
475 rxrpc_see_call(call); 506 rxrpc_see_call(call);
476 507
508 /* Find the user ID insertion point. */
509 pp = &rx->calls.rb_node;
510 parent = NULL;
511 while (*pp) {
512 parent = *pp;
513 call = rb_entry(parent, struct rxrpc_call, sock_node);
514
515 if (user_call_ID < call->user_call_ID)
516 pp = &(*pp)->rb_left;
517 else if (user_call_ID > call->user_call_ID)
518 pp = &(*pp)->rb_right;
519 else
520 BUG();
521 }
522
477 write_lock_bh(&call->state_lock); 523 write_lock_bh(&call->state_lock);
478 switch (call->state) { 524 switch (call->state) {
479 case RXRPC_CALL_SERVER_ACCEPTING: 525 case RXRPC_CALL_SERVER_ACCEPTING:
@@ -499,6 +545,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
499 write_unlock(&rx->call_lock); 545 write_unlock(&rx->call_lock);
500 rxrpc_notify_socket(call); 546 rxrpc_notify_socket(call);
501 rxrpc_service_prealloc(rx, GFP_KERNEL); 547 rxrpc_service_prealloc(rx, GFP_KERNEL);
548 release_sock(&rx->sk);
502 _leave(" = %p{%d}", call, call->debug_id); 549 _leave(" = %p{%d}", call, call->debug_id);
503 return call; 550 return call;
504 551
@@ -515,6 +562,7 @@ id_in_use:
515 write_unlock(&rx->call_lock); 562 write_unlock(&rx->call_lock);
516out: 563out:
517 rxrpc_service_prealloc(rx, GFP_KERNEL); 564 rxrpc_service_prealloc(rx, GFP_KERNEL);
565 release_sock(&rx->sk);
518 _leave(" = %d", ret); 566 _leave(" = %d", ret);
519 return ERR_PTR(ret); 567 return ERR_PTR(ret);
520} 568}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 8b94db3c9b2e..d79cd36987a9 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -115,6 +115,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
115 if (!call->rxtx_annotations) 115 if (!call->rxtx_annotations)
116 goto nomem_2; 116 goto nomem_2;
117 117
118 mutex_init(&call->user_mutex);
118 setup_timer(&call->timer, rxrpc_call_timer_expired, 119 setup_timer(&call->timer, rxrpc_call_timer_expired,
119 (unsigned long)call); 120 (unsigned long)call);
120 INIT_WORK(&call->processor, &rxrpc_process_call); 121 INIT_WORK(&call->processor, &rxrpc_process_call);
@@ -194,14 +195,16 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
194} 195}
195 196
196/* 197/*
197 * set up a call for the given data 198 * Set up a call for the given parameters.
198 * - called in process context with IRQs enabled 199 * - Called with the socket lock held, which it must release.
200 * - If it returns a call, the call's lock will need releasing by the caller.
199 */ 201 */
200struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, 202struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
201 struct rxrpc_conn_parameters *cp, 203 struct rxrpc_conn_parameters *cp,
202 struct sockaddr_rxrpc *srx, 204 struct sockaddr_rxrpc *srx,
203 unsigned long user_call_ID, 205 unsigned long user_call_ID,
204 gfp_t gfp) 206 gfp_t gfp)
207 __releases(&rx->sk.sk_lock.slock)
205{ 208{
206 struct rxrpc_call *call, *xcall; 209 struct rxrpc_call *call, *xcall;
207 struct rb_node *parent, **pp; 210 struct rb_node *parent, **pp;
@@ -212,6 +215,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
212 215
213 call = rxrpc_alloc_client_call(srx, gfp); 216 call = rxrpc_alloc_client_call(srx, gfp);
214 if (IS_ERR(call)) { 217 if (IS_ERR(call)) {
218 release_sock(&rx->sk);
215 _leave(" = %ld", PTR_ERR(call)); 219 _leave(" = %ld", PTR_ERR(call));
216 return call; 220 return call;
217 } 221 }
@@ -219,6 +223,11 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
219 trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), 223 trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
220 here, (const void *)user_call_ID); 224 here, (const void *)user_call_ID);
221 225
226 /* We need to protect a partially set up call against the user as we
227 * will be acting outside the socket lock.
228 */
229 mutex_lock(&call->user_mutex);
230
222 /* Publish the call, even though it is incompletely set up as yet */ 231 /* Publish the call, even though it is incompletely set up as yet */
223 write_lock(&rx->call_lock); 232 write_lock(&rx->call_lock);
224 233
@@ -250,6 +259,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
250 list_add_tail(&call->link, &rxrpc_calls); 259 list_add_tail(&call->link, &rxrpc_calls);
251 write_unlock(&rxrpc_call_lock); 260 write_unlock(&rxrpc_call_lock);
252 261
262 /* From this point on, the call is protected by its own lock. */
263 release_sock(&rx->sk);
264
253 /* Set up or get a connection record and set the protocol parameters, 265 /* Set up or get a connection record and set the protocol parameters,
254 * including channel number and call ID. 266 * including channel number and call ID.
255 */ 267 */
@@ -279,6 +291,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
279 */ 291 */
280error_dup_user_ID: 292error_dup_user_ID:
281 write_unlock(&rx->call_lock); 293 write_unlock(&rx->call_lock);
294 release_sock(&rx->sk);
282 ret = -EEXIST; 295 ret = -EEXIST;
283 296
284error: 297error:
@@ -287,6 +300,7 @@ error:
287 trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), 300 trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
288 here, ERR_PTR(ret)); 301 here, ERR_PTR(ret));
289 rxrpc_release_call(rx, call); 302 rxrpc_release_call(rx, call);
303 mutex_unlock(&call->user_mutex);
290 rxrpc_put_call(call, rxrpc_call_put); 304 rxrpc_put_call(call, rxrpc_call_put);
291 _leave(" = %d", ret); 305 _leave(" = %d", ret);
292 return ERR_PTR(ret); 306 return ERR_PTR(ret);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 40a1ef2adeb4..c3be03e8d098 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -76,6 +76,8 @@
76#include <linux/slab.h> 76#include <linux/slab.h>
77#include <linux/idr.h> 77#include <linux/idr.h>
78#include <linux/timer.h> 78#include <linux/timer.h>
79#include <linux/sched/signal.h>
80
79#include "ar-internal.h" 81#include "ar-internal.h"
80 82
81__read_mostly unsigned int rxrpc_max_client_connections = 1000; 83__read_mostly unsigned int rxrpc_max_client_connections = 1000;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 78ec33477adf..9f4cfa25af7c 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1194,6 +1194,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
1194 goto reject_packet; 1194 goto reject_packet;
1195 } 1195 }
1196 rxrpc_send_ping(call, skb, skew); 1196 rxrpc_send_ping(call, skb, skew);
1197 mutex_unlock(&call->user_mutex);
1197 } 1198 }
1198 1199
1199 rxrpc_input_call_packet(call, skb, skew); 1200 rxrpc_input_call_packet(call, skb, skew);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index f3a688e10843..6491ca46a03f 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -14,6 +14,8 @@
14#include <linux/net.h> 14#include <linux/net.h>
15#include <linux/skbuff.h> 15#include <linux/skbuff.h>
16#include <linux/export.h> 16#include <linux/export.h>
17#include <linux/sched/signal.h>
18
17#include <net/sock.h> 19#include <net/sock.h>
18#include <net/af_rxrpc.h> 20#include <net/af_rxrpc.h>
19#include "ar-internal.h" 21#include "ar-internal.h"
@@ -487,6 +489,20 @@ try_again:
487 489
488 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0); 490 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0);
489 491
492 /* We're going to drop the socket lock, so we need to lock the call
493 * against interference by sendmsg.
494 */
495 if (!mutex_trylock(&call->user_mutex)) {
496 ret = -EWOULDBLOCK;
497 if (flags & MSG_DONTWAIT)
498 goto error_requeue_call;
499 ret = -ERESTARTSYS;
500 if (mutex_lock_interruptible(&call->user_mutex) < 0)
501 goto error_requeue_call;
502 }
503
504 release_sock(&rx->sk);
505
490 if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) 506 if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
491 BUG(); 507 BUG();
492 508
@@ -502,7 +518,7 @@ try_again:
502 &call->user_call_ID); 518 &call->user_call_ID);
503 } 519 }
504 if (ret < 0) 520 if (ret < 0)
505 goto error; 521 goto error_unlock_call;
506 } 522 }
507 523
508 if (msg->msg_name) { 524 if (msg->msg_name) {
@@ -533,12 +549,12 @@ try_again:
533 } 549 }
534 550
535 if (ret < 0) 551 if (ret < 0)
536 goto error; 552 goto error_unlock_call;
537 553
538 if (call->state == RXRPC_CALL_COMPLETE) { 554 if (call->state == RXRPC_CALL_COMPLETE) {
539 ret = rxrpc_recvmsg_term(call, msg); 555 ret = rxrpc_recvmsg_term(call, msg);
540 if (ret < 0) 556 if (ret < 0)
541 goto error; 557 goto error_unlock_call;
542 if (!(flags & MSG_PEEK)) 558 if (!(flags & MSG_PEEK))
543 rxrpc_release_call(rx, call); 559 rxrpc_release_call(rx, call);
544 msg->msg_flags |= MSG_EOR; 560 msg->msg_flags |= MSG_EOR;
@@ -551,8 +567,21 @@ try_again:
551 msg->msg_flags &= ~MSG_MORE; 567 msg->msg_flags &= ~MSG_MORE;
552 ret = copied; 568 ret = copied;
553 569
554error: 570error_unlock_call:
571 mutex_unlock(&call->user_mutex);
555 rxrpc_put_call(call, rxrpc_call_put); 572 rxrpc_put_call(call, rxrpc_call_put);
573 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
574 return ret;
575
576error_requeue_call:
577 if (!(flags & MSG_PEEK)) {
578 write_lock_bh(&rx->recvmsg_lock);
579 list_add(&call->recvmsg_link, &rx->recvmsg_q);
580 write_unlock_bh(&rx->recvmsg_lock);
581 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0, 0, 0, 0);
582 } else {
583 rxrpc_put_call(call, rxrpc_call_put);
584 }
556error_no_call: 585error_no_call:
557 release_sock(&rx->sk); 586 release_sock(&rx->sk);
558 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); 587 trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
@@ -609,7 +638,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
609 iov.iov_len = size - *_offset; 638 iov.iov_len = size - *_offset;
610 iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset); 639 iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset);
611 640
612 lock_sock(sock->sk); 641 mutex_lock(&call->user_mutex);
613 642
614 switch (call->state) { 643 switch (call->state) {
615 case RXRPC_CALL_CLIENT_RECV_REPLY: 644 case RXRPC_CALL_CLIENT_RECV_REPLY:
@@ -648,7 +677,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
648read_phase_complete: 677read_phase_complete:
649 ret = 1; 678 ret = 1;
650out: 679out:
651 release_sock(sock->sk); 680 mutex_unlock(&call->user_mutex);
652 _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); 681 _leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
653 return ret; 682 return ret;
654 683
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 0a6ef217aa8a..bc2d3dcff9de 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -15,6 +15,8 @@
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <linux/export.h> 17#include <linux/export.h>
18#include <linux/sched/signal.h>
19
18#include <net/sock.h> 20#include <net/sock.h>
19#include <net/af_rxrpc.h> 21#include <net/af_rxrpc.h>
20#include "ar-internal.h" 22#include "ar-internal.h"
@@ -59,9 +61,12 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
59 } 61 }
60 62
61 trace_rxrpc_transmit(call, rxrpc_transmit_wait); 63 trace_rxrpc_transmit(call, rxrpc_transmit_wait);
62 release_sock(&rx->sk); 64 mutex_unlock(&call->user_mutex);
63 *timeo = schedule_timeout(*timeo); 65 *timeo = schedule_timeout(*timeo);
64 lock_sock(&rx->sk); 66 if (mutex_lock_interruptible(&call->user_mutex) < 0) {
67 ret = sock_intr_errno(*timeo);
68 break;
69 }
65 } 70 }
66 71
67 remove_wait_queue(&call->waitq, &myself); 72 remove_wait_queue(&call->waitq, &myself);
@@ -171,7 +176,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
171/* 176/*
172 * send data through a socket 177 * send data through a socket
173 * - must be called in process context 178 * - must be called in process context
174 * - caller holds the socket locked 179 * - The caller holds the call user access mutex, but not the socket lock.
175 */ 180 */
176static int rxrpc_send_data(struct rxrpc_sock *rx, 181static int rxrpc_send_data(struct rxrpc_sock *rx,
177 struct rxrpc_call *call, 182 struct rxrpc_call *call,
@@ -437,10 +442,13 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
437 442
438/* 443/*
439 * Create a new client call for sendmsg(). 444 * Create a new client call for sendmsg().
445 * - Called with the socket lock held, which it must release.
446 * - If it returns a call, the call's lock will need releasing by the caller.
440 */ 447 */
441static struct rxrpc_call * 448static struct rxrpc_call *
442rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, 449rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
443 unsigned long user_call_ID, bool exclusive) 450 unsigned long user_call_ID, bool exclusive)
451 __releases(&rx->sk.sk_lock.slock)
444{ 452{
445 struct rxrpc_conn_parameters cp; 453 struct rxrpc_conn_parameters cp;
446 struct rxrpc_call *call; 454 struct rxrpc_call *call;
@@ -450,8 +458,10 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
450 458
451 _enter(""); 459 _enter("");
452 460
453 if (!msg->msg_name) 461 if (!msg->msg_name) {
462 release_sock(&rx->sk);
454 return ERR_PTR(-EDESTADDRREQ); 463 return ERR_PTR(-EDESTADDRREQ);
464 }
455 465
456 key = rx->key; 466 key = rx->key;
457 if (key && !rx->key->payload.data[0]) 467 if (key && !rx->key->payload.data[0])
@@ -464,6 +474,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
464 cp.exclusive = rx->exclusive | exclusive; 474 cp.exclusive = rx->exclusive | exclusive;
465 cp.service_id = srx->srx_service; 475 cp.service_id = srx->srx_service;
466 call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); 476 call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
477 /* The socket is now unlocked */
467 478
468 _leave(" = %p\n", call); 479 _leave(" = %p\n", call);
469 return call; 480 return call;
@@ -475,6 +486,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
475 * - the socket may be either a client socket or a server socket 486 * - the socket may be either a client socket or a server socket
476 */ 487 */
477int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) 488int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
489 __releases(&rx->sk.sk_lock.slock)
478{ 490{
479 enum rxrpc_command cmd; 491 enum rxrpc_command cmd;
480 struct rxrpc_call *call; 492 struct rxrpc_call *call;
@@ -488,12 +500,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
488 ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, 500 ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
489 &exclusive); 501 &exclusive);
490 if (ret < 0) 502 if (ret < 0)
491 return ret; 503 goto error_release_sock;
492 504
493 if (cmd == RXRPC_CMD_ACCEPT) { 505 if (cmd == RXRPC_CMD_ACCEPT) {
506 ret = -EINVAL;
494 if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) 507 if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
495 return -EINVAL; 508 goto error_release_sock;
496 call = rxrpc_accept_call(rx, user_call_ID, NULL); 509 call = rxrpc_accept_call(rx, user_call_ID, NULL);
510 /* The socket is now unlocked. */
497 if (IS_ERR(call)) 511 if (IS_ERR(call))
498 return PTR_ERR(call); 512 return PTR_ERR(call);
499 rxrpc_put_call(call, rxrpc_call_put); 513 rxrpc_put_call(call, rxrpc_call_put);
@@ -502,12 +516,30 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
502 516
503 call = rxrpc_find_call_by_user_ID(rx, user_call_ID); 517 call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
504 if (!call) { 518 if (!call) {
519 ret = -EBADSLT;
505 if (cmd != RXRPC_CMD_SEND_DATA) 520 if (cmd != RXRPC_CMD_SEND_DATA)
506 return -EBADSLT; 521 goto error_release_sock;
507 call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, 522 call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
508 exclusive); 523 exclusive);
524 /* The socket is now unlocked... */
509 if (IS_ERR(call)) 525 if (IS_ERR(call))
510 return PTR_ERR(call); 526 return PTR_ERR(call);
527 /* ... and we have the call lock. */
528 } else {
529 ret = -EBUSY;
530 if (call->state == RXRPC_CALL_UNINITIALISED ||
531 call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
532 call->state == RXRPC_CALL_SERVER_PREALLOC ||
533 call->state == RXRPC_CALL_SERVER_SECURING ||
534 call->state == RXRPC_CALL_SERVER_ACCEPTING)
535 goto error_release_sock;
536
537 ret = mutex_lock_interruptible(&call->user_mutex);
538 release_sock(&rx->sk);
539 if (ret < 0) {
540 ret = -ERESTARTSYS;
541 goto error_put;
542 }
511 } 543 }
512 544
513 _debug("CALL %d USR %lx ST %d on CONN %p", 545 _debug("CALL %d USR %lx ST %d on CONN %p",
@@ -535,9 +567,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
535 ret = rxrpc_send_data(rx, call, msg, len); 567 ret = rxrpc_send_data(rx, call, msg, len);
536 } 568 }
537 569
570 mutex_unlock(&call->user_mutex);
571error_put:
538 rxrpc_put_call(call, rxrpc_call_put); 572 rxrpc_put_call(call, rxrpc_call_put);
539 _leave(" = %d", ret); 573 _leave(" = %d", ret);
540 return ret; 574 return ret;
575
576error_release_sock:
577 release_sock(&rx->sk);
578 return ret;
541} 579}
542 580
543/** 581/**
@@ -562,7 +600,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
562 ASSERTCMP(msg->msg_name, ==, NULL); 600 ASSERTCMP(msg->msg_name, ==, NULL);
563 ASSERTCMP(msg->msg_control, ==, NULL); 601 ASSERTCMP(msg->msg_control, ==, NULL);
564 602
565 lock_sock(sock->sk); 603 mutex_lock(&call->user_mutex);
566 604
567 _debug("CALL %d USR %lx ST %d on CONN %p", 605 _debug("CALL %d USR %lx ST %d on CONN %p",
568 call->debug_id, call->user_call_ID, call->state, call->conn); 606 call->debug_id, call->user_call_ID, call->state, call->conn);
@@ -577,7 +615,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
577 ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); 615 ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
578 } 616 }
579 617
580 release_sock(sock->sk); 618 mutex_unlock(&call->user_mutex);
581 _leave(" = %d", ret); 619 _leave(" = %d", ret);
582 return ret; 620 return ret;
583} 621}
@@ -598,12 +636,12 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
598{ 636{
599 _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); 637 _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
600 638
601 lock_sock(sock->sk); 639 mutex_lock(&call->user_mutex);
602 640
603 if (rxrpc_abort_call(why, call, 0, abort_code, error)) 641 if (rxrpc_abort_call(why, call, 0, abort_code, error))
604 rxrpc_send_abort_packet(call); 642 rxrpc_send_abort_packet(call);
605 643
606 release_sock(sock->sk); 644 mutex_unlock(&call->user_mutex);
607 _leave(""); 645 _leave("");
608} 646}
609 647
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 41c80b6c3906..ae7e4f5b348b 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -63,6 +63,7 @@
63#include <linux/types.h> 63#include <linux/types.h>
64#include <linux/kernel.h> 64#include <linux/kernel.h>
65#include <linux/sched.h> 65#include <linux/sched.h>
66#include <linux/sched/loadavg.h>
66#include <linux/string.h> 67#include <linux/string.h>
67#include <linux/skbuff.h> 68#include <linux/skbuff.h>
68#include <linux/random.h> 69#include <linux/random.h>
diff --git a/net/sctp/input.c b/net/sctp/input.c
index fc458968fe4b..2a28ab20487f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -884,14 +884,17 @@ int sctp_hash_transport(struct sctp_transport *t)
884 arg.paddr = &t->ipaddr; 884 arg.paddr = &t->ipaddr;
885 arg.lport = htons(t->asoc->base.bind_addr.port); 885 arg.lport = htons(t->asoc->base.bind_addr.port);
886 886
887 rcu_read_lock();
887 list = rhltable_lookup(&sctp_transport_hashtable, &arg, 888 list = rhltable_lookup(&sctp_transport_hashtable, &arg,
888 sctp_hash_params); 889 sctp_hash_params);
889 890
890 rhl_for_each_entry_rcu(transport, tmp, list, node) 891 rhl_for_each_entry_rcu(transport, tmp, list, node)
891 if (transport->asoc->ep == t->asoc->ep) { 892 if (transport->asoc->ep == t->asoc->ep) {
893 rcu_read_unlock();
892 err = -EEXIST; 894 err = -EEXIST;
893 goto out; 895 goto out;
894 } 896 }
897 rcu_read_unlock();
895 898
896 err = rhltable_insert_key(&sctp_transport_hashtable, &arg, 899 err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
897 &t->node, sctp_hash_params); 900 &t->node, sctp_hash_params);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 465a9c8464f9..6f0a9be50f50 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -57,6 +57,7 @@
57#include <linux/kernel.h> 57#include <linux/kernel.h>
58#include <linux/wait.h> 58#include <linux/wait.h>
59#include <linux/time.h> 59#include <linux/time.h>
60#include <linux/sched/signal.h>
60#include <linux/ip.h> 61#include <linux/ip.h>
61#include <linux/capability.h> 62#include <linux/capability.h>
62#include <linux/fcntl.h> 63#include <linux/fcntl.h>
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5d4208ad029e..85837ab90e89 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -27,6 +27,8 @@
27#include <linux/inetdevice.h> 27#include <linux/inetdevice.h>
28#include <linux/workqueue.h> 28#include <linux/workqueue.h>
29#include <linux/in.h> 29#include <linux/in.h>
30#include <linux/sched/signal.h>
31
30#include <net/sock.h> 32#include <net/sock.h>
31#include <net/tcp.h> 33#include <net/tcp.h>
32#include <net/smc.h> 34#include <net/smc.h>
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index cc6b6f8651eb..e41f594a1e1d 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -11,6 +11,8 @@
11 11
12#include <linux/in.h> 12#include <linux/in.h>
13#include <linux/if_ether.h> 13#include <linux/if_ether.h>
14#include <linux/sched/signal.h>
15
14#include <net/sock.h> 16#include <net/sock.h>
15#include <net/tcp.h> 17#include <net/tcp.h>
16 18
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 03dfcc6b7661..67a71d170bed 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -9,6 +9,8 @@
9 */ 9 */
10 10
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/sched/signal.h>
13
12#include <net/sock.h> 14#include <net/sock.h>
13 15
14#include "smc.h" 16#include "smc.h"
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 5d1878732f46..c4ef9a4ec569 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -11,6 +11,8 @@
11 11
12#include <linux/net.h> 12#include <linux/net.h>
13#include <linux/rcupdate.h> 13#include <linux/rcupdate.h>
14#include <linux/sched/signal.h>
15
14#include <net/sock.h> 16#include <net/sock.h>
15 17
16#include "smc.h" 18#include "smc.h"
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 6e73b28915ea..69a0013dd25c 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -15,6 +15,8 @@
15#include <linux/net.h> 15#include <linux/net.h>
16#include <linux/rcupdate.h> 16#include <linux/rcupdate.h>
17#include <linux/workqueue.h> 17#include <linux/workqueue.h>
18#include <linux/sched/signal.h>
19
18#include <net/sock.h> 20#include <net/sock.h>
19 21
20#include "smc.h" 22#include "smc.h"
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 41adf362936d..b5c279b22680 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -504,6 +504,7 @@ static int __init strp_mod_init(void)
504 504
505static void __exit strp_mod_exit(void) 505static void __exit strp_mod_exit(void)
506{ 506{
507 destroy_workqueue(strp_wq);
507} 508}
508module_init(strp_mod_init); 509module_init(strp_mod_init);
509module_exit(strp_mod_exit); 510module_exit(strp_mod_exit);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 2bff63a73cf8..d2623b9f23d6 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/cred.h>
11#include <linux/module.h> 12#include <linux/module.h>
12#include <linux/slab.h> 13#include <linux/slab.h>
13#include <linux/errno.h> 14#include <linux/errno.h>
@@ -464,8 +465,10 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
464 * Note that the cred_unused list must be time-ordered. 465 * Note that the cred_unused list must be time-ordered.
465 */ 466 */
466 if (time_in_range(cred->cr_expire, expired, jiffies) && 467 if (time_in_range(cred->cr_expire, expired, jiffies) &&
467 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) 468 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
469 freed = SHRINK_STOP;
468 break; 470 break;
471 }
469 472
470 list_del_init(&cred->cr_lru); 473 list_del_init(&cred->cr_lru);
471 number_cred_unused--; 474 number_cred_unused--;
@@ -520,7 +523,7 @@ static unsigned long
520rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) 523rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
521 524
522{ 525{
523 return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; 526 return number_cred_unused * sysctl_vfs_cache_pressure / 100;
524} 527}
525 528
526static void 529static void
@@ -646,9 +649,6 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
646 cred->cr_auth = auth; 649 cred->cr_auth = auth;
647 cred->cr_ops = ops; 650 cred->cr_ops = ops;
648 cred->cr_expire = jiffies; 651 cred->cr_expire = jiffies;
649#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
650 cred->cr_magic = RPCAUTH_CRED_MAGIC;
651#endif
652 cred->cr_uid = acred->uid; 652 cred->cr_uid = acred->uid;
653} 653}
654EXPORT_SYMBOL_GPL(rpcauth_init_cred); 654EXPORT_SYMBOL_GPL(rpcauth_init_cred);
@@ -876,8 +876,12 @@ int __init rpcauth_init_module(void)
876 err = rpc_init_generic_auth(); 876 err = rpc_init_generic_auth();
877 if (err < 0) 877 if (err < 0)
878 goto out2; 878 goto out2;
879 register_shrinker(&rpc_cred_shrinker); 879 err = register_shrinker(&rpc_cred_shrinker);
880 if (err < 0)
881 goto out3;
880 return 0; 882 return 0;
883out3:
884 rpc_destroy_generic_auth();
881out2: 885out2:
882 rpc_destroy_authunix(); 886 rpc_destroy_authunix();
883out1: 887out1:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 153082598522..a54a7a3d28f5 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1489,8 +1489,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
1489 case RPC_GSS_PROC_DESTROY: 1489 case RPC_GSS_PROC_DESTROY:
1490 if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) 1490 if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
1491 goto auth_err; 1491 goto auth_err;
1492 rsci->h.expiry_time = seconds_since_boot(); 1492 /* Delete the entry from the cache_list and call cache_put */
1493 set_bit(CACHE_NEGATIVE, &rsci->h.flags); 1493 sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
1494 if (resv->iov_len + 4 > PAGE_SIZE) 1494 if (resv->iov_len + 4 > PAGE_SIZE)
1495 goto drop; 1495 goto drop;
1496 svc_putnl(resv, RPC_SUCCESS); 1496 svc_putnl(resv, RPC_SUCCESS);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 4d17376b2acb..5f3d527dff65 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -139,7 +139,4 @@ struct rpc_cred null_cred = {
139 .cr_ops = &null_credops, 139 .cr_ops = &null_credops,
140 .cr_count = ATOMIC_INIT(1), 140 .cr_count = ATOMIC_INIT(1),
141 .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, 141 .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
142#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
143 .cr_magic = RPCAUTH_CRED_MAGIC,
144#endif
145}; 142};
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 306fc0f54596..82337e1ec9cd 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -14,12 +14,10 @@
14#include <linux/sunrpc/auth.h> 14#include <linux/sunrpc/auth.h>
15#include <linux/user_namespace.h> 15#include <linux/user_namespace.h>
16 16
17#define NFS_NGROUPS 16
18
19struct unx_cred { 17struct unx_cred {
20 struct rpc_cred uc_base; 18 struct rpc_cred uc_base;
21 kgid_t uc_gid; 19 kgid_t uc_gid;
22 kgid_t uc_gids[NFS_NGROUPS]; 20 kgid_t uc_gids[UNX_NGROUPS];
23}; 21};
24#define uc_uid uc_base.cr_uid 22#define uc_uid uc_base.cr_uid
25 23
@@ -82,13 +80,13 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
82 80
83 if (acred->group_info != NULL) 81 if (acred->group_info != NULL)
84 groups = acred->group_info->ngroups; 82 groups = acred->group_info->ngroups;
85 if (groups > NFS_NGROUPS) 83 if (groups > UNX_NGROUPS)
86 groups = NFS_NGROUPS; 84 groups = UNX_NGROUPS;
87 85
88 cred->uc_gid = acred->gid; 86 cred->uc_gid = acred->gid;
89 for (i = 0; i < groups; i++) 87 for (i = 0; i < groups; i++)
90 cred->uc_gids[i] = acred->group_info->gid[i]; 88 cred->uc_gids[i] = acred->group_info->gid[i];
91 if (i < NFS_NGROUPS) 89 if (i < UNX_NGROUPS)
92 cred->uc_gids[i] = INVALID_GID; 90 cred->uc_gids[i] = INVALID_GID;
93 91
94 return &cred->uc_base; 92 return &cred->uc_base;
@@ -132,12 +130,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
132 130
133 if (acred->group_info != NULL) 131 if (acred->group_info != NULL)
134 groups = acred->group_info->ngroups; 132 groups = acred->group_info->ngroups;
135 if (groups > NFS_NGROUPS) 133 if (groups > UNX_NGROUPS)
136 groups = NFS_NGROUPS; 134 groups = UNX_NGROUPS;
137 for (i = 0; i < groups ; i++) 135 for (i = 0; i < groups ; i++)
138 if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i])) 136 if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i]))
139 return 0; 137 return 0;
140 if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups])) 138 if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups]))
141 return 0; 139 return 0;
142 return 1; 140 return 1;
143} 141}
@@ -166,7 +164,7 @@ unx_marshal(struct rpc_task *task, __be32 *p)
166 *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid)); 164 *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
167 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid)); 165 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
168 hold = p++; 166 hold = p++;
169 for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++) 167 for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++)
170 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i])); 168 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
171 *hold = htonl(p - hold - 1); /* gid array length */ 169 *hold = htonl(p - hold - 1); /* gid array length */
172 *base = htonl((p - base - 1) << 2); /* cred length */ 170 *base = htonl((p - base - 1) << 2); /* cred length */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f39e3e11f9aa..79d55d949d9a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -362,11 +362,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
362 cache_purge(cd); 362 cache_purge(cd);
363 spin_lock(&cache_list_lock); 363 spin_lock(&cache_list_lock);
364 write_lock(&cd->hash_lock); 364 write_lock(&cd->hash_lock);
365 if (cd->entries) {
366 write_unlock(&cd->hash_lock);
367 spin_unlock(&cache_list_lock);
368 goto out;
369 }
370 if (current_detail == cd) 365 if (current_detail == cd)
371 current_detail = NULL; 366 current_detail = NULL;
372 list_del_init(&cd->others); 367 list_del_init(&cd->others);
@@ -376,9 +371,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
376 /* module must be being unloaded so its safe to kill the worker */ 371 /* module must be being unloaded so its safe to kill the worker */
377 cancel_delayed_work_sync(&cache_cleaner); 372 cancel_delayed_work_sync(&cache_cleaner);
378 } 373 }
379 return;
380out:
381 printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
382} 374}
383EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); 375EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
384 376
@@ -497,13 +489,32 @@ EXPORT_SYMBOL_GPL(cache_flush);
497 489
498void cache_purge(struct cache_detail *detail) 490void cache_purge(struct cache_detail *detail)
499{ 491{
500 time_t now = seconds_since_boot(); 492 struct cache_head *ch = NULL;
501 if (detail->flush_time >= now) 493 struct hlist_head *head = NULL;
502 now = detail->flush_time + 1; 494 struct hlist_node *tmp = NULL;
503 /* 'now' is the maximum value any 'last_refresh' can have */ 495 int i = 0;
504 detail->flush_time = now; 496
505 detail->nextcheck = seconds_since_boot(); 497 write_lock(&detail->hash_lock);
506 cache_flush(); 498 if (!detail->entries) {
499 write_unlock(&detail->hash_lock);
500 return;
501 }
502
503 dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name);
504 for (i = 0; i < detail->hash_size; i++) {
505 head = &detail->hash_table[i];
506 hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
507 hlist_del_init(&ch->cache_list);
508 detail->entries--;
509
510 set_bit(CACHE_CLEANED, &ch->flags);
511 write_unlock(&detail->hash_lock);
512 cache_fresh_unlocked(ch, detail);
513 cache_put(ch, detail);
514 write_lock(&detail->hash_lock);
515 }
516 }
517 write_unlock(&detail->hash_lock);
507} 518}
508EXPORT_SYMBOL_GPL(cache_purge); 519EXPORT_SYMBOL_GPL(cache_purge);
509 520
@@ -717,7 +728,7 @@ void cache_clean_deferred(void *owner)
717/* 728/*
718 * communicate with user-space 729 * communicate with user-space
719 * 730 *
720 * We have a magic /proc file - /proc/sunrpc/<cachename>/channel. 731 * We have a magic /proc file - /proc/net/rpc/<cachename>/channel.
721 * On read, you get a full request, or block. 732 * On read, you get a full request, or block.
722 * On write, an update request is processed. 733 * On write, an update request is processed.
723 * Poll works if anything to read, and always allows write. 734 * Poll works if anything to read, and always allows write.
@@ -1272,7 +1283,7 @@ EXPORT_SYMBOL_GPL(qword_get);
1272 1283
1273 1284
1274/* 1285/*
1275 * support /proc/sunrpc/cache/$CACHENAME/content 1286 * support /proc/net/rpc/$CACHENAME/content
1276 * as a seqfile. 1287 * as a seqfile.
1277 * We call ->cache_show passing NULL for the item to 1288 * We call ->cache_show passing NULL for the item to
1278 * get a header, then pass each real item in the cache 1289 * get a header, then pass each real item in the cache
@@ -1427,20 +1438,11 @@ static ssize_t read_flush(struct file *file, char __user *buf,
1427 struct cache_detail *cd) 1438 struct cache_detail *cd)
1428{ 1439{
1429 char tbuf[22]; 1440 char tbuf[22];
1430 unsigned long p = *ppos;
1431 size_t len; 1441 size_t len;
1432 1442
1433 snprintf(tbuf, sizeof(tbuf), "%lu\n", convert_to_wallclock(cd->flush_time)); 1443 len = snprintf(tbuf, sizeof(tbuf), "%lu\n",
1434 len = strlen(tbuf); 1444 convert_to_wallclock(cd->flush_time));
1435 if (p >= len) 1445 return simple_read_from_buffer(buf, count, ppos, tbuf, len);
1436 return 0;
1437 len -= p;
1438 if (len > count)
1439 len = count;
1440 if (copy_to_user(buf, (void*)(tbuf+p), len))
1441 return -EFAULT;
1442 *ppos += len;
1443 return len;
1444} 1446}
1445 1447
1446static ssize_t write_flush(struct file *file, const char __user *buf, 1448static ssize_t write_flush(struct file *file, const char __user *buf,
@@ -1600,21 +1602,12 @@ static const struct file_operations cache_flush_operations_procfs = {
1600 .llseek = no_llseek, 1602 .llseek = no_llseek,
1601}; 1603};
1602 1604
1603static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net) 1605static void remove_cache_proc_entries(struct cache_detail *cd)
1604{ 1606{
1605 struct sunrpc_net *sn; 1607 if (cd->procfs) {
1606 1608 proc_remove(cd->procfs);
1607 if (cd->u.procfs.proc_ent == NULL) 1609 cd->procfs = NULL;
1608 return; 1610 }
1609 if (cd->u.procfs.flush_ent)
1610 remove_proc_entry("flush", cd->u.procfs.proc_ent);
1611 if (cd->u.procfs.channel_ent)
1612 remove_proc_entry("channel", cd->u.procfs.proc_ent);
1613 if (cd->u.procfs.content_ent)
1614 remove_proc_entry("content", cd->u.procfs.proc_ent);
1615 cd->u.procfs.proc_ent = NULL;
1616 sn = net_generic(net, sunrpc_net_id);
1617 remove_proc_entry(cd->name, sn->proc_net_rpc);
1618} 1611}
1619 1612
1620#ifdef CONFIG_PROC_FS 1613#ifdef CONFIG_PROC_FS
@@ -1624,38 +1617,30 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1624 struct sunrpc_net *sn; 1617 struct sunrpc_net *sn;
1625 1618
1626 sn = net_generic(net, sunrpc_net_id); 1619 sn = net_generic(net, sunrpc_net_id);
1627 cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc); 1620 cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc);
1628 if (cd->u.procfs.proc_ent == NULL) 1621 if (cd->procfs == NULL)
1629 goto out_nomem; 1622 goto out_nomem;
1630 cd->u.procfs.channel_ent = NULL;
1631 cd->u.procfs.content_ent = NULL;
1632 1623
1633 p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, 1624 p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR,
1634 cd->u.procfs.proc_ent, 1625 cd->procfs, &cache_flush_operations_procfs, cd);
1635 &cache_flush_operations_procfs, cd);
1636 cd->u.procfs.flush_ent = p;
1637 if (p == NULL) 1626 if (p == NULL)
1638 goto out_nomem; 1627 goto out_nomem;
1639 1628
1640 if (cd->cache_request || cd->cache_parse) { 1629 if (cd->cache_request || cd->cache_parse) {
1641 p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, 1630 p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
1642 cd->u.procfs.proc_ent, 1631 cd->procfs, &cache_file_operations_procfs, cd);
1643 &cache_file_operations_procfs, cd);
1644 cd->u.procfs.channel_ent = p;
1645 if (p == NULL) 1632 if (p == NULL)
1646 goto out_nomem; 1633 goto out_nomem;
1647 } 1634 }
1648 if (cd->cache_show) { 1635 if (cd->cache_show) {
1649 p = proc_create_data("content", S_IFREG|S_IRUSR, 1636 p = proc_create_data("content", S_IFREG|S_IRUSR,
1650 cd->u.procfs.proc_ent, 1637 cd->procfs, &content_file_operations_procfs, cd);
1651 &content_file_operations_procfs, cd);
1652 cd->u.procfs.content_ent = p;
1653 if (p == NULL) 1638 if (p == NULL)
1654 goto out_nomem; 1639 goto out_nomem;
1655 } 1640 }
1656 return 0; 1641 return 0;
1657out_nomem: 1642out_nomem:
1658 remove_cache_proc_entries(cd, net); 1643 remove_cache_proc_entries(cd);
1659 return -ENOMEM; 1644 return -ENOMEM;
1660} 1645}
1661#else /* CONFIG_PROC_FS */ 1646#else /* CONFIG_PROC_FS */
@@ -1684,7 +1669,7 @@ EXPORT_SYMBOL_GPL(cache_register_net);
1684 1669
1685void cache_unregister_net(struct cache_detail *cd, struct net *net) 1670void cache_unregister_net(struct cache_detail *cd, struct net *net)
1686{ 1671{
1687 remove_cache_proc_entries(cd, net); 1672 remove_cache_proc_entries(cd);
1688 sunrpc_destroy_cache_detail(cd); 1673 sunrpc_destroy_cache_detail(cd);
1689} 1674}
1690EXPORT_SYMBOL_GPL(cache_unregister_net); 1675EXPORT_SYMBOL_GPL(cache_unregister_net);
@@ -1843,15 +1828,29 @@ int sunrpc_cache_register_pipefs(struct dentry *parent,
1843 struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd); 1828 struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd);
1844 if (IS_ERR(dir)) 1829 if (IS_ERR(dir))
1845 return PTR_ERR(dir); 1830 return PTR_ERR(dir);
1846 cd->u.pipefs.dir = dir; 1831 cd->pipefs = dir;
1847 return 0; 1832 return 0;
1848} 1833}
1849EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); 1834EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs);
1850 1835
1851void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) 1836void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
1852{ 1837{
1853 rpc_remove_cache_dir(cd->u.pipefs.dir); 1838 if (cd->pipefs) {
1854 cd->u.pipefs.dir = NULL; 1839 rpc_remove_cache_dir(cd->pipefs);
1840 cd->pipefs = NULL;
1841 }
1855} 1842}
1856EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); 1843EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
1857 1844
1845void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
1846{
1847 write_lock(&cd->hash_lock);
1848 if (!hlist_unhashed(&h->cache_list)){
1849 hlist_del_init(&h->cache_list);
1850 cd->entries--;
1851 write_unlock(&cd->hash_lock);
1852 cache_put(h, cd);
1853 } else
1854 write_unlock(&cd->hash_lock);
1855}
1856EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1dc9f3bac099..52da3ce54bb5 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1453,21 +1453,6 @@ size_t rpc_max_bc_payload(struct rpc_clnt *clnt)
1453EXPORT_SYMBOL_GPL(rpc_max_bc_payload); 1453EXPORT_SYMBOL_GPL(rpc_max_bc_payload);
1454 1454
1455/** 1455/**
1456 * rpc_get_timeout - Get timeout for transport in units of HZ
1457 * @clnt: RPC client to query
1458 */
1459unsigned long rpc_get_timeout(struct rpc_clnt *clnt)
1460{
1461 unsigned long ret;
1462
1463 rcu_read_lock();
1464 ret = rcu_dereference(clnt->cl_xprt)->timeout->to_initval;
1465 rcu_read_unlock();
1466 return ret;
1467}
1468EXPORT_SYMBOL_GPL(rpc_get_timeout);
1469
1470/**
1471 * rpc_force_rebind - force transport to check that remote port is unchanged 1456 * rpc_force_rebind - force transport to check that remote port is unchanged
1472 * @clnt: client to rebind 1457 * @clnt: client to rebind
1473 * 1458 *
@@ -2699,6 +2684,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
2699{ 2684{
2700 struct rpc_xprt_switch *xps; 2685 struct rpc_xprt_switch *xps;
2701 struct rpc_xprt *xprt; 2686 struct rpc_xprt *xprt;
2687 unsigned long connect_timeout;
2702 unsigned long reconnect_timeout; 2688 unsigned long reconnect_timeout;
2703 unsigned char resvport; 2689 unsigned char resvport;
2704 int ret = 0; 2690 int ret = 0;
@@ -2711,6 +2697,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
2711 return -EAGAIN; 2697 return -EAGAIN;
2712 } 2698 }
2713 resvport = xprt->resvport; 2699 resvport = xprt->resvport;
2700 connect_timeout = xprt->connect_timeout;
2714 reconnect_timeout = xprt->max_reconnect_timeout; 2701 reconnect_timeout = xprt->max_reconnect_timeout;
2715 rcu_read_unlock(); 2702 rcu_read_unlock();
2716 2703
@@ -2720,7 +2707,10 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
2720 goto out_put_switch; 2707 goto out_put_switch;
2721 } 2708 }
2722 xprt->resvport = resvport; 2709 xprt->resvport = resvport;
2723 xprt->max_reconnect_timeout = reconnect_timeout; 2710 if (xprt->ops->set_connect_timeout != NULL)
2711 xprt->ops->set_connect_timeout(xprt,
2712 connect_timeout,
2713 reconnect_timeout);
2724 2714
2725 rpc_xprt_switch_set_roundrobin(xps); 2715 rpc_xprt_switch_set_roundrobin(xps);
2726 if (setup) { 2716 if (setup) {
@@ -2737,26 +2727,39 @@ out_put_switch:
2737} 2727}
2738EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt); 2728EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
2739 2729
2730struct connect_timeout_data {
2731 unsigned long connect_timeout;
2732 unsigned long reconnect_timeout;
2733};
2734
2740static int 2735static int
2741rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt, 2736rpc_xprt_set_connect_timeout(struct rpc_clnt *clnt,
2742 struct rpc_xprt *xprt, 2737 struct rpc_xprt *xprt,
2743 void *data) 2738 void *data)
2744{ 2739{
2745 unsigned long timeout = *((unsigned long *)data); 2740 struct connect_timeout_data *timeo = data;
2746 2741
2747 if (timeout < xprt->max_reconnect_timeout) 2742 if (xprt->ops->set_connect_timeout)
2748 xprt->max_reconnect_timeout = timeout; 2743 xprt->ops->set_connect_timeout(xprt,
2744 timeo->connect_timeout,
2745 timeo->reconnect_timeout);
2749 return 0; 2746 return 0;
2750} 2747}
2751 2748
2752void 2749void
2753rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo) 2750rpc_set_connect_timeout(struct rpc_clnt *clnt,
2751 unsigned long connect_timeout,
2752 unsigned long reconnect_timeout)
2754{ 2753{
2754 struct connect_timeout_data timeout = {
2755 .connect_timeout = connect_timeout,
2756 .reconnect_timeout = reconnect_timeout,
2757 };
2755 rpc_clnt_iterate_for_each_xprt(clnt, 2758 rpc_clnt_iterate_for_each_xprt(clnt,
2756 rpc_xprt_cap_max_reconnect_timeout, 2759 rpc_xprt_set_connect_timeout,
2757 &timeo); 2760 &timeout);
2758} 2761}
2759EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout); 2762EXPORT_SYMBOL_GPL(rpc_set_connect_timeout);
2760 2763
2761void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt) 2764void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
2762{ 2765{
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index e7b4d93566df..c8fd0b6c1618 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -16,11 +16,6 @@ static struct dentry *rpc_xprt_dir;
16 16
17unsigned int rpc_inject_disconnect; 17unsigned int rpc_inject_disconnect;
18 18
19struct rpc_clnt_iter {
20 struct rpc_clnt *clnt;
21 loff_t pos;
22};
23
24static int 19static int
25tasks_show(struct seq_file *f, void *v) 20tasks_show(struct seq_file *f, void *v)
26{ 21{
@@ -47,12 +42,10 @@ static void *
47tasks_start(struct seq_file *f, loff_t *ppos) 42tasks_start(struct seq_file *f, loff_t *ppos)
48 __acquires(&clnt->cl_lock) 43 __acquires(&clnt->cl_lock)
49{ 44{
50 struct rpc_clnt_iter *iter = f->private; 45 struct rpc_clnt *clnt = f->private;
51 loff_t pos = *ppos; 46 loff_t pos = *ppos;
52 struct rpc_clnt *clnt = iter->clnt;
53 struct rpc_task *task; 47 struct rpc_task *task;
54 48
55 iter->pos = pos + 1;
56 spin_lock(&clnt->cl_lock); 49 spin_lock(&clnt->cl_lock);
57 list_for_each_entry(task, &clnt->cl_tasks, tk_task) 50 list_for_each_entry(task, &clnt->cl_tasks, tk_task)
58 if (pos-- == 0) 51 if (pos-- == 0)
@@ -63,12 +56,10 @@ tasks_start(struct seq_file *f, loff_t *ppos)
63static void * 56static void *
64tasks_next(struct seq_file *f, void *v, loff_t *pos) 57tasks_next(struct seq_file *f, void *v, loff_t *pos)
65{ 58{
66 struct rpc_clnt_iter *iter = f->private; 59 struct rpc_clnt *clnt = f->private;
67 struct rpc_clnt *clnt = iter->clnt;
68 struct rpc_task *task = v; 60 struct rpc_task *task = v;
69 struct list_head *next = task->tk_task.next; 61 struct list_head *next = task->tk_task.next;
70 62
71 ++iter->pos;
72 ++*pos; 63 ++*pos;
73 64
74 /* If there's another task on list, return it */ 65 /* If there's another task on list, return it */
@@ -81,9 +72,7 @@ static void
81tasks_stop(struct seq_file *f, void *v) 72tasks_stop(struct seq_file *f, void *v)
82 __releases(&clnt->cl_lock) 73 __releases(&clnt->cl_lock)
83{ 74{
84 struct rpc_clnt_iter *iter = f->private; 75 struct rpc_clnt *clnt = f->private;
85 struct rpc_clnt *clnt = iter->clnt;
86
87 spin_unlock(&clnt->cl_lock); 76 spin_unlock(&clnt->cl_lock);
88} 77}
89 78
@@ -96,17 +85,13 @@ static const struct seq_operations tasks_seq_operations = {
96 85
97static int tasks_open(struct inode *inode, struct file *filp) 86static int tasks_open(struct inode *inode, struct file *filp)
98{ 87{
99 int ret = seq_open_private(filp, &tasks_seq_operations, 88 int ret = seq_open(filp, &tasks_seq_operations);
100 sizeof(struct rpc_clnt_iter));
101
102 if (!ret) { 89 if (!ret) {
103 struct seq_file *seq = filp->private_data; 90 struct seq_file *seq = filp->private_data;
104 struct rpc_clnt_iter *iter = seq->private; 91 struct rpc_clnt *clnt = seq->private = inode->i_private;
105
106 iter->clnt = inode->i_private;
107 92
108 if (!atomic_inc_not_zero(&iter->clnt->cl_count)) { 93 if (!atomic_inc_not_zero(&clnt->cl_count)) {
109 seq_release_private(inode, filp); 94 seq_release(inode, filp);
110 ret = -EINVAL; 95 ret = -EINVAL;
111 } 96 }
112 } 97 }
@@ -118,10 +103,10 @@ static int
118tasks_release(struct inode *inode, struct file *filp) 103tasks_release(struct inode *inode, struct file *filp)
119{ 104{
120 struct seq_file *seq = filp->private_data; 105 struct seq_file *seq = filp->private_data;
121 struct rpc_clnt_iter *iter = seq->private; 106 struct rpc_clnt *clnt = seq->private;
122 107
123 rpc_release_client(iter->clnt); 108 rpc_release_client(clnt);
124 return seq_release_private(inode, filp); 109 return seq_release(inode, filp);
125} 110}
126 111
127static const struct file_operations tasks_fops = { 112static const struct file_operations tasks_fops = {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2e22889a8837..a08aeb56b8e4 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -11,7 +11,7 @@
11 */ 11 */
12 12
13#include <linux/linkage.h> 13#include <linux/linkage.h>
14#include <linux/sched.h> 14#include <linux/sched/signal.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/net.h> 16#include <linux/net.h>
17#include <linux/in.h> 17#include <linux/in.h>
@@ -385,7 +385,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv)
385 for (i = 0; i < progp->pg_nvers; i++) { 385 for (i = 0; i < progp->pg_nvers; i++) {
386 if (progp->pg_vers[i] == NULL) 386 if (progp->pg_vers[i] == NULL)
387 continue; 387 continue;
388 if (progp->pg_vers[i]->vs_hidden == 0) 388 if (!progp->pg_vers[i]->vs_hidden)
389 return 1; 389 return 1;
390 } 390 }
391 } 391 }
@@ -976,6 +976,13 @@ int svc_register(const struct svc_serv *serv, struct net *net,
976 if (vers->vs_hidden) 976 if (vers->vs_hidden)
977 continue; 977 continue;
978 978
979 /*
980 * Don't register a UDP port if we need congestion
981 * control.
982 */
983 if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
984 continue;
985
979 error = __svc_register(net, progp->pg_name, progp->pg_prog, 986 error = __svc_register(net, progp->pg_name, progp->pg_prog,
980 i, family, proto, port); 987 i, family, proto, port);
981 988
@@ -1169,6 +1176,21 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1169 !(versp = progp->pg_vers[vers])) 1176 !(versp = progp->pg_vers[vers]))
1170 goto err_bad_vers; 1177 goto err_bad_vers;
1171 1178
1179 /*
1180 * Some protocol versions (namely NFSv4) require some form of
1181 * congestion control. (See RFC 7530 section 3.1 paragraph 2)
1182 * In other words, UDP is not allowed. We mark those when setting
1183 * up the svc_xprt, and verify that here.
1184 *
1185 * The spec is not very clear about what error should be returned
1186 * when someone tries to access a server that is listening on UDP
1187 * for lower versions. RPC_PROG_MISMATCH seems to be the closest
1188 * fit.
1189 */
1190 if (versp->vs_need_cong_ctrl &&
1191 !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
1192 goto err_bad_vers;
1193
1172 procp = versp->vs_proc + proc; 1194 procp = versp->vs_proc + proc;
1173 if (proc >= versp->vs_nproc || !procp->pc_func) 1195 if (proc >= versp->vs_nproc || !procp->pc_func)
1174 goto err_bad_proc; 1196 goto err_bad_proc;
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 64af4f034de6..f81eaa8e0888 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -403,7 +403,7 @@ svcauth_unix_info_release(struct svc_xprt *xpt)
403/**************************************************************************** 403/****************************************************************************
404 * auth.unix.gid cache 404 * auth.unix.gid cache
405 * simple cache to map a UID to a list of GIDs 405 * simple cache to map a UID to a list of GIDs
406 * because AUTH_UNIX aka AUTH_SYS has a max of 16 406 * because AUTH_UNIX aka AUTH_SYS has a max of UNX_NGROUPS
407 */ 407 */
408#define GID_HASHBITS 8 408#define GID_HASHBITS 8
409#define GID_HASHMAX (1<<GID_HASHBITS) 409#define GID_HASHMAX (1<<GID_HASHBITS)
@@ -810,7 +810,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
810 cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */ 810 cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */
811 cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */ 811 cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */
812 slen = svc_getnl(argv); /* gids length */ 812 slen = svc_getnl(argv); /* gids length */
813 if (slen > 16 || (len -= (slen + 2)*4) < 0) 813 if (slen > UNX_NGROUPS || (len -= (slen + 2)*4) < 0)
814 goto badcred; 814 goto badcred;
815 cred->cr_group_info = groups_alloc(slen); 815 cred->cr_group_info = groups_alloc(slen);
816 if (cred->cr_group_info == NULL) 816 if (cred->cr_group_info == NULL)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d227d97f7ad4..8931e33b6541 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1306,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1306 svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class, 1306 svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
1307 &svsk->sk_xprt, serv); 1307 &svsk->sk_xprt, serv);
1308 set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); 1308 set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
1309 set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
1309 if (sk->sk_state == TCP_LISTEN) { 1310 if (sk->sk_state == TCP_LISTEN) {
1310 dprintk("setting up TCP socket for listening\n"); 1311 dprintk("setting up TCP socket for listening\n");
1311 set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); 1312 set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 7f1071e103ca..1f7082144e01 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1518,3 +1518,37 @@ out:
1518} 1518}
1519EXPORT_SYMBOL_GPL(xdr_process_buf); 1519EXPORT_SYMBOL_GPL(xdr_process_buf);
1520 1520
1521/**
1522 * xdr_stream_decode_string_dup - Decode and duplicate variable length string
1523 * @xdr: pointer to xdr_stream
1524 * @str: location to store pointer to string
1525 * @maxlen: maximum acceptable string length
1526 * @gfp_flags: GFP mask to use
1527 *
1528 * Return values:
1529 * On success, returns length of NUL-terminated string stored in *@ptr
1530 * %-EBADMSG on XDR buffer overflow
1531 * %-EMSGSIZE if the size of the string would exceed @maxlen
1532 * %-ENOMEM on memory allocation failure
1533 */
1534ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
1535 size_t maxlen, gfp_t gfp_flags)
1536{
1537 void *p;
1538 ssize_t ret;
1539
1540 ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
1541 if (ret > 0) {
1542 char *s = kmalloc(ret + 1, gfp_flags);
1543 if (s != NULL) {
1544 memcpy(s, p, ret);
1545 s[ret] = '\0';
1546 *str = s;
1547 return strlen(s);
1548 }
1549 ret = -ENOMEM;
1550 }
1551 *str = NULL;
1552 return ret;
1553}
1554EXPORT_SYMBOL_GPL(xdr_stream_decode_string_dup);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9a6be030ca7d..b530a2852ba8 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -897,13 +897,11 @@ static void xprt_timer(struct rpc_task *task)
897 return; 897 return;
898 dprintk("RPC: %5u xprt_timer\n", task->tk_pid); 898 dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
899 899
900 spin_lock_bh(&xprt->transport_lock);
901 if (!req->rq_reply_bytes_recvd) { 900 if (!req->rq_reply_bytes_recvd) {
902 if (xprt->ops->timer) 901 if (xprt->ops->timer)
903 xprt->ops->timer(xprt, task); 902 xprt->ops->timer(xprt, task);
904 } else 903 } else
905 task->tk_status = 0; 904 task->tk_status = 0;
906 spin_unlock_bh(&xprt->transport_lock);
907} 905}
908 906
909/** 907/**
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 1ebb09e1ac4f..59e64025ed96 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -310,10 +310,7 @@ fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
310 struct rpcrdma_mw *mw; 310 struct rpcrdma_mw *mw;
311 311
312 while (!list_empty(&req->rl_registered)) { 312 while (!list_empty(&req->rl_registered)) {
313 mw = list_first_entry(&req->rl_registered, 313 mw = rpcrdma_pop_mw(&req->rl_registered);
314 struct rpcrdma_mw, mw_list);
315 list_del_init(&mw->mw_list);
316
317 if (sync) 314 if (sync)
318 fmr_op_recover_mr(mw); 315 fmr_op_recover_mr(mw);
319 else 316 else
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 47bed5333c7f..f81dd93176c0 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -466,8 +466,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
466 struct ib_send_wr *first, **prev, *last, *bad_wr; 466 struct ib_send_wr *first, **prev, *last, *bad_wr;
467 struct rpcrdma_rep *rep = req->rl_reply; 467 struct rpcrdma_rep *rep = req->rl_reply;
468 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 468 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
469 struct rpcrdma_mw *mw, *tmp;
470 struct rpcrdma_frmr *f; 469 struct rpcrdma_frmr *f;
470 struct rpcrdma_mw *mw;
471 int count, rc; 471 int count, rc;
472 472
473 dprintk("RPC: %s: req %p\n", __func__, req); 473 dprintk("RPC: %s: req %p\n", __func__, req);
@@ -534,10 +534,10 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
534 * them to the free MW list. 534 * them to the free MW list.
535 */ 535 */
536unmap: 536unmap:
537 list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { 537 while (!list_empty(&req->rl_registered)) {
538 mw = rpcrdma_pop_mw(&req->rl_registered);
538 dprintk("RPC: %s: DMA unmapping frmr %p\n", 539 dprintk("RPC: %s: DMA unmapping frmr %p\n",
539 __func__, &mw->frmr); 540 __func__, &mw->frmr);
540 list_del_init(&mw->mw_list);
541 ib_dma_unmap_sg(ia->ri_device, 541 ib_dma_unmap_sg(ia->ri_device,
542 mw->mw_sg, mw->mw_nents, mw->mw_dir); 542 mw->mw_sg, mw->mw_nents, mw->mw_dir);
543 rpcrdma_put_mw(r_xprt, mw); 543 rpcrdma_put_mw(r_xprt, mw);
@@ -571,10 +571,7 @@ frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
571 struct rpcrdma_mw *mw; 571 struct rpcrdma_mw *mw;
572 572
573 while (!list_empty(&req->rl_registered)) { 573 while (!list_empty(&req->rl_registered)) {
574 mw = list_first_entry(&req->rl_registered, 574 mw = rpcrdma_pop_mw(&req->rl_registered);
575 struct rpcrdma_mw, mw_list);
576 list_del_init(&mw->mw_list);
577
578 if (sync) 575 if (sync)
579 frwr_op_recover_mr(mw); 576 frwr_op_recover_mr(mw);
580 else 577 else
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c52e0f2ffe52..a044be2d6ad7 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
125/* The client can send a request inline as long as the RPCRDMA header 125/* The client can send a request inline as long as the RPCRDMA header
126 * plus the RPC call fit under the transport's inline limit. If the 126 * plus the RPC call fit under the transport's inline limit. If the
127 * combined call message size exceeds that limit, the client must use 127 * combined call message size exceeds that limit, the client must use
128 * the read chunk list for this operation. 128 * a Read chunk for this operation.
129 *
130 * A Read chunk is also required if sending the RPC call inline would
131 * exceed this device's max_sge limit.
129 */ 132 */
130static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, 133static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
131 struct rpc_rqst *rqst) 134 struct rpc_rqst *rqst)
132{ 135{
133 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 136 struct xdr_buf *xdr = &rqst->rq_snd_buf;
137 unsigned int count, remaining, offset;
138
139 if (xdr->len > r_xprt->rx_ia.ri_max_inline_write)
140 return false;
141
142 if (xdr->page_len) {
143 remaining = xdr->page_len;
144 offset = xdr->page_base & ~PAGE_MASK;
145 count = 0;
146 while (remaining) {
147 remaining -= min_t(unsigned int,
148 PAGE_SIZE - offset, remaining);
149 offset = 0;
150 if (++count > r_xprt->rx_ia.ri_max_send_sges)
151 return false;
152 }
153 }
134 154
135 return rqst->rq_snd_buf.len <= ia->ri_max_inline_write; 155 return true;
136} 156}
137 157
138/* The client can't know how large the actual reply will be. Thus it 158/* The client can't know how large the actual reply will be. Thus it
@@ -186,9 +206,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
186 */ 206 */
187 207
188static int 208static int
189rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, 209rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
190 enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, 210 unsigned int pos, enum rpcrdma_chunktype type,
191 bool reminv_expected) 211 struct rpcrdma_mr_seg *seg)
192{ 212{
193 int len, n, p, page_base; 213 int len, n, p, page_base;
194 struct page **ppages; 214 struct page **ppages;
@@ -226,22 +246,21 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
226 if (len && n == RPCRDMA_MAX_SEGS) 246 if (len && n == RPCRDMA_MAX_SEGS)
227 goto out_overflow; 247 goto out_overflow;
228 248
229 /* When encoding the read list, the tail is always sent inline */ 249 /* When encoding a Read chunk, the tail iovec contains an
230 if (type == rpcrdma_readch) 250 * XDR pad and may be omitted.
251 */
252 if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
231 return n; 253 return n;
232 254
233 /* When encoding the Write list, some servers need to see an extra 255 /* When encoding a Write chunk, some servers need to see an
234 * segment for odd-length Write chunks. The upper layer provides 256 * extra segment for non-XDR-aligned Write chunks. The upper
235 * space in the tail iovec for this purpose. 257 * layer provides space in the tail iovec that may be used
258 * for this purpose.
236 */ 259 */
237 if (type == rpcrdma_writech && reminv_expected) 260 if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
238 return n; 261 return n;
239 262
240 if (xdrbuf->tail[0].iov_len) { 263 if (xdrbuf->tail[0].iov_len) {
241 /* the rpcrdma protocol allows us to omit any trailing
242 * xdr pad bytes, saving the server an RDMA operation. */
243 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
244 return n;
245 n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n); 264 n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n);
246 if (n == RPCRDMA_MAX_SEGS) 265 if (n == RPCRDMA_MAX_SEGS)
247 goto out_overflow; 266 goto out_overflow;
@@ -293,7 +312,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
293 if (rtype == rpcrdma_areadch) 312 if (rtype == rpcrdma_areadch)
294 pos = 0; 313 pos = 0;
295 seg = req->rl_segments; 314 seg = req->rl_segments;
296 nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false); 315 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
316 rtype, seg);
297 if (nsegs < 0) 317 if (nsegs < 0)
298 return ERR_PTR(nsegs); 318 return ERR_PTR(nsegs);
299 319
@@ -302,7 +322,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
302 false, &mw); 322 false, &mw);
303 if (n < 0) 323 if (n < 0)
304 return ERR_PTR(n); 324 return ERR_PTR(n);
305 list_add(&mw->mw_list, &req->rl_registered); 325 rpcrdma_push_mw(mw, &req->rl_registered);
306 326
307 *iptr++ = xdr_one; /* item present */ 327 *iptr++ = xdr_one; /* item present */
308 328
@@ -355,10 +375,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
355 } 375 }
356 376
357 seg = req->rl_segments; 377 seg = req->rl_segments;
358 nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 378 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
359 rqst->rq_rcv_buf.head[0].iov_len, 379 rqst->rq_rcv_buf.head[0].iov_len,
360 wtype, seg, 380 wtype, seg);
361 r_xprt->rx_ia.ri_reminv_expected);
362 if (nsegs < 0) 381 if (nsegs < 0)
363 return ERR_PTR(nsegs); 382 return ERR_PTR(nsegs);
364 383
@@ -371,7 +390,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
371 true, &mw); 390 true, &mw);
372 if (n < 0) 391 if (n < 0)
373 return ERR_PTR(n); 392 return ERR_PTR(n);
374 list_add(&mw->mw_list, &req->rl_registered); 393 rpcrdma_push_mw(mw, &req->rl_registered);
375 394
376 iptr = xdr_encode_rdma_segment(iptr, mw); 395 iptr = xdr_encode_rdma_segment(iptr, mw);
377 396
@@ -423,8 +442,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
423 } 442 }
424 443
425 seg = req->rl_segments; 444 seg = req->rl_segments;
426 nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, 445 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
427 r_xprt->rx_ia.ri_reminv_expected);
428 if (nsegs < 0) 446 if (nsegs < 0)
429 return ERR_PTR(nsegs); 447 return ERR_PTR(nsegs);
430 448
@@ -437,7 +455,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
437 true, &mw); 455 true, &mw);
438 if (n < 0) 456 if (n < 0)
439 return ERR_PTR(n); 457 return ERR_PTR(n);
440 list_add(&mw->mw_list, &req->rl_registered); 458 rpcrdma_push_mw(mw, &req->rl_registered);
441 459
442 iptr = xdr_encode_rdma_segment(iptr, mw); 460 iptr = xdr_encode_rdma_segment(iptr, mw);
443 461
@@ -741,13 +759,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
741 iptr = headerp->rm_body.rm_chunks; 759 iptr = headerp->rm_body.rm_chunks;
742 iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); 760 iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype);
743 if (IS_ERR(iptr)) 761 if (IS_ERR(iptr))
744 goto out_unmap; 762 goto out_err;
745 iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype); 763 iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype);
746 if (IS_ERR(iptr)) 764 if (IS_ERR(iptr))
747 goto out_unmap; 765 goto out_err;
748 iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype); 766 iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype);
749 if (IS_ERR(iptr)) 767 if (IS_ERR(iptr))
750 goto out_unmap; 768 goto out_err;
751 hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; 769 hdrlen = (unsigned char *)iptr - (unsigned char *)headerp;
752 770
753 dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", 771 dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n",
@@ -758,12 +776,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
758 if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, 776 if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen,
759 &rqst->rq_snd_buf, rtype)) { 777 &rqst->rq_snd_buf, rtype)) {
760 iptr = ERR_PTR(-EIO); 778 iptr = ERR_PTR(-EIO);
761 goto out_unmap; 779 goto out_err;
762 } 780 }
763 return 0; 781 return 0;
764 782
765out_unmap: 783out_err:
766 r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); 784 pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
785 PTR_ERR(iptr));
786 r_xprt->rx_stats.failed_marshal_count++;
767 return PTR_ERR(iptr); 787 return PTR_ERR(iptr);
768} 788}
769 789
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index cb1e48e54eb1..ff1df40f0d26 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -201,19 +201,20 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
201{ 201{
202 struct rpc_xprt *xprt = rqst->rq_xprt; 202 struct rpc_xprt *xprt = rqst->rq_xprt;
203 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 203 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
204 struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer; 204 __be32 *p;
205 int rc; 205 int rc;
206 206
207 /* Space in the send buffer for an RPC/RDMA header is reserved 207 /* Space in the send buffer for an RPC/RDMA header is reserved
208 * via xprt->tsh_size. 208 * via xprt->tsh_size.
209 */ 209 */
210 headerp->rm_xid = rqst->rq_xid; 210 p = rqst->rq_buffer;
211 headerp->rm_vers = rpcrdma_version; 211 *p++ = rqst->rq_xid;
212 headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); 212 *p++ = rpcrdma_version;
213 headerp->rm_type = rdma_msg; 213 *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
214 headerp->rm_body.rm_chunks[0] = xdr_zero; 214 *p++ = rdma_msg;
215 headerp->rm_body.rm_chunks[1] = xdr_zero; 215 *p++ = xdr_zero;
216 headerp->rm_body.rm_chunks[2] = xdr_zero; 216 *p++ = xdr_zero;
217 *p = xdr_zero;
217 218
218#ifdef SVCRDMA_BACKCHANNEL_DEBUG 219#ifdef SVCRDMA_BACKCHANNEL_DEBUG
219 pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); 220 pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 0ba9887f3e22..1c4aabf0f657 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -1,4 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016 Oracle. All rights reserved.
2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 3 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -47,102 +48,43 @@
47 48
48#define RPCDBG_FACILITY RPCDBG_SVCXPRT 49#define RPCDBG_FACILITY RPCDBG_SVCXPRT
49 50
50/* 51static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
51 * Decodes a read chunk list. The expected format is as follows:
52 * descrim : xdr_one
53 * position : __be32 offset into XDR stream
54 * handle : __be32 RKEY
55 * . . .
56 * end-of-list: xdr_zero
57 */
58static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
59{ 52{
60 struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; 53 __be32 *next;
61 54
62 while (ch->rc_discrim != xdr_zero) { 55 while (*p++ != xdr_zero) {
63 if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > 56 next = p + rpcrdma_readchunk_maxsz - 1;
64 (unsigned long)vaend) { 57 if (next > end)
65 dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
66 return NULL; 58 return NULL;
67 } 59 p = next;
68 ch++;
69 } 60 }
70 return &ch->rc_position; 61 return p;
71} 62}
72 63
73/* 64static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
74 * Decodes a write chunk list. The expected format is as follows:
75 * descrim : xdr_one
76 * nchunks : <count>
77 * handle : __be32 RKEY ---+
78 * length : __be32 <len of segment> |
79 * offset : remove va + <count>
80 * . . . |
81 * ---+
82 */
83static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
84{ 65{
85 unsigned long start, end; 66 __be32 *next;
86 int nchunks;
87
88 struct rpcrdma_write_array *ary =
89 (struct rpcrdma_write_array *)va;
90 67
91 /* Check for not write-array */ 68 while (*p++ != xdr_zero) {
92 if (ary->wc_discrim == xdr_zero) 69 next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
93 return &ary->wc_nchunks; 70 if (next > end)
94 71 return NULL;
95 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > 72 p = next;
96 (unsigned long)vaend) {
97 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
98 return NULL;
99 }
100 nchunks = be32_to_cpu(ary->wc_nchunks);
101
102 start = (unsigned long)&ary->wc_array[0];
103 end = (unsigned long)vaend;
104 if (nchunks < 0 ||
105 nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
106 (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
107 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
108 ary, nchunks, vaend);
109 return NULL;
110 } 73 }
111 /* 74 return p;
112 * rs_length is the 2nd 4B field in wc_target and taking its
113 * address skips the list terminator
114 */
115 return &ary->wc_array[nchunks].wc_target.rs_length;
116} 75}
117 76
118static __be32 *decode_reply_array(__be32 *va, __be32 *vaend) 77static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
119{ 78{
120 unsigned long start, end; 79 __be32 *next;
121 int nchunks; 80
122 struct rpcrdma_write_array *ary = 81 if (*p++ != xdr_zero) {
123 (struct rpcrdma_write_array *)va; 82 next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
124 83 if (next > end)
125 /* Check for no reply-array */ 84 return NULL;
126 if (ary->wc_discrim == xdr_zero) 85 p = next;
127 return &ary->wc_nchunks;
128
129 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
130 (unsigned long)vaend) {
131 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
132 return NULL;
133 }
134 nchunks = be32_to_cpu(ary->wc_nchunks);
135
136 start = (unsigned long)&ary->wc_array[0];
137 end = (unsigned long)vaend;
138 if (nchunks < 0 ||
139 nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
140 (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
141 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
142 ary, nchunks, vaend);
143 return NULL;
144 } 86 }
145 return (__be32 *)&ary->wc_array[nchunks]; 87 return p;
146} 88}
147 89
148/** 90/**
@@ -158,87 +100,71 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
158 */ 100 */
159int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg) 101int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
160{ 102{
161 struct rpcrdma_msg *rmsgp; 103 __be32 *p, *end, *rdma_argp;
162 __be32 *va, *vaend; 104 unsigned int hdr_len;
163 unsigned int len;
164 u32 hdr_len;
165 105
166 /* Verify that there's enough bytes for header + something */ 106 /* Verify that there's enough bytes for header + something */
167 if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) { 107 if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
168 dprintk("svcrdma: header too short = %d\n", 108 goto out_short;
169 rq_arg->len);
170 return -EINVAL;
171 }
172 109
173 rmsgp = (struct rpcrdma_msg *)rq_arg->head[0].iov_base; 110 rdma_argp = rq_arg->head[0].iov_base;
174 if (rmsgp->rm_vers != rpcrdma_version) { 111 if (*(rdma_argp + 1) != rpcrdma_version)
175 dprintk("%s: bad version %u\n", __func__, 112 goto out_version;
176 be32_to_cpu(rmsgp->rm_vers));
177 return -EPROTONOSUPPORT;
178 }
179 113
180 switch (be32_to_cpu(rmsgp->rm_type)) { 114 switch (*(rdma_argp + 3)) {
181 case RDMA_MSG: 115 case rdma_msg:
182 case RDMA_NOMSG: 116 case rdma_nomsg:
183 break; 117 break;
184 118
185 case RDMA_DONE: 119 case rdma_done:
186 /* Just drop it */ 120 goto out_drop;
187 dprintk("svcrdma: dropping RDMA_DONE message\n");
188 return 0;
189
190 case RDMA_ERROR:
191 /* Possible if this is a backchannel reply.
192 * XXX: We should cancel this XID, though.
193 */
194 dprintk("svcrdma: dropping RDMA_ERROR message\n");
195 return 0;
196
197 case RDMA_MSGP:
198 /* Pull in the extra for the padded case, bump our pointer */
199 rmsgp->rm_body.rm_padded.rm_align =
200 be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
201 rmsgp->rm_body.rm_padded.rm_thresh =
202 be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
203
204 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
205 rq_arg->head[0].iov_base = va;
206 len = (u32)((unsigned long)va - (unsigned long)rmsgp);
207 rq_arg->head[0].iov_len -= len;
208 if (len > rq_arg->len)
209 return -EINVAL;
210 return len;
211 default:
212 dprintk("svcrdma: bad rdma procedure (%u)\n",
213 be32_to_cpu(rmsgp->rm_type));
214 return -EINVAL;
215 }
216 121
217 /* The chunk list may contain either a read chunk list or a write 122 case rdma_error:
218 * chunk list and a reply chunk list. 123 goto out_drop;
219 */ 124
220 va = &rmsgp->rm_body.rm_chunks[0]; 125 default:
221 vaend = (__be32 *)((unsigned long)rmsgp + rq_arg->len); 126 goto out_proc;
222 va = decode_read_list(va, vaend);
223 if (!va) {
224 dprintk("svcrdma: failed to decode read list\n");
225 return -EINVAL;
226 }
227 va = decode_write_list(va, vaend);
228 if (!va) {
229 dprintk("svcrdma: failed to decode write list\n");
230 return -EINVAL;
231 }
232 va = decode_reply_array(va, vaend);
233 if (!va) {
234 dprintk("svcrdma: failed to decode reply chunk\n");
235 return -EINVAL;
236 } 127 }
237 128
238 rq_arg->head[0].iov_base = va; 129 end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
239 hdr_len = (unsigned long)va - (unsigned long)rmsgp; 130 p = xdr_check_read_list(rdma_argp + 4, end);
131 if (!p)
132 goto out_inval;
133 p = xdr_check_write_list(p, end);
134 if (!p)
135 goto out_inval;
136 p = xdr_check_reply_chunk(p, end);
137 if (!p)
138 goto out_inval;
139 if (p > end)
140 goto out_inval;
141
142 rq_arg->head[0].iov_base = p;
143 hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
240 rq_arg->head[0].iov_len -= hdr_len; 144 rq_arg->head[0].iov_len -= hdr_len;
241 return hdr_len; 145 return hdr_len;
146
147out_short:
148 dprintk("svcrdma: header too short = %d\n", rq_arg->len);
149 return -EINVAL;
150
151out_version:
152 dprintk("svcrdma: bad xprt version: %u\n",
153 be32_to_cpup(rdma_argp + 1));
154 return -EPROTONOSUPPORT;
155
156out_drop:
157 dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
158 return 0;
159
160out_proc:
161 dprintk("svcrdma: bad rdma procedure (%u)\n",
162 be32_to_cpup(rdma_argp + 3));
163 return -EINVAL;
164
165out_inval:
166 dprintk("svcrdma: failed to parse transport header\n");
167 return -EINVAL;
242} 168}
243 169
244int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, 170int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
@@ -249,7 +175,7 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
249 175
250 *va++ = rmsgp->rm_xid; 176 *va++ = rmsgp->rm_xid;
251 *va++ = rmsgp->rm_vers; 177 *va++ = rmsgp->rm_vers;
252 *va++ = cpu_to_be32(xprt->sc_max_requests); 178 *va++ = xprt->sc_fc_credits;
253 *va++ = rdma_error; 179 *va++ = rdma_error;
254 *va++ = cpu_to_be32(err); 180 *va++ = cpu_to_be32(err);
255 if (err == ERR_VERS) { 181 if (err == ERR_VERS) {
@@ -260,32 +186,35 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
260 return (int)((unsigned long)va - (unsigned long)startp); 186 return (int)((unsigned long)va - (unsigned long)startp);
261} 187}
262 188
263int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) 189/**
190 * svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header
191 * @rdma_resp: buffer containing Reply transport header
192 *
193 * Returns length of transport header, in bytes.
194 */
195unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp)
264{ 196{
265 struct rpcrdma_write_array *wr_ary; 197 unsigned int nsegs;
198 __be32 *p;
266 199
267 /* There is no read-list in a reply */ 200 p = rdma_resp;
268 201
269 /* skip write list */ 202 /* RPC-over-RDMA V1 replies never have a Read list. */
270 wr_ary = (struct rpcrdma_write_array *) 203 p += rpcrdma_fixed_maxsz + 1;
271 &rmsgp->rm_body.rm_chunks[1]; 204
272 if (wr_ary->wc_discrim) 205 /* Skip Write list. */
273 wr_ary = (struct rpcrdma_write_array *) 206 while (*p++ != xdr_zero) {
274 &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)]. 207 nsegs = be32_to_cpup(p++);
275 wc_target.rs_length; 208 p += nsegs * rpcrdma_segment_maxsz;
276 else 209 }
277 wr_ary = (struct rpcrdma_write_array *) 210
278 &wr_ary->wc_nchunks; 211 /* Skip Reply chunk. */
279 212 if (*p++ != xdr_zero) {
280 /* skip reply array */ 213 nsegs = be32_to_cpup(p++);
281 if (wr_ary->wc_discrim) 214 p += nsegs * rpcrdma_segment_maxsz;
282 wr_ary = (struct rpcrdma_write_array *) 215 }
283 &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)]; 216
284 else 217 return (unsigned long)p - (unsigned long)rdma_resp;
285 wr_ary = (struct rpcrdma_write_array *)
286 &wr_ary->wc_nchunks;
287
288 return (unsigned long) wr_ary - (unsigned long) rmsgp;
289} 218}
290 219
291void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) 220void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
@@ -326,19 +255,3 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
326 seg->rs_offset = rs_offset; 255 seg->rs_offset = rs_offset;
327 seg->rs_length = cpu_to_be32(write_len); 256 seg->rs_length = cpu_to_be32(write_len);
328} 257}
329
330void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
331 struct rpcrdma_msg *rdma_argp,
332 struct rpcrdma_msg *rdma_resp,
333 enum rpcrdma_proc rdma_type)
334{
335 rdma_resp->rm_xid = rdma_argp->rm_xid;
336 rdma_resp->rm_vers = rdma_argp->rm_vers;
337 rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
338 rdma_resp->rm_type = cpu_to_be32(rdma_type);
339
340 /* Encode <nul> chunks lists */
341 rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
342 rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
343 rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
344}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 172b537f8cfc..f7b2daf72a86 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -606,26 +606,24 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
606 606
607 dprintk("svcrdma: rqstp=%p\n", rqstp); 607 dprintk("svcrdma: rqstp=%p\n", rqstp);
608 608
609 spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); 609 spin_lock(&rdma_xprt->sc_rq_dto_lock);
610 if (!list_empty(&rdma_xprt->sc_read_complete_q)) { 610 if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
611 ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, 611 ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
612 struct svc_rdma_op_ctxt, 612 struct svc_rdma_op_ctxt, list);
613 dto_q); 613 list_del(&ctxt->list);
614 list_del_init(&ctxt->dto_q); 614 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
615 spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
616 rdma_read_complete(rqstp, ctxt); 615 rdma_read_complete(rqstp, ctxt);
617 goto complete; 616 goto complete;
618 } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { 617 } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
619 ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, 618 ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q,
620 struct svc_rdma_op_ctxt, 619 struct svc_rdma_op_ctxt, list);
621 dto_q); 620 list_del(&ctxt->list);
622 list_del_init(&ctxt->dto_q);
623 } else { 621 } else {
624 atomic_inc(&rdma_stat_rq_starve); 622 atomic_inc(&rdma_stat_rq_starve);
625 clear_bit(XPT_DATA, &xprt->xpt_flags); 623 clear_bit(XPT_DATA, &xprt->xpt_flags);
626 ctxt = NULL; 624 ctxt = NULL;
627 } 625 }
628 spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); 626 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
629 if (!ctxt) { 627 if (!ctxt) {
630 /* This is the EAGAIN path. The svc_recv routine will 628 /* This is the EAGAIN path. The svc_recv routine will
631 * return -EAGAIN, the nfsd thread will go to call into 629 * return -EAGAIN, the nfsd thread will go to call into
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index ad4d286a83c5..515221b16d09 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -476,7 +476,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
476 476
477 /* Prepare the SGE for the RPCRDMA Header */ 477 /* Prepare the SGE for the RPCRDMA Header */
478 ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; 478 ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
479 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 479 ctxt->sge[0].length =
480 svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp);
480 ctxt->sge[0].addr = 481 ctxt->sge[0].addr =
481 ib_dma_map_page(rdma->sc_cm_id->device, page, 0, 482 ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
482 ctxt->sge[0].length, DMA_TO_DEVICE); 483 ctxt->sge[0].length, DMA_TO_DEVICE);
@@ -559,12 +560,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
559 struct rpcrdma_msg *rdma_argp; 560 struct rpcrdma_msg *rdma_argp;
560 struct rpcrdma_msg *rdma_resp; 561 struct rpcrdma_msg *rdma_resp;
561 struct rpcrdma_write_array *wr_ary, *rp_ary; 562 struct rpcrdma_write_array *wr_ary, *rp_ary;
562 enum rpcrdma_proc reply_type;
563 int ret; 563 int ret;
564 int inline_bytes; 564 int inline_bytes;
565 struct page *res_page; 565 struct page *res_page;
566 struct svc_rdma_req_map *vec; 566 struct svc_rdma_req_map *vec;
567 u32 inv_rkey; 567 u32 inv_rkey;
568 __be32 *p;
568 569
569 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); 570 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
570 571
@@ -596,12 +597,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
596 if (!res_page) 597 if (!res_page)
597 goto err0; 598 goto err0;
598 rdma_resp = page_address(res_page); 599 rdma_resp = page_address(res_page);
599 if (rp_ary) 600
600 reply_type = RDMA_NOMSG; 601 p = &rdma_resp->rm_xid;
601 else 602 *p++ = rdma_argp->rm_xid;
602 reply_type = RDMA_MSG; 603 *p++ = rdma_argp->rm_vers;
603 svc_rdma_xdr_encode_reply_header(rdma, rdma_argp, 604 *p++ = rdma->sc_fc_credits;
604 rdma_resp, reply_type); 605 *p++ = rp_ary ? rdma_nomsg : rdma_msg;
606
607 /* Start with empty chunks */
608 *p++ = xdr_zero;
609 *p++ = xdr_zero;
610 *p = xdr_zero;
605 611
606 /* Send any write-chunk data and build resp write-list */ 612 /* Send any write-chunk data and build resp write-list */
607 if (wr_ary) { 613 if (wr_ary) {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 39652d390a9c..c13a5c35ce14 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -157,8 +157,7 @@ static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
157 ctxt = kmalloc(sizeof(*ctxt), flags); 157 ctxt = kmalloc(sizeof(*ctxt), flags);
158 if (ctxt) { 158 if (ctxt) {
159 ctxt->xprt = xprt; 159 ctxt->xprt = xprt;
160 INIT_LIST_HEAD(&ctxt->free); 160 INIT_LIST_HEAD(&ctxt->list);
161 INIT_LIST_HEAD(&ctxt->dto_q);
162 } 161 }
163 return ctxt; 162 return ctxt;
164} 163}
@@ -180,7 +179,7 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
180 dprintk("svcrdma: No memory for RDMA ctxt\n"); 179 dprintk("svcrdma: No memory for RDMA ctxt\n");
181 return false; 180 return false;
182 } 181 }
183 list_add(&ctxt->free, &xprt->sc_ctxts); 182 list_add(&ctxt->list, &xprt->sc_ctxts);
184 } 183 }
185 return true; 184 return true;
186} 185}
@@ -189,15 +188,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
189{ 188{
190 struct svc_rdma_op_ctxt *ctxt = NULL; 189 struct svc_rdma_op_ctxt *ctxt = NULL;
191 190
192 spin_lock_bh(&xprt->sc_ctxt_lock); 191 spin_lock(&xprt->sc_ctxt_lock);
193 xprt->sc_ctxt_used++; 192 xprt->sc_ctxt_used++;
194 if (list_empty(&xprt->sc_ctxts)) 193 if (list_empty(&xprt->sc_ctxts))
195 goto out_empty; 194 goto out_empty;
196 195
197 ctxt = list_first_entry(&xprt->sc_ctxts, 196 ctxt = list_first_entry(&xprt->sc_ctxts,
198 struct svc_rdma_op_ctxt, free); 197 struct svc_rdma_op_ctxt, list);
199 list_del_init(&ctxt->free); 198 list_del(&ctxt->list);
200 spin_unlock_bh(&xprt->sc_ctxt_lock); 199 spin_unlock(&xprt->sc_ctxt_lock);
201 200
202out: 201out:
203 ctxt->count = 0; 202 ctxt->count = 0;
@@ -209,15 +208,15 @@ out_empty:
209 /* Either pre-allocation missed the mark, or send 208 /* Either pre-allocation missed the mark, or send
210 * queue accounting is broken. 209 * queue accounting is broken.
211 */ 210 */
212 spin_unlock_bh(&xprt->sc_ctxt_lock); 211 spin_unlock(&xprt->sc_ctxt_lock);
213 212
214 ctxt = alloc_ctxt(xprt, GFP_NOIO); 213 ctxt = alloc_ctxt(xprt, GFP_NOIO);
215 if (ctxt) 214 if (ctxt)
216 goto out; 215 goto out;
217 216
218 spin_lock_bh(&xprt->sc_ctxt_lock); 217 spin_lock(&xprt->sc_ctxt_lock);
219 xprt->sc_ctxt_used--; 218 xprt->sc_ctxt_used--;
220 spin_unlock_bh(&xprt->sc_ctxt_lock); 219 spin_unlock(&xprt->sc_ctxt_lock);
221 WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n"); 220 WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
222 return NULL; 221 return NULL;
223} 222}
@@ -254,10 +253,10 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
254 for (i = 0; i < ctxt->count; i++) 253 for (i = 0; i < ctxt->count; i++)
255 put_page(ctxt->pages[i]); 254 put_page(ctxt->pages[i]);
256 255
257 spin_lock_bh(&xprt->sc_ctxt_lock); 256 spin_lock(&xprt->sc_ctxt_lock);
258 xprt->sc_ctxt_used--; 257 xprt->sc_ctxt_used--;
259 list_add(&ctxt->free, &xprt->sc_ctxts); 258 list_add(&ctxt->list, &xprt->sc_ctxts);
260 spin_unlock_bh(&xprt->sc_ctxt_lock); 259 spin_unlock(&xprt->sc_ctxt_lock);
261} 260}
262 261
263static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) 262static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
@@ -266,8 +265,8 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
266 struct svc_rdma_op_ctxt *ctxt; 265 struct svc_rdma_op_ctxt *ctxt;
267 266
268 ctxt = list_first_entry(&xprt->sc_ctxts, 267 ctxt = list_first_entry(&xprt->sc_ctxts,
269 struct svc_rdma_op_ctxt, free); 268 struct svc_rdma_op_ctxt, list);
270 list_del(&ctxt->free); 269 list_del(&ctxt->list);
271 kfree(ctxt); 270 kfree(ctxt);
272 } 271 }
273} 272}
@@ -404,7 +403,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
404 /* All wc fields are now known to be valid */ 403 /* All wc fields are now known to be valid */
405 ctxt->byte_len = wc->byte_len; 404 ctxt->byte_len = wc->byte_len;
406 spin_lock(&xprt->sc_rq_dto_lock); 405 spin_lock(&xprt->sc_rq_dto_lock);
407 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); 406 list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
408 spin_unlock(&xprt->sc_rq_dto_lock); 407 spin_unlock(&xprt->sc_rq_dto_lock);
409 408
410 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 409 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
@@ -525,7 +524,7 @@ void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
525 524
526 read_hdr = ctxt->read_hdr; 525 read_hdr = ctxt->read_hdr;
527 spin_lock(&xprt->sc_rq_dto_lock); 526 spin_lock(&xprt->sc_rq_dto_lock);
528 list_add_tail(&read_hdr->dto_q, 527 list_add_tail(&read_hdr->list,
529 &xprt->sc_read_complete_q); 528 &xprt->sc_read_complete_q);
530 spin_unlock(&xprt->sc_rq_dto_lock); 529 spin_unlock(&xprt->sc_rq_dto_lock);
531 530
@@ -557,7 +556,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
557 return NULL; 556 return NULL;
558 svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); 557 svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
559 INIT_LIST_HEAD(&cma_xprt->sc_accept_q); 558 INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
560 INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
561 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); 559 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
562 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); 560 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
563 INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); 561 INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
@@ -571,6 +569,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
571 spin_lock_init(&cma_xprt->sc_ctxt_lock); 569 spin_lock_init(&cma_xprt->sc_ctxt_lock);
572 spin_lock_init(&cma_xprt->sc_map_lock); 570 spin_lock_init(&cma_xprt->sc_map_lock);
573 571
572 /*
573 * Note that this implies that the underlying transport support
574 * has some form of congestion control (see RFC 7530 section 3.1
575 * paragraph 2). For now, we assume that all supported RDMA
576 * transports are suitable here.
577 */
578 set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
579
574 if (listener) 580 if (listener)
575 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); 581 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
576 582
@@ -923,14 +929,14 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
923{ 929{
924 struct svc_rdma_fastreg_mr *frmr = NULL; 930 struct svc_rdma_fastreg_mr *frmr = NULL;
925 931
926 spin_lock_bh(&rdma->sc_frmr_q_lock); 932 spin_lock(&rdma->sc_frmr_q_lock);
927 if (!list_empty(&rdma->sc_frmr_q)) { 933 if (!list_empty(&rdma->sc_frmr_q)) {
928 frmr = list_entry(rdma->sc_frmr_q.next, 934 frmr = list_entry(rdma->sc_frmr_q.next,
929 struct svc_rdma_fastreg_mr, frmr_list); 935 struct svc_rdma_fastreg_mr, frmr_list);
930 list_del_init(&frmr->frmr_list); 936 list_del_init(&frmr->frmr_list);
931 frmr->sg_nents = 0; 937 frmr->sg_nents = 0;
932 } 938 }
933 spin_unlock_bh(&rdma->sc_frmr_q_lock); 939 spin_unlock(&rdma->sc_frmr_q_lock);
934 if (frmr) 940 if (frmr)
935 return frmr; 941 return frmr;
936 942
@@ -943,10 +949,10 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
943 if (frmr) { 949 if (frmr) {
944 ib_dma_unmap_sg(rdma->sc_cm_id->device, 950 ib_dma_unmap_sg(rdma->sc_cm_id->device,
945 frmr->sg, frmr->sg_nents, frmr->direction); 951 frmr->sg, frmr->sg_nents, frmr->direction);
946 spin_lock_bh(&rdma->sc_frmr_q_lock); 952 spin_lock(&rdma->sc_frmr_q_lock);
947 WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); 953 WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
948 list_add(&frmr->frmr_list, &rdma->sc_frmr_q); 954 list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
949 spin_unlock_bh(&rdma->sc_frmr_q_lock); 955 spin_unlock(&rdma->sc_frmr_q_lock);
950 } 956 }
951} 957}
952 958
@@ -1002,6 +1008,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1002 newxprt->sc_max_req_size = svcrdma_max_req_size; 1008 newxprt->sc_max_req_size = svcrdma_max_req_size;
1003 newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr, 1009 newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
1004 svcrdma_max_requests); 1010 svcrdma_max_requests);
1011 newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
1005 newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr, 1012 newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
1006 svcrdma_max_bc_requests); 1013 svcrdma_max_bc_requests);
1007 newxprt->sc_rq_depth = newxprt->sc_max_requests + 1014 newxprt->sc_rq_depth = newxprt->sc_max_requests +
@@ -1027,13 +1034,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1027 goto errout; 1034 goto errout;
1028 } 1035 }
1029 newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth, 1036 newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
1030 0, IB_POLL_SOFTIRQ); 1037 0, IB_POLL_WORKQUEUE);
1031 if (IS_ERR(newxprt->sc_sq_cq)) { 1038 if (IS_ERR(newxprt->sc_sq_cq)) {
1032 dprintk("svcrdma: error creating SQ CQ for connect request\n"); 1039 dprintk("svcrdma: error creating SQ CQ for connect request\n");
1033 goto errout; 1040 goto errout;
1034 } 1041 }
1035 newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth, 1042 newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
1036 0, IB_POLL_SOFTIRQ); 1043 0, IB_POLL_WORKQUEUE);
1037 if (IS_ERR(newxprt->sc_rq_cq)) { 1044 if (IS_ERR(newxprt->sc_rq_cq)) {
1038 dprintk("svcrdma: error creating RQ CQ for connect request\n"); 1045 dprintk("svcrdma: error creating RQ CQ for connect request\n");
1039 goto errout; 1046 goto errout;
@@ -1213,20 +1220,18 @@ static void __svc_rdma_free(struct work_struct *work)
1213 */ 1220 */
1214 while (!list_empty(&rdma->sc_read_complete_q)) { 1221 while (!list_empty(&rdma->sc_read_complete_q)) {
1215 struct svc_rdma_op_ctxt *ctxt; 1222 struct svc_rdma_op_ctxt *ctxt;
1216 ctxt = list_entry(rdma->sc_read_complete_q.next, 1223 ctxt = list_first_entry(&rdma->sc_read_complete_q,
1217 struct svc_rdma_op_ctxt, 1224 struct svc_rdma_op_ctxt, list);
1218 dto_q); 1225 list_del(&ctxt->list);
1219 list_del_init(&ctxt->dto_q);
1220 svc_rdma_put_context(ctxt, 1); 1226 svc_rdma_put_context(ctxt, 1);
1221 } 1227 }
1222 1228
1223 /* Destroy queued, but not processed recv completions */ 1229 /* Destroy queued, but not processed recv completions */
1224 while (!list_empty(&rdma->sc_rq_dto_q)) { 1230 while (!list_empty(&rdma->sc_rq_dto_q)) {
1225 struct svc_rdma_op_ctxt *ctxt; 1231 struct svc_rdma_op_ctxt *ctxt;
1226 ctxt = list_entry(rdma->sc_rq_dto_q.next, 1232 ctxt = list_first_entry(&rdma->sc_rq_dto_q,
1227 struct svc_rdma_op_ctxt, 1233 struct svc_rdma_op_ctxt, list);
1228 dto_q); 1234 list_del(&ctxt->list);
1229 list_del_init(&ctxt->dto_q);
1230 svc_rdma_put_context(ctxt, 1); 1235 svc_rdma_put_context(ctxt, 1);
1231 } 1236 }
1232 1237
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 534c178d2a7e..c717f5410776 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -67,7 +67,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
67static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 67static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
68static unsigned int xprt_rdma_inline_write_padding; 68static unsigned int xprt_rdma_inline_write_padding;
69static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; 69static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
70 int xprt_rdma_pad_optimize = 1; 70 int xprt_rdma_pad_optimize = 0;
71 71
72#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 72#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
73 73
@@ -709,10 +709,6 @@ xprt_rdma_send_request(struct rpc_task *task)
709 return 0; 709 return 0;
710 710
711failed_marshal: 711failed_marshal:
712 dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
713 __func__, rc);
714 if (rc == -EIO)
715 r_xprt->rx_stats.failed_marshal_count++;
716 if (rc != -ENOTCONN) 712 if (rc != -ENOTCONN)
717 return rc; 713 return rc;
718drop_connection: 714drop_connection:
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 11d07748f699..81cd31acf690 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -54,6 +54,7 @@
54#include <linux/sunrpc/svc_rdma.h> 54#include <linux/sunrpc/svc_rdma.h>
55#include <asm/bitops.h> 55#include <asm/bitops.h>
56#include <linux/module.h> /* try_module_get()/module_put() */ 56#include <linux/module.h> /* try_module_get()/module_put() */
57#include <rdma/ib_cm.h>
57 58
58#include "xprt_rdma.h" 59#include "xprt_rdma.h"
59 60
@@ -208,6 +209,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
208 209
209 /* Default settings for RPC-over-RDMA Version One */ 210 /* Default settings for RPC-over-RDMA Version One */
210 r_xprt->rx_ia.ri_reminv_expected = false; 211 r_xprt->rx_ia.ri_reminv_expected = false;
212 r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
211 rsize = RPCRDMA_V1_DEF_INLINE_SIZE; 213 rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
212 wsize = RPCRDMA_V1_DEF_INLINE_SIZE; 214 wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
213 215
@@ -215,6 +217,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
215 pmsg->cp_magic == rpcrdma_cmp_magic && 217 pmsg->cp_magic == rpcrdma_cmp_magic &&
216 pmsg->cp_version == RPCRDMA_CMP_VERSION) { 218 pmsg->cp_version == RPCRDMA_CMP_VERSION) {
217 r_xprt->rx_ia.ri_reminv_expected = true; 219 r_xprt->rx_ia.ri_reminv_expected = true;
220 r_xprt->rx_ia.ri_implicit_roundup = true;
218 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); 221 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
219 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 222 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
220 } 223 }
@@ -277,7 +280,14 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
277 connstate = -ENETDOWN; 280 connstate = -ENETDOWN;
278 goto connected; 281 goto connected;
279 case RDMA_CM_EVENT_REJECTED: 282 case RDMA_CM_EVENT_REJECTED:
283#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
284 pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n",
285 sap, rpc_get_port(sap), ia->ri_device->name,
286 rdma_reject_msg(id, event->status));
287#endif
280 connstate = -ECONNREFUSED; 288 connstate = -ECONNREFUSED;
289 if (event->status == IB_CM_REJ_STALE_CONN)
290 connstate = -EAGAIN;
281 goto connected; 291 goto connected;
282 case RDMA_CM_EVENT_DISCONNECTED: 292 case RDMA_CM_EVENT_DISCONNECTED:
283 connstate = -ECONNABORTED; 293 connstate = -ECONNABORTED;
@@ -486,18 +496,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
486 */ 496 */
487int 497int
488rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, 498rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
489 struct rpcrdma_create_data_internal *cdata) 499 struct rpcrdma_create_data_internal *cdata)
490{ 500{
491 struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; 501 struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
502 unsigned int max_qp_wr, max_sge;
492 struct ib_cq *sendcq, *recvcq; 503 struct ib_cq *sendcq, *recvcq;
493 unsigned int max_qp_wr;
494 int rc; 504 int rc;
495 505
496 if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) { 506 max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
497 dprintk("RPC: %s: insufficient sge's available\n", 507 if (max_sge < RPCRDMA_MIN_SEND_SGES) {
498 __func__); 508 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
499 return -ENOMEM; 509 return -ENOMEM;
500 } 510 }
511 ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES;
501 512
502 if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { 513 if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
503 dprintk("RPC: %s: insufficient wqe's available\n", 514 dprintk("RPC: %s: insufficient wqe's available\n",
@@ -522,7 +533,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
522 ep->rep_attr.cap.max_recv_wr = cdata->max_requests; 533 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
523 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; 534 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
524 ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ 535 ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
525 ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES; 536 ep->rep_attr.cap.max_send_sge = max_sge;
526 ep->rep_attr.cap.max_recv_sge = 1; 537 ep->rep_attr.cap.max_recv_sge = 1;
527 ep->rep_attr.cap.max_inline_data = 0; 538 ep->rep_attr.cap.max_inline_data = 0;
528 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 539 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -640,20 +651,21 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
640int 651int
641rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 652rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
642{ 653{
654 struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
655 rx_ia);
643 struct rdma_cm_id *id, *old; 656 struct rdma_cm_id *id, *old;
657 struct sockaddr *sap;
658 unsigned int extras;
644 int rc = 0; 659 int rc = 0;
645 int retry_count = 0;
646 660
647 if (ep->rep_connected != 0) { 661 if (ep->rep_connected != 0) {
648 struct rpcrdma_xprt *xprt;
649retry: 662retry:
650 dprintk("RPC: %s: reconnecting...\n", __func__); 663 dprintk("RPC: %s: reconnecting...\n", __func__);
651 664
652 rpcrdma_ep_disconnect(ep, ia); 665 rpcrdma_ep_disconnect(ep, ia);
653 666
654 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); 667 sap = (struct sockaddr *)&r_xprt->rx_data.addr;
655 id = rpcrdma_create_id(xprt, ia, 668 id = rpcrdma_create_id(r_xprt, ia, sap);
656 (struct sockaddr *)&xprt->rx_data.addr);
657 if (IS_ERR(id)) { 669 if (IS_ERR(id)) {
658 rc = -EHOSTUNREACH; 670 rc = -EHOSTUNREACH;
659 goto out; 671 goto out;
@@ -708,51 +720,18 @@ retry:
708 } 720 }
709 721
710 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); 722 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
711
712 /*
713 * Check state. A non-peer reject indicates no listener
714 * (ECONNREFUSED), which may be a transient state. All
715 * others indicate a transport condition which has already
716 * undergone a best-effort.
717 */
718 if (ep->rep_connected == -ECONNREFUSED &&
719 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
720 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
721 goto retry;
722 }
723 if (ep->rep_connected <= 0) { 723 if (ep->rep_connected <= 0) {
724 /* Sometimes, the only way to reliably connect to remote 724 if (ep->rep_connected == -EAGAIN)
725 * CMs is to use same nonzero values for ORD and IRD. */
726 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
727 (ep->rep_remote_cma.responder_resources == 0 ||
728 ep->rep_remote_cma.initiator_depth !=
729 ep->rep_remote_cma.responder_resources)) {
730 if (ep->rep_remote_cma.responder_resources == 0)
731 ep->rep_remote_cma.responder_resources = 1;
732 ep->rep_remote_cma.initiator_depth =
733 ep->rep_remote_cma.responder_resources;
734 goto retry; 725 goto retry;
735 }
736 rc = ep->rep_connected; 726 rc = ep->rep_connected;
737 } else { 727 goto out;
738 struct rpcrdma_xprt *r_xprt;
739 unsigned int extras;
740
741 dprintk("RPC: %s: connected\n", __func__);
742
743 r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
744 extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
745
746 if (extras) {
747 rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
748 if (rc) {
749 pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
750 __func__, rc);
751 rc = 0;
752 }
753 }
754 } 728 }
755 729
730 dprintk("RPC: %s: connected\n", __func__);
731 extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
732 if (extras)
733 rpcrdma_ep_post_extra_recv(r_xprt, extras);
734
756out: 735out:
757 if (rc) 736 if (rc)
758 ep->rep_connected = rc; 737 ep->rep_connected = rc;
@@ -797,9 +776,7 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
797 776
798 spin_lock(&buf->rb_recovery_lock); 777 spin_lock(&buf->rb_recovery_lock);
799 while (!list_empty(&buf->rb_stale_mrs)) { 778 while (!list_empty(&buf->rb_stale_mrs)) {
800 mw = list_first_entry(&buf->rb_stale_mrs, 779 mw = rpcrdma_pop_mw(&buf->rb_stale_mrs);
801 struct rpcrdma_mw, mw_list);
802 list_del_init(&mw->mw_list);
803 spin_unlock(&buf->rb_recovery_lock); 780 spin_unlock(&buf->rb_recovery_lock);
804 781
805 dprintk("RPC: %s: recovering MR %p\n", __func__, mw); 782 dprintk("RPC: %s: recovering MR %p\n", __func__, mw);
@@ -817,7 +794,7 @@ rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
817 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 794 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
818 795
819 spin_lock(&buf->rb_recovery_lock); 796 spin_lock(&buf->rb_recovery_lock);
820 list_add(&mw->mw_list, &buf->rb_stale_mrs); 797 rpcrdma_push_mw(mw, &buf->rb_stale_mrs);
821 spin_unlock(&buf->rb_recovery_lock); 798 spin_unlock(&buf->rb_recovery_lock);
822 799
823 schedule_delayed_work(&buf->rb_recovery_worker, 0); 800 schedule_delayed_work(&buf->rb_recovery_worker, 0);
@@ -1093,11 +1070,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
1093 struct rpcrdma_mw *mw = NULL; 1070 struct rpcrdma_mw *mw = NULL;
1094 1071
1095 spin_lock(&buf->rb_mwlock); 1072 spin_lock(&buf->rb_mwlock);
1096 if (!list_empty(&buf->rb_mws)) { 1073 if (!list_empty(&buf->rb_mws))
1097 mw = list_first_entry(&buf->rb_mws, 1074 mw = rpcrdma_pop_mw(&buf->rb_mws);
1098 struct rpcrdma_mw, mw_list);
1099 list_del_init(&mw->mw_list);
1100 }
1101 spin_unlock(&buf->rb_mwlock); 1075 spin_unlock(&buf->rb_mwlock);
1102 1076
1103 if (!mw) 1077 if (!mw)
@@ -1120,7 +1094,7 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
1120 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1094 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1121 1095
1122 spin_lock(&buf->rb_mwlock); 1096 spin_lock(&buf->rb_mwlock);
1123 list_add_tail(&mw->mw_list, &buf->rb_mws); 1097 rpcrdma_push_mw(mw, &buf->rb_mws);
1124 spin_unlock(&buf->rb_mwlock); 1098 spin_unlock(&buf->rb_mwlock);
1125} 1099}
1126 1100
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e35efd4ac1e4..171a35116de9 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -74,7 +74,9 @@ struct rpcrdma_ia {
74 unsigned int ri_max_frmr_depth; 74 unsigned int ri_max_frmr_depth;
75 unsigned int ri_max_inline_write; 75 unsigned int ri_max_inline_write;
76 unsigned int ri_max_inline_read; 76 unsigned int ri_max_inline_read;
77 unsigned int ri_max_send_sges;
77 bool ri_reminv_expected; 78 bool ri_reminv_expected;
79 bool ri_implicit_roundup;
78 enum ib_mr_type ri_mrtype; 80 enum ib_mr_type ri_mrtype;
79 struct ib_qp_attr ri_qp_attr; 81 struct ib_qp_attr ri_qp_attr;
80 struct ib_qp_init_attr ri_qp_init_attr; 82 struct ib_qp_init_attr ri_qp_init_attr;
@@ -303,15 +305,19 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
303 char *mr_offset; /* kva if no page, else offset */ 305 char *mr_offset; /* kva if no page, else offset */
304}; 306};
305 307
306/* Reserve enough Send SGEs to send a maximum size inline request: 308/* The Send SGE array is provisioned to send a maximum size
309 * inline request:
307 * - RPC-over-RDMA header 310 * - RPC-over-RDMA header
308 * - xdr_buf head iovec 311 * - xdr_buf head iovec
309 * - RPCRDMA_MAX_INLINE bytes, possibly unaligned, in pages 312 * - RPCRDMA_MAX_INLINE bytes, in pages
310 * - xdr_buf tail iovec 313 * - xdr_buf tail iovec
314 *
315 * The actual number of array elements consumed by each RPC
316 * depends on the device's max_sge limit.
311 */ 317 */
312enum { 318enum {
313 RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1, 319 RPCRDMA_MIN_SEND_SGES = 3,
314 RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1, 320 RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT,
315 RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, 321 RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,
316}; 322};
317 323
@@ -348,6 +354,22 @@ rpcr_to_rdmar(struct rpc_rqst *rqst)
348 return rqst->rq_xprtdata; 354 return rqst->rq_xprtdata;
349} 355}
350 356
357static inline void
358rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list)
359{
360 list_add_tail(&mw->mw_list, list);
361}
362
363static inline struct rpcrdma_mw *
364rpcrdma_pop_mw(struct list_head *list)
365{
366 struct rpcrdma_mw *mw;
367
368 mw = list_first_entry(list, struct rpcrdma_mw, mw_list);
369 list_del(&mw->mw_list);
370 return mw;
371}
372
351/* 373/*
352 * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for 374 * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
353 * inline requests/replies, and client/server credits. 375 * inline requests/replies, and client/server credits.
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 956c7bce80d1..16aff8ddc16f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -52,6 +52,8 @@
52#include "sunrpc.h" 52#include "sunrpc.h"
53 53
54static void xs_close(struct rpc_xprt *xprt); 54static void xs_close(struct rpc_xprt *xprt);
55static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
56 struct socket *sock);
55 57
56/* 58/*
57 * xprtsock tunables 59 * xprtsock tunables
@@ -666,6 +668,9 @@ static int xs_tcp_send_request(struct rpc_task *task)
666 if (task->tk_flags & RPC_TASK_SENT) 668 if (task->tk_flags & RPC_TASK_SENT)
667 zerocopy = false; 669 zerocopy = false;
668 670
671 if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
672 xs_tcp_set_socket_timeouts(xprt, transport->sock);
673
669 /* Continue transmitting the packet/record. We must be careful 674 /* Continue transmitting the packet/record. We must be careful
670 * to cope with writespace callbacks arriving _after_ we have 675 * to cope with writespace callbacks arriving _after_ we have
671 * called sendmsg(). */ 676 * called sendmsg(). */
@@ -1734,7 +1739,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t
1734 */ 1739 */
1735static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) 1740static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
1736{ 1741{
1742 spin_lock_bh(&xprt->transport_lock);
1737 xprt_adjust_cwnd(xprt, task, -ETIMEDOUT); 1743 xprt_adjust_cwnd(xprt, task, -ETIMEDOUT);
1744 spin_unlock_bh(&xprt->transport_lock);
1738} 1745}
1739 1746
1740static unsigned short xs_get_random_port(void) 1747static unsigned short xs_get_random_port(void)
@@ -2235,6 +2242,66 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
2235 xs_reset_transport(transport); 2242 xs_reset_transport(transport);
2236} 2243}
2237 2244
2245static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
2246 struct socket *sock)
2247{
2248 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2249 unsigned int keepidle;
2250 unsigned int keepcnt;
2251 unsigned int opt_on = 1;
2252 unsigned int timeo;
2253
2254 spin_lock_bh(&xprt->transport_lock);
2255 keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
2256 keepcnt = xprt->timeout->to_retries + 1;
2257 timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
2258 (xprt->timeout->to_retries + 1);
2259 clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
2260 spin_unlock_bh(&xprt->transport_lock);
2261
2262 /* TCP Keepalive options */
2263 kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
2264 (char *)&opt_on, sizeof(opt_on));
2265 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
2266 (char *)&keepidle, sizeof(keepidle));
2267 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
2268 (char *)&keepidle, sizeof(keepidle));
2269 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
2270 (char *)&keepcnt, sizeof(keepcnt));
2271
2272 /* TCP user timeout (see RFC5482) */
2273 kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
2274 (char *)&timeo, sizeof(timeo));
2275}
2276
2277static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
2278 unsigned long connect_timeout,
2279 unsigned long reconnect_timeout)
2280{
2281 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2282 struct rpc_timeout to;
2283 unsigned long initval;
2284
2285 spin_lock_bh(&xprt->transport_lock);
2286 if (reconnect_timeout < xprt->max_reconnect_timeout)
2287 xprt->max_reconnect_timeout = reconnect_timeout;
2288 if (connect_timeout < xprt->connect_timeout) {
2289 memcpy(&to, xprt->timeout, sizeof(to));
2290 initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1);
2291 /* Arbitrary lower limit */
2292 if (initval < XS_TCP_INIT_REEST_TO << 1)
2293 initval = XS_TCP_INIT_REEST_TO << 1;
2294 to.to_initval = initval;
2295 to.to_maxval = initval;
2296 memcpy(&transport->tcp_timeout, &to,
2297 sizeof(transport->tcp_timeout));
2298 xprt->timeout = &transport->tcp_timeout;
2299 xprt->connect_timeout = connect_timeout;
2300 }
2301 set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
2302 spin_unlock_bh(&xprt->transport_lock);
2303}
2304
2238static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 2305static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2239{ 2306{
2240 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2307 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2242,22 +2309,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2242 2309
2243 if (!transport->inet) { 2310 if (!transport->inet) {
2244 struct sock *sk = sock->sk; 2311 struct sock *sk = sock->sk;
2245 unsigned int keepidle = xprt->timeout->to_initval / HZ;
2246 unsigned int keepcnt = xprt->timeout->to_retries + 1;
2247 unsigned int opt_on = 1;
2248 unsigned int timeo;
2249 unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC; 2312 unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
2250 2313
2251 /* TCP Keepalive options */
2252 kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
2253 (char *)&opt_on, sizeof(opt_on));
2254 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
2255 (char *)&keepidle, sizeof(keepidle));
2256 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
2257 (char *)&keepidle, sizeof(keepidle));
2258 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
2259 (char *)&keepcnt, sizeof(keepcnt));
2260
2261 /* Avoid temporary address, they are bad for long-lived 2314 /* Avoid temporary address, they are bad for long-lived
2262 * connections such as NFS mounts. 2315 * connections such as NFS mounts.
2263 * RFC4941, section 3.6 suggests that: 2316 * RFC4941, section 3.6 suggests that:
@@ -2268,11 +2321,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2268 kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES, 2321 kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
2269 (char *)&addr_pref, sizeof(addr_pref)); 2322 (char *)&addr_pref, sizeof(addr_pref));
2270 2323
2271 /* TCP user timeout (see RFC5482) */ 2324 xs_tcp_set_socket_timeouts(xprt, sock);
2272 timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
2273 (xprt->timeout->to_retries + 1);
2274 kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
2275 (char *)&timeo, sizeof(timeo));
2276 2325
2277 write_lock_bh(&sk->sk_callback_lock); 2326 write_lock_bh(&sk->sk_callback_lock);
2278 2327
@@ -2721,6 +2770,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
2721 .set_retrans_timeout = xprt_set_retrans_timeout_def, 2770 .set_retrans_timeout = xprt_set_retrans_timeout_def,
2722 .close = xs_tcp_shutdown, 2771 .close = xs_tcp_shutdown,
2723 .destroy = xs_destroy, 2772 .destroy = xs_destroy,
2773 .set_connect_timeout = xs_tcp_set_connect_timeout,
2724 .print_stats = xs_tcp_print_stats, 2774 .print_stats = xs_tcp_print_stats,
2725 .enable_swap = xs_enable_swap, 2775 .enable_swap = xs_enable_swap,
2726 .disable_swap = xs_disable_swap, 2776 .disable_swap = xs_disable_swap,
@@ -3007,6 +3057,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
3007 xprt->timeout = &xs_tcp_default_timeout; 3057 xprt->timeout = &xs_tcp_default_timeout;
3008 3058
3009 xprt->max_reconnect_timeout = xprt->timeout->to_maxval; 3059 xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
3060 xprt->connect_timeout = xprt->timeout->to_initval *
3061 (xprt->timeout->to_retries + 1);
3010 3062
3011 INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn); 3063 INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
3012 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); 3064 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
@@ -3209,7 +3261,9 @@ static int param_set_uint_minmax(const char *val,
3209 if (!val) 3261 if (!val)
3210 return -EINVAL; 3262 return -EINVAL;
3211 ret = kstrtouint(val, 0, &num); 3263 ret = kstrtouint(val, 0, &num);
3212 if (ret == -EINVAL || num < min || num > max) 3264 if (ret)
3265 return ret;
3266 if (num < min || num > max)
3213 return -EINVAL; 3267 return -EINVAL;
3214 *((unsigned int *)kp->arg) = num; 3268 *((unsigned int *)kp->arg) = num;
3215 return 0; 3269 return 0;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 6b09a778cc71..43e4045e72bc 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -35,6 +35,8 @@
35 */ 35 */
36 36
37#include <linux/rhashtable.h> 37#include <linux/rhashtable.h>
38#include <linux/sched/signal.h>
39
38#include "core.h" 40#include "core.h"
39#include "name_table.h" 41#include "name_table.h"
40#include "node.h" 42#include "node.h"
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e2d18b9f910f..ee37b390260a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -85,7 +85,7 @@
85#include <linux/module.h> 85#include <linux/module.h>
86#include <linux/kernel.h> 86#include <linux/kernel.h>
87#include <linux/signal.h> 87#include <linux/signal.h>
88#include <linux/sched.h> 88#include <linux/sched/signal.h>
89#include <linux/errno.h> 89#include <linux/errno.h>
90#include <linux/string.h> 90#include <linux/string.h>
91#include <linux/stat.h> 91#include <linux/stat.h>
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 8a398b3fb532..9192ead66751 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -90,6 +90,7 @@
90#include <linux/init.h> 90#include <linux/init.h>
91#include <linux/io.h> 91#include <linux/io.h>
92#include <linux/kernel.h> 92#include <linux/kernel.h>
93#include <linux/sched/signal.h>
93#include <linux/kmod.h> 94#include <linux/kmod.h>
94#include <linux/list.h> 95#include <linux/list.h>
95#include <linux/miscdevice.h> 96#include <linux/miscdevice.h>
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 6788264acc63..9d24c0e958b1 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -532,7 +532,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
532 vsock->vdev = vdev; 532 vsock->vdev = vdev;
533 533
534 ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, 534 ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
535 vsock->vqs, callbacks, names); 535 vsock->vqs, callbacks, names,
536 NULL);
536 if (ret < 0) 537 if (ret < 0)
537 goto out; 538 goto out;
538 539
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 849c4ad0411e..8d592a45b597 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -9,6 +9,7 @@
9 */ 9 */
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/sched/signal.h>
12#include <linux/ctype.h> 13#include <linux/ctype.h>
13#include <linux/list.h> 14#include <linux/list.h>
14#include <linux/virtio.h> 15#include <linux/virtio.h>
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 079c883aa96e..fd28a49dbe8f 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -41,7 +41,7 @@
41#include <linux/capability.h> 41#include <linux/capability.h>
42#include <linux/errno.h> 42#include <linux/errno.h>
43#include <linux/kernel.h> 43#include <linux/kernel.h>
44#include <linux/sched.h> 44#include <linux/sched/signal.h>
45#include <linux/timer.h> 45#include <linux/timer.h>
46#include <linux/string.h> 46#include <linux/string.h>
47#include <linux/net.h> 47#include <linux/net.h>