aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2018-08-20 12:05:17 -0400
committerJiri Kosina <jkosina@suse.cz>2018-08-20 12:05:17 -0400
commit415d2b3392d7a80903e0f97f051201aa02bf20e9 (patch)
tree47492d2386a0e7f00ef645313cb44ae4960b7e7e /net
parent4f65245f2d178b9cba48350620d76faa4a098841 (diff)
parentb8e759b8f6dab1c473c30ac12709095d0b81078e (diff)
Merge branch 'for-4.19/cougar' into for-linus
New device support for hid-cougar
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/9p/protocol.c11
-rw-r--r--net/9p/trans_virtio.c3
-rw-r--r--net/Makefile4
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/br2684.c3
-rw-r--r--net/atm/clip.c3
-rw-r--r--net/atm/common.c14
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/lec.c3
-rw-r--r--net/atm/mpc.c5
-rw-r--r--net/atm/pppoatm.c3
-rw-r--r--net/atm/pvc.c2
-rw-r--r--net/atm/raw.c4
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/bluetooth/af_bluetooth.c7
-rw-r--r--net/bluetooth/hci_core.c2
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c2
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/bpfilter/.gitignore1
-rw-r--r--net/bpfilter/Kconfig2
-rw-r--r--net/bpfilter/Makefile15
-rw-r--r--net/bpfilter/bpfilter_kern.c21
-rw-r--r--net/bpfilter/bpfilter_umh_blob.S7
-rw-r--r--net/bridge/br_fdb.c4
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/bridge/netfilter/ebtables.c36
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c2
-rw-r--r--net/caif/caif_socket.c12
-rw-r--r--net/can/bcm.c15
-rw-r--r--net/can/raw.c2
-rw-r--r--net/ceph/messenger.c31
-rw-r--r--net/ceph/osd_client.c216
-rw-r--r--net/ceph/osdmap.c24
-rw-r--r--net/ceph/pagevec.c4
-rw-r--r--net/core/datagram.c13
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/dev_ioctl.c11
-rw-r--r--net/core/ethtool.c12
-rw-r--r--net/core/fib_rules.c80
-rw-r--r--net/core/filter.c107
-rw-r--r--net/core/neighbour.c10
-rw-r--r--net/core/pktgen.c3
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/core/sock.c22
-rw-r--r--net/dcb/dcbnl.c3
-rw-r--r--net/dccp/ccids/ccid2.c3
-rw-r--r--net/dccp/ccids/ccid3.c16
-rw-r--r--net/dccp/dccp.h3
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/dccp/proto.c13
-rw-r--r--net/decnet/af_decnet.c6
-rw-r--r--net/dsa/tag_trailer.c3
-rw-r--r--net/ieee802154/nl-phy.c2
-rw-r--r--net/ieee802154/socket.c4
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fou.c4
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/inet_hashtables.c4
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/route.c5
-rw-r--r--net/ipv4/sysctl_net_ipv4.c18
-rw-r--r--net/ipv4/tcp.c35
-rw-r--r--net/ipv4/tcp_input.c13
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_offload.c2
-rw-r--r--net/ipv4/udp.c12
-rw-r--r--net/ipv4/udp_diag.c2
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv6/addrconf.c11
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/datagram.c6
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/ila/ila_xlat.c3
-rw-r--r--net/ipv6/inet6_hashtables.c4
-rw-r--r--net/ipv6/ip6_fib.c10
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/mcast.c9
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c6
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/route.c3
-rw-r--r--net/ipv6/seg6_hmac.c2
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/iucv/af_iucv.c7
-rw-r--r--net/kcm/kcmsock.c10
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/l2tp/l2tp_netlink.c6
-rw-r--r--net/l2tp/l2tp_ppp.c30
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/mac80211/chan.c2
-rw-r--r--net/mac80211/main.c14
-rw-r--r--net/mac80211/rc80211_minstrel.c4
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c4
-rw-r--r--net/mac80211/scan.c2
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/mac80211/util.c5
-rw-r--r--net/ncsi/ncsi-aen.c10
-rw-r--r--net/ncsi/ncsi-manage.c49
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h5
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_conncount.c63
-rw-r--r--net/netfilter/nf_conntrack_helper.c5
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c3
-rw-r--r--net/netfilter/nf_log.c13
-rw-r--r--net/netfilter/nf_nat_core.c5
-rw-r--r--net/netfilter/nf_tables_api.c42
-rw-r--r--net/netfilter/nf_tables_core.c3
-rw-r--r--net/netfilter/nfnetlink.c10
-rw-r--r--net/netfilter/nfnetlink_cthelper.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c3
-rw-r--r--net/netfilter/nft_chain_filter.c5
-rw-r--r--net/netfilter/nft_connlimit.c2
-rw-r--r--net/netfilter/nft_dynset.c4
-rw-r--r--net/netfilter/nft_log.c10
-rw-r--r--net/netfilter/nft_set_rbtree.c2
-rw-r--r--net/netfilter/nft_socket.c1
-rw-r--r--net/netfilter/x_tables.c2
-rw-r--r--net/netfilter/xt_CT.c10
-rw-r--r--net/netfilter/xt_connmark.c2
-rw-r--r--net/netfilter/xt_set.c10
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netlink/genetlink.c10
-rw-r--r--net/netrom/af_netrom.c4
-rw-r--r--net/nfc/llcp_sock.c9
-rw-r--r--net/nfc/rawsock.c4
-rw-r--r--net/openvswitch/datapath.c5
-rw-r--r--net/openvswitch/vport.c2
-rw-r--r--net/packet/af_packet.c29
-rw-r--r--net/phonet/socket.c9
-rw-r--r--net/qrtr/qrtr.c2
-rw-r--r--net/rds/connection.c11
-rw-r--r--net/rds/ib.c3
-rw-r--r--net/rds/ib_cm.c6
-rw-r--r--net/rds/info.c2
-rw-r--r--net/rds/loop.c57
-rw-r--r--net/rds/loop.h2
-rw-r--r--net/rds/rds.h5
-rw-r--r--net/rds/recv.c5
-rw-r--r--net/rose/af_rose.c5
-rw-r--r--net/rxrpc/af_rxrpc.c10
-rw-r--r--net/rxrpc/rxkad.c2
-rw-r--r--net/sched/act_ife.c12
-rw-r--r--net/sched/act_simple.c15
-rw-r--r--net/sched/cls_flower.c21
-rw-r--r--net/sched/sch_blackhole.c2
-rw-r--r--net/sched/sch_fq_codel.c7
-rw-r--r--net/sched/sch_hfsc.c4
-rw-r--r--net/sched/sch_hhf.c9
-rw-r--r--net/sctp/auth.c5
-rw-r--r--net/sctp/chunk.c4
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/output.c28
-rw-r--r--net/sctp/protocol.c4
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/smc/af_smc.c97
-rw-r--r--net/smc/smc.h8
-rw-r--r--net/smc/smc_wr.c6
-rw-r--r--net/socket.c68
-rw-r--r--net/strparser/strparser.c22
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c3
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c6
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/xprt.c23
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c107
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c26
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c33
-rw-r--r--net/sunrpc/xprtrdma/module.c5
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c73
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c55
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c439
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c133
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c510
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c481
-rw-r--r--net/sunrpc/xprtrdma/transport.c68
-rw-r--r--net/sunrpc/xprtrdma/verbs.c292
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h28
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/tipc/netlink_compat.c5
-rw-r--r--net/tipc/socket.c14
-rw-r--r--net/tls/tls_sw.c32
-rw-r--r--net/unix/af_unix.c30
-rw-r--r--net/vmw_vsock/af_vsock.c19
-rw-r--r--net/vmw_vsock/virtio_transport.c2
-rw-r--r--net/wireless/core.c1
-rw-r--r--net/wireless/nl80211.c39
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/xdp/xdp_umem.c9
-rw-r--r--net/xdp/xsk.c12
206 files changed, 2394 insertions, 1935 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 73a65789271b..8ccee3d01822 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -693,7 +693,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
693out_unlock: 693out_unlock:
694 rcu_read_unlock(); 694 rcu_read_unlock();
695out: 695out:
696 NAPI_GRO_CB(skb)->flush |= flush; 696 skb_gro_flush_final(skb, pp, flush);
697 697
698 return pp; 698 return pp;
699} 699}
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 16e10680518c..931ea00c4fed 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -242,8 +242,9 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
242 "w", nwname); 242 "w", nwname);
243 if (!errcode) { 243 if (!errcode) {
244 *wnames = 244 *wnames =
245 kmalloc(sizeof(char *) * *nwname, 245 kmalloc_array(*nwname,
246 GFP_NOFS); 246 sizeof(char *),
247 GFP_NOFS);
247 if (!*wnames) 248 if (!*wnames)
248 errcode = -ENOMEM; 249 errcode = -ENOMEM;
249 } 250 }
@@ -285,9 +286,9 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
285 p9pdu_readf(pdu, proto_version, "w", nwqid); 286 p9pdu_readf(pdu, proto_version, "w", nwqid);
286 if (!errcode) { 287 if (!errcode) {
287 *wqids = 288 *wqids =
288 kmalloc(*nwqid * 289 kmalloc_array(*nwqid,
289 sizeof(struct p9_qid), 290 sizeof(struct p9_qid),
290 GFP_NOFS); 291 GFP_NOFS);
291 if (*wqids == NULL) 292 if (*wqids == NULL)
292 errcode = -ENOMEM; 293 errcode = -ENOMEM;
293 } 294 }
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 4d0372263e5d..05006cbb3361 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -360,7 +360,8 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
360 nr_pages = DIV_ROUND_UP((unsigned long)p + len, PAGE_SIZE) - 360 nr_pages = DIV_ROUND_UP((unsigned long)p + len, PAGE_SIZE) -
361 (unsigned long)p / PAGE_SIZE; 361 (unsigned long)p / PAGE_SIZE;
362 362
363 *pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 363 *pages = kmalloc_array(nr_pages, sizeof(struct page *),
364 GFP_NOFS);
364 if (!*pages) 365 if (!*pages)
365 return -ENOMEM; 366 return -ENOMEM;
366 367
diff --git a/net/Makefile b/net/Makefile
index 13ec0d5415c7..bdaf53925acd 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -20,11 +20,7 @@ obj-$(CONFIG_TLS) += tls/
20obj-$(CONFIG_XFRM) += xfrm/ 20obj-$(CONFIG_XFRM) += xfrm/
21obj-$(CONFIG_UNIX) += unix/ 21obj-$(CONFIG_UNIX) += unix/
22obj-$(CONFIG_NET) += ipv6/ 22obj-$(CONFIG_NET) += ipv6/
23ifneq ($(CC_CAN_LINK),y)
24$(warning CC cannot link executables. Skipping bpfilter.)
25else
26obj-$(CONFIG_BPFILTER) += bpfilter/ 23obj-$(CONFIG_BPFILTER) += bpfilter/
27endif
28obj-$(CONFIG_PACKET) += packet/ 24obj-$(CONFIG_PACKET) += packet/
29obj-$(CONFIG_NET_KEY) += key/ 25obj-$(CONFIG_NET_KEY) += key/
30obj-$(CONFIG_BRIDGE) += bridge/ 26obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 55fdba05d7d9..9b6bc5abe946 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1869,7 +1869,7 @@ static const struct proto_ops atalk_dgram_ops = {
1869 .socketpair = sock_no_socketpair, 1869 .socketpair = sock_no_socketpair,
1870 .accept = sock_no_accept, 1870 .accept = sock_no_accept,
1871 .getname = atalk_getname, 1871 .getname = atalk_getname,
1872 .poll_mask = datagram_poll_mask, 1872 .poll = datagram_poll,
1873 .ioctl = atalk_ioctl, 1873 .ioctl = atalk_ioctl,
1874#ifdef CONFIG_COMPAT 1874#ifdef CONFIG_COMPAT
1875 .compat_ioctl = atalk_compat_ioctl, 1875 .compat_ioctl = atalk_compat_ioctl,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 36b3adacc0dd..10462de734ea 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -252,8 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
252 252
253 ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; 253 ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
254 pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); 254 pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
255 refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); 255 atm_account_tx(atmvcc, skb);
256 ATM_SKB(skb)->atm_options = atmvcc->atm_options;
257 dev->stats.tx_packets++; 256 dev->stats.tx_packets++;
258 dev->stats.tx_bytes += skb->len; 257 dev->stats.tx_bytes += skb->len;
259 258
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 66caa48a27c2..d795b9c5aea4 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -381,8 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
381 memcpy(here, llc_oui, sizeof(llc_oui)); 381 memcpy(here, llc_oui, sizeof(llc_oui));
382 ((__be16 *) here)[3] = skb->protocol; 382 ((__be16 *) here)[3] = skb->protocol;
383 } 383 }
384 refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); 384 atm_account_tx(vcc, skb);
385 ATM_SKB(skb)->atm_options = vcc->atm_options;
386 entry->vccs->last_use = jiffies; 385 entry->vccs->last_use = jiffies;
387 pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); 386 pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev);
388 old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */ 387 old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */
diff --git a/net/atm/common.c b/net/atm/common.c
index 1f2af59935db..a7a68e509628 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -630,10 +630,9 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
630 goto out; 630 goto out;
631 } 631 }
632 pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); 632 pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
633 refcount_add(skb->truesize, &sk->sk_wmem_alloc); 633 atm_account_tx(vcc, skb);
634 634
635 skb->dev = NULL; /* for paths shared with net_device interfaces */ 635 skb->dev = NULL; /* for paths shared with net_device interfaces */
636 ATM_SKB(skb)->atm_options = vcc->atm_options;
637 if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { 636 if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
638 kfree_skb(skb); 637 kfree_skb(skb);
639 error = -EFAULT; 638 error = -EFAULT;
@@ -648,11 +647,16 @@ out:
648 return error; 647 return error;
649} 648}
650 649
651__poll_t vcc_poll_mask(struct socket *sock, __poll_t events) 650__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
652{ 651{
653 struct sock *sk = sock->sk; 652 struct sock *sk = sock->sk;
654 struct atm_vcc *vcc = ATM_SD(sock); 653 struct atm_vcc *vcc;
655 __poll_t mask = 0; 654 __poll_t mask;
655
656 sock_poll_wait(file, sk_sleep(sk), wait);
657 mask = 0;
658
659 vcc = ATM_SD(sock);
656 660
657 /* exceptional events */ 661 /* exceptional events */
658 if (sk->sk_err) 662 if (sk->sk_err)
diff --git a/net/atm/common.h b/net/atm/common.h
index 526796ad230f..5850649068bb 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
17int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 17int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
18 int flags); 18 int flags);
19int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); 19int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
20__poll_t vcc_poll_mask(struct socket *sock, __poll_t events); 20__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
21int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 21int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
22int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 22int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
23int vcc_setsockopt(struct socket *sock, int level, int optname, 23int vcc_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 5a95fcf6f9b6..d7f5cf5b7594 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -182,9 +182,8 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
182 struct net_device *dev = skb->dev; 182 struct net_device *dev = skb->dev;
183 183
184 ATM_SKB(skb)->vcc = vcc; 184 ATM_SKB(skb)->vcc = vcc;
185 ATM_SKB(skb)->atm_options = vcc->atm_options; 185 atm_account_tx(vcc, skb);
186 186
187 refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
188 if (vcc->send(vcc, skb) < 0) { 187 if (vcc->send(vcc, skb) < 0) {
189 dev->stats.tx_dropped++; 188 dev->stats.tx_dropped++;
190 return; 189 return;
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 31e0dcb970f8..24b53c4c39c6 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -472,7 +472,7 @@ static const uint8_t *copy_macs(struct mpoa_client *mpc,
472 if (mpc->number_of_mps_macs != 0) 472 if (mpc->number_of_mps_macs != 0)
473 kfree(mpc->mps_macs); 473 kfree(mpc->mps_macs);
474 mpc->number_of_mps_macs = 0; 474 mpc->number_of_mps_macs = 0;
475 mpc->mps_macs = kmalloc(num_macs * ETH_ALEN, GFP_KERNEL); 475 mpc->mps_macs = kmalloc_array(ETH_ALEN, num_macs, GFP_KERNEL);
476 if (mpc->mps_macs == NULL) { 476 if (mpc->mps_macs == NULL) {
477 pr_info("(%s) out of mem\n", mpc->dev->name); 477 pr_info("(%s) out of mem\n", mpc->dev->name);
478 return NULL; 478 return NULL;
@@ -555,8 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
555 sizeof(struct llc_snap_hdr)); 555 sizeof(struct llc_snap_hdr));
556 } 556 }
557 557
558 refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); 558 atm_account_tx(entry->shortcut, skb);
559 ATM_SKB(skb)->atm_options = entry->shortcut->atm_options;
560 entry->shortcut->send(entry->shortcut, skb); 559 entry->shortcut->send(entry->shortcut, skb);
561 entry->packets_fwded++; 560 entry->packets_fwded++;
562 mpc->in_ops->put(entry); 561 mpc->in_ops->put(entry);
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 21d9d341a619..af8c4b38b746 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -350,8 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
350 return 1; 350 return 1;
351 } 351 }
352 352
353 refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); 353 atm_account_tx(vcc, skb);
354 ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
355 pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", 354 pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
356 skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev); 355 skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
357 ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb) 356 ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 9f75092fe778..2cb10af16afc 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -113,7 +113,7 @@ static const struct proto_ops pvc_proto_ops = {
113 .socketpair = sock_no_socketpair, 113 .socketpair = sock_no_socketpair,
114 .accept = sock_no_accept, 114 .accept = sock_no_accept,
115 .getname = pvc_getname, 115 .getname = pvc_getname,
116 .poll_mask = vcc_poll_mask, 116 .poll = vcc_poll,
117 .ioctl = vcc_ioctl, 117 .ioctl = vcc_ioctl,
118#ifdef CONFIG_COMPAT 118#ifdef CONFIG_COMPAT
119 .compat_ioctl = vcc_compat_ioctl, 119 .compat_ioctl = vcc_compat_ioctl,
diff --git a/net/atm/raw.c b/net/atm/raw.c
index ee10e8d46185..b3ba44aab0ee 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -35,8 +35,8 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb)
35 struct sock *sk = sk_atm(vcc); 35 struct sock *sk = sk_atm(vcc);
36 36
37 pr_debug("(%d) %d -= %d\n", 37 pr_debug("(%d) %d -= %d\n",
38 vcc->vci, sk_wmem_alloc_get(sk), skb->truesize); 38 vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize);
39 WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); 39 WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc));
40 dev_kfree_skb_any(skb); 40 dev_kfree_skb_any(skb);
41 sk->sk_write_space(sk); 41 sk->sk_write_space(sk);
42} 42}
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 53f4ad7087b1..2f91b766ac42 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -636,7 +636,7 @@ static const struct proto_ops svc_proto_ops = {
636 .socketpair = sock_no_socketpair, 636 .socketpair = sock_no_socketpair,
637 .accept = svc_accept, 637 .accept = svc_accept,
638 .getname = svc_getname, 638 .getname = svc_getname,
639 .poll_mask = vcc_poll_mask, 639 .poll = vcc_poll,
640 .ioctl = svc_ioctl, 640 .ioctl = svc_ioctl,
641#ifdef CONFIG_COMPAT 641#ifdef CONFIG_COMPAT
642 .compat_ioctl = svc_compat_ioctl, 642 .compat_ioctl = svc_compat_ioctl,
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index d1d2442ce573..c603d33d5410 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1941,7 +1941,7 @@ static const struct proto_ops ax25_proto_ops = {
1941 .socketpair = sock_no_socketpair, 1941 .socketpair = sock_no_socketpair,
1942 .accept = ax25_accept, 1942 .accept = ax25_accept,
1943 .getname = ax25_getname, 1943 .getname = ax25_getname,
1944 .poll_mask = datagram_poll_mask, 1944 .poll = datagram_poll,
1945 .ioctl = ax25_ioctl, 1945 .ioctl = ax25_ioctl,
1946 .listen = ax25_listen, 1946 .listen = ax25_listen,
1947 .shutdown = ax25_shutdown, 1947 .shutdown = ax25_shutdown,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 510ab4f55df5..3264e1873219 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -437,13 +437,16 @@ static inline __poll_t bt_accept_poll(struct sock *parent)
437 return 0; 437 return 0;
438} 438}
439 439
440__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events) 440__poll_t bt_sock_poll(struct file *file, struct socket *sock,
441 poll_table *wait)
441{ 442{
442 struct sock *sk = sock->sk; 443 struct sock *sk = sock->sk;
443 __poll_t mask = 0; 444 __poll_t mask = 0;
444 445
445 BT_DBG("sock %p, sk %p", sock, sk); 446 BT_DBG("sock %p, sk %p", sock, sk);
446 447
448 poll_wait(file, sk_sleep(sk), wait);
449
447 if (sk->sk_state == BT_LISTEN) 450 if (sk->sk_state == BT_LISTEN)
448 return bt_accept_poll(sk); 451 return bt_accept_poll(sk);
449 452
@@ -475,7 +478,7 @@ __poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events)
475 478
476 return mask; 479 return mask;
477} 480}
478EXPORT_SYMBOL(bt_sock_poll_mask); 481EXPORT_SYMBOL(bt_sock_poll);
479 482
480int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 483int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
481{ 484{
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 1dec33790198..ee8ef1228263 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1281,7 +1281,7 @@ int hci_inquiry(void __user *arg)
1281 /* cache_dump can't sleep. Therefore we allocate temp buffer and then 1281 /* cache_dump can't sleep. Therefore we allocate temp buffer and then
1282 * copy it to the user space. 1282 * copy it to the user space.
1283 */ 1283 */
1284 buf = kmalloc(sizeof(struct inquiry_info) * max_rsp, GFP_KERNEL); 1284 buf = kmalloc_array(max_rsp, sizeof(struct inquiry_info), GFP_KERNEL);
1285 if (!buf) { 1285 if (!buf) {
1286 err = -ENOMEM; 1286 err = -ENOMEM;
1287 goto done; 1287 goto done;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index d6c099861538..1506e1632394 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1975,7 +1975,7 @@ static const struct proto_ops hci_sock_ops = {
1975 .sendmsg = hci_sock_sendmsg, 1975 .sendmsg = hci_sock_sendmsg,
1976 .recvmsg = hci_sock_recvmsg, 1976 .recvmsg = hci_sock_recvmsg,
1977 .ioctl = hci_sock_ioctl, 1977 .ioctl = hci_sock_ioctl,
1978 .poll_mask = datagram_poll_mask, 1978 .poll = datagram_poll,
1979 .listen = sock_no_listen, 1979 .listen = sock_no_listen,
1980 .shutdown = sock_no_shutdown, 1980 .shutdown = sock_no_shutdown,
1981 .setsockopt = hci_sock_setsockopt, 1981 .setsockopt = hci_sock_setsockopt,
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 9b7907ebfa01..d17a4736e47c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -331,7 +331,7 @@ static int l2cap_seq_list_init(struct l2cap_seq_list *seq_list, u16 size)
331 */ 331 */
332 alloc_size = roundup_pow_of_two(size); 332 alloc_size = roundup_pow_of_two(size);
333 333
334 seq_list->list = kmalloc(sizeof(u16) * alloc_size, GFP_KERNEL); 334 seq_list->list = kmalloc_array(alloc_size, sizeof(u16), GFP_KERNEL);
335 if (!seq_list->list) 335 if (!seq_list->list)
336 return -ENOMEM; 336 return -ENOMEM;
337 337
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 742a190034e6..686bdc6b35b0 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1653,7 +1653,7 @@ static const struct proto_ops l2cap_sock_ops = {
1653 .getname = l2cap_sock_getname, 1653 .getname = l2cap_sock_getname,
1654 .sendmsg = l2cap_sock_sendmsg, 1654 .sendmsg = l2cap_sock_sendmsg,
1655 .recvmsg = l2cap_sock_recvmsg, 1655 .recvmsg = l2cap_sock_recvmsg,
1656 .poll_mask = bt_sock_poll_mask, 1656 .poll = bt_sock_poll,
1657 .ioctl = bt_sock_ioctl, 1657 .ioctl = bt_sock_ioctl,
1658 .mmap = sock_no_mmap, 1658 .mmap = sock_no_mmap,
1659 .socketpair = sock_no_socketpair, 1659 .socketpair = sock_no_socketpair,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 1cf57622473a..d606e9212291 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1049,7 +1049,7 @@ static const struct proto_ops rfcomm_sock_ops = {
1049 .setsockopt = rfcomm_sock_setsockopt, 1049 .setsockopt = rfcomm_sock_setsockopt,
1050 .getsockopt = rfcomm_sock_getsockopt, 1050 .getsockopt = rfcomm_sock_getsockopt,
1051 .ioctl = rfcomm_sock_ioctl, 1051 .ioctl = rfcomm_sock_ioctl,
1052 .poll_mask = bt_sock_poll_mask, 1052 .poll = bt_sock_poll,
1053 .socketpair = sock_no_socketpair, 1053 .socketpair = sock_no_socketpair,
1054 .mmap = sock_no_mmap 1054 .mmap = sock_no_mmap
1055}; 1055};
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index d60dbc61d170..413b8ee49fec 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1197,7 +1197,7 @@ static const struct proto_ops sco_sock_ops = {
1197 .getname = sco_sock_getname, 1197 .getname = sco_sock_getname,
1198 .sendmsg = sco_sock_sendmsg, 1198 .sendmsg = sco_sock_sendmsg,
1199 .recvmsg = sco_sock_recvmsg, 1199 .recvmsg = sco_sock_recvmsg,
1200 .poll_mask = bt_sock_poll_mask, 1200 .poll = bt_sock_poll,
1201 .ioctl = bt_sock_ioctl, 1201 .ioctl = bt_sock_ioctl,
1202 .mmap = sock_no_mmap, 1202 .mmap = sock_no_mmap,
1203 .socketpair = sock_no_socketpair, 1203 .socketpair = sock_no_socketpair,
diff --git a/net/bpfilter/.gitignore b/net/bpfilter/.gitignore
new file mode 100644
index 000000000000..e97084e3eea2
--- /dev/null
+++ b/net/bpfilter/.gitignore
@@ -0,0 +1 @@
bpfilter_umh
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index a948b072c28f..76deb6615883 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig
@@ -1,6 +1,5 @@
1menuconfig BPFILTER 1menuconfig BPFILTER
2 bool "BPF based packet filtering framework (BPFILTER)" 2 bool "BPF based packet filtering framework (BPFILTER)"
3 default n
4 depends on NET && BPF && INET 3 depends on NET && BPF && INET
5 help 4 help
6 This builds experimental bpfilter framework that is aiming to 5 This builds experimental bpfilter framework that is aiming to
@@ -9,6 +8,7 @@ menuconfig BPFILTER
9if BPFILTER 8if BPFILTER
10config BPFILTER_UMH 9config BPFILTER_UMH
11 tristate "bpfilter kernel module with user mode helper" 10 tristate "bpfilter kernel module with user mode helper"
11 depends on $(success,$(srctree)/scripts/cc-can-link.sh $(CC))
12 default m 12 default m
13 help 13 help
14 This builds bpfilter kernel module with embedded user mode helper 14 This builds bpfilter kernel module with embedded user mode helper
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index aafa72001fcd..39c6980b5d99 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -15,18 +15,7 @@ ifeq ($(CONFIG_BPFILTER_UMH), y)
15HOSTLDFLAGS += -static 15HOSTLDFLAGS += -static
16endif 16endif
17 17
18# a bit of elf magic to convert bpfilter_umh binary into a binary blob 18$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh
19# inside bpfilter_umh.o elf file referenced by
20# _binary_net_bpfilter_bpfilter_umh_start symbol
21# which bpfilter_kern.c passes further into umh blob loader at run-time
22quiet_cmd_copy_umh = GEN $@
23 cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
24 $(OBJCOPY) -I binary -O $(CONFIG_OUTPUT_FORMAT) \
25 -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \
26 --rename-section .data=.init.rodata $< $@
27
28$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh
29 $(call cmd,copy_umh)
30 19
31obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o 20obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
32bpfilter-objs += bpfilter_kern.o bpfilter_umh.o 21bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index b13d058f8c34..f0fc182d3db7 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -10,11 +10,8 @@
10#include <linux/file.h> 10#include <linux/file.h>
11#include "msgfmt.h" 11#include "msgfmt.h"
12 12
13#define UMH_start _binary_net_bpfilter_bpfilter_umh_start 13extern char bpfilter_umh_start;
14#define UMH_end _binary_net_bpfilter_bpfilter_umh_end 14extern char bpfilter_umh_end;
15
16extern char UMH_start;
17extern char UMH_end;
18 15
19static struct umh_info info; 16static struct umh_info info;
20/* since ip_getsockopt() can run in parallel, serialize access to umh */ 17/* since ip_getsockopt() can run in parallel, serialize access to umh */
@@ -24,17 +21,19 @@ static void shutdown_umh(struct umh_info *info)
24{ 21{
25 struct task_struct *tsk; 22 struct task_struct *tsk;
26 23
24 if (!info->pid)
25 return;
27 tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID); 26 tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID);
28 if (tsk) 27 if (tsk)
29 force_sig(SIGKILL, tsk); 28 force_sig(SIGKILL, tsk);
30 fput(info->pipe_to_umh); 29 fput(info->pipe_to_umh);
31 fput(info->pipe_from_umh); 30 fput(info->pipe_from_umh);
31 info->pid = 0;
32} 32}
33 33
34static void __stop_umh(void) 34static void __stop_umh(void)
35{ 35{
36 if (IS_ENABLED(CONFIG_INET) && 36 if (IS_ENABLED(CONFIG_INET)) {
37 bpfilter_process_sockopt) {
38 bpfilter_process_sockopt = NULL; 37 bpfilter_process_sockopt = NULL;
39 shutdown_umh(&info); 38 shutdown_umh(&info);
40 } 39 }
@@ -55,7 +54,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
55 struct mbox_reply reply; 54 struct mbox_reply reply;
56 loff_t pos; 55 loff_t pos;
57 ssize_t n; 56 ssize_t n;
58 int ret; 57 int ret = -EFAULT;
59 58
60 req.is_set = is_set; 59 req.is_set = is_set;
61 req.pid = current->pid; 60 req.pid = current->pid;
@@ -63,6 +62,8 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
63 req.addr = (long)optval; 62 req.addr = (long)optval;
64 req.len = optlen; 63 req.len = optlen;
65 mutex_lock(&bpfilter_lock); 64 mutex_lock(&bpfilter_lock);
65 if (!info.pid)
66 goto out;
66 n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos); 67 n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos);
67 if (n != sizeof(req)) { 68 if (n != sizeof(req)) {
68 pr_err("write fail %zd\n", n); 69 pr_err("write fail %zd\n", n);
@@ -89,7 +90,9 @@ static int __init load_umh(void)
89 int err; 90 int err;
90 91
91 /* fork usermode process */ 92 /* fork usermode process */
92 err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info); 93 err = fork_usermode_blob(&bpfilter_umh_start,
94 &bpfilter_umh_end - &bpfilter_umh_start,
95 &info);
93 if (err) 96 if (err)
94 return err; 97 return err;
95 pr_info("Loaded bpfilter_umh pid %d\n", info.pid); 98 pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
new file mode 100644
index 000000000000..40311d10d2f2
--- /dev/null
+++ b/net/bpfilter/bpfilter_umh_blob.S
@@ -0,0 +1,7 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2 .section .init.rodata, "a"
3 .global bpfilter_umh_start
4bpfilter_umh_start:
5 .incbin "net/bpfilter/bpfilter_umh"
6 .global bpfilter_umh_end
7bpfilter_umh_end:
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b19e3104afd6..502f66349530 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -135,9 +135,11 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev,
135 return NULL; 135 return NULL;
136 136
137 br = netdev_priv(br_dev); 137 br = netdev_priv(br_dev);
138 f = br_fdb_find(br, addr, vid); 138 rcu_read_lock();
139 f = br_fdb_find_rcu(br, addr, vid);
139 if (f && f->dst) 140 if (f && f->dst)
140 dev = f->dst->dev; 141 dev = f->dst->dev;
142 rcu_read_unlock();
141 143
142 return dev; 144 return dev;
143} 145}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index cb4729539b82..920665dd92db 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -333,7 +333,7 @@ static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max,
333 mdb->max = max; 333 mdb->max = max;
334 mdb->old = old; 334 mdb->old = old;
335 335
336 mdb->mhash = kzalloc(max * sizeof(*mdb->mhash), GFP_ATOMIC); 336 mdb->mhash = kcalloc(max, sizeof(*mdb->mhash), GFP_ATOMIC);
337 if (!mdb->mhash) { 337 if (!mdb->mhash) {
338 kfree(mdb); 338 kfree(mdb);
339 return -ENOMEM; 339 return -ENOMEM;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 28f68a2ec911..491828713e0b 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -411,6 +411,12 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par,
411 watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0); 411 watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0);
412 if (IS_ERR(watcher)) 412 if (IS_ERR(watcher))
413 return PTR_ERR(watcher); 413 return PTR_ERR(watcher);
414
415 if (watcher->family != NFPROTO_BRIDGE) {
416 module_put(watcher->me);
417 return -ENOENT;
418 }
419
414 w->u.watcher = watcher; 420 w->u.watcher = watcher;
415 421
416 par->target = watcher; 422 par->target = watcher;
@@ -709,6 +715,8 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
709 } 715 }
710 i = 0; 716 i = 0;
711 717
718 memset(&mtpar, 0, sizeof(mtpar));
719 memset(&tgpar, 0, sizeof(tgpar));
712 mtpar.net = tgpar.net = net; 720 mtpar.net = tgpar.net = net;
713 mtpar.table = tgpar.table = name; 721 mtpar.table = tgpar.table = name;
714 mtpar.entryinfo = tgpar.entryinfo = e; 722 mtpar.entryinfo = tgpar.entryinfo = e;
@@ -730,6 +738,13 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
730 goto cleanup_watchers; 738 goto cleanup_watchers;
731 } 739 }
732 740
741 /* Reject UNSPEC, xtables verdicts/return values are incompatible */
742 if (target->family != NFPROTO_BRIDGE) {
743 module_put(target->me);
744 ret = -ENOENT;
745 goto cleanup_watchers;
746 }
747
733 t->u.target = target; 748 t->u.target = target;
734 if (t->u.target == &ebt_standard_target) { 749 if (t->u.target == &ebt_standard_target) {
735 if (gap < sizeof(struct ebt_standard_target)) { 750 if (gap < sizeof(struct ebt_standard_target)) {
@@ -903,12 +918,13 @@ static int translate_table(struct net *net, const char *name,
903 * if an error occurs 918 * if an error occurs
904 */ 919 */
905 newinfo->chainstack = 920 newinfo->chainstack =
906 vmalloc(nr_cpu_ids * sizeof(*(newinfo->chainstack))); 921 vmalloc(array_size(nr_cpu_ids,
922 sizeof(*(newinfo->chainstack))));
907 if (!newinfo->chainstack) 923 if (!newinfo->chainstack)
908 return -ENOMEM; 924 return -ENOMEM;
909 for_each_possible_cpu(i) { 925 for_each_possible_cpu(i) {
910 newinfo->chainstack[i] = 926 newinfo->chainstack[i] =
911 vmalloc(udc_cnt * sizeof(*(newinfo->chainstack[0]))); 927 vmalloc(array_size(udc_cnt, sizeof(*(newinfo->chainstack[0]))));
912 if (!newinfo->chainstack[i]) { 928 if (!newinfo->chainstack[i]) {
913 while (i) 929 while (i)
914 vfree(newinfo->chainstack[--i]); 930 vfree(newinfo->chainstack[--i]);
@@ -918,7 +934,7 @@ static int translate_table(struct net *net, const char *name,
918 } 934 }
919 } 935 }
920 936
921 cl_s = vmalloc(udc_cnt * sizeof(*cl_s)); 937 cl_s = vmalloc(array_size(udc_cnt, sizeof(*cl_s)));
922 if (!cl_s) 938 if (!cl_s)
923 return -ENOMEM; 939 return -ENOMEM;
924 i = 0; /* the i'th udc */ 940 i = 0; /* the i'th udc */
@@ -1293,7 +1309,7 @@ static int do_update_counters(struct net *net, const char *name,
1293 if (num_counters == 0) 1309 if (num_counters == 0)
1294 return -EINVAL; 1310 return -EINVAL;
1295 1311
1296 tmp = vmalloc(num_counters * sizeof(*tmp)); 1312 tmp = vmalloc(array_size(num_counters, sizeof(*tmp)));
1297 if (!tmp) 1313 if (!tmp)
1298 return -ENOMEM; 1314 return -ENOMEM;
1299 1315
@@ -1434,7 +1450,7 @@ static int copy_counters_to_user(struct ebt_table *t,
1434 return -EINVAL; 1450 return -EINVAL;
1435 } 1451 }
1436 1452
1437 counterstmp = vmalloc(nentries * sizeof(*counterstmp)); 1453 counterstmp = vmalloc(array_size(nentries, sizeof(*counterstmp)));
1438 if (!counterstmp) 1454 if (!counterstmp)
1439 return -ENOMEM; 1455 return -ENOMEM;
1440 1456
@@ -1605,16 +1621,16 @@ struct compat_ebt_entry_mwt {
1605 compat_uptr_t ptr; 1621 compat_uptr_t ptr;
1606 } u; 1622 } u;
1607 compat_uint_t match_size; 1623 compat_uint_t match_size;
1608 compat_uint_t data[0]; 1624 compat_uint_t data[0] __attribute__ ((aligned (__alignof__(struct compat_ebt_replace))));
1609}; 1625};
1610 1626
1611/* account for possible padding between match_size and ->data */ 1627/* account for possible padding between match_size and ->data */
1612static int ebt_compat_entry_padsize(void) 1628static int ebt_compat_entry_padsize(void)
1613{ 1629{
1614 BUILD_BUG_ON(XT_ALIGN(sizeof(struct ebt_entry_match)) < 1630 BUILD_BUG_ON(sizeof(struct ebt_entry_match) <
1615 COMPAT_XT_ALIGN(sizeof(struct compat_ebt_entry_mwt))); 1631 sizeof(struct compat_ebt_entry_mwt));
1616 return (int) XT_ALIGN(sizeof(struct ebt_entry_match)) - 1632 return (int) sizeof(struct ebt_entry_match) -
1617 COMPAT_XT_ALIGN(sizeof(struct compat_ebt_entry_mwt)); 1633 sizeof(struct compat_ebt_entry_mwt);
1618} 1634}
1619 1635
1620static int ebt_compat_match_offset(const struct xt_match *match, 1636static int ebt_compat_match_offset(const struct xt_match *match,
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index eaf05de37f75..6de981270566 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -261,7 +261,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
261 if (!reject6_br_csum_ok(oldskb, hook)) 261 if (!reject6_br_csum_ok(oldskb, hook))
262 return; 262 return;
263 263
264 nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) + 264 nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) +
265 LL_MAX_HEADER + len, GFP_ATOMIC); 265 LL_MAX_HEADER + len, GFP_ATOMIC);
266 if (!nskb) 266 if (!nskb)
267 return; 267 return;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index c7991867d622..a6fb1b3bcad9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -934,11 +934,15 @@ static int caif_release(struct socket *sock)
934} 934}
935 935
936/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ 936/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
937static __poll_t caif_poll_mask(struct socket *sock, __poll_t events) 937static __poll_t caif_poll(struct file *file,
938 struct socket *sock, poll_table *wait)
938{ 939{
939 struct sock *sk = sock->sk; 940 struct sock *sk = sock->sk;
941 __poll_t mask;
940 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 942 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
941 __poll_t mask = 0; 943
944 sock_poll_wait(file, sk_sleep(sk), wait);
945 mask = 0;
942 946
943 /* exceptional events? */ 947 /* exceptional events? */
944 if (sk->sk_err) 948 if (sk->sk_err)
@@ -972,7 +976,7 @@ static const struct proto_ops caif_seqpacket_ops = {
972 .socketpair = sock_no_socketpair, 976 .socketpair = sock_no_socketpair,
973 .accept = sock_no_accept, 977 .accept = sock_no_accept,
974 .getname = sock_no_getname, 978 .getname = sock_no_getname,
975 .poll_mask = caif_poll_mask, 979 .poll = caif_poll,
976 .ioctl = sock_no_ioctl, 980 .ioctl = sock_no_ioctl,
977 .listen = sock_no_listen, 981 .listen = sock_no_listen,
978 .shutdown = sock_no_shutdown, 982 .shutdown = sock_no_shutdown,
@@ -993,7 +997,7 @@ static const struct proto_ops caif_stream_ops = {
993 .socketpair = sock_no_socketpair, 997 .socketpair = sock_no_socketpair,
994 .accept = sock_no_accept, 998 .accept = sock_no_accept,
995 .getname = sock_no_getname, 999 .getname = sock_no_getname,
996 .poll_mask = caif_poll_mask, 1000 .poll = caif_poll,
997 .ioctl = sock_no_ioctl, 1001 .ioctl = sock_no_ioctl,
998 .listen = sock_no_listen, 1002 .listen = sock_no_listen,
999 .shutdown = sock_no_shutdown, 1003 .shutdown = sock_no_shutdown,
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 97fedff3f0c4..0af8f0db892a 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -923,8 +923,9 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
923 923
924 /* create array for CAN frames and copy the data */ 924 /* create array for CAN frames and copy the data */
925 if (msg_head->nframes > 1) { 925 if (msg_head->nframes > 1) {
926 op->frames = kmalloc(msg_head->nframes * op->cfsiz, 926 op->frames = kmalloc_array(msg_head->nframes,
927 GFP_KERNEL); 927 op->cfsiz,
928 GFP_KERNEL);
928 if (!op->frames) { 929 if (!op->frames) {
929 kfree(op); 930 kfree(op);
930 return -ENOMEM; 931 return -ENOMEM;
@@ -1095,15 +1096,17 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
1095 1096
1096 if (msg_head->nframes > 1) { 1097 if (msg_head->nframes > 1) {
1097 /* create array for CAN frames and copy the data */ 1098 /* create array for CAN frames and copy the data */
1098 op->frames = kmalloc(msg_head->nframes * op->cfsiz, 1099 op->frames = kmalloc_array(msg_head->nframes,
1099 GFP_KERNEL); 1100 op->cfsiz,
1101 GFP_KERNEL);
1100 if (!op->frames) { 1102 if (!op->frames) {
1101 kfree(op); 1103 kfree(op);
1102 return -ENOMEM; 1104 return -ENOMEM;
1103 } 1105 }
1104 1106
1105 /* create and init array for received CAN frames */ 1107 /* create and init array for received CAN frames */
1106 op->last_frames = kzalloc(msg_head->nframes * op->cfsiz, 1108 op->last_frames = kcalloc(msg_head->nframes,
1109 op->cfsiz,
1107 GFP_KERNEL); 1110 GFP_KERNEL);
1108 if (!op->last_frames) { 1111 if (!op->last_frames) {
1109 kfree(op->frames); 1112 kfree(op->frames);
@@ -1657,7 +1660,7 @@ static const struct proto_ops bcm_ops = {
1657 .socketpair = sock_no_socketpair, 1660 .socketpair = sock_no_socketpair,
1658 .accept = sock_no_accept, 1661 .accept = sock_no_accept,
1659 .getname = sock_no_getname, 1662 .getname = sock_no_getname,
1660 .poll_mask = datagram_poll_mask, 1663 .poll = datagram_poll,
1661 .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ 1664 .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
1662 .listen = sock_no_listen, 1665 .listen = sock_no_listen,
1663 .shutdown = sock_no_shutdown, 1666 .shutdown = sock_no_shutdown,
diff --git a/net/can/raw.c b/net/can/raw.c
index fd7e2f49ea6a..1051eee82581 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -843,7 +843,7 @@ static const struct proto_ops raw_ops = {
843 .socketpair = sock_no_socketpair, 843 .socketpair = sock_no_socketpair,
844 .accept = sock_no_accept, 844 .accept = sock_no_accept,
845 .getname = raw_getname, 845 .getname = raw_getname,
846 .poll_mask = datagram_poll_mask, 846 .poll = datagram_poll,
847 .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ 847 .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
848 .listen = sock_no_listen, 848 .listen = sock_no_listen,
849 .shutdown = sock_no_shutdown, 849 .shutdown = sock_no_shutdown,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 3b3d33ea9ed8..c6413c360771 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -168,12 +168,6 @@ static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2;
168static struct lock_class_key socket_class; 168static struct lock_class_key socket_class;
169#endif 169#endif
170 170
171/*
172 * When skipping (ignoring) a block of input we read it into a "skip
173 * buffer," which is this many bytes in size.
174 */
175#define SKIP_BUF_SIZE 1024
176
177static void queue_con(struct ceph_connection *con); 171static void queue_con(struct ceph_connection *con);
178static void cancel_con(struct ceph_connection *con); 172static void cancel_con(struct ceph_connection *con);
179static void ceph_con_workfn(struct work_struct *); 173static void ceph_con_workfn(struct work_struct *);
@@ -520,12 +514,18 @@ static int ceph_tcp_connect(struct ceph_connection *con)
520 return 0; 514 return 0;
521} 515}
522 516
517/*
518 * If @buf is NULL, discard up to @len bytes.
519 */
523static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) 520static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
524{ 521{
525 struct kvec iov = {buf, len}; 522 struct kvec iov = {buf, len};
526 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 523 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
527 int r; 524 int r;
528 525
526 if (!buf)
527 msg.msg_flags |= MSG_TRUNC;
528
529 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len); 529 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
530 r = sock_recvmsg(sock, &msg, msg.msg_flags); 530 r = sock_recvmsg(sock, &msg, msg.msg_flags);
531 if (r == -EAGAIN) 531 if (r == -EAGAIN)
@@ -2575,9 +2575,6 @@ static int try_write(struct ceph_connection *con)
2575 con->state != CON_STATE_OPEN) 2575 con->state != CON_STATE_OPEN)
2576 return 0; 2576 return 0;
2577 2577
2578more:
2579 dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
2580
2581 /* open the socket first? */ 2578 /* open the socket first? */
2582 if (con->state == CON_STATE_PREOPEN) { 2579 if (con->state == CON_STATE_PREOPEN) {
2583 BUG_ON(con->sock); 2580 BUG_ON(con->sock);
@@ -2598,7 +2595,8 @@ more:
2598 } 2595 }
2599 } 2596 }
2600 2597
2601more_kvec: 2598more:
2599 dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
2602 BUG_ON(!con->sock); 2600 BUG_ON(!con->sock);
2603 2601
2604 /* kvec data queued? */ 2602 /* kvec data queued? */
@@ -2623,7 +2621,7 @@ more_kvec:
2623 2621
2624 ret = write_partial_message_data(con); 2622 ret = write_partial_message_data(con);
2625 if (ret == 1) 2623 if (ret == 1)
2626 goto more_kvec; /* we need to send the footer, too! */ 2624 goto more; /* we need to send the footer, too! */
2627 if (ret == 0) 2625 if (ret == 0)
2628 goto out; 2626 goto out;
2629 if (ret < 0) { 2627 if (ret < 0) {
@@ -2659,8 +2657,6 @@ out:
2659 return ret; 2657 return ret;
2660} 2658}
2661 2659
2662
2663
2664/* 2660/*
2665 * Read what we can from the socket. 2661 * Read what we can from the socket.
2666 */ 2662 */
@@ -2721,16 +2717,11 @@ more:
2721 if (con->in_base_pos < 0) { 2717 if (con->in_base_pos < 0) {
2722 /* 2718 /*
2723 * skipping + discarding content. 2719 * skipping + discarding content.
2724 *
2725 * FIXME: there must be a better way to do this!
2726 */ 2720 */
2727 static char buf[SKIP_BUF_SIZE]; 2721 ret = ceph_tcp_recvmsg(con->sock, NULL, -con->in_base_pos);
2728 int skip = min((int) sizeof (buf), -con->in_base_pos);
2729
2730 dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
2731 ret = ceph_tcp_recvmsg(con->sock, buf, skip);
2732 if (ret <= 0) 2722 if (ret <= 0)
2733 goto out; 2723 goto out;
2724 dout("skipped %d / %d bytes\n", ret, -con->in_base_pos);
2734 con->in_base_pos += ret; 2725 con->in_base_pos += ret;
2735 if (con->in_base_pos) 2726 if (con->in_base_pos)
2736 goto more; 2727 goto more;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 69a2581ddbba..a00c74f1154e 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -766,7 +766,7 @@ void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
766} 766}
767EXPORT_SYMBOL(osd_req_op_extent_dup_last); 767EXPORT_SYMBOL(osd_req_op_extent_dup_last);
768 768
769void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, 769int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
770 u16 opcode, const char *class, const char *method) 770 u16 opcode, const char *class, const char *method)
771{ 771{
772 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, 772 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
@@ -778,7 +778,9 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
778 BUG_ON(opcode != CEPH_OSD_OP_CALL); 778 BUG_ON(opcode != CEPH_OSD_OP_CALL);
779 779
780 pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS); 780 pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
781 BUG_ON(!pagelist); 781 if (!pagelist)
782 return -ENOMEM;
783
782 ceph_pagelist_init(pagelist); 784 ceph_pagelist_init(pagelist);
783 785
784 op->cls.class_name = class; 786 op->cls.class_name = class;
@@ -798,6 +800,7 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
798 osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist); 800 osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
799 801
800 op->indata_len = payload_len; 802 op->indata_len = payload_len;
803 return 0;
801} 804}
802EXPORT_SYMBOL(osd_req_op_cls_init); 805EXPORT_SYMBOL(osd_req_op_cls_init);
803 806
@@ -1026,7 +1029,6 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
1026 truncate_size, truncate_seq); 1029 truncate_size, truncate_seq);
1027 } 1030 }
1028 1031
1029 req->r_abort_on_full = true;
1030 req->r_flags = flags; 1032 req->r_flags = flags;
1031 req->r_base_oloc.pool = layout->pool_id; 1033 req->r_base_oloc.pool = layout->pool_id;
1032 req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns); 1034 req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns);
@@ -1054,6 +1056,38 @@ EXPORT_SYMBOL(ceph_osdc_new_request);
1054DEFINE_RB_FUNCS(request, struct ceph_osd_request, r_tid, r_node) 1056DEFINE_RB_FUNCS(request, struct ceph_osd_request, r_tid, r_node)
1055DEFINE_RB_FUNCS(request_mc, struct ceph_osd_request, r_tid, r_mc_node) 1057DEFINE_RB_FUNCS(request_mc, struct ceph_osd_request, r_tid, r_mc_node)
1056 1058
1059/*
1060 * Call @fn on each OSD request as long as @fn returns 0.
1061 */
1062static void for_each_request(struct ceph_osd_client *osdc,
1063 int (*fn)(struct ceph_osd_request *req, void *arg),
1064 void *arg)
1065{
1066 struct rb_node *n, *p;
1067
1068 for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
1069 struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
1070
1071 for (p = rb_first(&osd->o_requests); p; ) {
1072 struct ceph_osd_request *req =
1073 rb_entry(p, struct ceph_osd_request, r_node);
1074
1075 p = rb_next(p);
1076 if (fn(req, arg))
1077 return;
1078 }
1079 }
1080
1081 for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) {
1082 struct ceph_osd_request *req =
1083 rb_entry(p, struct ceph_osd_request, r_node);
1084
1085 p = rb_next(p);
1086 if (fn(req, arg))
1087 return;
1088 }
1089}
1090
1057static bool osd_homeless(struct ceph_osd *osd) 1091static bool osd_homeless(struct ceph_osd *osd)
1058{ 1092{
1059 return osd->o_osd == CEPH_HOMELESS_OSD; 1093 return osd->o_osd == CEPH_HOMELESS_OSD;
@@ -1395,7 +1429,6 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
1395 bool recovery_deletes = ceph_osdmap_flag(osdc, 1429 bool recovery_deletes = ceph_osdmap_flag(osdc,
1396 CEPH_OSDMAP_RECOVERY_DELETES); 1430 CEPH_OSDMAP_RECOVERY_DELETES);
1397 enum calc_target_result ct_res; 1431 enum calc_target_result ct_res;
1398 int ret;
1399 1432
1400 t->epoch = osdc->osdmap->epoch; 1433 t->epoch = osdc->osdmap->epoch;
1401 pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool); 1434 pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool);
@@ -1431,14 +1464,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
1431 } 1464 }
1432 } 1465 }
1433 1466
1434 ret = __ceph_object_locator_to_pg(pi, &t->target_oid, &t->target_oloc, 1467 __ceph_object_locator_to_pg(pi, &t->target_oid, &t->target_oloc, &pgid);
1435 &pgid);
1436 if (ret) {
1437 WARN_ON(ret != -ENOENT);
1438 t->osd = CEPH_HOMELESS_OSD;
1439 ct_res = CALC_TARGET_POOL_DNE;
1440 goto out;
1441 }
1442 last_pgid.pool = pgid.pool; 1468 last_pgid.pool = pgid.pool;
1443 last_pgid.seed = ceph_stable_mod(pgid.seed, t->pg_num, t->pg_num_mask); 1469 last_pgid.seed = ceph_stable_mod(pgid.seed, t->pg_num, t->pg_num_mask);
1444 1470
@@ -2161,9 +2187,9 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
2161 struct ceph_osd_client *osdc = req->r_osdc; 2187 struct ceph_osd_client *osdc = req->r_osdc;
2162 struct ceph_osd *osd; 2188 struct ceph_osd *osd;
2163 enum calc_target_result ct_res; 2189 enum calc_target_result ct_res;
2190 int err = 0;
2164 bool need_send = false; 2191 bool need_send = false;
2165 bool promoted = false; 2192 bool promoted = false;
2166 bool need_abort = false;
2167 2193
2168 WARN_ON(req->r_tid); 2194 WARN_ON(req->r_tid);
2169 dout("%s req %p wrlocked %d\n", __func__, req, wrlocked); 2195 dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
@@ -2179,7 +2205,10 @@ again:
2179 goto promote; 2205 goto promote;
2180 } 2206 }
2181 2207
2182 if (osdc->osdmap->epoch < osdc->epoch_barrier) { 2208 if (osdc->abort_err) {
2209 dout("req %p abort_err %d\n", req, osdc->abort_err);
2210 err = osdc->abort_err;
2211 } else if (osdc->osdmap->epoch < osdc->epoch_barrier) {
2183 dout("req %p epoch %u barrier %u\n", req, osdc->osdmap->epoch, 2212 dout("req %p epoch %u barrier %u\n", req, osdc->osdmap->epoch,
2184 osdc->epoch_barrier); 2213 osdc->epoch_barrier);
2185 req->r_t.paused = true; 2214 req->r_t.paused = true;
@@ -2200,11 +2229,13 @@ again:
2200 (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || 2229 (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
2201 pool_full(osdc, req->r_t.base_oloc.pool))) { 2230 pool_full(osdc, req->r_t.base_oloc.pool))) {
2202 dout("req %p full/pool_full\n", req); 2231 dout("req %p full/pool_full\n", req);
2203 pr_warn_ratelimited("FULL or reached pool quota\n"); 2232 if (osdc->abort_on_full) {
2204 req->r_t.paused = true; 2233 err = -ENOSPC;
2205 maybe_request_map(osdc); 2234 } else {
2206 if (req->r_abort_on_full) 2235 pr_warn_ratelimited("FULL or reached pool quota\n");
2207 need_abort = true; 2236 req->r_t.paused = true;
2237 maybe_request_map(osdc);
2238 }
2208 } else if (!osd_homeless(osd)) { 2239 } else if (!osd_homeless(osd)) {
2209 need_send = true; 2240 need_send = true;
2210 } else { 2241 } else {
@@ -2221,11 +2252,11 @@ again:
2221 link_request(osd, req); 2252 link_request(osd, req);
2222 if (need_send) 2253 if (need_send)
2223 send_request(req); 2254 send_request(req);
2224 else if (need_abort) 2255 else if (err)
2225 complete_request(req, -ENOSPC); 2256 complete_request(req, err);
2226 mutex_unlock(&osd->lock); 2257 mutex_unlock(&osd->lock);
2227 2258
2228 if (ct_res == CALC_TARGET_POOL_DNE) 2259 if (!err && ct_res == CALC_TARGET_POOL_DNE)
2229 send_map_check(req); 2260 send_map_check(req);
2230 2261
2231 if (promoted) 2262 if (promoted)
@@ -2281,11 +2312,21 @@ static void finish_request(struct ceph_osd_request *req)
2281 2312
2282static void __complete_request(struct ceph_osd_request *req) 2313static void __complete_request(struct ceph_osd_request *req)
2283{ 2314{
2284 if (req->r_callback) { 2315 dout("%s req %p tid %llu cb %pf result %d\n", __func__, req,
2285 dout("%s req %p tid %llu cb %pf result %d\n", __func__, req, 2316 req->r_tid, req->r_callback, req->r_result);
2286 req->r_tid, req->r_callback, req->r_result); 2317
2318 if (req->r_callback)
2287 req->r_callback(req); 2319 req->r_callback(req);
2288 } 2320 complete_all(&req->r_completion);
2321 ceph_osdc_put_request(req);
2322}
2323
2324static void complete_request_workfn(struct work_struct *work)
2325{
2326 struct ceph_osd_request *req =
2327 container_of(work, struct ceph_osd_request, r_complete_work);
2328
2329 __complete_request(req);
2289} 2330}
2290 2331
2291/* 2332/*
@@ -2297,9 +2338,9 @@ static void complete_request(struct ceph_osd_request *req, int err)
2297 2338
2298 req->r_result = err; 2339 req->r_result = err;
2299 finish_request(req); 2340 finish_request(req);
2300 __complete_request(req); 2341
2301 complete_all(&req->r_completion); 2342 INIT_WORK(&req->r_complete_work, complete_request_workfn);
2302 ceph_osdc_put_request(req); 2343 queue_work(req->r_osdc->completion_wq, &req->r_complete_work);
2303} 2344}
2304 2345
2305static void cancel_map_check(struct ceph_osd_request *req) 2346static void cancel_map_check(struct ceph_osd_request *req)
@@ -2336,6 +2377,28 @@ static void abort_request(struct ceph_osd_request *req, int err)
2336 complete_request(req, err); 2377 complete_request(req, err);
2337} 2378}
2338 2379
2380static int abort_fn(struct ceph_osd_request *req, void *arg)
2381{
2382 int err = *(int *)arg;
2383
2384 abort_request(req, err);
2385 return 0; /* continue iteration */
2386}
2387
2388/*
2389 * Abort all in-flight requests with @err and arrange for all future
2390 * requests to be failed immediately.
2391 */
2392void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err)
2393{
2394 dout("%s osdc %p err %d\n", __func__, osdc, err);
2395 down_write(&osdc->lock);
2396 for_each_request(osdc, abort_fn, &err);
2397 osdc->abort_err = err;
2398 up_write(&osdc->lock);
2399}
2400EXPORT_SYMBOL(ceph_osdc_abort_requests);
2401
2339static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb) 2402static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
2340{ 2403{
2341 if (likely(eb > osdc->epoch_barrier)) { 2404 if (likely(eb > osdc->epoch_barrier)) {
@@ -2363,6 +2426,30 @@ void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
2363EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier); 2426EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier);
2364 2427
2365/* 2428/*
2429 * We can end up releasing caps as a result of abort_request().
2430 * In that case, we probably want to ensure that the cap release message
2431 * has an updated epoch barrier in it, so set the epoch barrier prior to
2432 * aborting the first request.
2433 */
2434static int abort_on_full_fn(struct ceph_osd_request *req, void *arg)
2435{
2436 struct ceph_osd_client *osdc = req->r_osdc;
2437 bool *victims = arg;
2438
2439 if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
2440 (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
2441 pool_full(osdc, req->r_t.base_oloc.pool))) {
2442 if (!*victims) {
2443 update_epoch_barrier(osdc, osdc->osdmap->epoch);
2444 *victims = true;
2445 }
2446 abort_request(req, -ENOSPC);
2447 }
2448
2449 return 0; /* continue iteration */
2450}
2451
2452/*
2366 * Drop all pending requests that are stalled waiting on a full condition to 2453 * Drop all pending requests that are stalled waiting on a full condition to
2367 * clear, and complete them with ENOSPC as the return code. Set the 2454 * clear, and complete them with ENOSPC as the return code. Set the
2368 * osdc->epoch_barrier to the latest map epoch that we've seen if any were 2455 * osdc->epoch_barrier to the latest map epoch that we've seen if any were
@@ -2370,61 +2457,11 @@ EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier);
2370 */ 2457 */
2371static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc) 2458static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc)
2372{ 2459{
2373 struct rb_node *n;
2374 bool victims = false; 2460 bool victims = false;
2375 2461
2376 dout("enter abort_on_full\n"); 2462 if (osdc->abort_on_full &&
2377 2463 (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc)))
2378 if (!ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && !have_pool_full(osdc)) 2464 for_each_request(osdc, abort_on_full_fn, &victims);
2379 goto out;
2380
2381 /* Scan list and see if there is anything to abort */
2382 for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
2383 struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
2384 struct rb_node *m;
2385
2386 m = rb_first(&osd->o_requests);
2387 while (m) {
2388 struct ceph_osd_request *req = rb_entry(m,
2389 struct ceph_osd_request, r_node);
2390 m = rb_next(m);
2391
2392 if (req->r_abort_on_full) {
2393 victims = true;
2394 break;
2395 }
2396 }
2397 if (victims)
2398 break;
2399 }
2400
2401 if (!victims)
2402 goto out;
2403
2404 /*
2405 * Update the barrier to current epoch if it's behind that point,
2406 * since we know we have some calls to be aborted in the tree.
2407 */
2408 update_epoch_barrier(osdc, osdc->osdmap->epoch);
2409
2410 for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
2411 struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
2412 struct rb_node *m;
2413
2414 m = rb_first(&osd->o_requests);
2415 while (m) {
2416 struct ceph_osd_request *req = rb_entry(m,
2417 struct ceph_osd_request, r_node);
2418 m = rb_next(m);
2419
2420 if (req->r_abort_on_full &&
2421 (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
2422 pool_full(osdc, req->r_t.target_oloc.pool)))
2423 abort_request(req, -ENOSPC);
2424 }
2425 }
2426out:
2427 dout("return abort_on_full barrier=%u\n", osdc->epoch_barrier);
2428} 2465}
2429 2466
2430static void check_pool_dne(struct ceph_osd_request *req) 2467static void check_pool_dne(struct ceph_osd_request *req)
@@ -3541,8 +3578,6 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
3541 up_read(&osdc->lock); 3578 up_read(&osdc->lock);
3542 3579
3543 __complete_request(req); 3580 __complete_request(req);
3544 complete_all(&req->r_completion);
3545 ceph_osdc_put_request(req);
3546 return; 3581 return;
3547 3582
3548fail_request: 3583fail_request:
@@ -4927,7 +4962,10 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
4927 if (ret) 4962 if (ret)
4928 goto out_put_req; 4963 goto out_put_req;
4929 4964
4930 osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method); 4965 ret = osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method);
4966 if (ret)
4967 goto out_put_req;
4968
4931 if (req_page) 4969 if (req_page)
4932 osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len, 4970 osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len,
4933 0, false, false); 4971 0, false, false);
@@ -4996,6 +5034,10 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
4996 if (!osdc->notify_wq) 5034 if (!osdc->notify_wq)
4997 goto out_msgpool_reply; 5035 goto out_msgpool_reply;
4998 5036
5037 osdc->completion_wq = create_singlethread_workqueue("ceph-completion");
5038 if (!osdc->completion_wq)
5039 goto out_notify_wq;
5040
4999 schedule_delayed_work(&osdc->timeout_work, 5041 schedule_delayed_work(&osdc->timeout_work,
5000 osdc->client->options->osd_keepalive_timeout); 5042 osdc->client->options->osd_keepalive_timeout);
5001 schedule_delayed_work(&osdc->osds_timeout_work, 5043 schedule_delayed_work(&osdc->osds_timeout_work,
@@ -5003,6 +5045,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
5003 5045
5004 return 0; 5046 return 0;
5005 5047
5048out_notify_wq:
5049 destroy_workqueue(osdc->notify_wq);
5006out_msgpool_reply: 5050out_msgpool_reply:
5007 ceph_msgpool_destroy(&osdc->msgpool_op_reply); 5051 ceph_msgpool_destroy(&osdc->msgpool_op_reply);
5008out_msgpool: 5052out_msgpool:
@@ -5017,7 +5061,7 @@ out:
5017 5061
5018void ceph_osdc_stop(struct ceph_osd_client *osdc) 5062void ceph_osdc_stop(struct ceph_osd_client *osdc)
5019{ 5063{
5020 flush_workqueue(osdc->notify_wq); 5064 destroy_workqueue(osdc->completion_wq);
5021 destroy_workqueue(osdc->notify_wq); 5065 destroy_workqueue(osdc->notify_wq);
5022 cancel_delayed_work_sync(&osdc->timeout_work); 5066 cancel_delayed_work_sync(&osdc->timeout_work);
5023 cancel_delayed_work_sync(&osdc->osds_timeout_work); 5067 cancel_delayed_work_sync(&osdc->osds_timeout_work);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 9645ffd6acfb..98c0ff3d6441 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1299,8 +1299,9 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
1299 if (!map->osd_primary_affinity) { 1299 if (!map->osd_primary_affinity) {
1300 int i; 1300 int i;
1301 1301
1302 map->osd_primary_affinity = kmalloc(map->max_osd*sizeof(u32), 1302 map->osd_primary_affinity = kmalloc_array(map->max_osd,
1303 GFP_NOFS); 1303 sizeof(u32),
1304 GFP_NOFS);
1304 if (!map->osd_primary_affinity) 1305 if (!map->osd_primary_affinity)
1305 return -ENOMEM; 1306 return -ENOMEM;
1306 1307
@@ -2145,10 +2146,10 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
2145 * Should only be called with target_oid and target_oloc (as opposed to 2146 * Should only be called with target_oid and target_oloc (as opposed to
2146 * base_oid and base_oloc), since tiering isn't taken into account. 2147 * base_oid and base_oloc), since tiering isn't taken into account.
2147 */ 2148 */
2148int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi, 2149void __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
2149 const struct ceph_object_id *oid, 2150 const struct ceph_object_id *oid,
2150 const struct ceph_object_locator *oloc, 2151 const struct ceph_object_locator *oloc,
2151 struct ceph_pg *raw_pgid) 2152 struct ceph_pg *raw_pgid)
2152{ 2153{
2153 WARN_ON(pi->id != oloc->pool); 2154 WARN_ON(pi->id != oloc->pool);
2154 2155
@@ -2164,11 +2165,8 @@ int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
2164 int nsl = oloc->pool_ns->len; 2165 int nsl = oloc->pool_ns->len;
2165 size_t total = nsl + 1 + oid->name_len; 2166 size_t total = nsl + 1 + oid->name_len;
2166 2167
2167 if (total > sizeof(stack_buf)) { 2168 if (total > sizeof(stack_buf))
2168 buf = kmalloc(total, GFP_NOIO); 2169 buf = kmalloc(total, GFP_NOIO | __GFP_NOFAIL);
2169 if (!buf)
2170 return -ENOMEM;
2171 }
2172 memcpy(buf, oloc->pool_ns->str, nsl); 2170 memcpy(buf, oloc->pool_ns->str, nsl);
2173 buf[nsl] = '\037'; 2171 buf[nsl] = '\037';
2174 memcpy(buf + nsl + 1, oid->name, oid->name_len); 2172 memcpy(buf + nsl + 1, oid->name, oid->name_len);
@@ -2180,7 +2178,6 @@ int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
2180 oid->name, nsl, oloc->pool_ns->str, 2178 oid->name, nsl, oloc->pool_ns->str,
2181 raw_pgid->pool, raw_pgid->seed); 2179 raw_pgid->pool, raw_pgid->seed);
2182 } 2180 }
2183 return 0;
2184} 2181}
2185 2182
2186int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap, 2183int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
@@ -2194,7 +2191,8 @@ int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
2194 if (!pi) 2191 if (!pi)
2195 return -ENOENT; 2192 return -ENOENT;
2196 2193
2197 return __ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid); 2194 __ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid);
2195 return 0;
2198} 2196}
2199EXPORT_SYMBOL(ceph_object_locator_to_pg); 2197EXPORT_SYMBOL(ceph_object_locator_to_pg);
2200 2198
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index a3d0adc828e6..e560d3975f41 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -20,7 +20,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
20 int got = 0; 20 int got = 0;
21 int rc = 0; 21 int rc = 0;
22 22
23 pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); 23 pages = kmalloc_array(num_pages, sizeof(*pages), GFP_NOFS);
24 if (!pages) 24 if (!pages)
25 return ERR_PTR(-ENOMEM); 25 return ERR_PTR(-ENOMEM);
26 26
@@ -74,7 +74,7 @@ struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
74 struct page **pages; 74 struct page **pages;
75 int i; 75 int i;
76 76
77 pages = kmalloc(sizeof(*pages) * num_pages, flags); 77 pages = kmalloc_array(num_pages, sizeof(*pages), flags);
78 if (!pages) 78 if (!pages)
79 return ERR_PTR(-ENOMEM); 79 return ERR_PTR(-ENOMEM);
80 for (i = 0; i < num_pages; i++) { 80 for (i = 0; i < num_pages; i++) {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f19bf3dc2bd6..9938952c5c78 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -819,8 +819,9 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
819 819
820/** 820/**
821 * datagram_poll - generic datagram poll 821 * datagram_poll - generic datagram poll
822 * @file: file struct
822 * @sock: socket 823 * @sock: socket
823 * @events to wait for 824 * @wait: poll table
824 * 825 *
825 * Datagram poll: Again totally generic. This also handles 826 * Datagram poll: Again totally generic. This also handles
826 * sequenced packet sockets providing the socket receive queue 827 * sequenced packet sockets providing the socket receive queue
@@ -830,10 +831,14 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
830 * and you use a different write policy from sock_writeable() 831 * and you use a different write policy from sock_writeable()
831 * then please supply your own write_space callback. 832 * then please supply your own write_space callback.
832 */ 833 */
833__poll_t datagram_poll_mask(struct socket *sock, __poll_t events) 834__poll_t datagram_poll(struct file *file, struct socket *sock,
835 poll_table *wait)
834{ 836{
835 struct sock *sk = sock->sk; 837 struct sock *sk = sock->sk;
836 __poll_t mask = 0; 838 __poll_t mask;
839
840 sock_poll_wait(file, sk_sleep(sk), wait);
841 mask = 0;
837 842
838 /* exceptional events? */ 843 /* exceptional events? */
839 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 844 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -866,4 +871,4 @@ __poll_t datagram_poll_mask(struct socket *sock, __poll_t events)
866 871
867 return mask; 872 return mask;
868} 873}
869EXPORT_SYMBOL(datagram_poll_mask); 874EXPORT_SYMBOL(datagram_poll);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6e18242a1cae..a5aa1c7444e6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8643,7 +8643,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
8643 /* We get here if we can't use the current device name */ 8643 /* We get here if we can't use the current device name */
8644 if (!pat) 8644 if (!pat)
8645 goto out; 8645 goto out;
8646 if (dev_get_valid_name(net, dev, pat) < 0) 8646 err = dev_get_valid_name(net, dev, pat);
8647 if (err < 0)
8647 goto out; 8648 goto out;
8648 } 8649 }
8649 8650
@@ -8655,7 +8656,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
8655 dev_close(dev); 8656 dev_close(dev);
8656 8657
8657 /* And unlink it from device chain */ 8658 /* And unlink it from device chain */
8658 err = -ENODEV;
8659 unlist_netdevice(dev); 8659 unlist_netdevice(dev);
8660 8660
8661 synchronize_net(); 8661 synchronize_net();
@@ -8823,7 +8823,7 @@ static struct hlist_head * __net_init netdev_create_hash(void)
8823 int i; 8823 int i;
8824 struct hlist_head *hash; 8824 struct hlist_head *hash;
8825 8825
8826 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); 8826 hash = kmalloc_array(NETDEV_HASHENTRIES, sizeof(*hash), GFP_KERNEL);
8827 if (hash != NULL) 8827 if (hash != NULL)
8828 for (i = 0; i < NETDEV_HASHENTRIES; i++) 8828 for (i = 0; i < NETDEV_HASHENTRIES; i++)
8829 INIT_HLIST_HEAD(&hash[i]); 8829 INIT_HLIST_HEAD(&hash[i]);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index a04e1e88bf3a..50537ff961a7 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -285,16 +285,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
285 if (ifr->ifr_qlen < 0) 285 if (ifr->ifr_qlen < 0)
286 return -EINVAL; 286 return -EINVAL;
287 if (dev->tx_queue_len ^ ifr->ifr_qlen) { 287 if (dev->tx_queue_len ^ ifr->ifr_qlen) {
288 unsigned int orig_len = dev->tx_queue_len; 288 err = dev_change_tx_queue_len(dev, ifr->ifr_qlen);
289 289 if (err)
290 dev->tx_queue_len = ifr->ifr_qlen;
291 err = call_netdevice_notifiers(
292 NETDEV_CHANGE_TX_QUEUE_LEN, dev);
293 err = notifier_to_errno(err);
294 if (err) {
295 dev->tx_queue_len = orig_len;
296 return err; 290 return err;
297 }
298 } 291 }
299 return 0; 292 return 0;
300 293
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index c15075dc7572..e677a20180cf 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -911,7 +911,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
911 memset(&info, 0, sizeof(info)); 911 memset(&info, 0, sizeof(info));
912 info.cmd = ETHTOOL_GSSET_INFO; 912 info.cmd = ETHTOOL_GSSET_INFO;
913 913
914 info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER); 914 info_buf = kcalloc(n_bits, sizeof(u32), GFP_USER);
915 if (!info_buf) 915 if (!info_buf)
916 return -ENOMEM; 916 return -ENOMEM;
917 917
@@ -1017,7 +1017,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
1017 if (info.cmd == ETHTOOL_GRXCLSRLALL) { 1017 if (info.cmd == ETHTOOL_GRXCLSRLALL) {
1018 if (info.rule_cnt > 0) { 1018 if (info.rule_cnt > 0) {
1019 if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) 1019 if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
1020 rule_buf = kzalloc(info.rule_cnt * sizeof(u32), 1020 rule_buf = kcalloc(info.rule_cnt, sizeof(u32),
1021 GFP_USER); 1021 GFP_USER);
1022 if (!rule_buf) 1022 if (!rule_buf)
1023 return -ENOMEM; 1023 return -ENOMEM;
@@ -1816,7 +1816,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
1816 return -EFAULT; 1816 return -EFAULT;
1817 1817
1818 test.len = test_len; 1818 test.len = test_len;
1819 data = kmalloc(test_len * sizeof(u64), GFP_USER); 1819 data = kmalloc_array(test_len, sizeof(u64), GFP_USER);
1820 if (!data) 1820 if (!data)
1821 return -ENOMEM; 1821 return -ENOMEM;
1822 1822
@@ -1852,7 +1852,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
1852 WARN_ON_ONCE(!ret); 1852 WARN_ON_ONCE(!ret);
1853 1853
1854 gstrings.len = ret; 1854 gstrings.len = ret;
1855 data = vzalloc(gstrings.len * ETH_GSTRING_LEN); 1855 data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
1856 if (gstrings.len && !data) 1856 if (gstrings.len && !data)
1857 return -ENOMEM; 1857 return -ENOMEM;
1858 1858
@@ -1952,7 +1952,7 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
1952 return -EFAULT; 1952 return -EFAULT;
1953 1953
1954 stats.n_stats = n_stats; 1954 stats.n_stats = n_stats;
1955 data = vzalloc(n_stats * sizeof(u64)); 1955 data = vzalloc(array_size(n_stats, sizeof(u64)));
1956 if (n_stats && !data) 1956 if (n_stats && !data)
1957 return -ENOMEM; 1957 return -ENOMEM;
1958 1958
@@ -1996,7 +1996,7 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
1996 return -EFAULT; 1996 return -EFAULT;
1997 1997
1998 stats.n_stats = n_stats; 1998 stats.n_stats = n_stats;
1999 data = vzalloc(n_stats * sizeof(u64)); 1999 data = vzalloc(array_size(n_stats, sizeof(u64)));
2000 if (n_stats && !data) 2000 if (n_stats && !data)
2001 return -ENOMEM; 2001 return -ENOMEM;
2002 2002
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 126ffc5bc630..f64aa13811ea 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -416,6 +416,14 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
416 if (rule->mark && r->mark != rule->mark) 416 if (rule->mark && r->mark != rule->mark)
417 continue; 417 continue;
418 418
419 if (rule->suppress_ifgroup != -1 &&
420 r->suppress_ifgroup != rule->suppress_ifgroup)
421 continue;
422
423 if (rule->suppress_prefixlen != -1 &&
424 r->suppress_prefixlen != rule->suppress_prefixlen)
425 continue;
426
419 if (rule->mark_mask && r->mark_mask != rule->mark_mask) 427 if (rule->mark_mask && r->mark_mask != rule->mark_mask)
420 continue; 428 continue;
421 429
@@ -436,6 +444,9 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
436 if (rule->ip_proto && r->ip_proto != rule->ip_proto) 444 if (rule->ip_proto && r->ip_proto != rule->ip_proto)
437 continue; 445 continue;
438 446
447 if (rule->proto && r->proto != rule->proto)
448 continue;
449
439 if (fib_rule_port_range_set(&rule->sport_range) && 450 if (fib_rule_port_range_set(&rule->sport_range) &&
440 !fib_rule_port_range_compare(&r->sport_range, 451 !fib_rule_port_range_compare(&r->sport_range,
441 &rule->sport_range)) 452 &rule->sport_range))
@@ -645,6 +656,73 @@ errout:
645 return err; 656 return err;
646} 657}
647 658
659static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
660 struct nlattr **tb, struct fib_rule *rule)
661{
662 struct fib_rule *r;
663
664 list_for_each_entry(r, &ops->rules_list, list) {
665 if (r->action != rule->action)
666 continue;
667
668 if (r->table != rule->table)
669 continue;
670
671 if (r->pref != rule->pref)
672 continue;
673
674 if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
675 continue;
676
677 if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
678 continue;
679
680 if (r->mark != rule->mark)
681 continue;
682
683 if (r->suppress_ifgroup != rule->suppress_ifgroup)
684 continue;
685
686 if (r->suppress_prefixlen != rule->suppress_prefixlen)
687 continue;
688
689 if (r->mark_mask != rule->mark_mask)
690 continue;
691
692 if (r->tun_id != rule->tun_id)
693 continue;
694
695 if (r->fr_net != rule->fr_net)
696 continue;
697
698 if (r->l3mdev != rule->l3mdev)
699 continue;
700
701 if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
702 !uid_eq(r->uid_range.end, rule->uid_range.end))
703 continue;
704
705 if (r->ip_proto != rule->ip_proto)
706 continue;
707
708 if (r->proto != rule->proto)
709 continue;
710
711 if (!fib_rule_port_range_compare(&r->sport_range,
712 &rule->sport_range))
713 continue;
714
715 if (!fib_rule_port_range_compare(&r->dport_range,
716 &rule->dport_range))
717 continue;
718
719 if (!ops->compare(r, frh, tb))
720 continue;
721 return 1;
722 }
723 return 0;
724}
725
648int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, 726int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
649 struct netlink_ext_ack *extack) 727 struct netlink_ext_ack *extack)
650{ 728{
@@ -679,7 +757,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
679 goto errout; 757 goto errout;
680 758
681 if ((nlh->nlmsg_flags & NLM_F_EXCL) && 759 if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
682 rule_find(ops, frh, tb, rule, user_priority)) { 760 rule_exists(ops, frh, tb, rule)) {
683 err = -EEXIST; 761 err = -EEXIST;
684 goto errout_free; 762 goto errout_free;
685 } 763 }
diff --git a/net/core/filter.c b/net/core/filter.c
index 3d9ba7e5965a..0ca6907d7efe 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3214,20 +3214,6 @@ err:
3214} 3214}
3215EXPORT_SYMBOL_GPL(xdp_do_redirect); 3215EXPORT_SYMBOL_GPL(xdp_do_redirect);
3216 3216
3217static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
3218{
3219 unsigned int len;
3220
3221 if (unlikely(!(fwd->flags & IFF_UP)))
3222 return -ENETDOWN;
3223
3224 len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
3225 if (skb->len > len)
3226 return -EMSGSIZE;
3227
3228 return 0;
3229}
3230
3231static int xdp_do_generic_redirect_map(struct net_device *dev, 3217static int xdp_do_generic_redirect_map(struct net_device *dev,
3232 struct sk_buff *skb, 3218 struct sk_buff *skb,
3233 struct xdp_buff *xdp, 3219 struct xdp_buff *xdp,
@@ -3256,10 +3242,11 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
3256 } 3242 }
3257 3243
3258 if (map->map_type == BPF_MAP_TYPE_DEVMAP) { 3244 if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
3259 if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) 3245 struct bpf_dtab_netdev *dst = fwd;
3246
3247 err = dev_map_generic_redirect(dst, skb, xdp_prog);
3248 if (unlikely(err))
3260 goto err; 3249 goto err;
3261 skb->dev = fwd;
3262 generic_xdp_tx(skb, xdp_prog);
3263 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { 3250 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
3264 struct xdp_sock *xs = fwd; 3251 struct xdp_sock *xs = fwd;
3265 3252
@@ -4086,8 +4073,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
4086 memcpy(params->smac, dev->dev_addr, ETH_ALEN); 4073 memcpy(params->smac, dev->dev_addr, ETH_ALEN);
4087 params->h_vlan_TCI = 0; 4074 params->h_vlan_TCI = 0;
4088 params->h_vlan_proto = 0; 4075 params->h_vlan_proto = 0;
4076 params->ifindex = dev->ifindex;
4089 4077
4090 return dev->ifindex; 4078 return 0;
4091} 4079}
4092#endif 4080#endif
4093 4081
@@ -4111,7 +4099,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4111 /* verify forwarding is enabled on this interface */ 4099 /* verify forwarding is enabled on this interface */
4112 in_dev = __in_dev_get_rcu(dev); 4100 in_dev = __in_dev_get_rcu(dev);
4113 if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev))) 4101 if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
4114 return 0; 4102 return BPF_FIB_LKUP_RET_FWD_DISABLED;
4115 4103
4116 if (flags & BPF_FIB_LOOKUP_OUTPUT) { 4104 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
4117 fl4.flowi4_iif = 1; 4105 fl4.flowi4_iif = 1;
@@ -4136,7 +4124,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4136 4124
4137 tb = fib_get_table(net, tbid); 4125 tb = fib_get_table(net, tbid);
4138 if (unlikely(!tb)) 4126 if (unlikely(!tb))
4139 return 0; 4127 return BPF_FIB_LKUP_RET_NOT_FWDED;
4140 4128
4141 err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); 4129 err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
4142 } else { 4130 } else {
@@ -4148,8 +4136,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4148 err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF); 4136 err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
4149 } 4137 }
4150 4138
4151 if (err || res.type != RTN_UNICAST) 4139 if (err) {
4152 return 0; 4140 /* map fib lookup errors to RTN_ type */
4141 if (err == -EINVAL)
4142 return BPF_FIB_LKUP_RET_BLACKHOLE;
4143 if (err == -EHOSTUNREACH)
4144 return BPF_FIB_LKUP_RET_UNREACHABLE;
4145 if (err == -EACCES)
4146 return BPF_FIB_LKUP_RET_PROHIBIT;
4147
4148 return BPF_FIB_LKUP_RET_NOT_FWDED;
4149 }
4150
4151 if (res.type != RTN_UNICAST)
4152 return BPF_FIB_LKUP_RET_NOT_FWDED;
4153 4153
4154 if (res.fi->fib_nhs > 1) 4154 if (res.fi->fib_nhs > 1)
4155 fib_select_path(net, &res, &fl4, NULL); 4155 fib_select_path(net, &res, &fl4, NULL);
@@ -4157,19 +4157,16 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4157 if (check_mtu) { 4157 if (check_mtu) {
4158 mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); 4158 mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
4159 if (params->tot_len > mtu) 4159 if (params->tot_len > mtu)
4160 return 0; 4160 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
4161 } 4161 }
4162 4162
4163 nh = &res.fi->fib_nh[res.nh_sel]; 4163 nh = &res.fi->fib_nh[res.nh_sel];
4164 4164
4165 /* do not handle lwt encaps right now */ 4165 /* do not handle lwt encaps right now */
4166 if (nh->nh_lwtstate) 4166 if (nh->nh_lwtstate)
4167 return 0; 4167 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
4168 4168
4169 dev = nh->nh_dev; 4169 dev = nh->nh_dev;
4170 if (unlikely(!dev))
4171 return 0;
4172
4173 if (nh->nh_gw) 4170 if (nh->nh_gw)
4174 params->ipv4_dst = nh->nh_gw; 4171 params->ipv4_dst = nh->nh_gw;
4175 4172
@@ -4179,10 +4176,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4179 * rcu_read_lock_bh is not needed here 4176 * rcu_read_lock_bh is not needed here
4180 */ 4177 */
4181 neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); 4178 neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
4182 if (neigh) 4179 if (!neigh)
4183 return bpf_fib_set_fwd_params(params, neigh, dev); 4180 return BPF_FIB_LKUP_RET_NO_NEIGH;
4184 4181
4185 return 0; 4182 return bpf_fib_set_fwd_params(params, neigh, dev);
4186} 4183}
4187#endif 4184#endif
4188 4185
@@ -4203,7 +4200,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4203 4200
4204 /* link local addresses are never forwarded */ 4201 /* link local addresses are never forwarded */
4205 if (rt6_need_strict(dst) || rt6_need_strict(src)) 4202 if (rt6_need_strict(dst) || rt6_need_strict(src))
4206 return 0; 4203 return BPF_FIB_LKUP_RET_NOT_FWDED;
4207 4204
4208 dev = dev_get_by_index_rcu(net, params->ifindex); 4205 dev = dev_get_by_index_rcu(net, params->ifindex);
4209 if (unlikely(!dev)) 4206 if (unlikely(!dev))
@@ -4211,7 +4208,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4211 4208
4212 idev = __in6_dev_get_safely(dev); 4209 idev = __in6_dev_get_safely(dev);
4213 if (unlikely(!idev || !net->ipv6.devconf_all->forwarding)) 4210 if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
4214 return 0; 4211 return BPF_FIB_LKUP_RET_FWD_DISABLED;
4215 4212
4216 if (flags & BPF_FIB_LOOKUP_OUTPUT) { 4213 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
4217 fl6.flowi6_iif = 1; 4214 fl6.flowi6_iif = 1;
@@ -4238,7 +4235,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4238 4235
4239 tb = ipv6_stub->fib6_get_table(net, tbid); 4236 tb = ipv6_stub->fib6_get_table(net, tbid);
4240 if (unlikely(!tb)) 4237 if (unlikely(!tb))
4241 return 0; 4238 return BPF_FIB_LKUP_RET_NOT_FWDED;
4242 4239
4243 f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict); 4240 f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
4244 } else { 4241 } else {
@@ -4251,11 +4248,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4251 } 4248 }
4252 4249
4253 if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry)) 4250 if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
4254 return 0; 4251 return BPF_FIB_LKUP_RET_NOT_FWDED;
4252
4253 if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
4254 switch (f6i->fib6_type) {
4255 case RTN_BLACKHOLE:
4256 return BPF_FIB_LKUP_RET_BLACKHOLE;
4257 case RTN_UNREACHABLE:
4258 return BPF_FIB_LKUP_RET_UNREACHABLE;
4259 case RTN_PROHIBIT:
4260 return BPF_FIB_LKUP_RET_PROHIBIT;
4261 default:
4262 return BPF_FIB_LKUP_RET_NOT_FWDED;
4263 }
4264 }
4255 4265
4256 if (unlikely(f6i->fib6_flags & RTF_REJECT || 4266 if (f6i->fib6_type != RTN_UNICAST)
4257 f6i->fib6_type != RTN_UNICAST)) 4267 return BPF_FIB_LKUP_RET_NOT_FWDED;
4258 return 0;
4259 4268
4260 if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0) 4269 if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
4261 f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6, 4270 f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
@@ -4265,11 +4274,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4265 if (check_mtu) { 4274 if (check_mtu) {
4266 mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src); 4275 mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
4267 if (params->tot_len > mtu) 4276 if (params->tot_len > mtu)
4268 return 0; 4277 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
4269 } 4278 }
4270 4279
4271 if (f6i->fib6_nh.nh_lwtstate) 4280 if (f6i->fib6_nh.nh_lwtstate)
4272 return 0; 4281 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
4273 4282
4274 if (f6i->fib6_flags & RTF_GATEWAY) 4283 if (f6i->fib6_flags & RTF_GATEWAY)
4275 *dst = f6i->fib6_nh.nh_gw; 4284 *dst = f6i->fib6_nh.nh_gw;
@@ -4283,10 +4292,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4283 */ 4292 */
4284 neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, 4293 neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
4285 ndisc_hashfn, dst, dev); 4294 ndisc_hashfn, dst, dev);
4286 if (neigh) 4295 if (!neigh)
4287 return bpf_fib_set_fwd_params(params, neigh, dev); 4296 return BPF_FIB_LKUP_RET_NO_NEIGH;
4288 4297
4289 return 0; 4298 return bpf_fib_set_fwd_params(params, neigh, dev);
4290} 4299}
4291#endif 4300#endif
4292 4301
@@ -4328,7 +4337,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
4328 struct bpf_fib_lookup *, params, int, plen, u32, flags) 4337 struct bpf_fib_lookup *, params, int, plen, u32, flags)
4329{ 4338{
4330 struct net *net = dev_net(skb->dev); 4339 struct net *net = dev_net(skb->dev);
4331 int index = -EAFNOSUPPORT; 4340 int rc = -EAFNOSUPPORT;
4332 4341
4333 if (plen < sizeof(*params)) 4342 if (plen < sizeof(*params))
4334 return -EINVAL; 4343 return -EINVAL;
@@ -4339,25 +4348,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
4339 switch (params->family) { 4348 switch (params->family) {
4340#if IS_ENABLED(CONFIG_INET) 4349#if IS_ENABLED(CONFIG_INET)
4341 case AF_INET: 4350 case AF_INET:
4342 index = bpf_ipv4_fib_lookup(net, params, flags, false); 4351 rc = bpf_ipv4_fib_lookup(net, params, flags, false);
4343 break; 4352 break;
4344#endif 4353#endif
4345#if IS_ENABLED(CONFIG_IPV6) 4354#if IS_ENABLED(CONFIG_IPV6)
4346 case AF_INET6: 4355 case AF_INET6:
4347 index = bpf_ipv6_fib_lookup(net, params, flags, false); 4356 rc = bpf_ipv6_fib_lookup(net, params, flags, false);
4348 break; 4357 break;
4349#endif 4358#endif
4350 } 4359 }
4351 4360
4352 if (index > 0) { 4361 if (!rc) {
4353 struct net_device *dev; 4362 struct net_device *dev;
4354 4363
4355 dev = dev_get_by_index_rcu(net, index); 4364 dev = dev_get_by_index_rcu(net, params->ifindex);
4356 if (!is_skb_forwardable(dev, skb)) 4365 if (!is_skb_forwardable(dev, skb))
4357 index = 0; 4366 rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
4358 } 4367 }
4359 4368
4360 return index; 4369 return rc;
4361} 4370}
4362 4371
4363static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { 4372static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a7a9c3d738ba..8e3fda9e725c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -119,13 +119,14 @@ unsigned long neigh_rand_reach_time(unsigned long base)
119EXPORT_SYMBOL(neigh_rand_reach_time); 119EXPORT_SYMBOL(neigh_rand_reach_time);
120 120
121 121
122static bool neigh_del(struct neighbour *n, __u8 state, 122static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
123 struct neighbour __rcu **np, struct neigh_table *tbl) 123 struct neighbour __rcu **np, struct neigh_table *tbl)
124{ 124{
125 bool retval = false; 125 bool retval = false;
126 126
127 write_lock(&n->lock); 127 write_lock(&n->lock);
128 if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) { 128 if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
129 !(n->flags & flags)) {
129 struct neighbour *neigh; 130 struct neighbour *neigh;
130 131
131 neigh = rcu_dereference_protected(n->next, 132 neigh = rcu_dereference_protected(n->next,
@@ -157,7 +158,7 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
157 while ((n = rcu_dereference_protected(*np, 158 while ((n = rcu_dereference_protected(*np,
158 lockdep_is_held(&tbl->lock)))) { 159 lockdep_is_held(&tbl->lock)))) {
159 if (n == ndel) 160 if (n == ndel)
160 return neigh_del(n, 0, np, tbl); 161 return neigh_del(n, 0, 0, np, tbl);
161 np = &n->next; 162 np = &n->next;
162 } 163 }
163 return false; 164 return false;
@@ -185,7 +186,8 @@ static int neigh_forced_gc(struct neigh_table *tbl)
185 * - nobody refers to it. 186 * - nobody refers to it.
186 * - it is not permanent 187 * - it is not permanent
187 */ 188 */
188 if (neigh_del(n, NUD_PERMANENT, np, tbl)) { 189 if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
190 tbl)) {
189 shrunk = 1; 191 shrunk = 1;
190 continue; 192 continue;
191 } 193 }
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7e4ede34cc52..49368e21d228 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3603,7 +3603,8 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3603 return -ENOMEM; 3603 return -ENOMEM;
3604 3604
3605 strcpy(pkt_dev->odevname, ifname); 3605 strcpy(pkt_dev->odevname, ifname);
3606 pkt_dev->flows = vzalloc_node(MAX_CFLOWS * sizeof(struct flow_state), 3606 pkt_dev->flows = vzalloc_node(array_size(MAX_CFLOWS,
3607 sizeof(struct flow_state)),
3607 node); 3608 node);
3608 if (pkt_dev->flows == NULL) { 3609 if (pkt_dev->flows == NULL) {
3609 kfree(pkt_dev); 3610 kfree(pkt_dev);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c642304f178c..eba8dae22c25 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5276,8 +5276,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
5276 if (npages >= 1 << order) { 5276 if (npages >= 1 << order) {
5277 page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) | 5277 page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
5278 __GFP_COMP | 5278 __GFP_COMP |
5279 __GFP_NOWARN | 5279 __GFP_NOWARN,
5280 __GFP_NORETRY,
5281 order); 5280 order);
5282 if (page) 5281 if (page)
5283 goto fill_page; 5282 goto fill_page;
diff --git a/net/core/sock.c b/net/core/sock.c
index f333d75ef1a9..9e8f65585b81 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -728,22 +728,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
728 sock_valbool_flag(sk, SOCK_DBG, valbool); 728 sock_valbool_flag(sk, SOCK_DBG, valbool);
729 break; 729 break;
730 case SO_REUSEADDR: 730 case SO_REUSEADDR:
731 val = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); 731 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
732 if ((sk->sk_family == PF_INET || sk->sk_family == PF_INET6) &&
733 inet_sk(sk)->inet_num &&
734 (sk->sk_reuse != val)) {
735 ret = (sk->sk_state == TCP_ESTABLISHED) ? -EISCONN : -EUCLEAN;
736 break;
737 }
738 sk->sk_reuse = val;
739 break; 732 break;
740 case SO_REUSEPORT: 733 case SO_REUSEPORT:
741 if ((sk->sk_family == PF_INET || sk->sk_family == PF_INET6) &&
742 inet_sk(sk)->inet_num &&
743 (sk->sk_reuseport != valbool)) {
744 ret = (sk->sk_state == TCP_ESTABLISHED) ? -EISCONN : -EUCLEAN;
745 break;
746 }
747 sk->sk_reuseport = valbool; 734 sk->sk_reuseport = valbool;
748 break; 735 break;
749 case SO_TYPE: 736 case SO_TYPE:
@@ -3256,7 +3243,8 @@ static int req_prot_init(const struct proto *prot)
3256 3243
3257 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, 3244 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3258 rsk_prot->obj_size, 0, 3245 rsk_prot->obj_size, 0,
3259 prot->slab_flags, NULL); 3246 SLAB_ACCOUNT | prot->slab_flags,
3247 NULL);
3260 3248
3261 if (!rsk_prot->slab) { 3249 if (!rsk_prot->slab) {
3262 pr_crit("%s: Can't create request sock SLAB cache!\n", 3250 pr_crit("%s: Can't create request sock SLAB cache!\n",
@@ -3271,7 +3259,8 @@ int proto_register(struct proto *prot, int alloc_slab)
3271 if (alloc_slab) { 3259 if (alloc_slab) {
3272 prot->slab = kmem_cache_create_usercopy(prot->name, 3260 prot->slab = kmem_cache_create_usercopy(prot->name,
3273 prot->obj_size, 0, 3261 prot->obj_size, 0,
3274 SLAB_HWCACHE_ALIGN | prot->slab_flags, 3262 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3263 prot->slab_flags,
3275 prot->useroffset, prot->usersize, 3264 prot->useroffset, prot->usersize,
3276 NULL); 3265 NULL);
3277 3266
@@ -3294,6 +3283,7 @@ int proto_register(struct proto *prot, int alloc_slab)
3294 kmem_cache_create(prot->twsk_prot->twsk_slab_name, 3283 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
3295 prot->twsk_prot->twsk_obj_size, 3284 prot->twsk_prot->twsk_obj_size,
3296 0, 3285 0,
3286 SLAB_ACCOUNT |
3297 prot->slab_flags, 3287 prot->slab_flags,
3298 NULL); 3288 NULL);
3299 if (prot->twsk_prot->twsk_slab == NULL) 3289 if (prot->twsk_prot->twsk_slab == NULL)
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index d2f4e0c1faaf..2589a6b78aa1 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -984,7 +984,8 @@ static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb,
984 */ 984 */
985 err = ops->peer_getappinfo(netdev, &info, &app_count); 985 err = ops->peer_getappinfo(netdev, &info, &app_count);
986 if (!err && app_count) { 986 if (!err && app_count) {
987 table = kmalloc(sizeof(struct dcb_app) * app_count, GFP_KERNEL); 987 table = kmalloc_array(app_count, sizeof(struct dcb_app),
988 GFP_KERNEL);
988 if (!table) 989 if (!table)
989 return -ENOMEM; 990 return -ENOMEM;
990 991
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 385f153fe031..2b75df469220 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -46,7 +46,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
46 return -ENOMEM; 46 return -ENOMEM;
47 47
48 /* allocate buffer and initialize linked list */ 48 /* allocate buffer and initialize linked list */
49 seqp = kmalloc(CCID2_SEQBUF_LEN * sizeof(struct ccid2_seq), gfp_any()); 49 seqp = kmalloc_array(CCID2_SEQBUF_LEN, sizeof(struct ccid2_seq),
50 gfp_any());
50 if (seqp == NULL) 51 if (seqp == NULL)
51 return -ENOMEM; 52 return -ENOMEM;
52 53
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 8b5ba6dffac7..12877a1514e7 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -600,7 +600,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
600{ 600{
601 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 601 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
602 struct dccp_sock *dp = dccp_sk(sk); 602 struct dccp_sock *dp = dccp_sk(sk);
603 ktime_t now = ktime_get_real(); 603 ktime_t now = ktime_get();
604 s64 delta = 0; 604 s64 delta = 0;
605 605
606 switch (fbtype) { 606 switch (fbtype) {
@@ -625,15 +625,14 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
625 case CCID3_FBACK_PERIODIC: 625 case CCID3_FBACK_PERIODIC:
626 delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback); 626 delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback);
627 if (delta <= 0) 627 if (delta <= 0)
628 DCCP_BUG("delta (%ld) <= 0", (long)delta); 628 delta = 1;
629 else 629 hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta);
630 hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta);
631 break; 630 break;
632 default: 631 default:
633 return; 632 return;
634 } 633 }
635 634
636 ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta, 635 ccid3_pr_debug("Interval %lldusec, X_recv=%u, 1/p=%u\n", delta,
637 hc->rx_x_recv, hc->rx_pinv); 636 hc->rx_x_recv, hc->rx_pinv);
638 637
639 hc->rx_tstamp_last_feedback = now; 638 hc->rx_tstamp_last_feedback = now;
@@ -680,7 +679,8 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
680static u32 ccid3_first_li(struct sock *sk) 679static u32 ccid3_first_li(struct sock *sk)
681{ 680{
682 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 681 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
683 u32 x_recv, p, delta; 682 u32 x_recv, p;
683 s64 delta;
684 u64 fval; 684 u64 fval;
685 685
686 if (hc->rx_rtt == 0) { 686 if (hc->rx_rtt == 0) {
@@ -688,7 +688,9 @@ static u32 ccid3_first_li(struct sock *sk)
688 hc->rx_rtt = DCCP_FALLBACK_RTT; 688 hc->rx_rtt = DCCP_FALLBACK_RTT;
689 } 689 }
690 690
691 delta = ktime_to_us(net_timedelta(hc->rx_tstamp_last_feedback)); 691 delta = ktime_us_delta(ktime_get(), hc->rx_tstamp_last_feedback);
692 if (delta <= 0)
693 delta = 1;
692 x_recv = scaled_div32(hc->rx_bytes_recv, delta); 694 x_recv = scaled_div32(hc->rx_bytes_recv, delta);
693 if (x_recv == 0) { /* would also trigger divide-by-zero */ 695 if (x_recv == 0) { /* would also trigger divide-by-zero */
694 DCCP_WARN("X_recv==0\n"); 696 DCCP_WARN("X_recv==0\n");
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 0ea2ee56ac1b..f91e3816806b 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -316,7 +316,8 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
316 int flags, int *addr_len); 316 int flags, int *addr_len);
317void dccp_shutdown(struct sock *sk, int how); 317void dccp_shutdown(struct sock *sk, int how);
318int inet_dccp_listen(struct socket *sock, int backlog); 318int inet_dccp_listen(struct socket *sock, int backlog);
319__poll_t dccp_poll_mask(struct socket *sock, __poll_t events); 319__poll_t dccp_poll(struct file *file, struct socket *sock,
320 poll_table *wait);
320int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); 321int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
321void dccp_req_err(struct sock *sk, u64 seq); 322void dccp_req_err(struct sock *sk, u64 seq);
322 323
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index a9e478cd3787..b08feb219b44 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -984,7 +984,7 @@ static const struct proto_ops inet_dccp_ops = {
984 .accept = inet_accept, 984 .accept = inet_accept,
985 .getname = inet_getname, 985 .getname = inet_getname,
986 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ 986 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
987 .poll_mask = dccp_poll_mask, 987 .poll = dccp_poll,
988 .ioctl = inet_ioctl, 988 .ioctl = inet_ioctl,
989 /* FIXME: work on inet_listen to rename it to sock_common_listen */ 989 /* FIXME: work on inet_listen to rename it to sock_common_listen */
990 .listen = inet_dccp_listen, 990 .listen = inet_dccp_listen,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 17fc4e0166ba..6344f1b18a6a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1070,7 +1070,7 @@ static const struct proto_ops inet6_dccp_ops = {
1070 .socketpair = sock_no_socketpair, 1070 .socketpair = sock_no_socketpair,
1071 .accept = inet_accept, 1071 .accept = inet_accept,
1072 .getname = inet6_getname, 1072 .getname = inet6_getname,
1073 .poll_mask = dccp_poll_mask, 1073 .poll = dccp_poll,
1074 .ioctl = inet6_ioctl, 1074 .ioctl = inet6_ioctl,
1075 .listen = inet_dccp_listen, 1075 .listen = inet_dccp_listen,
1076 .shutdown = inet_shutdown, 1076 .shutdown = inet_shutdown,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ca21c1c76da0..0d56e36a6db7 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -312,11 +312,20 @@ int dccp_disconnect(struct sock *sk, int flags)
312 312
313EXPORT_SYMBOL_GPL(dccp_disconnect); 313EXPORT_SYMBOL_GPL(dccp_disconnect);
314 314
315__poll_t dccp_poll_mask(struct socket *sock, __poll_t events) 315/*
316 * Wait for a DCCP event.
317 *
318 * Note that we don't need to lock the socket, as the upper poll layers
319 * take care of normal races (between the test and the event) and we don't
320 * go look at any of the socket buffers directly.
321 */
322__poll_t dccp_poll(struct file *file, struct socket *sock,
323 poll_table *wait)
316{ 324{
317 __poll_t mask; 325 __poll_t mask;
318 struct sock *sk = sock->sk; 326 struct sock *sk = sock->sk;
319 327
328 sock_poll_wait(file, sk_sleep(sk), wait);
320 if (sk->sk_state == DCCP_LISTEN) 329 if (sk->sk_state == DCCP_LISTEN)
321 return inet_csk_listen_poll(sk); 330 return inet_csk_listen_poll(sk);
322 331
@@ -358,7 +367,7 @@ __poll_t dccp_poll_mask(struct socket *sock, __poll_t events)
358 return mask; 367 return mask;
359} 368}
360 369
361EXPORT_SYMBOL_GPL(dccp_poll_mask); 370EXPORT_SYMBOL_GPL(dccp_poll);
362 371
363int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) 372int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
364{ 373{
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 9a686d890bfa..7d6ff983ba2c 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1207,11 +1207,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
1207} 1207}
1208 1208
1209 1209
1210static __poll_t dn_poll_mask(struct socket *sock, __poll_t events) 1210static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
1211{ 1211{
1212 struct sock *sk = sock->sk; 1212 struct sock *sk = sock->sk;
1213 struct dn_scp *scp = DN_SK(sk); 1213 struct dn_scp *scp = DN_SK(sk);
1214 __poll_t mask = datagram_poll_mask(sock, events); 1214 __poll_t mask = datagram_poll(file, sock, wait);
1215 1215
1216 if (!skb_queue_empty(&scp->other_receive_queue)) 1216 if (!skb_queue_empty(&scp->other_receive_queue))
1217 mask |= EPOLLRDBAND; 1217 mask |= EPOLLRDBAND;
@@ -2331,7 +2331,7 @@ static const struct proto_ops dn_proto_ops = {
2331 .socketpair = sock_no_socketpair, 2331 .socketpair = sock_no_socketpair,
2332 .accept = dn_accept, 2332 .accept = dn_accept,
2333 .getname = dn_getname, 2333 .getname = dn_getname,
2334 .poll_mask = dn_poll_mask, 2334 .poll = dn_poll,
2335 .ioctl = dn_ioctl, 2335 .ioctl = dn_ioctl,
2336 .listen = dn_listen, 2336 .listen = dn_listen,
2337 .shutdown = dn_shutdown, 2337 .shutdown = dn_shutdown,
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 7d20e1f3de28..56197f0d9608 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -75,7 +75,8 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
75 if (!skb->dev) 75 if (!skb->dev)
76 return NULL; 76 return NULL;
77 77
78 pskb_trim_rcsum(skb, skb->len - 4); 78 if (pskb_trim_rcsum(skb, skb->len - 4))
79 return NULL;
79 80
80 return skb; 81 return skb;
81} 82}
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index dc2960be51e0..b231e40f006a 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -38,7 +38,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
38{ 38{
39 void *hdr; 39 void *hdr;
40 int i, pages = 0; 40 int i, pages = 0;
41 uint32_t *buf = kzalloc(32 * sizeof(uint32_t), GFP_KERNEL); 41 uint32_t *buf = kcalloc(32, sizeof(uint32_t), GFP_KERNEL);
42 42
43 pr_debug("%s\n", __func__); 43 pr_debug("%s\n", __func__);
44 44
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a0768d2759b8..a60658c85a9a 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -423,7 +423,7 @@ static const struct proto_ops ieee802154_raw_ops = {
423 .socketpair = sock_no_socketpair, 423 .socketpair = sock_no_socketpair,
424 .accept = sock_no_accept, 424 .accept = sock_no_accept,
425 .getname = sock_no_getname, 425 .getname = sock_no_getname,
426 .poll_mask = datagram_poll_mask, 426 .poll = datagram_poll,
427 .ioctl = ieee802154_sock_ioctl, 427 .ioctl = ieee802154_sock_ioctl,
428 .listen = sock_no_listen, 428 .listen = sock_no_listen,
429 .shutdown = sock_no_shutdown, 429 .shutdown = sock_no_shutdown,
@@ -969,7 +969,7 @@ static const struct proto_ops ieee802154_dgram_ops = {
969 .socketpair = sock_no_socketpair, 969 .socketpair = sock_no_socketpair,
970 .accept = sock_no_accept, 970 .accept = sock_no_accept,
971 .getname = sock_no_getname, 971 .getname = sock_no_getname,
972 .poll_mask = datagram_poll_mask, 972 .poll = datagram_poll,
973 .ioctl = ieee802154_sock_ioctl, 973 .ioctl = ieee802154_sock_ioctl,
974 .listen = sock_no_listen, 974 .listen = sock_no_listen,
975 .shutdown = sock_no_shutdown, 975 .shutdown = sock_no_shutdown,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 15e125558c76..b403499fdabe 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -986,7 +986,7 @@ const struct proto_ops inet_stream_ops = {
986 .socketpair = sock_no_socketpair, 986 .socketpair = sock_no_socketpair,
987 .accept = inet_accept, 987 .accept = inet_accept,
988 .getname = inet_getname, 988 .getname = inet_getname,
989 .poll_mask = tcp_poll_mask, 989 .poll = tcp_poll,
990 .ioctl = inet_ioctl, 990 .ioctl = inet_ioctl,
991 .listen = inet_listen, 991 .listen = inet_listen,
992 .shutdown = inet_shutdown, 992 .shutdown = inet_shutdown,
@@ -1021,7 +1021,7 @@ const struct proto_ops inet_dgram_ops = {
1021 .socketpair = sock_no_socketpair, 1021 .socketpair = sock_no_socketpair,
1022 .accept = sock_no_accept, 1022 .accept = sock_no_accept,
1023 .getname = inet_getname, 1023 .getname = inet_getname,
1024 .poll_mask = udp_poll_mask, 1024 .poll = udp_poll,
1025 .ioctl = inet_ioctl, 1025 .ioctl = inet_ioctl,
1026 .listen = sock_no_listen, 1026 .listen = sock_no_listen,
1027 .shutdown = inet_shutdown, 1027 .shutdown = inet_shutdown,
@@ -1042,7 +1042,7 @@ EXPORT_SYMBOL(inet_dgram_ops);
1042 1042
1043/* 1043/*
1044 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without 1044 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
1045 * udp_poll_mask 1045 * udp_poll
1046 */ 1046 */
1047static const struct proto_ops inet_sockraw_ops = { 1047static const struct proto_ops inet_sockraw_ops = {
1048 .family = PF_INET, 1048 .family = PF_INET,
@@ -1053,7 +1053,7 @@ static const struct proto_ops inet_sockraw_ops = {
1053 .socketpair = sock_no_socketpair, 1053 .socketpair = sock_no_socketpair,
1054 .accept = sock_no_accept, 1054 .accept = sock_no_accept,
1055 .getname = inet_getname, 1055 .getname = inet_getname,
1056 .poll_mask = datagram_poll_mask, 1056 .poll = datagram_poll,
1057 .ioctl = inet_ioctl, 1057 .ioctl = inet_ioctl,
1058 .listen = sock_no_listen, 1058 .listen = sock_no_listen,
1059 .shutdown = inet_shutdown, 1059 .shutdown = inet_shutdown,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 63aa39b3af03..b21833651394 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -567,7 +567,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
567 struct nlattr *mx; 567 struct nlattr *mx;
568 int len = 0; 568 int len = 0;
569 569
570 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); 570 mx = kcalloc(3, nla_total_size(4), GFP_KERNEL);
571 if (!mx) 571 if (!mx)
572 return -ENOMEM; 572 return -ENOMEM;
573 573
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 1540db65241a..c9ec1603666b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -448,9 +448,7 @@ next_proto:
448out_unlock: 448out_unlock:
449 rcu_read_unlock(); 449 rcu_read_unlock();
450out: 450out:
451 NAPI_GRO_CB(skb)->flush |= flush; 451 skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
452 skb_gro_remcsum_cleanup(skb, &grc);
453 skb->remcsum_offload = 0;
454 452
455 return pp; 453 return pp;
456} 454}
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 1859c473b21a..6a7d980105f6 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -223,7 +223,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
223out_unlock: 223out_unlock:
224 rcu_read_unlock(); 224 rcu_read_unlock();
225out: 225out:
226 NAPI_GRO_CB(skb)->flush |= flush; 226 skb_gro_flush_final(skb, pp, flush);
227 227
228 return pp; 228 return pp;
229} 229}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 31ff46daae97..3647167c8fa3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -243,9 +243,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
243 bool dev_match = (sk->sk_bound_dev_if == dif || 243 bool dev_match = (sk->sk_bound_dev_if == dif ||
244 sk->sk_bound_dev_if == sdif); 244 sk->sk_bound_dev_if == sdif);
245 245
246 if (exact_dif && !dev_match) 246 if (!dev_match)
247 return -1; 247 return -1;
248 if (sk->sk_bound_dev_if && dev_match) 248 if (sk->sk_bound_dev_if)
249 score += 4; 249 score += 4;
250 } 250 }
251 if (sk->sk_incoming_cpu == raw_smp_processor_id()) 251 if (sk->sk_incoming_cpu == raw_smp_processor_id())
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index af5a830ff6ad..b3308e9d9762 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1145,7 +1145,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1145 cork->fragsize = ip_sk_use_pmtu(sk) ? 1145 cork->fragsize = ip_sk_use_pmtu(sk) ?
1146 dst_mtu(&rt->dst) : rt->dst.dev->mtu; 1146 dst_mtu(&rt->dst) : rt->dst.dev->mtu;
1147 1147
1148 cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0; 1148 cork->gso_size = sk->sk_type == SOCK_DGRAM &&
1149 sk->sk_protocol == IPPROTO_UDP ? ipc->gso_size : 0;
1149 cork->dst = &rt->dst; 1150 cork->dst = &rt->dst;
1150 cork->length = 0; 1151 cork->length = 0;
1151 cork->ttl = ipc->ttl; 1152 cork->ttl = ipc->ttl;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 38d906baf1df..c4f5602308ed 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -261,8 +261,8 @@ static struct net_device *__ip_tunnel_create(struct net *net,
261 } else { 261 } else {
262 if (strlen(ops->kind) > (IFNAMSIZ - 3)) 262 if (strlen(ops->kind) > (IFNAMSIZ - 3))
263 goto failed; 263 goto failed;
264 strlcpy(name, ops->kind, IFNAMSIZ); 264 strcpy(name, ops->kind);
265 strncat(name, "%d", 2); 265 strcat(name, "%d");
266 } 266 }
267 267
268 ASSERT_RTNL(); 268 ASSERT_RTNL();
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 38ab97b0a2ec..ca0dad90803a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -531,6 +531,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
531 return -ENOMEM; 531 return -ENOMEM;
532 532
533 j = 0; 533 j = 0;
534 memset(&mtpar, 0, sizeof(mtpar));
534 mtpar.net = net; 535 mtpar.net = net;
535 mtpar.table = name; 536 mtpar.table = name;
536 mtpar.entryinfo = &e->ip; 537 mtpar.entryinfo = &e->ip;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bf4e4adc2d00..1df6e97106d7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -649,7 +649,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
649 649
650 hash = rcu_dereference(nh->nh_exceptions); 650 hash = rcu_dereference(nh->nh_exceptions);
651 if (!hash) { 651 if (!hash) {
652 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC); 652 hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
653 if (!hash) 653 if (!hash)
654 goto out_unlock; 654 goto out_unlock;
655 rcu_assign_pointer(nh->nh_exceptions, hash); 655 rcu_assign_pointer(nh->nh_exceptions, hash);
@@ -3146,7 +3146,8 @@ int __init ip_rt_init(void)
3146{ 3146{
3147 int cpu; 3147 int cpu;
3148 3148
3149 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); 3149 ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
3150 GFP_KERNEL);
3150 if (!ip_idents) 3151 if (!ip_idents)
3151 panic("IP: failed to allocate ip_idents\n"); 3152 panic("IP: failed to allocate ip_idents\n");
3152 3153
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d06247ba08b2..af0a857d8352 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -265,8 +265,9 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
265 ipv4.sysctl_tcp_fastopen); 265 ipv4.sysctl_tcp_fastopen);
266 struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; 266 struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
267 struct tcp_fastopen_context *ctxt; 267 struct tcp_fastopen_context *ctxt;
268 int ret;
269 u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ 268 u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
269 __le32 key[4];
270 int ret, i;
270 271
271 tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); 272 tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
272 if (!tbl.data) 273 if (!tbl.data)
@@ -275,11 +276,14 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
275 rcu_read_lock(); 276 rcu_read_lock();
276 ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); 277 ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
277 if (ctxt) 278 if (ctxt)
278 memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); 279 memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
279 else 280 else
280 memset(user_key, 0, sizeof(user_key)); 281 memset(key, 0, sizeof(key));
281 rcu_read_unlock(); 282 rcu_read_unlock();
282 283
284 for (i = 0; i < ARRAY_SIZE(key); i++)
285 user_key[i] = le32_to_cpu(key[i]);
286
283 snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", 287 snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
284 user_key[0], user_key[1], user_key[2], user_key[3]); 288 user_key[0], user_key[1], user_key[2], user_key[3]);
285 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 289 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
@@ -290,13 +294,17 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
290 ret = -EINVAL; 294 ret = -EINVAL;
291 goto bad_key; 295 goto bad_key;
292 } 296 }
293 tcp_fastopen_reset_cipher(net, NULL, user_key, 297
298 for (i = 0; i < ARRAY_SIZE(user_key); i++)
299 key[i] = cpu_to_le32(user_key[i]);
300
301 tcp_fastopen_reset_cipher(net, NULL, key,
294 TCP_FASTOPEN_KEY_LENGTH); 302 TCP_FASTOPEN_KEY_LENGTH);
295 } 303 }
296 304
297bad_key: 305bad_key:
298 pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", 306 pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
299 user_key[0], user_key[1], user_key[2], user_key[3], 307 user_key[0], user_key[1], user_key[2], user_key[3],
300 (char *)tbl.data, ret); 308 (char *)tbl.data, ret);
301 kfree(tbl.data); 309 kfree(tbl.data);
302 return ret; 310 return ret;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2741953adaba..e7b53d2a971f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -494,21 +494,32 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
494} 494}
495 495
496/* 496/*
497 * Socket is not locked. We are protected from async events by poll logic and 497 * Wait for a TCP event.
498 * correct handling of state changes made by other threads is impossible in 498 *
499 * any case. 499 * Note that we don't need to lock the socket, as the upper poll layers
500 * take care of normal races (between the test and the event) and we don't
501 * go look at any of the socket buffers directly.
500 */ 502 */
501__poll_t tcp_poll_mask(struct socket *sock, __poll_t events) 503__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
502{ 504{
505 __poll_t mask;
503 struct sock *sk = sock->sk; 506 struct sock *sk = sock->sk;
504 const struct tcp_sock *tp = tcp_sk(sk); 507 const struct tcp_sock *tp = tcp_sk(sk);
505 __poll_t mask = 0;
506 int state; 508 int state;
507 509
510 sock_poll_wait(file, sk_sleep(sk), wait);
511
508 state = inet_sk_state_load(sk); 512 state = inet_sk_state_load(sk);
509 if (state == TCP_LISTEN) 513 if (state == TCP_LISTEN)
510 return inet_csk_listen_poll(sk); 514 return inet_csk_listen_poll(sk);
511 515
516 /* Socket is not locked. We are protected from async events
517 * by poll logic and correct handling of state changes
518 * made by other threads is impossible in any case.
519 */
520
521 mask = 0;
522
512 /* 523 /*
513 * EPOLLHUP is certainly not done right. But poll() doesn't 524 * EPOLLHUP is certainly not done right. But poll() doesn't
514 * have a notion of HUP in just one direction, and for a 525 * have a notion of HUP in just one direction, and for a
@@ -589,7 +600,7 @@ __poll_t tcp_poll_mask(struct socket *sock, __poll_t events)
589 600
590 return mask; 601 return mask;
591} 602}
592EXPORT_SYMBOL(tcp_poll_mask); 603EXPORT_SYMBOL(tcp_poll);
593 604
594int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) 605int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
595{ 606{
@@ -1694,6 +1705,13 @@ EXPORT_SYMBOL(tcp_peek_len);
1694/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */ 1705/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
1695int tcp_set_rcvlowat(struct sock *sk, int val) 1706int tcp_set_rcvlowat(struct sock *sk, int val)
1696{ 1707{
1708 int cap;
1709
1710 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1711 cap = sk->sk_rcvbuf >> 1;
1712 else
1713 cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
1714 val = min(val, cap);
1697 sk->sk_rcvlowat = val ? : 1; 1715 sk->sk_rcvlowat = val ? : 1;
1698 1716
1699 /* Check if we need to signal EPOLLIN right now */ 1717 /* Check if we need to signal EPOLLIN right now */
@@ -1702,12 +1720,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
1702 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1720 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1703 return 0; 1721 return 0;
1704 1722
1705 /* val comes from user space and might be close to INT_MAX */
1706 val <<= 1; 1723 val <<= 1;
1707 if (val < 0)
1708 val = INT_MAX;
1709
1710 val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
1711 if (val > sk->sk_rcvbuf) { 1724 if (val > sk->sk_rcvbuf) {
1712 sk->sk_rcvbuf = val; 1725 sk->sk_rcvbuf = val;
1713 tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val); 1726 tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 355d3dffd021..8e5522c6833a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -265,7 +265,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
265 * it is probably a retransmit. 265 * it is probably a retransmit.
266 */ 266 */
267 if (tp->ecn_flags & TCP_ECN_SEEN) 267 if (tp->ecn_flags & TCP_ECN_SEEN)
268 tcp_enter_quickack_mode(sk, 1); 268 tcp_enter_quickack_mode(sk, 2);
269 break; 269 break;
270 case INET_ECN_CE: 270 case INET_ECN_CE:
271 if (tcp_ca_needs_ecn(sk)) 271 if (tcp_ca_needs_ecn(sk))
@@ -273,7 +273,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
273 273
274 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { 274 if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
275 /* Better not delay acks, sender can have a very low cwnd */ 275 /* Better not delay acks, sender can have a very low cwnd */
276 tcp_enter_quickack_mode(sk, 1); 276 tcp_enter_quickack_mode(sk, 2);
277 tp->ecn_flags |= TCP_ECN_DEMAND_CWR; 277 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
278 } 278 }
279 tp->ecn_flags |= TCP_ECN_SEEN; 279 tp->ecn_flags |= TCP_ECN_SEEN;
@@ -3181,6 +3181,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
3181 3181
3182 if (tcp_is_reno(tp)) { 3182 if (tcp_is_reno(tp)) {
3183 tcp_remove_reno_sacks(sk, pkts_acked); 3183 tcp_remove_reno_sacks(sk, pkts_acked);
3184
3185 /* If any of the cumulatively ACKed segments was
3186 * retransmitted, non-SACK case cannot confirm that
3187 * progress was due to original transmission due to
3188 * lack of TCPCB_SACKED_ACKED bits even if some of
3189 * the packets may have been never retransmitted.
3190 */
3191 if (flag & FLAG_RETRANS_DATA_ACKED)
3192 flag &= ~FLAG_ORIG_SACK_ACKED;
3184 } else { 3193 } else {
3185 int delta; 3194 int delta;
3186 3195
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fed3f1c66167..bea17f1e8302 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1730,6 +1730,10 @@ process:
1730 reqsk_put(req); 1730 reqsk_put(req);
1731 goto discard_it; 1731 goto discard_it;
1732 } 1732 }
1733 if (tcp_checksum_complete(skb)) {
1734 reqsk_put(req);
1735 goto csum_error;
1736 }
1733 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1737 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1734 inet_csk_reqsk_queue_drop_and_put(sk, req); 1738 inet_csk_reqsk_queue_drop_and_put(sk, req);
1735 goto lookup; 1739 goto lookup;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 4d58e2ce0b5b..8cc7c3487330 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -268,8 +268,6 @@ found:
268 goto out_check_final; 268 goto out_check_final;
269 } 269 }
270 270
271 p = *head;
272 th2 = tcp_hdr(p);
273 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); 271 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
274 272
275out_check_final: 273out_check_final:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3365362cac88..24e116ddae79 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2591,7 +2591,7 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
2591 * udp_poll - wait for a UDP event. 2591 * udp_poll - wait for a UDP event.
2592 * @file - file struct 2592 * @file - file struct
2593 * @sock - socket 2593 * @sock - socket
2594 * @events - events to wait for 2594 * @wait - poll table
2595 * 2595 *
2596 * This is same as datagram poll, except for the special case of 2596 * This is same as datagram poll, except for the special case of
2597 * blocking sockets. If application is using a blocking fd 2597 * blocking sockets. If application is using a blocking fd
@@ -2600,23 +2600,23 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
2600 * but then block when reading it. Add special case code 2600 * but then block when reading it. Add special case code
2601 * to work around these arguably broken applications. 2601 * to work around these arguably broken applications.
2602 */ 2602 */
2603__poll_t udp_poll_mask(struct socket *sock, __poll_t events) 2603__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
2604{ 2604{
2605 __poll_t mask = datagram_poll_mask(sock, events); 2605 __poll_t mask = datagram_poll(file, sock, wait);
2606 struct sock *sk = sock->sk; 2606 struct sock *sk = sock->sk;
2607 2607
2608 if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) 2608 if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
2609 mask |= EPOLLIN | EPOLLRDNORM; 2609 mask |= EPOLLIN | EPOLLRDNORM;
2610 2610
2611 /* Check for false positives due to checksum errors */ 2611 /* Check for false positives due to checksum errors */
2612 if ((mask & EPOLLRDNORM) && !(sock->file->f_flags & O_NONBLOCK) && 2612 if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
2613 !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) 2613 !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
2614 mask &= ~(EPOLLIN | EPOLLRDNORM); 2614 mask &= ~(EPOLLIN | EPOLLRDNORM);
2615 2615
2616 return mask; 2616 return mask;
2617 2617
2618} 2618}
2619EXPORT_SYMBOL(udp_poll_mask); 2619EXPORT_SYMBOL(udp_poll);
2620 2620
2621int udp_abort(struct sock *sk, int err) 2621int udp_abort(struct sock *sk, int err)
2622{ 2622{
@@ -2772,7 +2772,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
2772 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", 2772 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d",
2773 bucket, src, srcp, dest, destp, sp->sk_state, 2773 bucket, src, srcp, dest, destp, sp->sk_state,
2774 sk_wmem_alloc_get(sp), 2774 sk_wmem_alloc_get(sp),
2775 sk_rmem_alloc_get(sp), 2775 udp_rqueue_get(sp),
2776 0, 0L, 0, 2776 0, 0L, 0,
2777 from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 2777 from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
2778 0, sock_i_ino(sp), 2778 0, sock_i_ino(sp),
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index d0390d844ac8..d9ad986c7b2c 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -163,7 +163,7 @@ static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
163static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, 163static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
164 void *info) 164 void *info)
165{ 165{
166 r->idiag_rqueue = sk_rmem_alloc_get(sk); 166 r->idiag_rqueue = udp_rqueue_get(sk);
167 r->idiag_wqueue = sk_wmem_alloc_get(sk); 167 r->idiag_wqueue = sk_wmem_alloc_get(sk);
168} 168}
169 169
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 92dc9e5a7ff3..69c54540d5b4 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -394,7 +394,7 @@ unflush:
394out_unlock: 394out_unlock:
395 rcu_read_unlock(); 395 rcu_read_unlock();
396out: 396out:
397 NAPI_GRO_CB(skb)->flush |= flush; 397 skb_gro_flush_final(skb, pp, flush);
398 return pp; 398 return pp;
399} 399}
400EXPORT_SYMBOL(udp_gro_receive); 400EXPORT_SYMBOL(udp_gro_receive);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 89019bf59f46..91580c62bb86 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1324,6 +1324,7 @@ retry:
1324 } 1324 }
1325 } 1325 }
1326 1326
1327 memset(&cfg, 0, sizeof(cfg));
1327 cfg.valid_lft = min_t(__u32, ifp->valid_lft, 1328 cfg.valid_lft = min_t(__u32, ifp->valid_lft,
1328 idev->cnf.temp_valid_lft + age); 1329 idev->cnf.temp_valid_lft + age);
1329 cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor; 1330 cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
@@ -1357,7 +1358,6 @@ retry:
1357 1358
1358 cfg.pfx = &addr; 1359 cfg.pfx = &addr;
1359 cfg.scope = ipv6_addr_scope(cfg.pfx); 1360 cfg.scope = ipv6_addr_scope(cfg.pfx);
1360 cfg.rt_priority = 0;
1361 1361
1362 ift = ipv6_add_addr(idev, &cfg, block, NULL); 1362 ift = ipv6_add_addr(idev, &cfg, block, NULL);
1363 if (IS_ERR(ift)) { 1363 if (IS_ERR(ift)) {
@@ -4528,6 +4528,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
4528 unsigned long expires, u32 flags) 4528 unsigned long expires, u32 flags)
4529{ 4529{
4530 struct fib6_info *f6i; 4530 struct fib6_info *f6i;
4531 u32 prio;
4531 4532
4532 f6i = addrconf_get_prefix_route(&ifp->addr, 4533 f6i = addrconf_get_prefix_route(&ifp->addr,
4533 ifp->prefix_len, 4534 ifp->prefix_len,
@@ -4536,13 +4537,15 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
4536 if (!f6i) 4537 if (!f6i)
4537 return -ENOENT; 4538 return -ENOENT;
4538 4539
4539 if (f6i->fib6_metric != ifp->rt_priority) { 4540 prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF;
4541 if (f6i->fib6_metric != prio) {
4542 /* delete old one */
4543 ip6_del_rt(dev_net(ifp->idev->dev), f6i);
4544
4540 /* add new one */ 4545 /* add new one */
4541 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, 4546 addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
4542 ifp->rt_priority, ifp->idev->dev, 4547 ifp->rt_priority, ifp->idev->dev,
4543 expires, flags, GFP_KERNEL); 4548 expires, flags, GFP_KERNEL);
4544 /* delete old one */
4545 ip6_del_rt(dev_net(ifp->idev->dev), f6i);
4546 } else { 4549 } else {
4547 if (!expires) 4550 if (!expires)
4548 fib6_clean_expires(f6i); 4551 fib6_clean_expires(f6i);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 74f2a261e8df..9ed0eae91758 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -570,7 +570,7 @@ const struct proto_ops inet6_stream_ops = {
570 .socketpair = sock_no_socketpair, /* a do nothing */ 570 .socketpair = sock_no_socketpair, /* a do nothing */
571 .accept = inet_accept, /* ok */ 571 .accept = inet_accept, /* ok */
572 .getname = inet6_getname, 572 .getname = inet6_getname,
573 .poll_mask = tcp_poll_mask, /* ok */ 573 .poll = tcp_poll, /* ok */
574 .ioctl = inet6_ioctl, /* must change */ 574 .ioctl = inet6_ioctl, /* must change */
575 .listen = inet_listen, /* ok */ 575 .listen = inet_listen, /* ok */
576 .shutdown = inet_shutdown, /* ok */ 576 .shutdown = inet_shutdown, /* ok */
@@ -603,7 +603,7 @@ const struct proto_ops inet6_dgram_ops = {
603 .socketpair = sock_no_socketpair, /* a do nothing */ 603 .socketpair = sock_no_socketpair, /* a do nothing */
604 .accept = sock_no_accept, /* a do nothing */ 604 .accept = sock_no_accept, /* a do nothing */
605 .getname = inet6_getname, 605 .getname = inet6_getname,
606 .poll_mask = udp_poll_mask, /* ok */ 606 .poll = udp_poll, /* ok */
607 .ioctl = inet6_ioctl, /* must change */ 607 .ioctl = inet6_ioctl, /* must change */
608 .listen = sock_no_listen, /* ok */ 608 .listen = sock_no_listen, /* ok */
609 .shutdown = inet_shutdown, /* ok */ 609 .shutdown = inet_shutdown, /* ok */
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a02ad100f0d7..2ee08b6a86a4 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -1019,8 +1019,8 @@ exit_f:
1019} 1019}
1020EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); 1020EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
1021 1021
1022void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, 1022void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
1023 __u16 srcp, __u16 destp, int bucket) 1023 __u16 srcp, __u16 destp, int rqueue, int bucket)
1024{ 1024{
1025 const struct in6_addr *dest, *src; 1025 const struct in6_addr *dest, *src;
1026 1026
@@ -1036,7 +1036,7 @@ void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
1036 dest->s6_addr32[2], dest->s6_addr32[3], destp, 1036 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1037 sp->sk_state, 1037 sp->sk_state,
1038 sk_wmem_alloc_get(sp), 1038 sk_wmem_alloc_get(sp),
1039 sk_rmem_alloc_get(sp), 1039 rqueue,
1040 0, 0L, 0, 1040 0, 0L, 0,
1041 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 1041 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1042 0, 1042 0,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index d8c4b6374377..be491bf6ab6e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -956,7 +956,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
956 int err, i, j; 956 int err, i, j;
957 957
958 net->ipv6.icmp_sk = 958 net->ipv6.icmp_sk =
959 kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); 959 kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
960 if (!net->ipv6.icmp_sk) 960 if (!net->ipv6.icmp_sk)
961 return -ENOMEM; 961 return -ENOMEM;
962 962
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 44c39c5f0638..10ae13560b40 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -42,7 +42,8 @@ static int alloc_ila_locks(struct ila_net *ilan)
42 size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU); 42 size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU);
43 43
44 if (sizeof(spinlock_t) != 0) { 44 if (sizeof(spinlock_t) != 0) {
45 ilan->locks = kvmalloc(size * sizeof(spinlock_t), GFP_KERNEL); 45 ilan->locks = kvmalloc_array(size, sizeof(spinlock_t),
46 GFP_KERNEL);
46 if (!ilan->locks) 47 if (!ilan->locks)
47 return -ENOMEM; 48 return -ENOMEM;
48 for (i = 0; i < size; i++) 49 for (i = 0; i < size; i++)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 2febe26de6a1..595ad408dba0 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -113,9 +113,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
113 bool dev_match = (sk->sk_bound_dev_if == dif || 113 bool dev_match = (sk->sk_bound_dev_if == dif ||
114 sk->sk_bound_dev_if == sdif); 114 sk->sk_bound_dev_if == sdif);
115 115
116 if (exact_dif && !dev_match) 116 if (!dev_match)
117 return -1; 117 return -1;
118 if (sk->sk_bound_dev_if && dev_match) 118 if (sk->sk_bound_dev_if)
119 score++; 119 score++;
120 } 120 }
121 if (sk->sk_incoming_cpu == raw_smp_processor_id()) 121 if (sk->sk_incoming_cpu == raw_smp_processor_id())
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7aa4c41a3bd9..1fb2f3118d60 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -167,8 +167,9 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
167 return f6i; 167 return f6i;
168} 168}
169 169
170void fib6_info_destroy(struct fib6_info *f6i) 170void fib6_info_destroy_rcu(struct rcu_head *head)
171{ 171{
172 struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
172 struct rt6_exception_bucket *bucket; 173 struct rt6_exception_bucket *bucket;
173 struct dst_metrics *m; 174 struct dst_metrics *m;
174 175
@@ -206,7 +207,7 @@ void fib6_info_destroy(struct fib6_info *f6i)
206 207
207 kfree(f6i); 208 kfree(f6i);
208} 209}
209EXPORT_SYMBOL_GPL(fib6_info_destroy); 210EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
210 211
211static struct fib6_node *node_alloc(struct net *net) 212static struct fib6_node *node_alloc(struct net *net)
212{ 213{
@@ -934,6 +935,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
934{ 935{
935 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, 936 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
936 lockdep_is_held(&rt->fib6_table->tb6_lock)); 937 lockdep_is_held(&rt->fib6_table->tb6_lock));
938 enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
937 struct fib6_info *iter = NULL, *match = NULL; 939 struct fib6_info *iter = NULL, *match = NULL;
938 struct fib6_info __rcu **ins; 940 struct fib6_info __rcu **ins;
939 int replace = (info->nlh && 941 int replace = (info->nlh &&
@@ -1013,6 +1015,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
1013 "Can not append to a REJECT route"); 1015 "Can not append to a REJECT route");
1014 return -EINVAL; 1016 return -EINVAL;
1015 } 1017 }
1018 event = FIB_EVENT_ENTRY_APPEND;
1016 rt->fib6_nsiblings = match->fib6_nsiblings; 1019 rt->fib6_nsiblings = match->fib6_nsiblings;
1017 list_add_tail(&rt->fib6_siblings, &match->fib6_siblings); 1020 list_add_tail(&rt->fib6_siblings, &match->fib6_siblings);
1018 match->fib6_nsiblings++; 1021 match->fib6_nsiblings++;
@@ -1034,15 +1037,12 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
1034 * insert node 1037 * insert node
1035 */ 1038 */
1036 if (!replace) { 1039 if (!replace) {
1037 enum fib_event_type event;
1038
1039 if (!add) 1040 if (!add)
1040 pr_warn("NLM_F_CREATE should be set when creating new route\n"); 1041 pr_warn("NLM_F_CREATE should be set when creating new route\n");
1041 1042
1042add: 1043add:
1043 nlflags |= NLM_F_CREATE; 1044 nlflags |= NLM_F_CREATE;
1044 1045
1045 event = append ? FIB_EVENT_ENTRY_APPEND : FIB_EVENT_ENTRY_ADD;
1046 err = call_fib6_entry_notifiers(info->nl_net, event, rt, 1046 err = call_fib6_entry_notifiers(info->nl_net, event, rt,
1047 extack); 1047 extack);
1048 if (err) 1048 if (err)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 021e5aef6ba3..a14fb4fcdf18 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1219,7 +1219,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1219 if (mtu < IPV6_MIN_MTU) 1219 if (mtu < IPV6_MIN_MTU)
1220 return -EINVAL; 1220 return -EINVAL;
1221 cork->base.fragsize = mtu; 1221 cork->base.fragsize = mtu;
1222 cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0; 1222 cork->base.gso_size = sk->sk_type == SOCK_DGRAM &&
1223 sk->sk_protocol == IPPROTO_UDP ? ipc6->gso_size : 0;
1223 1224
1224 if (dst_allfrag(xfrm_dst_path(&rt->dst))) 1225 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1225 cork->base.flags |= IPCORK_ALLFRAG; 1226 cork->base.flags |= IPCORK_ALLFRAG;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 975021df7c1c..c0c74088f2af 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2082,7 +2082,8 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev)
2082 mld_send_initial_cr(idev); 2082 mld_send_initial_cr(idev);
2083 idev->mc_dad_count--; 2083 idev->mc_dad_count--;
2084 if (idev->mc_dad_count) 2084 if (idev->mc_dad_count)
2085 mld_dad_start_timer(idev, idev->mc_maxdelay); 2085 mld_dad_start_timer(idev,
2086 unsolicited_report_interval(idev));
2086 } 2087 }
2087} 2088}
2088 2089
@@ -2094,7 +2095,8 @@ static void mld_dad_timer_expire(struct timer_list *t)
2094 if (idev->mc_dad_count) { 2095 if (idev->mc_dad_count) {
2095 idev->mc_dad_count--; 2096 idev->mc_dad_count--;
2096 if (idev->mc_dad_count) 2097 if (idev->mc_dad_count)
2097 mld_dad_start_timer(idev, idev->mc_maxdelay); 2098 mld_dad_start_timer(idev,
2099 unsolicited_report_interval(idev));
2098 } 2100 }
2099 in6_dev_put(idev); 2101 in6_dev_put(idev);
2100} 2102}
@@ -2452,7 +2454,8 @@ static void mld_ifc_timer_expire(struct timer_list *t)
2452 if (idev->mc_ifc_count) { 2454 if (idev->mc_ifc_count) {
2453 idev->mc_ifc_count--; 2455 idev->mc_ifc_count--;
2454 if (idev->mc_ifc_count) 2456 if (idev->mc_ifc_count)
2455 mld_ifc_start_timer(idev, idev->mc_maxdelay); 2457 mld_ifc_start_timer(idev,
2458 unsolicited_report_interval(idev));
2456 } 2459 }
2457 in6_dev_put(idev); 2460 in6_dev_put(idev);
2458} 2461}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 0758b5bcfb29..7eab959734bc 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -550,6 +550,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
550 return -ENOMEM; 550 return -ENOMEM;
551 551
552 j = 0; 552 j = 0;
553 memset(&mtpar, 0, sizeof(mtpar));
553 mtpar.net = net; 554 mtpar.net = net;
554 mtpar.table = name; 555 mtpar.table = name;
555 mtpar.entryinfo = &e->ipv6; 556 mtpar.entryinfo = &e->ipv6;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 5e0332014c17..a452d99c9f52 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -107,7 +107,7 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
107 if (hdr == NULL) 107 if (hdr == NULL)
108 goto err_reg; 108 goto err_reg;
109 109
110 net->nf_frag.sysctl.frags_hdr = hdr; 110 net->nf_frag_frags_hdr = hdr;
111 return 0; 111 return 0;
112 112
113err_reg: 113err_reg:
@@ -121,8 +121,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
121{ 121{
122 struct ctl_table *table; 122 struct ctl_table *table;
123 123
124 table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; 124 table = net->nf_frag_frags_hdr->ctl_table_arg;
125 unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); 125 unregister_net_sysctl_table(net->nf_frag_frags_hdr);
126 if (!net_eq(net, &init_net)) 126 if (!net_eq(net, &init_net))
127 kfree(table); 127 kfree(table);
128} 128}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ce6f0d15b5dd..afc307c89d1a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1334,7 +1334,7 @@ void raw6_proc_exit(void)
1334} 1334}
1335#endif /* CONFIG_PROC_FS */ 1335#endif /* CONFIG_PROC_FS */
1336 1336
1337/* Same as inet6_dgram_ops, sans udp_poll_mask. */ 1337/* Same as inet6_dgram_ops, sans udp_poll. */
1338const struct proto_ops inet6_sockraw_ops = { 1338const struct proto_ops inet6_sockraw_ops = {
1339 .family = PF_INET6, 1339 .family = PF_INET6,
1340 .owner = THIS_MODULE, 1340 .owner = THIS_MODULE,
@@ -1344,7 +1344,7 @@ const struct proto_ops inet6_sockraw_ops = {
1344 .socketpair = sock_no_socketpair, /* a do nothing */ 1344 .socketpair = sock_no_socketpair, /* a do nothing */
1345 .accept = sock_no_accept, /* a do nothing */ 1345 .accept = sock_no_accept, /* a do nothing */
1346 .getname = inet6_getname, 1346 .getname = inet6_getname,
1347 .poll_mask = datagram_poll_mask, /* ok */ 1347 .poll = datagram_poll, /* ok */
1348 .ioctl = inet6_ioctl, /* must change */ 1348 .ioctl = inet6_ioctl, /* must change */
1349 .listen = sock_no_listen, /* ok */ 1349 .listen = sock_no_listen, /* ok */
1350 .shutdown = inet_shutdown, /* ok */ 1350 .shutdown = inet_shutdown, /* ok */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fb956989adaf..86a0e4333d42 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2307,9 +2307,6 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2307 const struct in6_addr *daddr, *saddr; 2307 const struct in6_addr *daddr, *saddr;
2308 struct rt6_info *rt6 = (struct rt6_info *)dst; 2308 struct rt6_info *rt6 = (struct rt6_info *)dst;
2309 2309
2310 if (rt6->rt6i_flags & RTF_LOCAL)
2311 return;
2312
2313 if (dst_metric_locked(dst, RTAX_MTU)) 2310 if (dst_metric_locked(dst, RTAX_MTU))
2314 return; 2311 return;
2315 2312
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 33fb35cbfac1..558fe8cc6d43 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -373,7 +373,7 @@ static int seg6_hmac_init_algo(void)
373 return -ENOMEM; 373 return -ENOMEM;
374 374
375 for_each_possible_cpu(cpu) { 375 for_each_possible_cpu(cpu) {
376 tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); 376 tfm = crypto_alloc_shash(algo->name, 0, 0);
377 if (IS_ERR(tfm)) 377 if (IS_ERR(tfm))
378 return PTR_ERR(tfm); 378 return PTR_ERR(tfm);
379 p_tfm = per_cpu_ptr(algo->tfms, cpu); 379 p_tfm = per_cpu_ptr(algo->tfms, cpu);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b620d9b72e59..7efa9fd7e109 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1479,6 +1479,10 @@ process:
1479 reqsk_put(req); 1479 reqsk_put(req);
1480 goto discard_it; 1480 goto discard_it;
1481 } 1481 }
1482 if (tcp_checksum_complete(skb)) {
1483 reqsk_put(req);
1484 goto csum_error;
1485 }
1482 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1486 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1483 inet_csk_reqsk_queue_drop_and_put(sk, req); 1487 inet_csk_reqsk_queue_drop_and_put(sk, req);
1484 goto lookup; 1488 goto lookup;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 164afd31aebf..e6645cae403e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1523,7 +1523,8 @@ int udp6_seq_show(struct seq_file *seq, void *v)
1523 struct inet_sock *inet = inet_sk(v); 1523 struct inet_sock *inet = inet_sk(v);
1524 __u16 srcp = ntohs(inet->inet_sport); 1524 __u16 srcp = ntohs(inet->inet_sport);
1525 __u16 destp = ntohs(inet->inet_dport); 1525 __u16 destp = ntohs(inet->inet_dport);
1526 ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); 1526 __ip6_dgram_sock_seq_show(seq, v, srcp, destp,
1527 udp_rqueue_get(v), bucket);
1527 } 1528 }
1528 return 0; 1529 return 0;
1529} 1530}
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 68e86257a549..893a022f9620 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1488,11 +1488,14 @@ static inline __poll_t iucv_accept_poll(struct sock *parent)
1488 return 0; 1488 return 0;
1489} 1489}
1490 1490
1491static __poll_t iucv_sock_poll_mask(struct socket *sock, __poll_t events) 1491__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
1492 poll_table *wait)
1492{ 1493{
1493 struct sock *sk = sock->sk; 1494 struct sock *sk = sock->sk;
1494 __poll_t mask = 0; 1495 __poll_t mask = 0;
1495 1496
1497 sock_poll_wait(file, sk_sleep(sk), wait);
1498
1496 if (sk->sk_state == IUCV_LISTEN) 1499 if (sk->sk_state == IUCV_LISTEN)
1497 return iucv_accept_poll(sk); 1500 return iucv_accept_poll(sk);
1498 1501
@@ -2385,7 +2388,7 @@ static const struct proto_ops iucv_sock_ops = {
2385 .getname = iucv_sock_getname, 2388 .getname = iucv_sock_getname,
2386 .sendmsg = iucv_sock_sendmsg, 2389 .sendmsg = iucv_sock_sendmsg,
2387 .recvmsg = iucv_sock_recvmsg, 2390 .recvmsg = iucv_sock_recvmsg,
2388 .poll_mask = iucv_sock_poll_mask, 2391 .poll = iucv_sock_poll,
2389 .ioctl = sock_no_ioctl, 2392 .ioctl = sock_no_ioctl,
2390 .mmap = sock_no_mmap, 2393 .mmap = sock_no_mmap,
2391 .socketpair = sock_no_socketpair, 2394 .socketpair = sock_no_socketpair,
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 84b7d5c6fec8..d3601d421571 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1336,9 +1336,9 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
1336 struct list_head *head; 1336 struct list_head *head;
1337 int index = 0; 1337 int index = 0;
1338 1338
1339 /* For SOCK_SEQPACKET sock type, datagram_poll_mask checks the sk_state, 1339 /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
1340 * so we set sk_state, otherwise epoll_wait always returns right away 1340 * we set sk_state, otherwise epoll_wait always returns right away with
1341 * with EPOLLHUP 1341 * EPOLLHUP
1342 */ 1342 */
1343 kcm->sk.sk_state = TCP_ESTABLISHED; 1343 kcm->sk.sk_state = TCP_ESTABLISHED;
1344 1344
@@ -1903,7 +1903,7 @@ static const struct proto_ops kcm_dgram_ops = {
1903 .socketpair = sock_no_socketpair, 1903 .socketpair = sock_no_socketpair,
1904 .accept = sock_no_accept, 1904 .accept = sock_no_accept,
1905 .getname = sock_no_getname, 1905 .getname = sock_no_getname,
1906 .poll_mask = datagram_poll_mask, 1906 .poll = datagram_poll,
1907 .ioctl = kcm_ioctl, 1907 .ioctl = kcm_ioctl,
1908 .listen = sock_no_listen, 1908 .listen = sock_no_listen,
1909 .shutdown = sock_no_shutdown, 1909 .shutdown = sock_no_shutdown,
@@ -1924,7 +1924,7 @@ static const struct proto_ops kcm_seqpacket_ops = {
1924 .socketpair = sock_no_socketpair, 1924 .socketpair = sock_no_socketpair,
1925 .accept = sock_no_accept, 1925 .accept = sock_no_accept,
1926 .getname = sock_no_getname, 1926 .getname = sock_no_getname,
1927 .poll_mask = datagram_poll_mask, 1927 .poll = datagram_poll,
1928 .ioctl = kcm_ioctl, 1928 .ioctl = kcm_ioctl,
1929 .listen = sock_no_listen, 1929 .listen = sock_no_listen,
1930 .shutdown = sock_no_shutdown, 1930 .shutdown = sock_no_shutdown,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 8bdc1cbe490a..5e1d2946ffbf 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3751,7 +3751,7 @@ static const struct proto_ops pfkey_ops = {
3751 3751
3752 /* Now the operations that really occur. */ 3752 /* Now the operations that really occur. */
3753 .release = pfkey_release, 3753 .release = pfkey_release,
3754 .poll_mask = datagram_poll_mask, 3754 .poll = datagram_poll,
3755 .sendmsg = pfkey_sendmsg, 3755 .sendmsg = pfkey_sendmsg,
3756 .recvmsg = pfkey_recvmsg, 3756 .recvmsg = pfkey_recvmsg,
3757}; 3757};
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 181073bf6925..a9c05b2bc1b0 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -613,7 +613,7 @@ static const struct proto_ops l2tp_ip_ops = {
613 .socketpair = sock_no_socketpair, 613 .socketpair = sock_no_socketpair,
614 .accept = sock_no_accept, 614 .accept = sock_no_accept,
615 .getname = l2tp_ip_getname, 615 .getname = l2tp_ip_getname,
616 .poll_mask = datagram_poll_mask, 616 .poll = datagram_poll,
617 .ioctl = inet_ioctl, 617 .ioctl = inet_ioctl,
618 .listen = sock_no_listen, 618 .listen = sock_no_listen,
619 .shutdown = inet_shutdown, 619 .shutdown = inet_shutdown,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 336e4c00abbc..957369192ca1 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -754,7 +754,7 @@ static const struct proto_ops l2tp_ip6_ops = {
754 .socketpair = sock_no_socketpair, 754 .socketpair = sock_no_socketpair,
755 .accept = sock_no_accept, 755 .accept = sock_no_accept,
756 .getname = l2tp_ip6_getname, 756 .getname = l2tp_ip6_getname,
757 .poll_mask = datagram_poll_mask, 757 .poll = datagram_poll,
758 .ioctl = inet6_ioctl, 758 .ioctl = inet6_ioctl,
759 .listen = sock_no_listen, 759 .listen = sock_no_listen,
760 .shutdown = inet_shutdown, 760 .shutdown = inet_shutdown,
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 6616c9fd292f..5b9900889e31 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -553,6 +553,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
553 goto out_tunnel; 553 goto out_tunnel;
554 } 554 }
555 555
556 /* L2TPv2 only accepts PPP pseudo-wires */
557 if (tunnel->version == 2 && cfg.pw_type != L2TP_PWTYPE_PPP) {
558 ret = -EPROTONOSUPPORT;
559 goto out_tunnel;
560 }
561
556 if (tunnel->version > 2) { 562 if (tunnel->version > 2) {
557 if (info->attrs[L2TP_ATTR_DATA_SEQ]) 563 if (info->attrs[L2TP_ATTR_DATA_SEQ])
558 cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]); 564 cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index b56cb1df4fc0..e398797878a9 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -612,6 +612,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
612 u32 session_id, peer_session_id; 612 u32 session_id, peer_session_id;
613 bool drop_refcnt = false; 613 bool drop_refcnt = false;
614 bool drop_tunnel = false; 614 bool drop_tunnel = false;
615 bool new_session = false;
616 bool new_tunnel = false;
615 int ver = 2; 617 int ver = 2;
616 int fd; 618 int fd;
617 619
@@ -701,6 +703,15 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
701 .encap = L2TP_ENCAPTYPE_UDP, 703 .encap = L2TP_ENCAPTYPE_UDP,
702 .debug = 0, 704 .debug = 0,
703 }; 705 };
706
707 /* Prevent l2tp_tunnel_register() from trying to set up
708 * a kernel socket.
709 */
710 if (fd < 0) {
711 error = -EBADF;
712 goto end;
713 }
714
704 error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel); 715 error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
705 if (error < 0) 716 if (error < 0)
706 goto end; 717 goto end;
@@ -713,6 +724,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
713 goto end; 724 goto end;
714 } 725 }
715 drop_tunnel = true; 726 drop_tunnel = true;
727 new_tunnel = true;
716 } 728 }
717 } else { 729 } else {
718 /* Error if we can't find the tunnel */ 730 /* Error if we can't find the tunnel */
@@ -734,6 +746,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
734 session = l2tp_session_get(sock_net(sk), tunnel, session_id); 746 session = l2tp_session_get(sock_net(sk), tunnel, session_id);
735 if (session) { 747 if (session) {
736 drop_refcnt = true; 748 drop_refcnt = true;
749
750 if (session->pwtype != L2TP_PWTYPE_PPP) {
751 error = -EPROTOTYPE;
752 goto end;
753 }
754
737 ps = l2tp_session_priv(session); 755 ps = l2tp_session_priv(session);
738 756
739 /* Using a pre-existing session is fine as long as it hasn't 757 /* Using a pre-existing session is fine as long as it hasn't
@@ -751,6 +769,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
751 /* Default MTU must allow space for UDP/L2TP/PPP headers */ 769 /* Default MTU must allow space for UDP/L2TP/PPP headers */
752 cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; 770 cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
753 cfg.mru = cfg.mtu; 771 cfg.mru = cfg.mtu;
772 cfg.pw_type = L2TP_PWTYPE_PPP;
754 773
755 session = l2tp_session_create(sizeof(struct pppol2tp_session), 774 session = l2tp_session_create(sizeof(struct pppol2tp_session),
756 tunnel, session_id, 775 tunnel, session_id,
@@ -772,6 +791,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
772 goto end; 791 goto end;
773 } 792 }
774 drop_refcnt = true; 793 drop_refcnt = true;
794 new_session = true;
775 } 795 }
776 796
777 /* Special case: if source & dest session_id == 0x0000, this 797 /* Special case: if source & dest session_id == 0x0000, this
@@ -818,6 +838,12 @@ out_no_ppp:
818 session->name); 838 session->name);
819 839
820end: 840end:
841 if (error) {
842 if (new_session)
843 l2tp_session_delete(session);
844 if (new_tunnel)
845 l2tp_tunnel_delete(tunnel);
846 }
821 if (drop_refcnt) 847 if (drop_refcnt)
822 l2tp_session_dec_refcount(session); 848 l2tp_session_dec_refcount(session);
823 if (drop_tunnel) 849 if (drop_tunnel)
@@ -1175,7 +1201,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
1175 l2tp_session_get(sock_net(sk), tunnel, 1201 l2tp_session_get(sock_net(sk), tunnel,
1176 stats.session_id); 1202 stats.session_id);
1177 1203
1178 if (session) { 1204 if (session && session->pwtype == L2TP_PWTYPE_PPP) {
1179 err = pppol2tp_session_ioctl(session, cmd, 1205 err = pppol2tp_session_ioctl(session, cmd,
1180 arg); 1206 arg);
1181 l2tp_session_dec_refcount(session); 1207 l2tp_session_dec_refcount(session);
@@ -1792,7 +1818,7 @@ static const struct proto_ops pppol2tp_ops = {
1792 .socketpair = sock_no_socketpair, 1818 .socketpair = sock_no_socketpair,
1793 .accept = sock_no_accept, 1819 .accept = sock_no_accept,
1794 .getname = pppol2tp_getname, 1820 .getname = pppol2tp_getname,
1795 .poll_mask = datagram_poll_mask, 1821 .poll = datagram_poll,
1796 .listen = sock_no_listen, 1822 .listen = sock_no_listen,
1797 .shutdown = sock_no_shutdown, 1823 .shutdown = sock_no_shutdown,
1798 .setsockopt = pppol2tp_setsockopt, 1824 .setsockopt = pppol2tp_setsockopt,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 804de8490186..1beeea9549fa 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1192,7 +1192,7 @@ static const struct proto_ops llc_ui_ops = {
1192 .socketpair = sock_no_socketpair, 1192 .socketpair = sock_no_socketpair,
1193 .accept = llc_ui_accept, 1193 .accept = llc_ui_accept,
1194 .getname = llc_ui_getname, 1194 .getname = llc_ui_getname,
1195 .poll_mask = datagram_poll_mask, 1195 .poll = datagram_poll,
1196 .ioctl = llc_ui_ioctl, 1196 .ioctl = llc_ui_ioctl,
1197 .listen = llc_ui_listen, 1197 .listen = llc_ui_listen,
1198 .shutdown = llc_ui_shutdown, 1198 .shutdown = llc_ui_shutdown,
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 89178b46b32f..d9558ffb8acf 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1186,7 +1186,7 @@ static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local,
1186 lockdep_assert_held(&local->mtx); 1186 lockdep_assert_held(&local->mtx);
1187 lockdep_assert_held(&local->chanctx_mtx); 1187 lockdep_assert_held(&local->chanctx_mtx);
1188 1188
1189 vif_chsw = kzalloc(sizeof(vif_chsw[0]) * n_vifs, GFP_KERNEL); 1189 vif_chsw = kcalloc(n_vifs, sizeof(vif_chsw[0]), GFP_KERNEL);
1190 if (!vif_chsw) 1190 if (!vif_chsw)
1191 return -ENOMEM; 1191 return -ENOMEM;
1192 1192
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4d2e797e3f16..fb73451ed85e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -772,7 +772,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
772 if (have_mfp) 772 if (have_mfp)
773 n_suites += 4; 773 n_suites += 4;
774 774
775 suites = kmalloc(sizeof(u32) * n_suites, GFP_KERNEL); 775 suites = kmalloc_array(n_suites, sizeof(u32), GFP_KERNEL);
776 if (!suites) 776 if (!suites)
777 return -ENOMEM; 777 return -ENOMEM;
778 778
@@ -1098,6 +1098,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
1098 1098
1099 ieee80211_led_init(local); 1099 ieee80211_led_init(local);
1100 1100
1101 result = ieee80211_txq_setup_flows(local);
1102 if (result)
1103 goto fail_flows;
1104
1101 rtnl_lock(); 1105 rtnl_lock();
1102 1106
1103 result = ieee80211_init_rate_ctrl_alg(local, 1107 result = ieee80211_init_rate_ctrl_alg(local,
@@ -1120,10 +1124,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
1120 1124
1121 rtnl_unlock(); 1125 rtnl_unlock();
1122 1126
1123 result = ieee80211_txq_setup_flows(local);
1124 if (result)
1125 goto fail_flows;
1126
1127#ifdef CONFIG_INET 1127#ifdef CONFIG_INET
1128 local->ifa_notifier.notifier_call = ieee80211_ifa_changed; 1128 local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
1129 result = register_inetaddr_notifier(&local->ifa_notifier); 1129 result = register_inetaddr_notifier(&local->ifa_notifier);
@@ -1149,8 +1149,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
1149#if defined(CONFIG_INET) || defined(CONFIG_IPV6) 1149#if defined(CONFIG_INET) || defined(CONFIG_IPV6)
1150 fail_ifa: 1150 fail_ifa:
1151#endif 1151#endif
1152 ieee80211_txq_teardown_flows(local);
1153 fail_flows:
1154 rtnl_lock(); 1152 rtnl_lock();
1155 rate_control_deinitialize(local); 1153 rate_control_deinitialize(local);
1156 ieee80211_remove_interfaces(local); 1154 ieee80211_remove_interfaces(local);
@@ -1158,6 +1156,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
1158 rtnl_unlock(); 1156 rtnl_unlock();
1159 ieee80211_led_exit(local); 1157 ieee80211_led_exit(local);
1160 ieee80211_wep_free(local); 1158 ieee80211_wep_free(local);
1159 ieee80211_txq_teardown_flows(local);
1160 fail_flows:
1161 destroy_workqueue(local->workqueue); 1161 destroy_workqueue(local->workqueue);
1162 fail_workqueue: 1162 fail_workqueue:
1163 wiphy_unregister(local->hw.wiphy); 1163 wiphy_unregister(local->hw.wiphy);
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 8221bc5582ab..76048b53c5b2 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -592,11 +592,11 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
592 max_rates = sband->n_bitrates; 592 max_rates = sband->n_bitrates;
593 } 593 }
594 594
595 mi->r = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp); 595 mi->r = kcalloc(max_rates, sizeof(struct minstrel_rate), gfp);
596 if (!mi->r) 596 if (!mi->r)
597 goto error; 597 goto error;
598 598
599 mi->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp); 599 mi->sample_table = kmalloc_array(max_rates, SAMPLE_COLUMNS, gfp);
600 if (!mi->sample_table) 600 if (!mi->sample_table)
601 goto error1; 601 goto error1;
602 602
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index fb586b6e5d49..67ebdeaffbbc 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1313,11 +1313,11 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
1313 if (!msp) 1313 if (!msp)
1314 return NULL; 1314 return NULL;
1315 1315
1316 msp->ratelist = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp); 1316 msp->ratelist = kcalloc(max_rates, sizeof(struct minstrel_rate), gfp);
1317 if (!msp->ratelist) 1317 if (!msp->ratelist)
1318 goto error; 1318 goto error;
1319 1319
1320 msp->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp); 1320 msp->sample_table = kmalloc_array(max_rates, SAMPLE_COLUMNS, gfp);
1321 if (!msp->sample_table) 1321 if (!msp->sample_table)
1322 goto error1; 1322 goto error1;
1323 1323
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index a3b1bcc2b461..2e917a6d239d 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -1157,7 +1157,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
1157 } 1157 }
1158 } 1158 }
1159 1159
1160 ie = kzalloc(num_bands * iebufsz, GFP_KERNEL); 1160 ie = kcalloc(iebufsz, num_bands, GFP_KERNEL);
1161 if (!ie) { 1161 if (!ie) {
1162 ret = -ENOMEM; 1162 ret = -ENOMEM;
1163 goto out; 1163 goto out;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 44b5dfe8727d..fa1f1e63a264 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4845,7 +4845,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
4845 skb_reset_network_header(skb); 4845 skb_reset_network_header(skb);
4846 skb_reset_mac_header(skb); 4846 skb_reset_mac_header(skb);
4847 4847
4848 local_bh_disable();
4848 __ieee80211_subif_start_xmit(skb, skb->dev, flags); 4849 __ieee80211_subif_start_xmit(skb, skb->dev, flags);
4850 local_bh_enable();
4849 4851
4850 return 0; 4852 return 0;
4851} 4853}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 2d82c88efd0b..5e2e511c4a6f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1803,8 +1803,9 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata)
1803 if (WARN_ON(res)) 1803 if (WARN_ON(res))
1804 return res; 1804 return res;
1805 1805
1806 funcs = kzalloc((sdata->local->hw.max_nan_de_entries + 1) * 1806 funcs = kcalloc(sdata->local->hw.max_nan_de_entries + 1,
1807 sizeof(*funcs), GFP_KERNEL); 1807 sizeof(*funcs),
1808 GFP_KERNEL);
1808 if (!funcs) 1809 if (!funcs)
1809 return -ENOMEM; 1810 return -ENOMEM;
1810 1811
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index e7b05de1e6d1..25e483e8278b 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -73,8 +73,8 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
73 ncm->data[2] = data; 73 ncm->data[2] = data;
74 ncm->data[4] = ntohl(lsc->oem_status); 74 ncm->data[4] = ntohl(lsc->oem_status);
75 75
76 netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n", 76 netdev_dbg(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
77 nc->id, data & 0x1 ? "up" : "down"); 77 nc->id, data & 0x1 ? "up" : "down");
78 78
79 chained = !list_empty(&nc->link); 79 chained = !list_empty(&nc->link);
80 state = nc->state; 80 state = nc->state;
@@ -148,9 +148,9 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
148 hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; 148 hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
149 ncm->data[3] = ntohl(hncdsc->status); 149 ncm->data[3] = ntohl(hncdsc->status);
150 spin_unlock_irqrestore(&nc->lock, flags); 150 spin_unlock_irqrestore(&nc->lock, flags);
151 netdev_printk(KERN_DEBUG, ndp->ndev.dev, 151 netdev_dbg(ndp->ndev.dev,
152 "NCSI: host driver %srunning on channel %u\n", 152 "NCSI: host driver %srunning on channel %u\n",
153 ncm->data[3] & 0x1 ? "" : "not ", nc->id); 153 ncm->data[3] & 0x1 ? "" : "not ", nc->id);
154 154
155 return 0; 155 return 0;
156} 156}
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 5561e221b71f..091284760d21 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -788,8 +788,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
788 } 788 }
789 break; 789 break;
790 case ncsi_dev_state_config_done: 790 case ncsi_dev_state_config_done:
791 netdev_printk(KERN_DEBUG, ndp->ndev.dev, 791 netdev_dbg(ndp->ndev.dev, "NCSI: channel %u config done\n",
792 "NCSI: channel %u config done\n", nc->id); 792 nc->id);
793 spin_lock_irqsave(&nc->lock, flags); 793 spin_lock_irqsave(&nc->lock, flags);
794 if (nc->reconfigure_needed) { 794 if (nc->reconfigure_needed) {
795 /* This channel's configuration has been updated 795 /* This channel's configuration has been updated
@@ -804,8 +804,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
804 list_add_tail_rcu(&nc->link, &ndp->channel_queue); 804 list_add_tail_rcu(&nc->link, &ndp->channel_queue);
805 spin_unlock_irqrestore(&ndp->lock, flags); 805 spin_unlock_irqrestore(&ndp->lock, flags);
806 806
807 netdev_printk(KERN_DEBUG, dev, 807 netdev_dbg(dev, "Dirty NCSI channel state reset\n");
808 "Dirty NCSI channel state reset\n");
809 ncsi_process_next_channel(ndp); 808 ncsi_process_next_channel(ndp);
810 break; 809 break;
811 } 810 }
@@ -816,9 +815,9 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
816 } else { 815 } else {
817 hot_nc = NULL; 816 hot_nc = NULL;
818 nc->state = NCSI_CHANNEL_INACTIVE; 817 nc->state = NCSI_CHANNEL_INACTIVE;
819 netdev_warn(ndp->ndev.dev, 818 netdev_dbg(ndp->ndev.dev,
820 "NCSI: channel %u link down after config\n", 819 "NCSI: channel %u link down after config\n",
821 nc->id); 820 nc->id);
822 } 821 }
823 spin_unlock_irqrestore(&nc->lock, flags); 822 spin_unlock_irqrestore(&nc->lock, flags);
824 823
@@ -908,9 +907,9 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
908 } 907 }
909 908
910 ncm = &found->modes[NCSI_MODE_LINK]; 909 ncm = &found->modes[NCSI_MODE_LINK];
911 netdev_printk(KERN_DEBUG, ndp->ndev.dev, 910 netdev_dbg(ndp->ndev.dev,
912 "NCSI: Channel %u added to queue (link %s)\n", 911 "NCSI: Channel %u added to queue (link %s)\n",
913 found->id, ncm->data[2] & 0x1 ? "up" : "down"); 912 found->id, ncm->data[2] & 0x1 ? "up" : "down");
914 913
915out: 914out:
916 spin_lock_irqsave(&ndp->lock, flags); 915 spin_lock_irqsave(&ndp->lock, flags);
@@ -1199,14 +1198,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
1199 switch (old_state) { 1198 switch (old_state) {
1200 case NCSI_CHANNEL_INACTIVE: 1199 case NCSI_CHANNEL_INACTIVE:
1201 ndp->ndev.state = ncsi_dev_state_config; 1200 ndp->ndev.state = ncsi_dev_state_config;
1202 netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n", 1201 netdev_dbg(ndp->ndev.dev, "NCSI: configuring channel %u\n",
1203 nc->id); 1202 nc->id);
1204 ncsi_configure_channel(ndp); 1203 ncsi_configure_channel(ndp);
1205 break; 1204 break;
1206 case NCSI_CHANNEL_ACTIVE: 1205 case NCSI_CHANNEL_ACTIVE:
1207 ndp->ndev.state = ncsi_dev_state_suspend; 1206 ndp->ndev.state = ncsi_dev_state_suspend;
1208 netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n", 1207 netdev_dbg(ndp->ndev.dev, "NCSI: suspending channel %u\n",
1209 nc->id); 1208 nc->id);
1210 ncsi_suspend_channel(ndp); 1209 ncsi_suspend_channel(ndp);
1211 break; 1210 break;
1212 default: 1211 default:
@@ -1226,8 +1225,6 @@ out:
1226 return ncsi_choose_active_channel(ndp); 1225 return ncsi_choose_active_channel(ndp);
1227 } 1226 }
1228 1227
1229 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
1230 "NCSI: No more channels to process\n");
1231 ncsi_report_link(ndp, false); 1228 ncsi_report_link(ndp, false);
1232 return -ENODEV; 1229 return -ENODEV;
1233} 1230}
@@ -1318,9 +1315,9 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
1318 if ((ndp->ndev.state & 0xff00) == 1315 if ((ndp->ndev.state & 0xff00) ==
1319 ncsi_dev_state_config || 1316 ncsi_dev_state_config ||
1320 !list_empty(&nc->link)) { 1317 !list_empty(&nc->link)) {
1321 netdev_printk(KERN_DEBUG, nd->dev, 1318 netdev_dbg(nd->dev,
1322 "NCSI: channel %p marked dirty\n", 1319 "NCSI: channel %p marked dirty\n",
1323 nc); 1320 nc);
1324 nc->reconfigure_needed = true; 1321 nc->reconfigure_needed = true;
1325 } 1322 }
1326 spin_unlock_irqrestore(&nc->lock, flags); 1323 spin_unlock_irqrestore(&nc->lock, flags);
@@ -1338,8 +1335,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
1338 list_add_tail_rcu(&nc->link, &ndp->channel_queue); 1335 list_add_tail_rcu(&nc->link, &ndp->channel_queue);
1339 spin_unlock_irqrestore(&ndp->lock, flags); 1336 spin_unlock_irqrestore(&ndp->lock, flags);
1340 1337
1341 netdev_printk(KERN_DEBUG, nd->dev, 1338 netdev_dbg(nd->dev, "NCSI: kicked channel %p\n", nc);
1342 "NCSI: kicked channel %p\n", nc);
1343 n++; 1339 n++;
1344 } 1340 }
1345 } 1341 }
@@ -1370,8 +1366,8 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
1370 list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { 1366 list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
1371 n_vids++; 1367 n_vids++;
1372 if (vlan->vid == vid) { 1368 if (vlan->vid == vid) {
1373 netdev_printk(KERN_DEBUG, dev, 1369 netdev_dbg(dev, "NCSI: vid %u already registered\n",
1374 "NCSI: vid %u already registered\n", vid); 1370 vid);
1375 return 0; 1371 return 0;
1376 } 1372 }
1377 } 1373 }
@@ -1390,7 +1386,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
1390 vlan->vid = vid; 1386 vlan->vid = vid;
1391 list_add_rcu(&vlan->list, &ndp->vlan_vids); 1387 list_add_rcu(&vlan->list, &ndp->vlan_vids);
1392 1388
1393 netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid); 1389 netdev_dbg(dev, "NCSI: Added new vid %u\n", vid);
1394 1390
1395 found = ncsi_kick_channels(ndp) != 0; 1391 found = ncsi_kick_channels(ndp) != 0;
1396 1392
@@ -1419,8 +1415,7 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
1419 /* Remove the VLAN id from our internal list */ 1415 /* Remove the VLAN id from our internal list */
1420 list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list) 1416 list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list)
1421 if (vlan->vid == vid) { 1417 if (vlan->vid == vid) {
1422 netdev_printk(KERN_DEBUG, dev, 1418 netdev_dbg(dev, "NCSI: vid %u found, removing\n", vid);
1423 "NCSI: vid %u found, removing\n", vid);
1424 list_del_rcu(&vlan->list); 1419 list_del_rcu(&vlan->list);
1425 found = true; 1420 found = true;
1426 kfree(vlan); 1421 kfree(vlan);
@@ -1547,7 +1542,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
1547 } 1542 }
1548 } 1543 }
1549 1544
1550 netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n"); 1545 netdev_dbg(ndp->ndev.dev, "NCSI: Stopping device\n");
1551 ncsi_report_link(ndp, true); 1546 ncsi_report_link(ndp, true);
1552} 1547}
1553EXPORT_SYMBOL_GPL(ncsi_stop_dev); 1548EXPORT_SYMBOL_GPL(ncsi_stop_dev);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index bbad940c0137..8a33dac4e805 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1234,7 +1234,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1234 pr_debug("Create set %s with family %s\n", 1234 pr_debug("Create set %s with family %s\n",
1235 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); 1235 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
1236 1236
1237#ifndef IP_SET_PROTO_UNDEF 1237#ifdef IP_SET_PROTO_UNDEF
1238 if (set->family != NFPROTO_UNSPEC)
1239 return -IPSET_ERR_INVALID_FAMILY;
1240#else
1238 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) 1241 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
1239 return -IPSET_ERR_INVALID_FAMILY; 1242 return -IPSET_ERR_INVALID_FAMILY;
1240#endif 1243#endif
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 61c3a389da89..99e0aa350dc5 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1380,7 +1380,8 @@ int __init ip_vs_conn_init(void)
1380 /* 1380 /*
1381 * Allocate the connection hash table and initialize its list heads 1381 * Allocate the connection hash table and initialize its list heads
1382 */ 1382 */
1383 ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size * sizeof(*ip_vs_conn_tab)); 1383 ip_vs_conn_tab = vmalloc(array_size(ip_vs_conn_tab_size,
1384 sizeof(*ip_vs_conn_tab)));
1384 if (!ip_vs_conn_tab) 1385 if (!ip_vs_conn_tab)
1385 return -ENOMEM; 1386 return -ENOMEM;
1386 1387
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 0c03c0e16a96..dd21782e2f12 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -839,6 +839,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
839 * For now only for NAT! 839 * For now only for NAT!
840 */ 840 */
841 ip_vs_rs_hash(ipvs, dest); 841 ip_vs_rs_hash(ipvs, dest);
842 /* FTP-NAT requires conntrack for mangling */
843 if (svc->port == FTPPORT)
844 ip_vs_register_conntrack(svc);
842 } 845 }
843 atomic_set(&dest->conn_flags, conn_flags); 846 atomic_set(&dest->conn_flags, conn_flags);
844 847
@@ -1462,6 +1465,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1462 */ 1465 */
1463static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) 1466static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
1464{ 1467{
1468 ip_vs_unregister_conntrack(svc);
1465 /* Hold svc to avoid double release from dest_trash */ 1469 /* Hold svc to avoid double release from dest_trash */
1466 atomic_inc(&svc->refcnt); 1470 atomic_inc(&svc->refcnt);
1467 /* 1471 /*
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index ba0a0fd045c8..473cce2a5231 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -168,7 +168,7 @@ static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
168 bool new_rt_is_local) 168 bool new_rt_is_local)
169{ 169{
170 bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); 170 bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
171 bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); 171 bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_NON_LOCAL);
172 bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR); 172 bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR);
173 bool source_is_loopback; 173 bool source_is_loopback;
174 bool old_rt_is_local; 174 bool old_rt_is_local;
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 3b5059a8dcdd..510039862aa9 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -46,6 +46,9 @@
46struct nf_conncount_tuple { 46struct nf_conncount_tuple {
47 struct hlist_node node; 47 struct hlist_node node;
48 struct nf_conntrack_tuple tuple; 48 struct nf_conntrack_tuple tuple;
49 struct nf_conntrack_zone zone;
50 int cpu;
51 u32 jiffies32;
49}; 52};
50 53
51struct nf_conncount_rb { 54struct nf_conncount_rb {
@@ -80,7 +83,8 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
80} 83}
81 84
82bool nf_conncount_add(struct hlist_head *head, 85bool nf_conncount_add(struct hlist_head *head,
83 const struct nf_conntrack_tuple *tuple) 86 const struct nf_conntrack_tuple *tuple,
87 const struct nf_conntrack_zone *zone)
84{ 88{
85 struct nf_conncount_tuple *conn; 89 struct nf_conncount_tuple *conn;
86 90
@@ -88,11 +92,43 @@ bool nf_conncount_add(struct hlist_head *head,
88 if (conn == NULL) 92 if (conn == NULL)
89 return false; 93 return false;
90 conn->tuple = *tuple; 94 conn->tuple = *tuple;
95 conn->zone = *zone;
96 conn->cpu = raw_smp_processor_id();
97 conn->jiffies32 = (u32)jiffies;
91 hlist_add_head(&conn->node, head); 98 hlist_add_head(&conn->node, head);
92 return true; 99 return true;
93} 100}
94EXPORT_SYMBOL_GPL(nf_conncount_add); 101EXPORT_SYMBOL_GPL(nf_conncount_add);
95 102
103static const struct nf_conntrack_tuple_hash *
104find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
105{
106 const struct nf_conntrack_tuple_hash *found;
107 unsigned long a, b;
108 int cpu = raw_smp_processor_id();
109 __s32 age;
110
111 found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
112 if (found)
113 return found;
114 b = conn->jiffies32;
115 a = (u32)jiffies;
116
117 /* conn might have been added just before by another cpu and
118 * might still be unconfirmed. In this case, nf_conntrack_find()
119 * returns no result. Thus only evict if this cpu added the
120 * stale entry or if the entry is older than two jiffies.
121 */
122 age = a - b;
123 if (conn->cpu == cpu || age >= 2) {
124 hlist_del(&conn->node);
125 kmem_cache_free(conncount_conn_cachep, conn);
126 return ERR_PTR(-ENOENT);
127 }
128
129 return ERR_PTR(-EAGAIN);
130}
131
96unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, 132unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
97 const struct nf_conntrack_tuple *tuple, 133 const struct nf_conntrack_tuple *tuple,
98 const struct nf_conntrack_zone *zone, 134 const struct nf_conntrack_zone *zone,
@@ -100,24 +136,34 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
100{ 136{
101 const struct nf_conntrack_tuple_hash *found; 137 const struct nf_conntrack_tuple_hash *found;
102 struct nf_conncount_tuple *conn; 138 struct nf_conncount_tuple *conn;
103 struct hlist_node *n;
104 struct nf_conn *found_ct; 139 struct nf_conn *found_ct;
140 struct hlist_node *n;
105 unsigned int length = 0; 141 unsigned int length = 0;
106 142
107 *addit = tuple ? true : false; 143 *addit = tuple ? true : false;
108 144
109 /* check the saved connections */ 145 /* check the saved connections */
110 hlist_for_each_entry_safe(conn, n, head, node) { 146 hlist_for_each_entry_safe(conn, n, head, node) {
111 found = nf_conntrack_find_get(net, zone, &conn->tuple); 147 found = find_or_evict(net, conn);
112 if (found == NULL) { 148 if (IS_ERR(found)) {
113 hlist_del(&conn->node); 149 /* Not found, but might be about to be confirmed */
114 kmem_cache_free(conncount_conn_cachep, conn); 150 if (PTR_ERR(found) == -EAGAIN) {
151 length++;
152 if (!tuple)
153 continue;
154
155 if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
156 nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
157 nf_ct_zone_id(zone, zone->dir))
158 *addit = false;
159 }
115 continue; 160 continue;
116 } 161 }
117 162
118 found_ct = nf_ct_tuplehash_to_ctrack(found); 163 found_ct = nf_ct_tuplehash_to_ctrack(found);
119 164
120 if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple)) { 165 if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
166 nf_ct_zone_equal(found_ct, zone, zone->dir)) {
121 /* 167 /*
122 * Just to be sure we have it only once in the list. 168 * Just to be sure we have it only once in the list.
123 * We should not see tuples twice unless someone hooks 169 * We should not see tuples twice unless someone hooks
@@ -196,7 +242,7 @@ count_tree(struct net *net, struct rb_root *root,
196 if (!addit) 242 if (!addit)
197 return count; 243 return count;
198 244
199 if (!nf_conncount_add(&rbconn->hhead, tuple)) 245 if (!nf_conncount_add(&rbconn->hhead, tuple, zone))
200 return 0; /* hotdrop */ 246 return 0; /* hotdrop */
201 247
202 return count + 1; 248 return count + 1;
@@ -238,6 +284,7 @@ count_tree(struct net *net, struct rb_root *root,
238 } 284 }
239 285
240 conn->tuple = *tuple; 286 conn->tuple = *tuple;
287 conn->zone = *zone;
241 memcpy(rbconn->key, key, sizeof(u32) * keylen); 288 memcpy(rbconn->key, key, sizeof(u32) * keylen);
242 289
243 INIT_HLIST_HEAD(&rbconn->hhead); 290 INIT_HLIST_HEAD(&rbconn->hhead);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 551a1eddf0fa..a75b11c39312 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -465,6 +465,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
465 465
466 nf_ct_expect_iterate_destroy(expect_iter_me, NULL); 466 nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
467 nf_ct_iterate_destroy(unhelp, me); 467 nf_ct_iterate_destroy(unhelp, me);
468
469 /* Maybe someone has gotten the helper already when unhelp above.
470 * So need to wait it.
471 */
472 synchronize_rcu();
468} 473}
469EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); 474EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
470 475
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 39327a42879f..20a2e37c76d1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1446,7 +1446,8 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
1446 } 1446 }
1447 nfnl_lock(NFNL_SUBSYS_CTNETLINK); 1447 nfnl_lock(NFNL_SUBSYS_CTNETLINK);
1448 rcu_read_lock(); 1448 rcu_read_lock();
1449 if (nat_hook->parse_nat_setup) 1449 nat_hook = rcu_dereference(nf_nat_hook);
1450 if (nat_hook)
1450 return -EAGAIN; 1451 return -EAGAIN;
1451#endif 1452#endif
1452 return -EOPNOTSUPP; 1453 return -EOPNOTSUPP;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index afdeca53e88b..d88841fbc560 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -402,7 +402,8 @@ int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
402 struct nf_conntrack_l4proto __rcu **proto_array; 402 struct nf_conntrack_l4proto __rcu **proto_array;
403 int i; 403 int i;
404 404
405 proto_array = kmalloc(MAX_NF_CT_PROTO * 405 proto_array =
406 kmalloc_array(MAX_NF_CT_PROTO,
406 sizeof(struct nf_conntrack_l4proto *), 407 sizeof(struct nf_conntrack_l4proto *),
407 GFP_KERNEL); 408 GFP_KERNEL);
408 if (proto_array == NULL) { 409 if (proto_array == NULL) {
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 426457047578..a61d6df6e5f6 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -424,6 +424,10 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
424 if (write) { 424 if (write) {
425 struct ctl_table tmp = *table; 425 struct ctl_table tmp = *table;
426 426
427 /* proc_dostring() can append to existing strings, so we need to
428 * initialize it as an empty string.
429 */
430 buf[0] = '\0';
427 tmp.data = buf; 431 tmp.data = buf;
428 r = proc_dostring(&tmp, write, buffer, lenp, ppos); 432 r = proc_dostring(&tmp, write, buffer, lenp, ppos);
429 if (r) 433 if (r)
@@ -442,14 +446,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
442 rcu_assign_pointer(net->nf.nf_loggers[tindex], logger); 446 rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
443 mutex_unlock(&nf_log_mutex); 447 mutex_unlock(&nf_log_mutex);
444 } else { 448 } else {
449 struct ctl_table tmp = *table;
450
451 tmp.data = buf;
445 mutex_lock(&nf_log_mutex); 452 mutex_lock(&nf_log_mutex);
446 logger = nft_log_dereference(net->nf.nf_loggers[tindex]); 453 logger = nft_log_dereference(net->nf.nf_loggers[tindex]);
447 if (!logger) 454 if (!logger)
448 table->data = "NONE"; 455 strlcpy(buf, "NONE", sizeof(buf));
449 else 456 else
450 table->data = logger->name; 457 strlcpy(buf, logger->name, sizeof(buf));
451 r = proc_dostring(table, write, buffer, lenp, ppos);
452 mutex_unlock(&nf_log_mutex); 458 mutex_unlock(&nf_log_mutex);
459 r = proc_dostring(&tmp, write, buffer, lenp, ppos);
453 } 460 }
454 461
455 return r; 462 return r;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b7df32a56e7e..46f9df99d276 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -691,8 +691,9 @@ int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto)
691 691
692 mutex_lock(&nf_nat_proto_mutex); 692 mutex_lock(&nf_nat_proto_mutex);
693 if (nf_nat_l4protos[l3proto] == NULL) { 693 if (nf_nat_l4protos[l3proto] == NULL) {
694 l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *), 694 l4protos = kmalloc_array(IPPROTO_MAX,
695 GFP_KERNEL); 695 sizeof(struct nf_nat_l4proto *),
696 GFP_KERNEL);
696 if (l4protos == NULL) { 697 if (l4protos == NULL) {
697 ret = -ENOMEM; 698 ret = -ENOMEM;
698 goto out; 699 goto out;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ca4c4d994ddb..896d4a36081d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2890,12 +2890,13 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net,
2890 u32 id = ntohl(nla_get_be32(nla)); 2890 u32 id = ntohl(nla_get_be32(nla));
2891 2891
2892 list_for_each_entry(trans, &net->nft.commit_list, list) { 2892 list_for_each_entry(trans, &net->nft.commit_list, list) {
2893 struct nft_set *set = nft_trans_set(trans); 2893 if (trans->msg_type == NFT_MSG_NEWSET) {
2894 struct nft_set *set = nft_trans_set(trans);
2894 2895
2895 if (trans->msg_type == NFT_MSG_NEWSET && 2896 if (id == nft_trans_set_id(trans) &&
2896 id == nft_trans_set_id(trans) && 2897 nft_active_genmask(set, genmask))
2897 nft_active_genmask(set, genmask)) 2898 return set;
2898 return set; 2899 }
2899 } 2900 }
2900 return ERR_PTR(-ENOENT); 2901 return ERR_PTR(-ENOENT);
2901} 2902}
@@ -5303,7 +5304,7 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
5303 if (err < 0) 5304 if (err < 0)
5304 return err; 5305 return err;
5305 5306
5306 ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL); 5307 ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL);
5307 if (!ops) 5308 if (!ops)
5308 return -ENOMEM; 5309 return -ENOMEM;
5309 5310
@@ -5836,18 +5837,23 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
5836 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 5837 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5837 struct nft_flowtable *flowtable; 5838 struct nft_flowtable *flowtable;
5838 struct nft_table *table; 5839 struct nft_table *table;
5840 struct net *net;
5839 5841
5840 if (event != NETDEV_UNREGISTER) 5842 if (event != NETDEV_UNREGISTER)
5841 return 0; 5843 return 0;
5842 5844
5845 net = maybe_get_net(dev_net(dev));
5846 if (!net)
5847 return 0;
5848
5843 nfnl_lock(NFNL_SUBSYS_NFTABLES); 5849 nfnl_lock(NFNL_SUBSYS_NFTABLES);
5844 list_for_each_entry(table, &dev_net(dev)->nft.tables, list) { 5850 list_for_each_entry(table, &net->nft.tables, list) {
5845 list_for_each_entry(flowtable, &table->flowtables, list) { 5851 list_for_each_entry(flowtable, &table->flowtables, list) {
5846 nft_flowtable_event(event, dev, flowtable); 5852 nft_flowtable_event(event, dev, flowtable);
5847 } 5853 }
5848 } 5854 }
5849 nfnl_unlock(NFNL_SUBSYS_NFTABLES); 5855 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
5850 5856 put_net(net);
5851 return NOTIFY_DONE; 5857 return NOTIFY_DONE;
5852} 5858}
5853 5859
@@ -6438,7 +6444,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
6438 kfree(trans); 6444 kfree(trans);
6439} 6445}
6440 6446
6441static int nf_tables_abort(struct net *net, struct sk_buff *skb) 6447static int __nf_tables_abort(struct net *net)
6442{ 6448{
6443 struct nft_trans *trans, *next; 6449 struct nft_trans *trans, *next;
6444 struct nft_trans_elem *te; 6450 struct nft_trans_elem *te;
@@ -6554,6 +6560,11 @@ static void nf_tables_cleanup(struct net *net)
6554 nft_validate_state_update(net, NFT_VALIDATE_SKIP); 6560 nft_validate_state_update(net, NFT_VALIDATE_SKIP);
6555} 6561}
6556 6562
6563static int nf_tables_abort(struct net *net, struct sk_buff *skb)
6564{
6565 return __nf_tables_abort(net);
6566}
6567
6557static bool nf_tables_valid_genid(struct net *net, u32 genid) 6568static bool nf_tables_valid_genid(struct net *net, u32 genid)
6558{ 6569{
6559 return net->nft.base_seq == genid; 6570 return net->nft.base_seq == genid;
@@ -7148,9 +7159,12 @@ static int __net_init nf_tables_init_net(struct net *net)
7148 7159
7149static void __net_exit nf_tables_exit_net(struct net *net) 7160static void __net_exit nf_tables_exit_net(struct net *net)
7150{ 7161{
7162 nfnl_lock(NFNL_SUBSYS_NFTABLES);
7163 if (!list_empty(&net->nft.commit_list))
7164 __nf_tables_abort(net);
7151 __nft_release_tables(net); 7165 __nft_release_tables(net);
7166 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
7152 WARN_ON_ONCE(!list_empty(&net->nft.tables)); 7167 WARN_ON_ONCE(!list_empty(&net->nft.tables));
7153 WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
7154} 7168}
7155 7169
7156static struct pernet_operations nf_tables_net_ops = { 7170static struct pernet_operations nf_tables_net_ops = {
@@ -7164,8 +7178,8 @@ static int __init nf_tables_module_init(void)
7164 7178
7165 nft_chain_filter_init(); 7179 nft_chain_filter_init();
7166 7180
7167 info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS, 7181 info = kmalloc_array(NFT_RULE_MAXEXPRS, sizeof(struct nft_expr_info),
7168 GFP_KERNEL); 7182 GFP_KERNEL);
7169 if (info == NULL) { 7183 if (info == NULL) {
7170 err = -ENOMEM; 7184 err = -ENOMEM;
7171 goto err1; 7185 goto err1;
@@ -7192,13 +7206,13 @@ err1:
7192 7206
7193static void __exit nf_tables_module_exit(void) 7207static void __exit nf_tables_module_exit(void)
7194{ 7208{
7195 unregister_pernet_subsys(&nf_tables_net_ops);
7196 nfnetlink_subsys_unregister(&nf_tables_subsys); 7209 nfnetlink_subsys_unregister(&nf_tables_subsys);
7197 unregister_netdevice_notifier(&nf_tables_flowtable_notifier); 7210 unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
7211 nft_chain_filter_fini();
7212 unregister_pernet_subsys(&nf_tables_net_ops);
7198 rcu_barrier(); 7213 rcu_barrier();
7199 nf_tables_core_module_exit(); 7214 nf_tables_core_module_exit();
7200 kfree(info); 7215 kfree(info);
7201 nft_chain_filter_fini();
7202} 7216}
7203 7217
7204module_init(nf_tables_module_init); 7218module_init(nf_tables_module_init);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index deff10adef9c..8de912ca53d3 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -183,7 +183,8 @@ next_rule:
183 183
184 switch (regs.verdict.code) { 184 switch (regs.verdict.code) {
185 case NFT_JUMP: 185 case NFT_JUMP:
186 BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); 186 if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE))
187 return NF_DROP;
187 jumpstack[stackptr].chain = chain; 188 jumpstack[stackptr].chain = chain;
188 jumpstack[stackptr].rules = rules + 1; 189 jumpstack[stackptr].rules = rules + 1;
189 stackptr++; 190 stackptr++;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 4d0da7042aff..e1b6be29848d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -429,7 +429,7 @@ replay:
429 */ 429 */
430 if (err == -EAGAIN) { 430 if (err == -EAGAIN) {
431 status |= NFNL_BATCH_REPLAY; 431 status |= NFNL_BATCH_REPLAY;
432 goto next; 432 goto done;
433 } 433 }
434 } 434 }
435ack: 435ack:
@@ -456,7 +456,7 @@ ack:
456 if (err) 456 if (err)
457 status |= NFNL_BATCH_FAILURE; 457 status |= NFNL_BATCH_FAILURE;
458 } 458 }
459next: 459
460 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 460 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
461 if (msglen > skb->len) 461 if (msglen > skb->len)
462 msglen = skb->len; 462 msglen = skb->len;
@@ -464,7 +464,11 @@ next:
464 } 464 }
465done: 465done:
466 if (status & NFNL_BATCH_REPLAY) { 466 if (status & NFNL_BATCH_REPLAY) {
467 ss->abort(net, oskb); 467 const struct nfnetlink_subsystem *ss2;
468
469 ss2 = nfnl_dereference_protected(subsys_id);
470 if (ss2 == ss)
471 ss->abort(net, oskb);
468 nfnl_err_reset(&err_list); 472 nfnl_err_reset(&err_list);
469 nfnl_unlock(subsys_id); 473 nfnl_unlock(subsys_id);
470 kfree_skb(skb); 474 kfree_skb(skb);
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index cb5b5f207777..e5d27b2e4eba 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -190,8 +190,9 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
190 if (class_max > NF_CT_MAX_EXPECT_CLASSES) 190 if (class_max > NF_CT_MAX_EXPECT_CLASSES)
191 return -EOVERFLOW; 191 return -EOVERFLOW;
192 192
193 expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) * 193 expect_policy = kcalloc(class_max,
194 class_max, GFP_KERNEL); 194 sizeof(struct nf_conntrack_expect_policy),
195 GFP_KERNEL);
195 if (expect_policy == NULL) 196 if (expect_policy == NULL)
196 return -ENOMEM; 197 return -ENOMEM;
197 198
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 4ccd2988f9db..ea4ba551abb2 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1243,6 +1243,9 @@ static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
1243static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { 1243static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
1244 [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, 1244 [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) },
1245 [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, 1245 [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) },
1246 [NFQA_CFG_QUEUE_MAXLEN] = { .type = NLA_U32 },
1247 [NFQA_CFG_MASK] = { .type = NLA_U32 },
1248 [NFQA_CFG_FLAGS] = { .type = NLA_U32 },
1246}; 1249};
1247 1250
1248static const struct nf_queue_handler nfqh = { 1251static const struct nf_queue_handler nfqh = {
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 84c902477a91..d21834bed805 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -318,6 +318,10 @@ static int nf_tables_netdev_event(struct notifier_block *this,
318 event != NETDEV_CHANGENAME) 318 event != NETDEV_CHANGENAME)
319 return NOTIFY_DONE; 319 return NOTIFY_DONE;
320 320
321 ctx.net = maybe_get_net(ctx.net);
322 if (!ctx.net)
323 return NOTIFY_DONE;
324
321 nfnl_lock(NFNL_SUBSYS_NFTABLES); 325 nfnl_lock(NFNL_SUBSYS_NFTABLES);
322 list_for_each_entry(table, &ctx.net->nft.tables, list) { 326 list_for_each_entry(table, &ctx.net->nft.tables, list) {
323 if (table->family != NFPROTO_NETDEV) 327 if (table->family != NFPROTO_NETDEV)
@@ -334,6 +338,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
334 } 338 }
335 } 339 }
336 nfnl_unlock(NFNL_SUBSYS_NFTABLES); 340 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
341 put_net(ctx.net);
337 342
338 return NOTIFY_DONE; 343 return NOTIFY_DONE;
339} 344}
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 50c068d660e5..a832c59f0a9c 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -52,7 +52,7 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
52 if (!addit) 52 if (!addit)
53 goto out; 53 goto out;
54 54
55 if (!nf_conncount_add(&priv->hhead, tuple_ptr)) { 55 if (!nf_conncount_add(&priv->hhead, tuple_ptr, zone)) {
56 regs->verdict.code = NF_DROP; 56 regs->verdict.code = NF_DROP;
57 spin_unlock_bh(&priv->lock); 57 spin_unlock_bh(&priv->lock);
58 return; 58 return;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 4d49529cff61..27d7e4598ab6 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -203,9 +203,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
203 goto err1; 203 goto err1;
204 set->ops->gc_init(set); 204 set->ops->gc_init(set);
205 } 205 }
206 206 }
207 } else if (set->flags & NFT_SET_EVAL)
208 return -EINVAL;
209 207
210 nft_set_ext_prepare(&priv->tmpl); 208 nft_set_ext_prepare(&priv->tmpl);
211 nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen); 209 nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen);
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 7eef1cffbf1b..655187bed5d8 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -111,7 +111,7 @@ static void nft_log_eval(const struct nft_expr *expr,
111 const struct nft_log *priv = nft_expr_priv(expr); 111 const struct nft_log *priv = nft_expr_priv(expr);
112 112
113 if (priv->loginfo.type == NF_LOG_TYPE_LOG && 113 if (priv->loginfo.type == NF_LOG_TYPE_LOG &&
114 priv->loginfo.u.log.level == LOGLEVEL_AUDIT) { 114 priv->loginfo.u.log.level == NFT_LOGLEVEL_AUDIT) {
115 nft_log_eval_audit(pkt); 115 nft_log_eval_audit(pkt);
116 return; 116 return;
117 } 117 }
@@ -166,9 +166,9 @@ static int nft_log_init(const struct nft_ctx *ctx,
166 li->u.log.level = 166 li->u.log.level =
167 ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL])); 167 ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL]));
168 } else { 168 } else {
169 li->u.log.level = LOGLEVEL_WARNING; 169 li->u.log.level = NFT_LOGLEVEL_WARNING;
170 } 170 }
171 if (li->u.log.level > LOGLEVEL_AUDIT) { 171 if (li->u.log.level > NFT_LOGLEVEL_AUDIT) {
172 err = -EINVAL; 172 err = -EINVAL;
173 goto err1; 173 goto err1;
174 } 174 }
@@ -196,7 +196,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
196 break; 196 break;
197 } 197 }
198 198
199 if (li->u.log.level == LOGLEVEL_AUDIT) 199 if (li->u.log.level == NFT_LOGLEVEL_AUDIT)
200 return 0; 200 return 0;
201 201
202 err = nf_logger_find_get(ctx->family, li->type); 202 err = nf_logger_find_get(ctx->family, li->type);
@@ -220,7 +220,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx,
220 if (priv->prefix != nft_log_null_prefix) 220 if (priv->prefix != nft_log_null_prefix)
221 kfree(priv->prefix); 221 kfree(priv->prefix);
222 222
223 if (li->u.log.level == LOGLEVEL_AUDIT) 223 if (li->u.log.level == NFT_LOGLEVEL_AUDIT)
224 return; 224 return;
225 225
226 nf_logger_put(ctx->family, li->type); 226 nf_logger_put(ctx->family, li->type);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index d260ce2d6671..7f3a9a211034 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -66,7 +66,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
66 parent = rcu_dereference_raw(parent->rb_left); 66 parent = rcu_dereference_raw(parent->rb_left);
67 if (interval && 67 if (interval &&
68 nft_rbtree_equal(set, this, interval) && 68 nft_rbtree_equal(set, this, interval) &&
69 nft_rbtree_interval_end(this) && 69 nft_rbtree_interval_end(rbe) &&
70 !nft_rbtree_interval_end(interval)) 70 !nft_rbtree_interval_end(interval))
71 continue; 71 continue;
72 interval = rbe; 72 interval = rbe;
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index f28a0b944087..74e1b3bd6954 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -142,3 +142,4 @@ module_exit(nft_socket_module_exit);
142MODULE_LICENSE("GPL"); 142MODULE_LICENSE("GPL");
143MODULE_AUTHOR("Máté Eckl"); 143MODULE_AUTHOR("Máté Eckl");
144MODULE_DESCRIPTION("nf_tables socket match module"); 144MODULE_DESCRIPTION("nf_tables socket match module");
145MODULE_ALIAS_NFT_EXPR("socket");
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index df9ab71b0ed9..d0d8397c9588 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1904,7 +1904,7 @@ static int __init xt_init(void)
1904 seqcount_init(&per_cpu(xt_recseq, i)); 1904 seqcount_init(&per_cpu(xt_recseq, i));
1905 } 1905 }
1906 1906
1907 xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); 1907 xt = kmalloc_array(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
1908 if (!xt) 1908 if (!xt)
1909 return -ENOMEM; 1909 return -ENOMEM;
1910 1910
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 8790190c6feb..03b9a50ec93b 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -245,12 +245,22 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
245 } 245 }
246 246
247 if (info->helper[0]) { 247 if (info->helper[0]) {
248 if (strnlen(info->helper, sizeof(info->helper)) == sizeof(info->helper)) {
249 ret = -ENAMETOOLONG;
250 goto err3;
251 }
252
248 ret = xt_ct_set_helper(ct, info->helper, par); 253 ret = xt_ct_set_helper(ct, info->helper, par);
249 if (ret < 0) 254 if (ret < 0)
250 goto err3; 255 goto err3;
251 } 256 }
252 257
253 if (info->timeout[0]) { 258 if (info->timeout[0]) {
259 if (strnlen(info->timeout, sizeof(info->timeout)) == sizeof(info->timeout)) {
260 ret = -ENAMETOOLONG;
261 goto err4;
262 }
263
254 ret = xt_ct_set_timeout(ct, par, info->timeout); 264 ret = xt_ct_set_timeout(ct, par, info->timeout);
255 if (ret < 0) 265 if (ret < 0)
256 goto err4; 266 goto err4;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 94df000abb92..29c38aa7f726 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -211,7 +211,7 @@ static int __init connmark_mt_init(void)
211static void __exit connmark_mt_exit(void) 211static void __exit connmark_mt_exit(void)
212{ 212{
213 xt_unregister_match(&connmark_mt_reg); 213 xt_unregister_match(&connmark_mt_reg);
214 xt_unregister_target(connmark_tg_reg); 214 xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg));
215} 215}
216 216
217module_init(connmark_mt_init); 217module_init(connmark_mt_init);
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 6f4c5217d835..bf2890b13212 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -372,8 +372,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
372 372
373 /* Normalize to fit into jiffies */ 373 /* Normalize to fit into jiffies */
374 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && 374 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
375 add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC) 375 add_opt.ext.timeout > IPSET_MAX_TIMEOUT)
376 add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC; 376 add_opt.ext.timeout = IPSET_MAX_TIMEOUT;
377 if (info->add_set.index != IPSET_INVALID_ID) 377 if (info->add_set.index != IPSET_INVALID_ID)
378 ip_set_add(info->add_set.index, skb, par, &add_opt); 378 ip_set_add(info->add_set.index, skb, par, &add_opt);
379 if (info->del_set.index != IPSET_INVALID_ID) 379 if (info->del_set.index != IPSET_INVALID_ID)
@@ -407,8 +407,8 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
407 407
408 /* Normalize to fit into jiffies */ 408 /* Normalize to fit into jiffies */
409 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && 409 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
410 add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC) 410 add_opt.ext.timeout > IPSET_MAX_TIMEOUT)
411 add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC; 411 add_opt.ext.timeout = IPSET_MAX_TIMEOUT;
412 if (info->add_set.index != IPSET_INVALID_ID) 412 if (info->add_set.index != IPSET_INVALID_ID)
413 ip_set_add(info->add_set.index, skb, par, &add_opt); 413 ip_set_add(info->add_set.index, skb, par, &add_opt);
414 if (info->del_set.index != IPSET_INVALID_ID) 414 if (info->del_set.index != IPSET_INVALID_ID)
@@ -470,7 +470,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
470 } 470 }
471 if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | 471 if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
472 (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) && 472 (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
473 !(par->hook_mask & (1 << NF_INET_FORWARD | 473 (par->hook_mask & ~(1 << NF_INET_FORWARD |
474 1 << NF_INET_LOCAL_OUT | 474 1 << NF_INET_LOCAL_OUT |
475 1 << NF_INET_POST_ROUTING))) { 475 1 << NF_INET_POST_ROUTING))) {
476 pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); 476 pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1189b84413d5..393573a99a5a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2658,7 +2658,7 @@ static const struct proto_ops netlink_ops = {
2658 .socketpair = sock_no_socketpair, 2658 .socketpair = sock_no_socketpair,
2659 .accept = sock_no_accept, 2659 .accept = sock_no_accept,
2660 .getname = netlink_getname, 2660 .getname = netlink_getname,
2661 .poll_mask = datagram_poll_mask, 2661 .poll = datagram_poll,
2662 .ioctl = netlink_ioctl, 2662 .ioctl = netlink_ioctl,
2663 .listen = sock_no_listen, 2663 .listen = sock_no_listen,
2664 .shutdown = sock_no_shutdown, 2664 .shutdown = sock_no_shutdown,
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index b9ce82c9440f..25eeb6d2a75a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -352,8 +352,9 @@ int genl_register_family(struct genl_family *family)
352 } 352 }
353 353
354 if (family->maxattr && !family->parallel_ops) { 354 if (family->maxattr && !family->parallel_ops) {
355 family->attrbuf = kmalloc((family->maxattr+1) * 355 family->attrbuf = kmalloc_array(family->maxattr + 1,
356 sizeof(struct nlattr *), GFP_KERNEL); 356 sizeof(struct nlattr *),
357 GFP_KERNEL);
357 if (family->attrbuf == NULL) { 358 if (family->attrbuf == NULL) {
358 err = -ENOMEM; 359 err = -ENOMEM;
359 goto errout_locked; 360 goto errout_locked;
@@ -566,8 +567,9 @@ static int genl_family_rcv_msg(const struct genl_family *family,
566 return -EOPNOTSUPP; 567 return -EOPNOTSUPP;
567 568
568 if (family->maxattr && family->parallel_ops) { 569 if (family->maxattr && family->parallel_ops) {
569 attrbuf = kmalloc((family->maxattr+1) * 570 attrbuf = kmalloc_array(family->maxattr + 1,
570 sizeof(struct nlattr *), GFP_KERNEL); 571 sizeof(struct nlattr *),
572 GFP_KERNEL);
571 if (attrbuf == NULL) 573 if (attrbuf == NULL)
572 return -ENOMEM; 574 return -ENOMEM;
573 } else 575 } else
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index b97eb766a1d5..03f37c4e64fe 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1355,7 +1355,7 @@ static const struct proto_ops nr_proto_ops = {
1355 .socketpair = sock_no_socketpair, 1355 .socketpair = sock_no_socketpair,
1356 .accept = nr_accept, 1356 .accept = nr_accept,
1357 .getname = nr_getname, 1357 .getname = nr_getname,
1358 .poll_mask = datagram_poll_mask, 1358 .poll = datagram_poll,
1359 .ioctl = nr_ioctl, 1359 .ioctl = nr_ioctl,
1360 .listen = nr_listen, 1360 .listen = nr_listen,
1361 .shutdown = sock_no_shutdown, 1361 .shutdown = sock_no_shutdown,
@@ -1395,7 +1395,7 @@ static int __init nr_proto_init(void)
1395 return -1; 1395 return -1;
1396 } 1396 }
1397 1397
1398 dev_nr = kzalloc(nr_ndevs * sizeof(struct net_device *), GFP_KERNEL); 1398 dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL);
1399 if (dev_nr == NULL) { 1399 if (dev_nr == NULL) {
1400 printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n"); 1400 printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n");
1401 return -1; 1401 return -1;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ab5bb14b49af..ea0c0c6f1874 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -548,13 +548,16 @@ static inline __poll_t llcp_accept_poll(struct sock *parent)
548 return 0; 548 return 0;
549} 549}
550 550
551static __poll_t llcp_sock_poll_mask(struct socket *sock, __poll_t events) 551static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
552 poll_table *wait)
552{ 553{
553 struct sock *sk = sock->sk; 554 struct sock *sk = sock->sk;
554 __poll_t mask = 0; 555 __poll_t mask = 0;
555 556
556 pr_debug("%p\n", sk); 557 pr_debug("%p\n", sk);
557 558
559 sock_poll_wait(file, sk_sleep(sk), wait);
560
558 if (sk->sk_state == LLCP_LISTEN) 561 if (sk->sk_state == LLCP_LISTEN)
559 return llcp_accept_poll(sk); 562 return llcp_accept_poll(sk);
560 563
@@ -896,7 +899,7 @@ static const struct proto_ops llcp_sock_ops = {
896 .socketpair = sock_no_socketpair, 899 .socketpair = sock_no_socketpair,
897 .accept = llcp_sock_accept, 900 .accept = llcp_sock_accept,
898 .getname = llcp_sock_getname, 901 .getname = llcp_sock_getname,
899 .poll_mask = llcp_sock_poll_mask, 902 .poll = llcp_sock_poll,
900 .ioctl = sock_no_ioctl, 903 .ioctl = sock_no_ioctl,
901 .listen = llcp_sock_listen, 904 .listen = llcp_sock_listen,
902 .shutdown = sock_no_shutdown, 905 .shutdown = sock_no_shutdown,
@@ -916,7 +919,7 @@ static const struct proto_ops llcp_rawsock_ops = {
916 .socketpair = sock_no_socketpair, 919 .socketpair = sock_no_socketpair,
917 .accept = sock_no_accept, 920 .accept = sock_no_accept,
918 .getname = llcp_sock_getname, 921 .getname = llcp_sock_getname,
919 .poll_mask = llcp_sock_poll_mask, 922 .poll = llcp_sock_poll,
920 .ioctl = sock_no_ioctl, 923 .ioctl = sock_no_ioctl,
921 .listen = sock_no_listen, 924 .listen = sock_no_listen,
922 .shutdown = sock_no_shutdown, 925 .shutdown = sock_no_shutdown,
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 60c322531c49..e2188deb08dc 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -284,7 +284,7 @@ static const struct proto_ops rawsock_ops = {
284 .socketpair = sock_no_socketpair, 284 .socketpair = sock_no_socketpair,
285 .accept = sock_no_accept, 285 .accept = sock_no_accept,
286 .getname = sock_no_getname, 286 .getname = sock_no_getname,
287 .poll_mask = datagram_poll_mask, 287 .poll = datagram_poll,
288 .ioctl = sock_no_ioctl, 288 .ioctl = sock_no_ioctl,
289 .listen = sock_no_listen, 289 .listen = sock_no_listen,
290 .shutdown = sock_no_shutdown, 290 .shutdown = sock_no_shutdown,
@@ -304,7 +304,7 @@ static const struct proto_ops rawsock_raw_ops = {
304 .socketpair = sock_no_socketpair, 304 .socketpair = sock_no_socketpair,
305 .accept = sock_no_accept, 305 .accept = sock_no_accept,
306 .getname = sock_no_getname, 306 .getname = sock_no_getname,
307 .poll_mask = datagram_poll_mask, 307 .poll = datagram_poll,
308 .ioctl = sock_no_ioctl, 308 .ioctl = sock_no_ioctl,
309 .listen = sock_no_listen, 309 .listen = sock_no_listen,
310 .shutdown = sock_no_shutdown, 310 .shutdown = sock_no_shutdown,
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a61818e94396..0f5ce77460d4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1578,8 +1578,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1578 goto err_destroy_table; 1578 goto err_destroy_table;
1579 } 1579 }
1580 1580
1581 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), 1581 dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1582 GFP_KERNEL); 1582 sizeof(struct hlist_head),
1583 GFP_KERNEL);
1583 if (!dp->ports) { 1584 if (!dp->ports) {
1584 err = -ENOMEM; 1585 err = -ENOMEM;
1585 goto err_destroy_percpu; 1586 goto err_destroy_percpu;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index f81c1d0ddff4..19f6765566e7 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -47,7 +47,7 @@ static struct hlist_head *dev_table;
47 */ 47 */
48int ovs_vport_init(void) 48int ovs_vport_init(void)
49{ 49{
50 dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head), 50 dev_table = kcalloc(VPORT_HASH_BUCKETS, sizeof(struct hlist_head),
51 GFP_KERNEL); 51 GFP_KERNEL);
52 if (!dev_table) 52 if (!dev_table)
53 return -ENOMEM; 53 return -ENOMEM;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 54ce66f68482..57634bc3da74 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2005,7 +2005,7 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
2005 return -EINVAL; 2005 return -EINVAL;
2006 *len -= sizeof(vnet_hdr); 2006 *len -= sizeof(vnet_hdr);
2007 2007
2008 if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true)) 2008 if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
2009 return -EINVAL; 2009 return -EINVAL;
2010 2010
2011 return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr)); 2011 return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
@@ -2262,6 +2262,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2262 if (po->stats.stats1.tp_drops) 2262 if (po->stats.stats1.tp_drops)
2263 status |= TP_STATUS_LOSING; 2263 status |= TP_STATUS_LOSING;
2264 } 2264 }
2265
2266 if (do_vnet &&
2267 virtio_net_hdr_from_skb(skb, h.raw + macoff -
2268 sizeof(struct virtio_net_hdr),
2269 vio_le(), true, 0))
2270 goto drop_n_account;
2271
2265 po->stats.stats1.tp_packets++; 2272 po->stats.stats1.tp_packets++;
2266 if (copy_skb) { 2273 if (copy_skb) {
2267 status |= TP_STATUS_COPY; 2274 status |= TP_STATUS_COPY;
@@ -2269,15 +2276,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2269 } 2276 }
2270 spin_unlock(&sk->sk_receive_queue.lock); 2277 spin_unlock(&sk->sk_receive_queue.lock);
2271 2278
2272 if (do_vnet) {
2273 if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
2274 sizeof(struct virtio_net_hdr),
2275 vio_le(), true)) {
2276 spin_lock(&sk->sk_receive_queue.lock);
2277 goto drop_n_account;
2278 }
2279 }
2280
2281 skb_copy_bits(skb, 0, h.raw + macoff, snaplen); 2279 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
2282 2280
2283 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) 2281 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
@@ -4078,11 +4076,12 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
4078 return 0; 4076 return 0;
4079} 4077}
4080 4078
4081static __poll_t packet_poll_mask(struct socket *sock, __poll_t events) 4079static __poll_t packet_poll(struct file *file, struct socket *sock,
4080 poll_table *wait)
4082{ 4081{
4083 struct sock *sk = sock->sk; 4082 struct sock *sk = sock->sk;
4084 struct packet_sock *po = pkt_sk(sk); 4083 struct packet_sock *po = pkt_sk(sk);
4085 __poll_t mask = datagram_poll_mask(sock, events); 4084 __poll_t mask = datagram_poll(file, sock, wait);
4086 4085
4087 spin_lock_bh(&sk->sk_receive_queue.lock); 4086 spin_lock_bh(&sk->sk_receive_queue.lock);
4088 if (po->rx_ring.pg_vec) { 4087 if (po->rx_ring.pg_vec) {
@@ -4161,7 +4160,7 @@ static char *alloc_one_pg_vec_page(unsigned long order)
4161 return buffer; 4160 return buffer;
4162 4161
4163 /* __get_free_pages failed, fall back to vmalloc */ 4162 /* __get_free_pages failed, fall back to vmalloc */
4164 buffer = vzalloc((1 << order) * PAGE_SIZE); 4163 buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
4165 if (buffer) 4164 if (buffer)
4166 return buffer; 4165 return buffer;
4167 4166
@@ -4424,7 +4423,7 @@ static const struct proto_ops packet_ops_spkt = {
4424 .socketpair = sock_no_socketpair, 4423 .socketpair = sock_no_socketpair,
4425 .accept = sock_no_accept, 4424 .accept = sock_no_accept,
4426 .getname = packet_getname_spkt, 4425 .getname = packet_getname_spkt,
4427 .poll_mask = datagram_poll_mask, 4426 .poll = datagram_poll,
4428 .ioctl = packet_ioctl, 4427 .ioctl = packet_ioctl,
4429 .listen = sock_no_listen, 4428 .listen = sock_no_listen,
4430 .shutdown = sock_no_shutdown, 4429 .shutdown = sock_no_shutdown,
@@ -4445,7 +4444,7 @@ static const struct proto_ops packet_ops = {
4445 .socketpair = sock_no_socketpair, 4444 .socketpair = sock_no_socketpair,
4446 .accept = sock_no_accept, 4445 .accept = sock_no_accept,
4447 .getname = packet_getname, 4446 .getname = packet_getname,
4448 .poll_mask = packet_poll_mask, 4447 .poll = packet_poll,
4449 .ioctl = packet_ioctl, 4448 .ioctl = packet_ioctl,
4450 .listen = sock_no_listen, 4449 .listen = sock_no_listen,
4451 .shutdown = sock_no_shutdown, 4450 .shutdown = sock_no_shutdown,
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index c295c4e20f01..30187990257f 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -340,12 +340,15 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
340 return sizeof(struct sockaddr_pn); 340 return sizeof(struct sockaddr_pn);
341} 341}
342 342
343static __poll_t pn_socket_poll_mask(struct socket *sock, __poll_t events) 343static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
344 poll_table *wait)
344{ 345{
345 struct sock *sk = sock->sk; 346 struct sock *sk = sock->sk;
346 struct pep_sock *pn = pep_sk(sk); 347 struct pep_sock *pn = pep_sk(sk);
347 __poll_t mask = 0; 348 __poll_t mask = 0;
348 349
350 poll_wait(file, sk_sleep(sk), wait);
351
349 if (sk->sk_state == TCP_CLOSE) 352 if (sk->sk_state == TCP_CLOSE)
350 return EPOLLERR; 353 return EPOLLERR;
351 if (!skb_queue_empty(&sk->sk_receive_queue)) 354 if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -445,7 +448,7 @@ const struct proto_ops phonet_dgram_ops = {
445 .socketpair = sock_no_socketpair, 448 .socketpair = sock_no_socketpair,
446 .accept = sock_no_accept, 449 .accept = sock_no_accept,
447 .getname = pn_socket_getname, 450 .getname = pn_socket_getname,
448 .poll_mask = datagram_poll_mask, 451 .poll = datagram_poll,
449 .ioctl = pn_socket_ioctl, 452 .ioctl = pn_socket_ioctl,
450 .listen = sock_no_listen, 453 .listen = sock_no_listen,
451 .shutdown = sock_no_shutdown, 454 .shutdown = sock_no_shutdown,
@@ -470,7 +473,7 @@ const struct proto_ops phonet_stream_ops = {
470 .socketpair = sock_no_socketpair, 473 .socketpair = sock_no_socketpair,
471 .accept = pn_socket_accept, 474 .accept = pn_socket_accept,
472 .getname = pn_socket_getname, 475 .getname = pn_socket_getname,
473 .poll_mask = pn_socket_poll_mask, 476 .poll = pn_socket_poll,
474 .ioctl = pn_socket_ioctl, 477 .ioctl = pn_socket_ioctl,
475 .listen = pn_socket_listen, 478 .listen = pn_socket_listen,
476 .shutdown = sock_no_shutdown, 479 .shutdown = sock_no_shutdown,
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 1b5025ea5b04..2aa07b547b16 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1023,7 +1023,7 @@ static const struct proto_ops qrtr_proto_ops = {
1023 .recvmsg = qrtr_recvmsg, 1023 .recvmsg = qrtr_recvmsg,
1024 .getname = qrtr_getname, 1024 .getname = qrtr_getname,
1025 .ioctl = qrtr_ioctl, 1025 .ioctl = qrtr_ioctl,
1026 .poll_mask = datagram_poll_mask, 1026 .poll = datagram_poll,
1027 .shutdown = sock_no_shutdown, 1027 .shutdown = sock_no_shutdown,
1028 .setsockopt = sock_no_setsockopt, 1028 .setsockopt = sock_no_setsockopt,
1029 .getsockopt = sock_no_getsockopt, 1029 .getsockopt = sock_no_getsockopt,
diff --git a/net/rds/connection.c b/net/rds/connection.c
index abef75da89a7..cfb05953b0e5 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -659,11 +659,19 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
659 659
660int rds_conn_init(void) 660int rds_conn_init(void)
661{ 661{
662 int ret;
663
664 ret = rds_loop_net_init(); /* register pernet callback */
665 if (ret)
666 return ret;
667
662 rds_conn_slab = kmem_cache_create("rds_connection", 668 rds_conn_slab = kmem_cache_create("rds_connection",
663 sizeof(struct rds_connection), 669 sizeof(struct rds_connection),
664 0, 0, NULL); 670 0, 0, NULL);
665 if (!rds_conn_slab) 671 if (!rds_conn_slab) {
672 rds_loop_net_exit();
666 return -ENOMEM; 673 return -ENOMEM;
674 }
667 675
668 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); 676 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
669 rds_info_register_func(RDS_INFO_SEND_MESSAGES, 677 rds_info_register_func(RDS_INFO_SEND_MESSAGES,
@@ -676,6 +684,7 @@ int rds_conn_init(void)
676 684
677void rds_conn_exit(void) 685void rds_conn_exit(void)
678{ 686{
687 rds_loop_net_exit(); /* unregister pernet callback */
679 rds_loop_exit(); 688 rds_loop_exit();
680 689
681 WARN_ON(!hlist_empty(rds_conn_hash)); 690 WARN_ON(!hlist_empty(rds_conn_hash));
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 02deee29e7f1..b6ad38e48f62 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -163,7 +163,8 @@ static void rds_ib_add_one(struct ib_device *device)
163 rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom; 163 rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
164 rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; 164 rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
165 165
166 rds_ibdev->vector_load = kzalloc(sizeof(int) * device->num_comp_vectors, 166 rds_ibdev->vector_load = kcalloc(device->num_comp_vectors,
167 sizeof(int),
167 GFP_KERNEL); 168 GFP_KERNEL);
168 if (!rds_ibdev->vector_load) { 169 if (!rds_ibdev->vector_load) {
169 pr_err("RDS/IB: %s failed to allocate vector memory\n", 170 pr_err("RDS/IB: %s failed to allocate vector memory\n",
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 13b38ad0fa4a..f1684ae6abfd 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -526,7 +526,8 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
526 goto recv_hdrs_dma_out; 526 goto recv_hdrs_dma_out;
527 } 527 }
528 528
529 ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), 529 ic->i_sends = vzalloc_node(array_size(sizeof(struct rds_ib_send_work),
530 ic->i_send_ring.w_nr),
530 ibdev_to_node(dev)); 531 ibdev_to_node(dev));
531 if (!ic->i_sends) { 532 if (!ic->i_sends) {
532 ret = -ENOMEM; 533 ret = -ENOMEM;
@@ -534,7 +535,8 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
534 goto ack_dma_out; 535 goto ack_dma_out;
535 } 536 }
536 537
537 ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), 538 ic->i_recvs = vzalloc_node(array_size(sizeof(struct rds_ib_recv_work),
539 ic->i_recv_ring.w_nr),
538 ibdev_to_node(dev)); 540 ibdev_to_node(dev));
539 if (!ic->i_recvs) { 541 if (!ic->i_recvs) {
540 ret = -ENOMEM; 542 ret = -ENOMEM;
diff --git a/net/rds/info.c b/net/rds/info.c
index 140a44a5f7b7..e367a97a18c8 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -188,7 +188,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
188 nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK)) 188 nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK))
189 >> PAGE_SHIFT; 189 >> PAGE_SHIFT;
190 190
191 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); 191 pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
192 if (!pages) { 192 if (!pages) {
193 ret = -ENOMEM; 193 ret = -ENOMEM;
194 goto out; 194 goto out;
diff --git a/net/rds/loop.c b/net/rds/loop.c
index f2bf78de5688..feea1f96ee2a 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -33,6 +33,8 @@
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/in.h> 35#include <linux/in.h>
36#include <net/net_namespace.h>
37#include <net/netns/generic.h>
36 38
37#include "rds_single_path.h" 39#include "rds_single_path.h"
38#include "rds.h" 40#include "rds.h"
@@ -40,6 +42,17 @@
40 42
41static DEFINE_SPINLOCK(loop_conns_lock); 43static DEFINE_SPINLOCK(loop_conns_lock);
42static LIST_HEAD(loop_conns); 44static LIST_HEAD(loop_conns);
45static atomic_t rds_loop_unloading = ATOMIC_INIT(0);
46
47static void rds_loop_set_unloading(void)
48{
49 atomic_set(&rds_loop_unloading, 1);
50}
51
52static bool rds_loop_is_unloading(struct rds_connection *conn)
53{
54 return atomic_read(&rds_loop_unloading) != 0;
55}
43 56
44/* 57/*
45 * This 'loopback' transport is a special case for flows that originate 58 * This 'loopback' transport is a special case for flows that originate
@@ -165,6 +178,8 @@ void rds_loop_exit(void)
165 struct rds_loop_connection *lc, *_lc; 178 struct rds_loop_connection *lc, *_lc;
166 LIST_HEAD(tmp_list); 179 LIST_HEAD(tmp_list);
167 180
181 rds_loop_set_unloading();
182 synchronize_rcu();
168 /* avoid calling conn_destroy with irqs off */ 183 /* avoid calling conn_destroy with irqs off */
169 spin_lock_irq(&loop_conns_lock); 184 spin_lock_irq(&loop_conns_lock);
170 list_splice(&loop_conns, &tmp_list); 185 list_splice(&loop_conns, &tmp_list);
@@ -177,6 +192,46 @@ void rds_loop_exit(void)
177 } 192 }
178} 193}
179 194
195static void rds_loop_kill_conns(struct net *net)
196{
197 struct rds_loop_connection *lc, *_lc;
198 LIST_HEAD(tmp_list);
199
200 spin_lock_irq(&loop_conns_lock);
201 list_for_each_entry_safe(lc, _lc, &loop_conns, loop_node) {
202 struct net *c_net = read_pnet(&lc->conn->c_net);
203
204 if (net != c_net)
205 continue;
206 list_move_tail(&lc->loop_node, &tmp_list);
207 }
208 spin_unlock_irq(&loop_conns_lock);
209
210 list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) {
211 WARN_ON(lc->conn->c_passive);
212 rds_conn_destroy(lc->conn);
213 }
214}
215
216static void __net_exit rds_loop_exit_net(struct net *net)
217{
218 rds_loop_kill_conns(net);
219}
220
221static struct pernet_operations rds_loop_net_ops = {
222 .exit = rds_loop_exit_net,
223};
224
225int rds_loop_net_init(void)
226{
227 return register_pernet_device(&rds_loop_net_ops);
228}
229
230void rds_loop_net_exit(void)
231{
232 unregister_pernet_device(&rds_loop_net_ops);
233}
234
180/* 235/*
181 * This is missing .xmit_* because loop doesn't go through generic 236 * This is missing .xmit_* because loop doesn't go through generic
182 * rds_send_xmit() and doesn't call rds_recv_incoming(). .listen_stop and 237 * rds_send_xmit() and doesn't call rds_recv_incoming(). .listen_stop and
@@ -193,4 +248,6 @@ struct rds_transport rds_loop_transport = {
193 .inc_copy_to_user = rds_message_inc_copy_to_user, 248 .inc_copy_to_user = rds_message_inc_copy_to_user,
194 .inc_free = rds_loop_inc_free, 249 .inc_free = rds_loop_inc_free,
195 .t_name = "loopback", 250 .t_name = "loopback",
251 .t_type = RDS_TRANS_LOOP,
252 .t_unloading = rds_loop_is_unloading,
196}; 253};
diff --git a/net/rds/loop.h b/net/rds/loop.h
index 469fa4b2da4f..bbc8cdd030df 100644
--- a/net/rds/loop.h
+++ b/net/rds/loop.h
@@ -5,6 +5,8 @@
5/* loop.c */ 5/* loop.c */
6extern struct rds_transport rds_loop_transport; 6extern struct rds_transport rds_loop_transport;
7 7
8int rds_loop_net_init(void);
9void rds_loop_net_exit(void);
8void rds_loop_exit(void); 10void rds_loop_exit(void);
9 11
10#endif 12#endif
diff --git a/net/rds/rds.h b/net/rds/rds.h
index b04c333d9d1c..f2272fb8cd45 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -479,6 +479,11 @@ struct rds_notifier {
479 int n_status; 479 int n_status;
480}; 480};
481 481
482/* Available as part of RDS core, so doesn't need to participate
483 * in get_preferred transport etc
484 */
485#define RDS_TRANS_LOOP 3
486
482/** 487/**
483 * struct rds_transport - transport specific behavioural hooks 488 * struct rds_transport - transport specific behavioural hooks
484 * 489 *
diff --git a/net/rds/recv.c b/net/rds/recv.c
index dc67458b52f0..192ac6f78ded 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -103,6 +103,11 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
103 rds_stats_add(s_recv_bytes_added_to_socket, delta); 103 rds_stats_add(s_recv_bytes_added_to_socket, delta);
104 else 104 else
105 rds_stats_add(s_recv_bytes_removed_from_socket, -delta); 105 rds_stats_add(s_recv_bytes_removed_from_socket, -delta);
106
107 /* loop transport doesn't send/recv congestion updates */
108 if (rs->rs_transport->t_type == RDS_TRANS_LOOP)
109 return;
110
106 now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs); 111 now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs);
107 112
108 rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d " 113 rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d "
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 5b73fea849df..d00a0ef39a56 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1470,7 +1470,7 @@ static const struct proto_ops rose_proto_ops = {
1470 .socketpair = sock_no_socketpair, 1470 .socketpair = sock_no_socketpair,
1471 .accept = rose_accept, 1471 .accept = rose_accept,
1472 .getname = rose_getname, 1472 .getname = rose_getname,
1473 .poll_mask = datagram_poll_mask, 1473 .poll = datagram_poll,
1474 .ioctl = rose_ioctl, 1474 .ioctl = rose_ioctl,
1475 .listen = rose_listen, 1475 .listen = rose_listen,
1476 .shutdown = sock_no_shutdown, 1476 .shutdown = sock_no_shutdown,
@@ -1514,7 +1514,8 @@ static int __init rose_proto_init(void)
1514 1514
1515 rose_callsign = null_ax25_address; 1515 rose_callsign = null_ax25_address;
1516 1516
1517 dev_rose = kzalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL); 1517 dev_rose = kcalloc(rose_ndevs, sizeof(struct net_device *),
1518 GFP_KERNEL);
1518 if (dev_rose == NULL) { 1519 if (dev_rose == NULL) {
1519 printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n"); 1520 printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n");
1520 rc = -ENOMEM; 1521 rc = -ENOMEM;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 3b1ac93efee2..2b463047dd7b 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -734,11 +734,15 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
734/* 734/*
735 * permit an RxRPC socket to be polled 735 * permit an RxRPC socket to be polled
736 */ 736 */
737static __poll_t rxrpc_poll_mask(struct socket *sock, __poll_t events) 737static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
738 poll_table *wait)
738{ 739{
739 struct sock *sk = sock->sk; 740 struct sock *sk = sock->sk;
740 struct rxrpc_sock *rx = rxrpc_sk(sk); 741 struct rxrpc_sock *rx = rxrpc_sk(sk);
741 __poll_t mask = 0; 742 __poll_t mask;
743
744 sock_poll_wait(file, sk_sleep(sk), wait);
745 mask = 0;
742 746
743 /* the socket is readable if there are any messages waiting on the Rx 747 /* the socket is readable if there are any messages waiting on the Rx
744 * queue */ 748 * queue */
@@ -945,7 +949,7 @@ static const struct proto_ops rxrpc_rpc_ops = {
945 .socketpair = sock_no_socketpair, 949 .socketpair = sock_no_socketpair,
946 .accept = sock_no_accept, 950 .accept = sock_no_accept,
947 .getname = sock_no_getname, 951 .getname = sock_no_getname,
948 .poll_mask = rxrpc_poll_mask, 952 .poll = rxrpc_poll,
949 .ioctl = sock_no_ioctl, 953 .ioctl = sock_no_ioctl,
950 .listen = rxrpc_listen, 954 .listen = rxrpc_listen,
951 .shutdown = rxrpc_shutdown, 955 .shutdown = rxrpc_shutdown,
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 6c0ae27fff84..278ac0807a60 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -432,7 +432,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
432 432
433 sg = _sg; 433 sg = _sg;
434 if (unlikely(nsg > 4)) { 434 if (unlikely(nsg > 4)) {
435 sg = kmalloc(sizeof(*sg) * nsg, GFP_NOIO); 435 sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO);
436 if (!sg) 436 if (!sg)
437 goto nomem; 437 goto nomem;
438 } 438 }
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 8527cfdc446d..20d7d36b2fc9 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -415,7 +415,8 @@ static void tcf_ife_cleanup(struct tc_action *a)
415 spin_unlock_bh(&ife->tcf_lock); 415 spin_unlock_bh(&ife->tcf_lock);
416 416
417 p = rcu_dereference_protected(ife->params, 1); 417 p = rcu_dereference_protected(ife->params, 1);
418 kfree_rcu(p, rcu); 418 if (p)
419 kfree_rcu(p, rcu);
419} 420}
420 421
421/* under ife->tcf_lock for existing action */ 422/* under ife->tcf_lock for existing action */
@@ -516,8 +517,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
516 saddr = nla_data(tb[TCA_IFE_SMAC]); 517 saddr = nla_data(tb[TCA_IFE_SMAC]);
517 } 518 }
518 519
519 ife->tcf_action = parm->action;
520
521 if (parm->flags & IFE_ENCODE) { 520 if (parm->flags & IFE_ENCODE) {
522 if (daddr) 521 if (daddr)
523 ether_addr_copy(p->eth_dst, daddr); 522 ether_addr_copy(p->eth_dst, daddr);
@@ -543,10 +542,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
543 NULL, NULL); 542 NULL, NULL);
544 if (err) { 543 if (err) {
545metadata_parse_err: 544metadata_parse_err:
546 if (exists)
547 tcf_idr_release(*a, bind);
548 if (ret == ACT_P_CREATED) 545 if (ret == ACT_P_CREATED)
549 _tcf_ife_cleanup(*a); 546 tcf_idr_release(*a, bind);
550 547
551 if (exists) 548 if (exists)
552 spin_unlock_bh(&ife->tcf_lock); 549 spin_unlock_bh(&ife->tcf_lock);
@@ -567,7 +564,7 @@ metadata_parse_err:
567 err = use_all_metadata(ife); 564 err = use_all_metadata(ife);
568 if (err) { 565 if (err) {
569 if (ret == ACT_P_CREATED) 566 if (ret == ACT_P_CREATED)
570 _tcf_ife_cleanup(*a); 567 tcf_idr_release(*a, bind);
571 568
572 if (exists) 569 if (exists)
573 spin_unlock_bh(&ife->tcf_lock); 570 spin_unlock_bh(&ife->tcf_lock);
@@ -576,6 +573,7 @@ metadata_parse_err:
576 } 573 }
577 } 574 }
578 575
576 ife->tcf_action = parm->action;
579 if (exists) 577 if (exists)
580 spin_unlock_bh(&ife->tcf_lock); 578 spin_unlock_bh(&ife->tcf_lock);
581 579
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 9618b4a83cee..98c4afe7c15b 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -53,22 +53,22 @@ static void tcf_simp_release(struct tc_action *a)
53 kfree(d->tcfd_defdata); 53 kfree(d->tcfd_defdata);
54} 54}
55 55
56static int alloc_defdata(struct tcf_defact *d, char *defdata) 56static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata)
57{ 57{
58 d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL); 58 d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL);
59 if (unlikely(!d->tcfd_defdata)) 59 if (unlikely(!d->tcfd_defdata))
60 return -ENOMEM; 60 return -ENOMEM;
61 strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); 61 nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
62 return 0; 62 return 0;
63} 63}
64 64
65static void reset_policy(struct tcf_defact *d, char *defdata, 65static void reset_policy(struct tcf_defact *d, const struct nlattr *defdata,
66 struct tc_defact *p) 66 struct tc_defact *p)
67{ 67{
68 spin_lock_bh(&d->tcf_lock); 68 spin_lock_bh(&d->tcf_lock);
69 d->tcf_action = p->action; 69 d->tcf_action = p->action;
70 memset(d->tcfd_defdata, 0, SIMP_MAX_DATA); 70 memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
71 strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); 71 nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
72 spin_unlock_bh(&d->tcf_lock); 72 spin_unlock_bh(&d->tcf_lock);
73} 73}
74 74
@@ -87,7 +87,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
87 struct tcf_defact *d; 87 struct tcf_defact *d;
88 bool exists = false; 88 bool exists = false;
89 int ret = 0, err; 89 int ret = 0, err;
90 char *defdata;
91 90
92 if (nla == NULL) 91 if (nla == NULL)
93 return -EINVAL; 92 return -EINVAL;
@@ -110,8 +109,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
110 return -EINVAL; 109 return -EINVAL;
111 } 110 }
112 111
113 defdata = nla_data(tb[TCA_DEF_DATA]);
114
115 if (!exists) { 112 if (!exists) {
116 ret = tcf_idr_create(tn, parm->index, est, a, 113 ret = tcf_idr_create(tn, parm->index, est, a,
117 &act_simp_ops, bind, false); 114 &act_simp_ops, bind, false);
@@ -119,7 +116,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
119 return ret; 116 return ret;
120 117
121 d = to_defact(*a); 118 d = to_defact(*a);
122 ret = alloc_defdata(d, defdata); 119 ret = alloc_defdata(d, tb[TCA_DEF_DATA]);
123 if (ret < 0) { 120 if (ret < 0) {
124 tcf_idr_release(*a, bind); 121 tcf_idr_release(*a, bind);
125 return ret; 122 return ret;
@@ -133,7 +130,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
133 if (!ovr) 130 if (!ovr)
134 return -EEXIST; 131 return -EEXIST;
135 132
136 reset_policy(d, defdata, parm); 133 reset_policy(d, tb[TCA_DEF_DATA], parm);
137 } 134 }
138 135
139 if (ret == ACT_P_CREATED) 136 if (ret == ACT_P_CREATED)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 2b5be42a9f1c..9e8b26a80fb3 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -66,7 +66,7 @@ struct fl_flow_mask {
66 struct rhashtable_params filter_ht_params; 66 struct rhashtable_params filter_ht_params;
67 struct flow_dissector dissector; 67 struct flow_dissector dissector;
68 struct list_head filters; 68 struct list_head filters;
69 struct rcu_head rcu; 69 struct rcu_work rwork;
70 struct list_head list; 70 struct list_head list;
71}; 71};
72 72
@@ -203,6 +203,20 @@ static int fl_init(struct tcf_proto *tp)
203 return rhashtable_init(&head->ht, &mask_ht_params); 203 return rhashtable_init(&head->ht, &mask_ht_params);
204} 204}
205 205
206static void fl_mask_free(struct fl_flow_mask *mask)
207{
208 rhashtable_destroy(&mask->ht);
209 kfree(mask);
210}
211
212static void fl_mask_free_work(struct work_struct *work)
213{
214 struct fl_flow_mask *mask = container_of(to_rcu_work(work),
215 struct fl_flow_mask, rwork);
216
217 fl_mask_free(mask);
218}
219
206static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask, 220static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
207 bool async) 221 bool async)
208{ 222{
@@ -210,12 +224,11 @@ static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
210 return false; 224 return false;
211 225
212 rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params); 226 rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params);
213 rhashtable_destroy(&mask->ht);
214 list_del_rcu(&mask->list); 227 list_del_rcu(&mask->list);
215 if (async) 228 if (async)
216 kfree_rcu(mask, rcu); 229 tcf_queue_work(&mask->rwork, fl_mask_free_work);
217 else 230 else
218 kfree(mask); 231 fl_mask_free(mask);
219 232
220 return true; 233 return true;
221} 234}
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index c98a61e980ba..9c4c2bb547d7 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -21,7 +21,7 @@ static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch,
21 struct sk_buff **to_free) 21 struct sk_buff **to_free)
22{ 22{
23 qdisc_drop(skb, sch, to_free); 23 qdisc_drop(skb, sch, to_free);
24 return NET_XMIT_SUCCESS; 24 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
25} 25}
26 26
27static struct sk_buff *blackhole_dequeue(struct Qdisc *sch) 27static struct sk_buff *blackhole_dequeue(struct Qdisc *sch)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 22fa13cf5d8b..cd2e0e342fb6 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -489,11 +489,12 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
489 return err; 489 return err;
490 490
491 if (!q->flows) { 491 if (!q->flows) {
492 q->flows = kvzalloc(q->flows_cnt * 492 q->flows = kvcalloc(q->flows_cnt,
493 sizeof(struct fq_codel_flow), GFP_KERNEL); 493 sizeof(struct fq_codel_flow),
494 GFP_KERNEL);
494 if (!q->flows) 495 if (!q->flows)
495 return -ENOMEM; 496 return -ENOMEM;
496 q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL); 497 q->backlogs = kvcalloc(q->flows_cnt, sizeof(u32), GFP_KERNEL);
497 if (!q->backlogs) 498 if (!q->backlogs)
498 return -ENOMEM; 499 return -ENOMEM;
499 for (i = 0; i < q->flows_cnt; i++) { 500 for (i = 0; i < q->flows_cnt; i++) {
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ae9877ea205..3278a76f6861 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1385,8 +1385,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
1385 if (next_time == 0 || next_time > q->root.cl_cfmin) 1385 if (next_time == 0 || next_time > q->root.cl_cfmin)
1386 next_time = q->root.cl_cfmin; 1386 next_time = q->root.cl_cfmin;
1387 } 1387 }
1388 WARN_ON(next_time == 0); 1388 if (next_time)
1389 qdisc_watchdog_schedule(&q->watchdog, next_time); 1389 qdisc_watchdog_schedule(&q->watchdog, next_time);
1390} 1390}
1391 1391
1392static int 1392static int
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index bce2632212d3..c3a8388dcdf6 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -599,8 +599,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt,
599 599
600 if (!q->hh_flows) { 600 if (!q->hh_flows) {
601 /* Initialize heavy-hitter flow table. */ 601 /* Initialize heavy-hitter flow table. */
602 q->hh_flows = kvzalloc(HH_FLOWS_CNT * 602 q->hh_flows = kvcalloc(HH_FLOWS_CNT, sizeof(struct list_head),
603 sizeof(struct list_head), GFP_KERNEL); 603 GFP_KERNEL);
604 if (!q->hh_flows) 604 if (!q->hh_flows)
605 return -ENOMEM; 605 return -ENOMEM;
606 for (i = 0; i < HH_FLOWS_CNT; i++) 606 for (i = 0; i < HH_FLOWS_CNT; i++)
@@ -614,8 +614,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt,
614 614
615 /* Initialize heavy-hitter filter arrays. */ 615 /* Initialize heavy-hitter filter arrays. */
616 for (i = 0; i < HHF_ARRAYS_CNT; i++) { 616 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
617 q->hhf_arrays[i] = kvzalloc(HHF_ARRAYS_LEN * 617 q->hhf_arrays[i] = kvcalloc(HHF_ARRAYS_LEN,
618 sizeof(u32), GFP_KERNEL); 618 sizeof(u32),
619 GFP_KERNEL);
619 if (!q->hhf_arrays[i]) { 620 if (!q->hhf_arrays[i]) {
620 /* Note: hhf_destroy() will be called 621 /* Note: hhf_destroy() will be called
621 * by our caller. 622 * by our caller.
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index e64630cd3331..5b537613946f 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -482,8 +482,9 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
482 return 0; 482 return 0;
483 483
484 /* Allocated the array of pointers to transorms */ 484 /* Allocated the array of pointers to transorms */
485 ep->auth_hmacs = kzalloc(sizeof(struct crypto_shash *) * 485 ep->auth_hmacs = kcalloc(SCTP_AUTH_NUM_HMACS,
486 SCTP_AUTH_NUM_HMACS, gfp); 486 sizeof(struct crypto_shash *),
487 gfp);
487 if (!ep->auth_hmacs) 488 if (!ep->auth_hmacs)
488 return -ENOMEM; 489 return -ENOMEM;
489 490
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 79daa98208c3..bfb9f812e2ef 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -237,7 +237,9 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
237 /* Account for a different sized first fragment */ 237 /* Account for a different sized first fragment */
238 if (msg_len >= first_len) { 238 if (msg_len >= first_len) {
239 msg->can_delay = 0; 239 msg->can_delay = 0;
240 SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); 240 if (msg_len > first_len)
241 SCTP_INC_STATS(sock_net(asoc->base.sk),
242 SCTP_MIB_FRAGUSRMSGS);
241 } else { 243 } else {
242 /* Which may be the only one... */ 244 /* Which may be the only one... */
243 first_len = msg_len; 245 first_len = msg_len;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 7339918a805d..0cd2e764f47f 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -1010,7 +1010,7 @@ static const struct proto_ops inet6_seqpacket_ops = {
1010 .socketpair = sock_no_socketpair, 1010 .socketpair = sock_no_socketpair,
1011 .accept = inet_accept, 1011 .accept = inet_accept,
1012 .getname = sctp_getname, 1012 .getname = sctp_getname,
1013 .poll_mask = sctp_poll_mask, 1013 .poll = sctp_poll,
1014 .ioctl = inet6_ioctl, 1014 .ioctl = inet6_ioctl,
1015 .listen = sctp_inet_listen, 1015 .listen = sctp_inet_listen,
1016 .shutdown = inet_shutdown, 1016 .shutdown = inet_shutdown,
diff --git a/net/sctp/output.c b/net/sctp/output.c
index e672dee302c7..7f849b01ec8e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -409,6 +409,21 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
409 refcount_inc(&sk->sk_wmem_alloc); 409 refcount_inc(&sk->sk_wmem_alloc);
410} 410}
411 411
412static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
413{
414 if (SCTP_OUTPUT_CB(head)->last == head)
415 skb_shinfo(head)->frag_list = skb;
416 else
417 SCTP_OUTPUT_CB(head)->last->next = skb;
418 SCTP_OUTPUT_CB(head)->last = skb;
419
420 head->truesize += skb->truesize;
421 head->data_len += skb->len;
422 head->len += skb->len;
423
424 __skb_header_release(skb);
425}
426
412static int sctp_packet_pack(struct sctp_packet *packet, 427static int sctp_packet_pack(struct sctp_packet *packet,
413 struct sk_buff *head, int gso, gfp_t gfp) 428 struct sk_buff *head, int gso, gfp_t gfp)
414{ 429{
@@ -422,7 +437,7 @@ static int sctp_packet_pack(struct sctp_packet *packet,
422 437
423 if (gso) { 438 if (gso) {
424 skb_shinfo(head)->gso_type = sk->sk_gso_type; 439 skb_shinfo(head)->gso_type = sk->sk_gso_type;
425 NAPI_GRO_CB(head)->last = head; 440 SCTP_OUTPUT_CB(head)->last = head;
426 } else { 441 } else {
427 nskb = head; 442 nskb = head;
428 pkt_size = packet->size; 443 pkt_size = packet->size;
@@ -503,15 +518,8 @@ merge:
503 &packet->chunk_list); 518 &packet->chunk_list);
504 } 519 }
505 520
506 if (gso) { 521 if (gso)
507 if (skb_gro_receive(&head, nskb)) { 522 sctp_packet_gso_append(head, nskb);
508 kfree_skb(nskb);
509 return 0;
510 }
511 if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
512 sk->sk_gso_max_segs))
513 return 0;
514 }
515 523
516 pkt_count++; 524 pkt_count++;
517 } while (!list_empty(&packet->chunk_list)); 525 } while (!list_empty(&packet->chunk_list));
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 11d93377ba5e..67f73d3a1356 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1016,7 +1016,7 @@ static const struct proto_ops inet_seqpacket_ops = {
1016 .socketpair = sock_no_socketpair, 1016 .socketpair = sock_no_socketpair,
1017 .accept = inet_accept, 1017 .accept = inet_accept,
1018 .getname = inet_getname, /* Semantics are different. */ 1018 .getname = inet_getname, /* Semantics are different. */
1019 .poll_mask = sctp_poll_mask, 1019 .poll = sctp_poll,
1020 .ioctl = inet_ioctl, 1020 .ioctl = inet_ioctl,
1021 .listen = sctp_inet_listen, 1021 .listen = sctp_inet_listen,
1022 .shutdown = inet_shutdown, /* Looks harmless. */ 1022 .shutdown = inet_shutdown, /* Looks harmless. */
@@ -1438,7 +1438,7 @@ static __init int sctp_init(void)
1438 /* Allocate and initialize the endpoint hash table. */ 1438 /* Allocate and initialize the endpoint hash table. */
1439 sctp_ep_hashsize = 64; 1439 sctp_ep_hashsize = 64;
1440 sctp_ep_hashtable = 1440 sctp_ep_hashtable =
1441 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL); 1441 kmalloc_array(64, sizeof(struct sctp_hashbucket), GFP_KERNEL);
1442 if (!sctp_ep_hashtable) { 1442 if (!sctp_ep_hashtable) {
1443 pr_err("Failed endpoint_hash alloc\n"); 1443 pr_err("Failed endpoint_hash alloc\n");
1444 status = -ENOMEM; 1444 status = -ENOMEM;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d20f7addee19..ce620e878538 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7717,12 +7717,14 @@ out:
7717 * here, again, by modeling the current TCP/UDP code. We don't have 7717 * here, again, by modeling the current TCP/UDP code. We don't have
7718 * a good way to test with it yet. 7718 * a good way to test with it yet.
7719 */ 7719 */
7720__poll_t sctp_poll_mask(struct socket *sock, __poll_t events) 7720__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
7721{ 7721{
7722 struct sock *sk = sock->sk; 7722 struct sock *sk = sock->sk;
7723 struct sctp_sock *sp = sctp_sk(sk); 7723 struct sctp_sock *sp = sctp_sk(sk);
7724 __poll_t mask; 7724 __poll_t mask;
7725 7725
7726 poll_wait(file, sk_sleep(sk), wait);
7727
7726 sock_rps_record_flow(sk); 7728 sock_rps_record_flow(sk);
7727 7729
7728 /* A TCP-style listening socket becomes readable when the accept queue 7730 /* A TCP-style listening socket becomes readable when the accept queue
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 973b4471b532..3c1405df936c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -45,6 +45,7 @@ static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
45 */ 45 */
46 46
47static void smc_tcp_listen_work(struct work_struct *); 47static void smc_tcp_listen_work(struct work_struct *);
48static void smc_connect_work(struct work_struct *);
48 49
49static void smc_set_keepalive(struct sock *sk, int val) 50static void smc_set_keepalive(struct sock *sk, int val)
50{ 51{
@@ -122,6 +123,12 @@ static int smc_release(struct socket *sock)
122 goto out; 123 goto out;
123 124
124 smc = smc_sk(sk); 125 smc = smc_sk(sk);
126
127 /* cleanup for a dangling non-blocking connect */
128 flush_work(&smc->connect_work);
129 kfree(smc->connect_info);
130 smc->connect_info = NULL;
131
125 if (sk->sk_state == SMC_LISTEN) 132 if (sk->sk_state == SMC_LISTEN)
126 /* smc_close_non_accepted() is called and acquires 133 /* smc_close_non_accepted() is called and acquires
127 * sock lock for child sockets again 134 * sock lock for child sockets again
@@ -186,6 +193,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
186 sk->sk_protocol = protocol; 193 sk->sk_protocol = protocol;
187 smc = smc_sk(sk); 194 smc = smc_sk(sk);
188 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 195 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
196 INIT_WORK(&smc->connect_work, smc_connect_work);
189 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); 197 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
190 INIT_LIST_HEAD(&smc->accept_q); 198 INIT_LIST_HEAD(&smc->accept_q);
191 spin_lock_init(&smc->accept_q_lock); 199 spin_lock_init(&smc->accept_q_lock);
@@ -576,6 +584,35 @@ static int __smc_connect(struct smc_sock *smc)
576 return 0; 584 return 0;
577} 585}
578 586
587static void smc_connect_work(struct work_struct *work)
588{
589 struct smc_sock *smc = container_of(work, struct smc_sock,
590 connect_work);
591 int rc;
592
593 lock_sock(&smc->sk);
594 rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
595 smc->connect_info->alen, smc->connect_info->flags);
596 if (smc->clcsock->sk->sk_err) {
597 smc->sk.sk_err = smc->clcsock->sk->sk_err;
598 goto out;
599 }
600 if (rc < 0) {
601 smc->sk.sk_err = -rc;
602 goto out;
603 }
604
605 rc = __smc_connect(smc);
606 if (rc < 0)
607 smc->sk.sk_err = -rc;
608
609out:
610 smc->sk.sk_state_change(&smc->sk);
611 kfree(smc->connect_info);
612 smc->connect_info = NULL;
613 release_sock(&smc->sk);
614}
615
579static int smc_connect(struct socket *sock, struct sockaddr *addr, 616static int smc_connect(struct socket *sock, struct sockaddr *addr,
580 int alen, int flags) 617 int alen, int flags)
581{ 618{
@@ -605,15 +642,32 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
605 642
606 smc_copy_sock_settings_to_clc(smc); 643 smc_copy_sock_settings_to_clc(smc);
607 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 644 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
608 rc = kernel_connect(smc->clcsock, addr, alen, flags); 645 if (flags & O_NONBLOCK) {
609 if (rc) 646 if (smc->connect_info) {
610 goto out; 647 rc = -EALREADY;
648 goto out;
649 }
650 smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
651 if (!smc->connect_info) {
652 rc = -ENOMEM;
653 goto out;
654 }
655 smc->connect_info->alen = alen;
656 smc->connect_info->flags = flags ^ O_NONBLOCK;
657 memcpy(&smc->connect_info->addr, addr, alen);
658 schedule_work(&smc->connect_work);
659 rc = -EINPROGRESS;
660 } else {
661 rc = kernel_connect(smc->clcsock, addr, alen, flags);
662 if (rc)
663 goto out;
611 664
612 rc = __smc_connect(smc); 665 rc = __smc_connect(smc);
613 if (rc < 0) 666 if (rc < 0)
614 goto out; 667 goto out;
615 else 668 else
616 rc = 0; /* success cases including fallback */ 669 rc = 0; /* success cases including fallback */
670 }
617 671
618out: 672out:
619 release_sock(sk); 673 release_sock(sk);
@@ -1279,40 +1333,20 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
1279 struct sock *sk = sock->sk; 1333 struct sock *sk = sock->sk;
1280 __poll_t mask = 0; 1334 __poll_t mask = 0;
1281 struct smc_sock *smc; 1335 struct smc_sock *smc;
1282 int rc;
1283 1336
1284 if (!sk) 1337 if (!sk)
1285 return EPOLLNVAL; 1338 return EPOLLNVAL;
1286 1339
1287 smc = smc_sk(sock->sk); 1340 smc = smc_sk(sock->sk);
1288 sock_hold(sk);
1289 lock_sock(sk);
1290 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { 1341 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
1291 /* delegate to CLC child sock */ 1342 /* delegate to CLC child sock */
1292 release_sock(sk);
1293 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1343 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
1294 lock_sock(sk);
1295 sk->sk_err = smc->clcsock->sk->sk_err; 1344 sk->sk_err = smc->clcsock->sk->sk_err;
1296 if (sk->sk_err) { 1345 if (sk->sk_err)
1297 mask |= EPOLLERR; 1346 mask |= EPOLLERR;
1298 } else {
1299 /* if non-blocking connect finished ... */
1300 if (sk->sk_state == SMC_INIT &&
1301 mask & EPOLLOUT &&
1302 smc->clcsock->sk->sk_state != TCP_CLOSE) {
1303 rc = __smc_connect(smc);
1304 if (rc < 0)
1305 mask |= EPOLLERR;
1306 /* success cases including fallback */
1307 mask |= EPOLLOUT | EPOLLWRNORM;
1308 }
1309 }
1310 } else { 1347 } else {
1311 if (sk->sk_state != SMC_CLOSED) { 1348 if (sk->sk_state != SMC_CLOSED)
1312 release_sock(sk);
1313 sock_poll_wait(file, sk_sleep(sk), wait); 1349 sock_poll_wait(file, sk_sleep(sk), wait);
1314 lock_sock(sk);
1315 }
1316 if (sk->sk_err) 1350 if (sk->sk_err)
1317 mask |= EPOLLERR; 1351 mask |= EPOLLERR;
1318 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1352 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
@@ -1338,10 +1372,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
1338 } 1372 }
1339 if (smc->conn.urg_state == SMC_URG_VALID) 1373 if (smc->conn.urg_state == SMC_URG_VALID)
1340 mask |= EPOLLPRI; 1374 mask |= EPOLLPRI;
1341
1342 } 1375 }
1343 release_sock(sk);
1344 sock_put(sk);
1345 1376
1346 return mask; 1377 return mask;
1347} 1378}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 51ae1f10d81a..d7ca26570482 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -187,11 +187,19 @@ struct smc_connection {
187 struct work_struct close_work; /* peer sent some closing */ 187 struct work_struct close_work; /* peer sent some closing */
188}; 188};
189 189
190struct smc_connect_info {
191 int flags;
192 int alen;
193 struct sockaddr addr;
194};
195
190struct smc_sock { /* smc sock container */ 196struct smc_sock { /* smc sock container */
191 struct sock sk; 197 struct sock sk;
192 struct socket *clcsock; /* internal tcp socket */ 198 struct socket *clcsock; /* internal tcp socket */
193 struct smc_connection conn; /* smc connection */ 199 struct smc_connection conn; /* smc connection */
194 struct smc_sock *listen_smc; /* listen parent */ 200 struct smc_sock *listen_smc; /* listen parent */
201 struct smc_connect_info *connect_info; /* connect address & flags */
202 struct work_struct connect_work; /* handle non-blocking connect*/
195 struct work_struct tcp_listen_work;/* handle tcp socket accepts */ 203 struct work_struct tcp_listen_work;/* handle tcp socket accepts */
196 struct work_struct smc_listen_work;/* prepare new accept socket */ 204 struct work_struct smc_listen_work;/* prepare new accept socket */
197 struct list_head accept_q; /* sockets to be accepted */ 205 struct list_head accept_q; /* sockets to be accepted */
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index cc7c1bb60fe8..dbd2605d1962 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -584,9 +584,9 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
584 GFP_KERNEL); 584 GFP_KERNEL);
585 if (!link->wr_rx_sges) 585 if (!link->wr_rx_sges)
586 goto no_mem_wr_tx_sges; 586 goto no_mem_wr_tx_sges;
587 link->wr_tx_mask = kzalloc( 587 link->wr_tx_mask = kcalloc(BITS_TO_LONGS(SMC_WR_BUF_CNT),
588 BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*link->wr_tx_mask), 588 sizeof(*link->wr_tx_mask),
589 GFP_KERNEL); 589 GFP_KERNEL);
590 if (!link->wr_tx_mask) 590 if (!link->wr_tx_mask)
591 goto no_mem_wr_rx_sges; 591 goto no_mem_wr_rx_sges;
592 link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT, 592 link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT,
diff --git a/net/socket.c b/net/socket.c
index af57d85bcb48..85633622c94d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -117,10 +117,8 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
117static int sock_mmap(struct file *file, struct vm_area_struct *vma); 117static int sock_mmap(struct file *file, struct vm_area_struct *vma);
118 118
119static int sock_close(struct inode *inode, struct file *file); 119static int sock_close(struct inode *inode, struct file *file);
120static struct wait_queue_head *sock_get_poll_head(struct file *file, 120static __poll_t sock_poll(struct file *file,
121 __poll_t events); 121 struct poll_table_struct *wait);
122static __poll_t sock_poll_mask(struct file *file, __poll_t);
123static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait);
124static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 122static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
125#ifdef CONFIG_COMPAT 123#ifdef CONFIG_COMPAT
126static long compat_sock_ioctl(struct file *file, 124static long compat_sock_ioctl(struct file *file,
@@ -143,8 +141,6 @@ static const struct file_operations socket_file_ops = {
143 .llseek = no_llseek, 141 .llseek = no_llseek,
144 .read_iter = sock_read_iter, 142 .read_iter = sock_read_iter,
145 .write_iter = sock_write_iter, 143 .write_iter = sock_write_iter,
146 .get_poll_head = sock_get_poll_head,
147 .poll_mask = sock_poll_mask,
148 .poll = sock_poll, 144 .poll = sock_poll,
149 .unlocked_ioctl = sock_ioctl, 145 .unlocked_ioctl = sock_ioctl,
150#ifdef CONFIG_COMPAT 146#ifdef CONFIG_COMPAT
@@ -541,7 +537,10 @@ static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
541 if (!err && (iattr->ia_valid & ATTR_UID)) { 537 if (!err && (iattr->ia_valid & ATTR_UID)) {
542 struct socket *sock = SOCKET_I(d_inode(dentry)); 538 struct socket *sock = SOCKET_I(d_inode(dentry));
543 539
544 sock->sk->sk_uid = iattr->ia_uid; 540 if (sock->sk)
541 sock->sk->sk_uid = iattr->ia_uid;
542 else
543 err = -ENOENT;
545 } 544 }
546 545
547 return err; 546 return err;
@@ -590,12 +589,16 @@ EXPORT_SYMBOL(sock_alloc);
590 * an inode not a file. 589 * an inode not a file.
591 */ 590 */
592 591
593void sock_release(struct socket *sock) 592static void __sock_release(struct socket *sock, struct inode *inode)
594{ 593{
595 if (sock->ops) { 594 if (sock->ops) {
596 struct module *owner = sock->ops->owner; 595 struct module *owner = sock->ops->owner;
597 596
597 if (inode)
598 inode_lock(inode);
598 sock->ops->release(sock); 599 sock->ops->release(sock);
600 if (inode)
601 inode_unlock(inode);
599 sock->ops = NULL; 602 sock->ops = NULL;
600 module_put(owner); 603 module_put(owner);
601 } 604 }
@@ -609,6 +612,11 @@ void sock_release(struct socket *sock)
609 } 612 }
610 sock->file = NULL; 613 sock->file = NULL;
611} 614}
615
616void sock_release(struct socket *sock)
617{
618 __sock_release(sock, NULL);
619}
612EXPORT_SYMBOL(sock_release); 620EXPORT_SYMBOL(sock_release);
613 621
614void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) 622void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
@@ -1118,48 +1126,16 @@ out_release:
1118} 1126}
1119EXPORT_SYMBOL(sock_create_lite); 1127EXPORT_SYMBOL(sock_create_lite);
1120 1128
1121static struct wait_queue_head *sock_get_poll_head(struct file *file,
1122 __poll_t events)
1123{
1124 struct socket *sock = file->private_data;
1125
1126 if (!sock->ops->poll_mask)
1127 return NULL;
1128 sock_poll_busy_loop(sock, events);
1129 return sk_sleep(sock->sk);
1130}
1131
1132static __poll_t sock_poll_mask(struct file *file, __poll_t events)
1133{
1134 struct socket *sock = file->private_data;
1135
1136 /*
1137 * We need to be sure we are in sync with the socket flags modification.
1138 *
1139 * This memory barrier is paired in the wq_has_sleeper.
1140 */
1141 smp_mb();
1142
1143 /* this socket can poll_ll so tell the system call */
1144 return sock->ops->poll_mask(sock, events) |
1145 (sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0);
1146}
1147
1148/* No kernel lock held - perfect */ 1129/* No kernel lock held - perfect */
1149static __poll_t sock_poll(struct file *file, poll_table *wait) 1130static __poll_t sock_poll(struct file *file, poll_table *wait)
1150{ 1131{
1151 struct socket *sock = file->private_data; 1132 struct socket *sock = file->private_data;
1152 __poll_t events = poll_requested_events(wait), mask = 0; 1133 __poll_t events = poll_requested_events(wait);
1153
1154 if (sock->ops->poll) {
1155 sock_poll_busy_loop(sock, events);
1156 mask = sock->ops->poll(file, sock, wait);
1157 } else if (sock->ops->poll_mask) {
1158 sock_poll_wait(file, sock_get_poll_head(file, events), wait);
1159 mask = sock->ops->poll_mask(sock, events);
1160 }
1161 1134
1162 return mask | sock_poll_busy_flag(sock); 1135 sock_poll_busy_loop(sock, events);
1136 if (!sock->ops->poll)
1137 return 0;
1138 return sock->ops->poll(file, sock, wait) | sock_poll_busy_flag(sock);
1163} 1139}
1164 1140
1165static int sock_mmap(struct file *file, struct vm_area_struct *vma) 1141static int sock_mmap(struct file *file, struct vm_area_struct *vma)
@@ -1171,7 +1147,7 @@ static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1171 1147
1172static int sock_close(struct inode *inode, struct file *filp) 1148static int sock_close(struct inode *inode, struct file *filp)
1173{ 1149{
1174 sock_release(SOCKET_I(inode)); 1150 __sock_release(SOCKET_I(inode), inode);
1175 return 0; 1151 return 0;
1176} 1152}
1177 1153
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 1a9695183599..625acb27efcc 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -35,7 +35,6 @@ struct _strp_msg {
35 */ 35 */
36 struct strp_msg strp; 36 struct strp_msg strp;
37 int accum_len; 37 int accum_len;
38 int early_eaten;
39}; 38};
40 39
41static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) 40static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
@@ -115,20 +114,6 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
115 head = strp->skb_head; 114 head = strp->skb_head;
116 if (head) { 115 if (head) {
117 /* Message already in progress */ 116 /* Message already in progress */
118
119 stm = _strp_msg(head);
120 if (unlikely(stm->early_eaten)) {
121 /* Already some number of bytes on the receive sock
122 * data saved in skb_head, just indicate they
123 * are consumed.
124 */
125 eaten = orig_len <= stm->early_eaten ?
126 orig_len : stm->early_eaten;
127 stm->early_eaten -= eaten;
128
129 return eaten;
130 }
131
132 if (unlikely(orig_offset)) { 117 if (unlikely(orig_offset)) {
133 /* Getting data with a non-zero offset when a message is 118 /* Getting data with a non-zero offset when a message is
134 * in progress is not expected. If it does happen, we 119 * in progress is not expected. If it does happen, we
@@ -297,9 +282,9 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
297 } 282 }
298 283
299 stm->accum_len += cand_len; 284 stm->accum_len += cand_len;
285 eaten += cand_len;
300 strp->need_bytes = stm->strp.full_len - 286 strp->need_bytes = stm->strp.full_len -
301 stm->accum_len; 287 stm->accum_len;
302 stm->early_eaten = cand_len;
303 STRP_STATS_ADD(strp->stats.bytes, cand_len); 288 STRP_STATS_ADD(strp->stats.bytes, cand_len);
304 desc->count = 0; /* Stop reading socket */ 289 desc->count = 0; /* Stop reading socket */
305 break; 290 break;
@@ -392,7 +377,7 @@ static int strp_read_sock(struct strparser *strp)
392/* Lower sock lock held */ 377/* Lower sock lock held */
393void strp_data_ready(struct strparser *strp) 378void strp_data_ready(struct strparser *strp)
394{ 379{
395 if (unlikely(strp->stopped)) 380 if (unlikely(strp->stopped) || strp->paused)
396 return; 381 return;
397 382
398 /* This check is needed to synchronize with do_strp_work. 383 /* This check is needed to synchronize with do_strp_work.
@@ -407,9 +392,6 @@ void strp_data_ready(struct strparser *strp)
407 return; 392 return;
408 } 393 }
409 394
410 if (strp->paused)
411 return;
412
413 if (strp->need_bytes) { 395 if (strp->need_bytes) {
414 if (strp_peek_len(strp) < strp->need_bytes) 396 if (strp_peek_len(strp) < strp->need_bytes)
415 return; 397 return;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 9463af4b32e8..be8f103d22fd 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1753,7 +1753,8 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
1753 last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_SHIFT; 1753 last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_SHIFT;
1754 rqstp->rq_enc_pages_num = last - first + 1 + 1; 1754 rqstp->rq_enc_pages_num = last - first + 1 + 1;
1755 rqstp->rq_enc_pages 1755 rqstp->rq_enc_pages
1756 = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *), 1756 = kmalloc_array(rqstp->rq_enc_pages_num,
1757 sizeof(struct page *),
1757 GFP_NOFS); 1758 GFP_NOFS);
1758 if (!rqstp->rq_enc_pages) 1759 if (!rqstp->rq_enc_pages)
1759 goto out; 1760 goto out;
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 46b295e4f2b8..1c7c49dbf8ba 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -224,7 +224,7 @@ static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
224static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) 224static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
225{ 225{
226 arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE); 226 arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE);
227 arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL); 227 arg->pages = kcalloc(arg->npages, sizeof(struct page *), GFP_KERNEL);
228 /* 228 /*
229 * XXX: actual pages are allocated by xdr layer in 229 * XXX: actual pages are allocated by xdr layer in
230 * xdr_partial_copy_from_skb. 230 * xdr_partial_copy_from_skb.
@@ -298,9 +298,11 @@ int gssp_accept_sec_context_upcall(struct net *net,
298 if (res.context_handle) { 298 if (res.context_handle) {
299 data->out_handle = rctxh.exported_context_token; 299 data->out_handle = rctxh.exported_context_token;
300 data->mech_oid.len = rctxh.mech.len; 300 data->mech_oid.len = rctxh.mech.len;
301 if (rctxh.mech.data) 301 if (rctxh.mech.data) {
302 memcpy(data->mech_oid.data, rctxh.mech.data, 302 memcpy(data->mech_oid.data, rctxh.mech.data,
303 data->mech_oid.len); 303 data->mech_oid.len);
304 kfree(rctxh.mech.data);
305 }
304 client_name = rctxh.src_name.display_name; 306 client_name = rctxh.src_name.display_name;
305 } 307 }
306 308
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index cdda4744c9b1..109fbe591e7b 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1683,7 +1683,7 @@ struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct ne
1683 if (cd == NULL) 1683 if (cd == NULL)
1684 return ERR_PTR(-ENOMEM); 1684 return ERR_PTR(-ENOMEM);
1685 1685
1686 cd->hash_table = kzalloc(cd->hash_size * sizeof(struct hlist_head), 1686 cd->hash_table = kcalloc(cd->hash_size, sizeof(struct hlist_head),
1687 GFP_KERNEL); 1687 GFP_KERNEL);
1688 if (cd->hash_table == NULL) { 1688 if (cd->hash_table == NULL) {
1689 kfree(cd); 1689 kfree(cd);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c2266f387213..d839c33ae7d9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1546,6 +1546,7 @@ call_reserveresult(struct rpc_task *task)
1546 task->tk_status = 0; 1546 task->tk_status = 0;
1547 if (status >= 0) { 1547 if (status >= 0) {
1548 if (task->tk_rqstp) { 1548 if (task->tk_rqstp) {
1549 xprt_request_init(task);
1549 task->tk_action = call_refresh; 1550 task->tk_action = call_refresh;
1550 return; 1551 return;
1551 } 1552 }
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 70f005044f06..3fabf9f6a0f9 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -66,7 +66,7 @@
66 * Local functions 66 * Local functions
67 */ 67 */
68static void xprt_init(struct rpc_xprt *xprt, struct net *net); 68static void xprt_init(struct rpc_xprt *xprt, struct net *net);
69static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); 69static __be32 xprt_alloc_xid(struct rpc_xprt *xprt);
70static void xprt_connect_status(struct rpc_task *task); 70static void xprt_connect_status(struct rpc_task *task);
71static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); 71static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
72static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *); 72static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
@@ -1163,10 +1163,10 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
1163out_init_req: 1163out_init_req:
1164 xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots, 1164 xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots,
1165 xprt->num_reqs); 1165 xprt->num_reqs);
1166 spin_unlock(&xprt->reserve_lock);
1167
1166 task->tk_status = 0; 1168 task->tk_status = 0;
1167 task->tk_rqstp = req; 1169 task->tk_rqstp = req;
1168 xprt_request_init(task, xprt);
1169 spin_unlock(&xprt->reserve_lock);
1170} 1170}
1171EXPORT_SYMBOL_GPL(xprt_alloc_slot); 1171EXPORT_SYMBOL_GPL(xprt_alloc_slot);
1172 1172
@@ -1184,7 +1184,7 @@ void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
1184} 1184}
1185EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot); 1185EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
1186 1186
1187static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 1187void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
1188{ 1188{
1189 spin_lock(&xprt->reserve_lock); 1189 spin_lock(&xprt->reserve_lock);
1190 if (!xprt_dynamic_free_slot(xprt, req)) { 1190 if (!xprt_dynamic_free_slot(xprt, req)) {
@@ -1194,6 +1194,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
1194 xprt_wake_up_backlog(xprt); 1194 xprt_wake_up_backlog(xprt);
1195 spin_unlock(&xprt->reserve_lock); 1195 spin_unlock(&xprt->reserve_lock);
1196} 1196}
1197EXPORT_SYMBOL_GPL(xprt_free_slot);
1197 1198
1198static void xprt_free_all_slots(struct rpc_xprt *xprt) 1199static void xprt_free_all_slots(struct rpc_xprt *xprt)
1199{ 1200{
@@ -1295,7 +1296,12 @@ void xprt_retry_reserve(struct rpc_task *task)
1295 1296
1296static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) 1297static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
1297{ 1298{
1298 return (__force __be32)xprt->xid++; 1299 __be32 xid;
1300
1301 spin_lock(&xprt->reserve_lock);
1302 xid = (__force __be32)xprt->xid++;
1303 spin_unlock(&xprt->reserve_lock);
1304 return xid;
1299} 1305}
1300 1306
1301static inline void xprt_init_xid(struct rpc_xprt *xprt) 1307static inline void xprt_init_xid(struct rpc_xprt *xprt)
@@ -1303,8 +1309,9 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt)
1303 xprt->xid = prandom_u32(); 1309 xprt->xid = prandom_u32();
1304} 1310}
1305 1311
1306static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) 1312void xprt_request_init(struct rpc_task *task)
1307{ 1313{
1314 struct rpc_xprt *xprt = task->tk_xprt;
1308 struct rpc_rqst *req = task->tk_rqstp; 1315 struct rpc_rqst *req = task->tk_rqstp;
1309 1316
1310 INIT_LIST_HEAD(&req->rq_list); 1317 INIT_LIST_HEAD(&req->rq_list);
@@ -1312,7 +1319,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
1312 req->rq_task = task; 1319 req->rq_task = task;
1313 req->rq_xprt = xprt; 1320 req->rq_xprt = xprt;
1314 req->rq_buffer = NULL; 1321 req->rq_buffer = NULL;
1315 req->rq_xid = xprt_alloc_xid(xprt); 1322 req->rq_xid = xprt_alloc_xid(xprt);
1316 req->rq_connect_cookie = xprt->connect_cookie - 1; 1323 req->rq_connect_cookie = xprt->connect_cookie - 1;
1317 req->rq_bytes_sent = 0; 1324 req->rq_bytes_sent = 0;
1318 req->rq_snd_buf.len = 0; 1325 req->rq_snd_buf.len = 0;
@@ -1373,7 +1380,7 @@ void xprt_release(struct rpc_task *task)
1373 1380
1374 dprintk("RPC: %5u release request %p\n", task->tk_pid, req); 1381 dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
1375 if (likely(!bc_prealloc(req))) 1382 if (likely(!bc_prealloc(req)))
1376 xprt_free_slot(xprt, req); 1383 xprt->ops->free_slot(xprt, req);
1377 else 1384 else
1378 xprt_free_bc_request(req); 1385 xprt_free_bc_request(req);
1379} 1386}
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 47ebac949769..90adeff4c06b 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -9,8 +9,10 @@
9#include <linux/sunrpc/xprt.h> 9#include <linux/sunrpc/xprt.h>
10#include <linux/sunrpc/svc.h> 10#include <linux/sunrpc/svc.h>
11#include <linux/sunrpc/svc_xprt.h> 11#include <linux/sunrpc/svc_xprt.h>
12#include <linux/sunrpc/svc_rdma.h>
12 13
13#include "xprt_rdma.h" 14#include "xprt_rdma.h"
15#include <trace/events/rpcrdma.h>
14 16
15#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 17#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
16# define RPCDBG_FACILITY RPCDBG_TRANS 18# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -29,29 +31,41 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
29 spin_unlock(&buf->rb_reqslock); 31 spin_unlock(&buf->rb_reqslock);
30 32
31 rpcrdma_destroy_req(req); 33 rpcrdma_destroy_req(req);
32
33 kfree(rqst);
34} 34}
35 35
36static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, 36static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
37 struct rpc_rqst *rqst) 37 unsigned int count)
38{ 38{
39 struct rpcrdma_regbuf *rb; 39 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
40 struct rpcrdma_req *req; 40 struct rpc_rqst *rqst;
41 size_t size; 41 unsigned int i;
42
43 for (i = 0; i < (count << 1); i++) {
44 struct rpcrdma_regbuf *rb;
45 struct rpcrdma_req *req;
46 size_t size;
47
48 req = rpcrdma_create_req(r_xprt);
49 if (IS_ERR(req))
50 return PTR_ERR(req);
51 rqst = &req->rl_slot;
52
53 rqst->rq_xprt = xprt;
54 INIT_LIST_HEAD(&rqst->rq_list);
55 INIT_LIST_HEAD(&rqst->rq_bc_list);
56 __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
57 spin_lock_bh(&xprt->bc_pa_lock);
58 list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
59 spin_unlock_bh(&xprt->bc_pa_lock);
42 60
43 req = rpcrdma_create_req(r_xprt); 61 size = r_xprt->rx_data.inline_rsize;
44 if (IS_ERR(req)) 62 rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
45 return PTR_ERR(req); 63 if (IS_ERR(rb))
46 64 goto out_fail;
47 size = r_xprt->rx_data.inline_rsize; 65 req->rl_sendbuf = rb;
48 rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); 66 xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
49 if (IS_ERR(rb)) 67 min_t(size_t, size, PAGE_SIZE));
50 goto out_fail; 68 }
51 req->rl_sendbuf = rb;
52 xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
53 min_t(size_t, size, PAGE_SIZE));
54 rpcrdma_set_xprtdata(rqst, req);
55 return 0; 69 return 0;
56 70
57out_fail: 71out_fail:
@@ -59,23 +73,6 @@ out_fail:
59 return -ENOMEM; 73 return -ENOMEM;
60} 74}
61 75
62/* Allocate and add receive buffers to the rpcrdma_buffer's
63 * existing list of rep's. These are released when the
64 * transport is destroyed.
65 */
66static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
67 unsigned int count)
68{
69 int rc = 0;
70
71 while (count--) {
72 rc = rpcrdma_create_rep(r_xprt);
73 if (rc)
74 break;
75 }
76 return rc;
77}
78
79/** 76/**
80 * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests 77 * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
81 * @xprt: transport associated with these backchannel resources 78 * @xprt: transport associated with these backchannel resources
@@ -86,9 +83,6 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
86int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) 83int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
87{ 84{
88 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 85 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
89 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
90 struct rpc_rqst *rqst;
91 unsigned int i;
92 int rc; 86 int rc;
93 87
94 /* The backchannel reply path returns each rpc_rqst to the 88 /* The backchannel reply path returns each rpc_rqst to the
@@ -103,35 +97,11 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
103 if (reqs > RPCRDMA_BACKWARD_WRS >> 1) 97 if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
104 goto out_err; 98 goto out_err;
105 99
106 for (i = 0; i < (reqs << 1); i++) { 100 rc = rpcrdma_bc_setup_reqs(r_xprt, reqs);
107 rqst = kzalloc(sizeof(*rqst), GFP_KERNEL);
108 if (!rqst)
109 goto out_free;
110
111 dprintk("RPC: %s: new rqst %p\n", __func__, rqst);
112
113 rqst->rq_xprt = &r_xprt->rx_xprt;
114 INIT_LIST_HEAD(&rqst->rq_list);
115 INIT_LIST_HEAD(&rqst->rq_bc_list);
116 __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
117
118 if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
119 goto out_free;
120
121 spin_lock_bh(&xprt->bc_pa_lock);
122 list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
123 spin_unlock_bh(&xprt->bc_pa_lock);
124 }
125
126 rc = rpcrdma_bc_setup_reps(r_xprt, reqs);
127 if (rc) 101 if (rc)
128 goto out_free; 102 goto out_free;
129 103
130 rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs); 104 r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
131 if (rc)
132 goto out_free;
133
134 buffer->rb_bc_srv_max_requests = reqs;
135 request_module("svcrdma"); 105 request_module("svcrdma");
136 trace_xprtrdma_cb_setup(r_xprt, reqs); 106 trace_xprtrdma_cb_setup(r_xprt, reqs);
137 return 0; 107 return 0;
@@ -235,6 +205,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
235 if (rc < 0) 205 if (rc < 0)
236 goto failed_marshal; 206 goto failed_marshal;
237 207
208 rpcrdma_post_recvs(r_xprt, true);
238 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) 209 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
239 goto drop_connection; 210 goto drop_connection;
240 return 0; 211 return 0;
@@ -275,10 +246,14 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
275 */ 246 */
276void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) 247void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
277{ 248{
249 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
278 struct rpc_xprt *xprt = rqst->rq_xprt; 250 struct rpc_xprt *xprt = rqst->rq_xprt;
279 251
280 dprintk("RPC: %s: freeing rqst %p (req %p)\n", 252 dprintk("RPC: %s: freeing rqst %p (req %p)\n",
281 __func__, rqst, rpcr_to_rdmar(rqst)); 253 __func__, rqst, req);
254
255 rpcrdma_recv_buffer_put(req->rl_reply);
256 req->rl_reply = NULL;
282 257
283 spin_lock_bh(&xprt->bc_pa_lock); 258 spin_lock_bh(&xprt->bc_pa_lock);
284 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); 259 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index f2f63959fddd..17fb1e025654 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -20,7 +20,10 @@
20 * verb (fmr_op_unmap). 20 * verb (fmr_op_unmap).
21 */ 21 */
22 22
23#include <linux/sunrpc/svc_rdma.h>
24
23#include "xprt_rdma.h" 25#include "xprt_rdma.h"
26#include <trace/events/rpcrdma.h>
24 27
25#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 28#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
26# define RPCDBG_FACILITY RPCDBG_TRANS 29# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -156,10 +159,32 @@ out_release:
156 fmr_op_release_mr(mr); 159 fmr_op_release_mr(mr);
157} 160}
158 161
162/* On success, sets:
163 * ep->rep_attr.cap.max_send_wr
164 * ep->rep_attr.cap.max_recv_wr
165 * cdata->max_requests
166 * ia->ri_max_segs
167 */
159static int 168static int
160fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 169fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
161 struct rpcrdma_create_data_internal *cdata) 170 struct rpcrdma_create_data_internal *cdata)
162{ 171{
172 int max_qp_wr;
173
174 max_qp_wr = ia->ri_device->attrs.max_qp_wr;
175 max_qp_wr -= RPCRDMA_BACKWARD_WRS;
176 max_qp_wr -= 1;
177 if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
178 return -ENOMEM;
179 if (cdata->max_requests > max_qp_wr)
180 cdata->max_requests = max_qp_wr;
181 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
182 ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
183 ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
184 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
185 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
186 ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
187
163 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / 188 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
164 RPCRDMA_MAX_FMR_SGES); 189 RPCRDMA_MAX_FMR_SGES);
165 return 0; 190 return 0;
@@ -219,6 +244,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
219 mr->mr_sg, i, mr->mr_dir); 244 mr->mr_sg, i, mr->mr_dir);
220 if (!mr->mr_nents) 245 if (!mr->mr_nents)
221 goto out_dmamap_err; 246 goto out_dmamap_err;
247 trace_xprtrdma_dma_map(mr);
222 248
223 for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) 249 for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
224 dma_pages[i] = sg_dma_address(&mr->mr_sg[i]); 250 dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index c59c5c788db0..c040de196e13 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -71,8 +71,10 @@
71 */ 71 */
72 72
73#include <linux/sunrpc/rpc_rdma.h> 73#include <linux/sunrpc/rpc_rdma.h>
74#include <linux/sunrpc/svc_rdma.h>
74 75
75#include "xprt_rdma.h" 76#include "xprt_rdma.h"
77#include <trace/events/rpcrdma.h>
76 78
77#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 79#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
78# define RPCDBG_FACILITY RPCDBG_TRANS 80# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -202,12 +204,22 @@ out_release:
202 frwr_op_release_mr(mr); 204 frwr_op_release_mr(mr);
203} 205}
204 206
207/* On success, sets:
208 * ep->rep_attr.cap.max_send_wr
209 * ep->rep_attr.cap.max_recv_wr
210 * cdata->max_requests
211 * ia->ri_max_segs
212 *
213 * And these FRWR-related fields:
214 * ia->ri_max_frwr_depth
215 * ia->ri_mrtype
216 */
205static int 217static int
206frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 218frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
207 struct rpcrdma_create_data_internal *cdata) 219 struct rpcrdma_create_data_internal *cdata)
208{ 220{
209 struct ib_device_attr *attrs = &ia->ri_device->attrs; 221 struct ib_device_attr *attrs = &ia->ri_device->attrs;
210 int depth, delta; 222 int max_qp_wr, depth, delta;
211 223
212 ia->ri_mrtype = IB_MR_TYPE_MEM_REG; 224 ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
213 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) 225 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
@@ -241,14 +253,26 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
241 } while (delta > 0); 253 } while (delta > 0);
242 } 254 }
243 255
244 ep->rep_attr.cap.max_send_wr *= depth; 256 max_qp_wr = ia->ri_device->attrs.max_qp_wr;
245 if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) { 257 max_qp_wr -= RPCRDMA_BACKWARD_WRS;
246 cdata->max_requests = attrs->max_qp_wr / depth; 258 max_qp_wr -= 1;
259 if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
260 return -ENOMEM;
261 if (cdata->max_requests > max_qp_wr)
262 cdata->max_requests = max_qp_wr;
263 ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
264 if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
265 cdata->max_requests = max_qp_wr / depth;
247 if (!cdata->max_requests) 266 if (!cdata->max_requests)
248 return -EINVAL; 267 return -EINVAL;
249 ep->rep_attr.cap.max_send_wr = cdata->max_requests * 268 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
250 depth; 269 depth;
251 } 270 }
271 ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
272 ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
273 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
274 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
275 ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
252 276
253 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / 277 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
254 ia->ri_max_frwr_depth); 278 ia->ri_max_frwr_depth);
@@ -393,6 +417,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
393 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); 417 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
394 if (!mr->mr_nents) 418 if (!mr->mr_nents)
395 goto out_dmamap_err; 419 goto out_dmamap_err;
420 trace_xprtrdma_dma_map(mr);
396 421
397 ibmr = frwr->fr_mr; 422 ibmr = frwr->fr_mr;
398 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); 423 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index a762d192372b..620327c01302 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
1/* 2/*
2 * Copyright (c) 2015, 2017 Oracle. All rights reserved. 3 * Copyright (c) 2015, 2017 Oracle. All rights reserved.
3 */ 4 */
@@ -13,9 +14,11 @@
13 14
14#include <asm/swab.h> 15#include <asm/swab.h>
15 16
16#define CREATE_TRACE_POINTS
17#include "xprt_rdma.h" 17#include "xprt_rdma.h"
18 18
19#define CREATE_TRACE_POINTS
20#include <trace/events/rpcrdma.h>
21
19MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); 22MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
20MODULE_DESCRIPTION("RPC/RDMA Transport"); 23MODULE_DESCRIPTION("RPC/RDMA Transport");
21MODULE_LICENSE("Dual BSD/GPL"); 24MODULE_LICENSE("Dual BSD/GPL");
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e8adad33d0bb..c8ae983c6cc0 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
1/* 2/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved. 3 * Copyright (c) 2014-2017 Oracle. All rights reserved.
3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -46,22 +47,17 @@
46 * to the Linux RPC framework lives. 47 * to the Linux RPC framework lives.
47 */ 48 */
48 49
49#include "xprt_rdma.h"
50
51#include <linux/highmem.h> 50#include <linux/highmem.h>
52 51
52#include <linux/sunrpc/svc_rdma.h>
53
54#include "xprt_rdma.h"
55#include <trace/events/rpcrdma.h>
56
53#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 57#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
54# define RPCDBG_FACILITY RPCDBG_TRANS 58# define RPCDBG_FACILITY RPCDBG_TRANS
55#endif 59#endif
56 60
57static const char transfertypes[][12] = {
58 "inline", /* no chunks */
59 "read list", /* some argument via rdma read */
60 "*read list", /* entire request via rdma read */
61 "write list", /* some result via rdma write */
62 "reply chunk" /* entire reply via rdma write */
63};
64
65/* Returns size of largest RPC-over-RDMA header in a Call message 61/* Returns size of largest RPC-over-RDMA header in a Call message
66 * 62 *
67 * The largest Call header contains a full-size Read list and a 63 * The largest Call header contains a full-size Read list and a
@@ -230,7 +226,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
230 */ 226 */
231 *ppages = alloc_page(GFP_ATOMIC); 227 *ppages = alloc_page(GFP_ATOMIC);
232 if (!*ppages) 228 if (!*ppages)
233 return -EAGAIN; 229 return -ENOBUFS;
234 } 230 }
235 seg->mr_page = *ppages; 231 seg->mr_page = *ppages;
236 seg->mr_offset = (char *)page_base; 232 seg->mr_offset = (char *)page_base;
@@ -365,7 +361,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
365 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 361 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
366 false, &mr); 362 false, &mr);
367 if (IS_ERR(seg)) 363 if (IS_ERR(seg))
368 goto out_maperr; 364 return PTR_ERR(seg);
369 rpcrdma_mr_push(mr, &req->rl_registered); 365 rpcrdma_mr_push(mr, &req->rl_registered);
370 366
371 if (encode_read_segment(xdr, mr, pos) < 0) 367 if (encode_read_segment(xdr, mr, pos) < 0)
@@ -377,11 +373,6 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
377 } while (nsegs); 373 } while (nsegs);
378 374
379 return 0; 375 return 0;
380
381out_maperr:
382 if (PTR_ERR(seg) == -EAGAIN)
383 xprt_wait_for_buffer_space(rqst->rq_task, NULL);
384 return PTR_ERR(seg);
385} 376}
386 377
387/* Register and XDR encode the Write list. Supports encoding a list 378/* Register and XDR encode the Write list. Supports encoding a list
@@ -428,7 +419,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
428 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 419 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
429 true, &mr); 420 true, &mr);
430 if (IS_ERR(seg)) 421 if (IS_ERR(seg))
431 goto out_maperr; 422 return PTR_ERR(seg);
432 rpcrdma_mr_push(mr, &req->rl_registered); 423 rpcrdma_mr_push(mr, &req->rl_registered);
433 424
434 if (encode_rdma_segment(xdr, mr) < 0) 425 if (encode_rdma_segment(xdr, mr) < 0)
@@ -445,11 +436,6 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
445 *segcount = cpu_to_be32(nchunks); 436 *segcount = cpu_to_be32(nchunks);
446 437
447 return 0; 438 return 0;
448
449out_maperr:
450 if (PTR_ERR(seg) == -EAGAIN)
451 xprt_wait_for_buffer_space(rqst->rq_task, NULL);
452 return PTR_ERR(seg);
453} 439}
454 440
455/* Register and XDR encode the Reply chunk. Supports encoding an array 441/* Register and XDR encode the Reply chunk. Supports encoding an array
@@ -491,7 +477,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
491 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 477 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
492 true, &mr); 478 true, &mr);
493 if (IS_ERR(seg)) 479 if (IS_ERR(seg))
494 goto out_maperr; 480 return PTR_ERR(seg);
495 rpcrdma_mr_push(mr, &req->rl_registered); 481 rpcrdma_mr_push(mr, &req->rl_registered);
496 482
497 if (encode_rdma_segment(xdr, mr) < 0) 483 if (encode_rdma_segment(xdr, mr) < 0)
@@ -508,11 +494,6 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
508 *segcount = cpu_to_be32(nchunks); 494 *segcount = cpu_to_be32(nchunks);
509 495
510 return 0; 496 return 0;
511
512out_maperr:
513 if (PTR_ERR(seg) == -EAGAIN)
514 xprt_wait_for_buffer_space(rqst->rq_task, NULL);
515 return PTR_ERR(seg);
516} 497}
517 498
518/** 499/**
@@ -709,7 +690,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
709{ 690{
710 req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf); 691 req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
711 if (!req->rl_sendctx) 692 if (!req->rl_sendctx)
712 return -ENOBUFS; 693 return -EAGAIN;
713 req->rl_sendctx->sc_wr.num_sge = 0; 694 req->rl_sendctx->sc_wr.num_sge = 0;
714 req->rl_sendctx->sc_unmap_count = 0; 695 req->rl_sendctx->sc_unmap_count = 0;
715 req->rl_sendctx->sc_req = req; 696 req->rl_sendctx->sc_req = req;
@@ -883,7 +864,15 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
883 return 0; 864 return 0;
884 865
885out_err: 866out_err:
886 r_xprt->rx_stats.failed_marshal_count++; 867 switch (ret) {
868 case -EAGAIN:
869 xprt_wait_for_buffer_space(rqst->rq_task, NULL);
870 break;
871 case -ENOBUFS:
872 break;
873 default:
874 r_xprt->rx_stats.failed_marshal_count++;
875 }
887 return ret; 876 return ret;
888} 877}
889 878
@@ -1026,8 +1015,6 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
1026 1015
1027out_short: 1016out_short:
1028 pr_warn("RPC/RDMA short backward direction call\n"); 1017 pr_warn("RPC/RDMA short backward direction call\n");
1029 if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
1030 xprt_disconnect_done(&r_xprt->rx_xprt);
1031 return true; 1018 return true;
1032} 1019}
1033#else /* CONFIG_SUNRPC_BACKCHANNEL */ 1020#else /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -1333,13 +1320,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1333 u32 credits; 1320 u32 credits;
1334 __be32 *p; 1321 __be32 *p;
1335 1322
1323 --buf->rb_posted_receives;
1324
1336 if (rep->rr_hdrbuf.head[0].iov_len == 0) 1325 if (rep->rr_hdrbuf.head[0].iov_len == 0)
1337 goto out_badstatus; 1326 goto out_badstatus;
1338 1327
1328 /* Fixed transport header fields */
1339 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, 1329 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
1340 rep->rr_hdrbuf.head[0].iov_base); 1330 rep->rr_hdrbuf.head[0].iov_base);
1341
1342 /* Fixed transport header fields */
1343 p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p)); 1331 p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
1344 if (unlikely(!p)) 1332 if (unlikely(!p))
1345 goto out_shortreply; 1333 goto out_shortreply;
@@ -1378,17 +1366,10 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1378 1366
1379 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); 1367 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
1380 1368
1369 rpcrdma_post_recvs(r_xprt, false);
1381 queue_work(rpcrdma_receive_wq, &rep->rr_work); 1370 queue_work(rpcrdma_receive_wq, &rep->rr_work);
1382 return; 1371 return;
1383 1372
1384out_badstatus:
1385 rpcrdma_recv_buffer_put(rep);
1386 if (r_xprt->rx_ep.rep_connected == 1) {
1387 r_xprt->rx_ep.rep_connected = -EIO;
1388 rpcrdma_conn_func(&r_xprt->rx_ep);
1389 }
1390 return;
1391
1392out_badversion: 1373out_badversion:
1393 trace_xprtrdma_reply_vers(rep); 1374 trace_xprtrdma_reply_vers(rep);
1394 goto repost; 1375 goto repost;
@@ -1408,7 +1389,7 @@ out_shortreply:
1408 * receive buffer before returning. 1389 * receive buffer before returning.
1409 */ 1390 */
1410repost: 1391repost:
1411 r_xprt->rx_stats.bad_reply_count++; 1392 rpcrdma_post_recvs(r_xprt, false);
1412 if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) 1393out_badstatus:
1413 rpcrdma_recv_buffer_put(rep); 1394 rpcrdma_recv_buffer_put(rep);
1414} 1395}
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index dd8a431dc2ae..357ba90c382d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -1,4 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
1/* 2/*
3 * Copyright (c) 2015-2018 Oracle. All rights reserved.
2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3 * 5 *
4 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -46,7 +48,6 @@
46#include <linux/sunrpc/clnt.h> 48#include <linux/sunrpc/clnt.h>
47#include <linux/sunrpc/sched.h> 49#include <linux/sunrpc/sched.h>
48#include <linux/sunrpc/svc_rdma.h> 50#include <linux/sunrpc/svc_rdma.h>
49#include "xprt_rdma.h"
50 51
51#define RPCDBG_FACILITY RPCDBG_SVCXPRT 52#define RPCDBG_FACILITY RPCDBG_SVCXPRT
52 53
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index a73632ca9048..a68180090554 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -1,13 +1,16 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * Copyright (c) 2015 Oracle. All rights reserved. 3 * Copyright (c) 2015-2018 Oracle. All rights reserved.
4 * 4 *
5 * Support for backward direction RPCs on RPC/RDMA (server-side). 5 * Support for backward direction RPCs on RPC/RDMA (server-side).
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9
9#include <linux/sunrpc/svc_rdma.h> 10#include <linux/sunrpc/svc_rdma.h>
11
10#include "xprt_rdma.h" 12#include "xprt_rdma.h"
13#include <trace/events/rpcrdma.h>
11 14
12#define RPCDBG_FACILITY RPCDBG_SVCXPRT 15#define RPCDBG_FACILITY RPCDBG_SVCXPRT
13 16
@@ -112,39 +115,21 @@ out_notfound:
112 * the adapter has a small maximum SQ depth. 115 * the adapter has a small maximum SQ depth.
113 */ 116 */
114static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, 117static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
115 struct rpc_rqst *rqst) 118 struct rpc_rqst *rqst,
119 struct svc_rdma_send_ctxt *ctxt)
116{ 120{
117 struct svc_rdma_op_ctxt *ctxt;
118 int ret; 121 int ret;
119 122
120 ctxt = svc_rdma_get_context(rdma); 123 ret = svc_rdma_map_reply_msg(rdma, ctxt, &rqst->rq_snd_buf, NULL);
121
122 /* rpcrdma_bc_send_request builds the transport header and
123 * the backchannel RPC message in the same buffer. Thus only
124 * one SGE is needed to send both.
125 */
126 ret = svc_rdma_map_reply_hdr(rdma, ctxt, rqst->rq_buffer,
127 rqst->rq_snd_buf.len);
128 if (ret < 0) 124 if (ret < 0)
129 goto out_err; 125 return -EIO;
130 126
131 /* Bump page refcnt so Send completion doesn't release 127 /* Bump page refcnt so Send completion doesn't release
132 * the rq_buffer before all retransmits are complete. 128 * the rq_buffer before all retransmits are complete.
133 */ 129 */
134 get_page(virt_to_page(rqst->rq_buffer)); 130 get_page(virt_to_page(rqst->rq_buffer));
135 ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0); 131 ctxt->sc_send_wr.opcode = IB_WR_SEND;
136 if (ret) 132 return svc_rdma_send(rdma, &ctxt->sc_send_wr);
137 goto out_unmap;
138
139out_err:
140 dprintk("svcrdma: %s returns %d\n", __func__, ret);
141 return ret;
142
143out_unmap:
144 svc_rdma_unmap_dma(ctxt);
145 svc_rdma_put_context(ctxt, 1);
146 ret = -EIO;
147 goto out_err;
148} 133}
149 134
150/* Server-side transport endpoint wants a whole page for its send 135/* Server-side transport endpoint wants a whole page for its send
@@ -191,13 +176,15 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
191{ 176{
192 struct rpc_xprt *xprt = rqst->rq_xprt; 177 struct rpc_xprt *xprt = rqst->rq_xprt;
193 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 178 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
179 struct svc_rdma_send_ctxt *ctxt;
194 __be32 *p; 180 __be32 *p;
195 int rc; 181 int rc;
196 182
197 /* Space in the send buffer for an RPC/RDMA header is reserved 183 ctxt = svc_rdma_send_ctxt_get(rdma);
198 * via xprt->tsh_size. 184 if (!ctxt)
199 */ 185 goto drop_connection;
200 p = rqst->rq_buffer; 186
187 p = ctxt->sc_xprt_buf;
201 *p++ = rqst->rq_xid; 188 *p++ = rqst->rq_xid;
202 *p++ = rpcrdma_version; 189 *p++ = rpcrdma_version;
203 *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); 190 *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
@@ -205,14 +192,17 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
205 *p++ = xdr_zero; 192 *p++ = xdr_zero;
206 *p++ = xdr_zero; 193 *p++ = xdr_zero;
207 *p = xdr_zero; 194 *p = xdr_zero;
195 svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_MIN);
208 196
209#ifdef SVCRDMA_BACKCHANNEL_DEBUG 197#ifdef SVCRDMA_BACKCHANNEL_DEBUG
210 pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); 198 pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
211#endif 199#endif
212 200
213 rc = svc_rdma_bc_sendto(rdma, rqst); 201 rc = svc_rdma_bc_sendto(rdma, rqst, ctxt);
214 if (rc) 202 if (rc) {
203 svc_rdma_send_ctxt_put(rdma, ctxt);
215 goto drop_connection; 204 goto drop_connection;
205 }
216 return rc; 206 return rc;
217 207
218drop_connection: 208drop_connection:
@@ -273,6 +263,7 @@ static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
273 .reserve_xprt = xprt_reserve_xprt_cong, 263 .reserve_xprt = xprt_reserve_xprt_cong,
274 .release_xprt = xprt_release_xprt_cong, 264 .release_xprt = xprt_release_xprt_cong,
275 .alloc_slot = xprt_alloc_slot, 265 .alloc_slot = xprt_alloc_slot,
266 .free_slot = xprt_free_slot,
276 .release_request = xprt_release_rqst_cong, 267 .release_request = xprt_release_rqst_cong,
277 .buf_alloc = xprt_rdma_bc_allocate, 268 .buf_alloc = xprt_rdma_bc_allocate,
278 .buf_free = xprt_rdma_bc_free, 269 .buf_free = xprt_rdma_bc_free,
@@ -320,7 +311,7 @@ xprt_setup_rdma_bc(struct xprt_create *args)
320 xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; 311 xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
321 312
322 xprt->prot = XPRT_TRANSPORT_BC_RDMA; 313 xprt->prot = XPRT_TRANSPORT_BC_RDMA;
323 xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32); 314 xprt->tsh_size = 0;
324 xprt->ops = &xprt_rdma_bc_procs; 315 xprt->ops = &xprt_rdma_bc_procs;
325 316
326 memcpy(&xprt->addr, args->dstaddr, args->addrlen); 317 memcpy(&xprt->addr, args->dstaddr, args->addrlen);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 3d45015dca97..841fca143804 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -1,5 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
1/* 2/*
2 * Copyright (c) 2016, 2017 Oracle. All rights reserved. 3 * Copyright (c) 2016-2018 Oracle. All rights reserved.
3 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 4 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
4 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 5 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
5 * 6 *
@@ -60,7 +61,7 @@
60 * svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's 61 * svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's
61 * data payload from the client. svc_rdma_recvfrom sets up the 62 * data payload from the client. svc_rdma_recvfrom sets up the
62 * RDMA Reads using pages in svc_rqst::rq_pages, which are 63 * RDMA Reads using pages in svc_rqst::rq_pages, which are
63 * transferred to an svc_rdma_op_ctxt for the duration of the 64 * transferred to an svc_rdma_recv_ctxt for the duration of the
64 * I/O. svc_rdma_recvfrom then returns zero, since the RPC message 65 * I/O. svc_rdma_recvfrom then returns zero, since the RPC message
65 * is still not yet ready. 66 * is still not yet ready.
66 * 67 *
@@ -69,18 +70,18 @@
69 * svc_rdma_recvfrom again. This second call may use a different 70 * svc_rdma_recvfrom again. This second call may use a different
70 * svc_rqst than the first one, thus any information that needs 71 * svc_rqst than the first one, thus any information that needs
71 * to be preserved across these two calls is kept in an 72 * to be preserved across these two calls is kept in an
72 * svc_rdma_op_ctxt. 73 * svc_rdma_recv_ctxt.
73 * 74 *
74 * The second call to svc_rdma_recvfrom performs final assembly 75 * The second call to svc_rdma_recvfrom performs final assembly
75 * of the RPC Call message, using the RDMA Read sink pages kept in 76 * of the RPC Call message, using the RDMA Read sink pages kept in
76 * the svc_rdma_op_ctxt. The xdr_buf is copied from the 77 * the svc_rdma_recv_ctxt. The xdr_buf is copied from the
77 * svc_rdma_op_ctxt to the second svc_rqst. The second call returns 78 * svc_rdma_recv_ctxt to the second svc_rqst. The second call returns
78 * the length of the completed RPC Call message. 79 * the length of the completed RPC Call message.
79 * 80 *
80 * Page Management 81 * Page Management
81 * 82 *
82 * Pages under I/O must be transferred from the first svc_rqst to an 83 * Pages under I/O must be transferred from the first svc_rqst to an
83 * svc_rdma_op_ctxt before the first svc_rdma_recvfrom call returns. 84 * svc_rdma_recv_ctxt before the first svc_rdma_recvfrom call returns.
84 * 85 *
85 * The first svc_rqst supplies pages for RDMA Reads. These are moved 86 * The first svc_rqst supplies pages for RDMA Reads. These are moved
86 * from rqstp::rq_pages into ctxt::pages. The consumed elements of 87 * from rqstp::rq_pages into ctxt::pages. The consumed elements of
@@ -88,78 +89,286 @@
88 * svc_rdma_recvfrom call returns. 89 * svc_rdma_recvfrom call returns.
89 * 90 *
90 * During the second svc_rdma_recvfrom call, RDMA Read sink pages 91 * During the second svc_rdma_recvfrom call, RDMA Read sink pages
91 * are transferred from the svc_rdma_op_ctxt to the second svc_rqst 92 * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst
92 * (see rdma_read_complete() below). 93 * (see rdma_read_complete() below).
93 */ 94 */
94 95
96#include <linux/spinlock.h>
95#include <asm/unaligned.h> 97#include <asm/unaligned.h>
96#include <rdma/ib_verbs.h> 98#include <rdma/ib_verbs.h>
97#include <rdma/rdma_cm.h> 99#include <rdma/rdma_cm.h>
98 100
99#include <linux/spinlock.h>
100
101#include <linux/sunrpc/xdr.h> 101#include <linux/sunrpc/xdr.h>
102#include <linux/sunrpc/debug.h> 102#include <linux/sunrpc/debug.h>
103#include <linux/sunrpc/rpc_rdma.h> 103#include <linux/sunrpc/rpc_rdma.h>
104#include <linux/sunrpc/svc_rdma.h> 104#include <linux/sunrpc/svc_rdma.h>
105 105
106#include "xprt_rdma.h"
107#include <trace/events/rpcrdma.h>
108
106#define RPCDBG_FACILITY RPCDBG_SVCXPRT 109#define RPCDBG_FACILITY RPCDBG_SVCXPRT
107 110
108/* 111static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc);
109 * Replace the pages in the rq_argpages array with the pages from the SGE in 112
110 * the RDMA_RECV completion. The SGL should contain full pages up until the 113static inline struct svc_rdma_recv_ctxt *
111 * last one. 114svc_rdma_next_recv_ctxt(struct list_head *list)
115{
116 return list_first_entry_or_null(list, struct svc_rdma_recv_ctxt,
117 rc_list);
118}
119
120static struct svc_rdma_recv_ctxt *
121svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
122{
123 struct svc_rdma_recv_ctxt *ctxt;
124 dma_addr_t addr;
125 void *buffer;
126
127 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
128 if (!ctxt)
129 goto fail0;
130 buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
131 if (!buffer)
132 goto fail1;
133 addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
134 rdma->sc_max_req_size, DMA_FROM_DEVICE);
135 if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
136 goto fail2;
137
138 ctxt->rc_recv_wr.next = NULL;
139 ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe;
140 ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge;
141 ctxt->rc_recv_wr.num_sge = 1;
142 ctxt->rc_cqe.done = svc_rdma_wc_receive;
143 ctxt->rc_recv_sge.addr = addr;
144 ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
145 ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
146 ctxt->rc_recv_buf = buffer;
147 ctxt->rc_temp = false;
148 return ctxt;
149
150fail2:
151 kfree(buffer);
152fail1:
153 kfree(ctxt);
154fail0:
155 return NULL;
156}
157
158static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
159 struct svc_rdma_recv_ctxt *ctxt)
160{
161 ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr,
162 ctxt->rc_recv_sge.length, DMA_FROM_DEVICE);
163 kfree(ctxt->rc_recv_buf);
164 kfree(ctxt);
165}
166
167/**
168 * svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt
169 * @rdma: svcxprt_rdma being torn down
170 *
112 */ 171 */
113static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, 172void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
114 struct svc_rdma_op_ctxt *ctxt)
115{ 173{
116 struct page *page; 174 struct svc_rdma_recv_ctxt *ctxt;
117 int sge_no;
118 u32 len;
119 175
120 /* The reply path assumes the Call's transport header resides 176 while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
121 * in rqstp->rq_pages[0]. 177 list_del(&ctxt->rc_list);
122 */ 178 svc_rdma_recv_ctxt_destroy(rdma, ctxt);
123 page = ctxt->pages[0];
124 put_page(rqstp->rq_pages[0]);
125 rqstp->rq_pages[0] = page;
126
127 /* Set up the XDR head */
128 rqstp->rq_arg.head[0].iov_base = page_address(page);
129 rqstp->rq_arg.head[0].iov_len =
130 min_t(size_t, ctxt->byte_len, ctxt->sge[0].length);
131 rqstp->rq_arg.len = ctxt->byte_len;
132 rqstp->rq_arg.buflen = ctxt->byte_len;
133
134 /* Compute bytes past head in the SGL */
135 len = ctxt->byte_len - rqstp->rq_arg.head[0].iov_len;
136
137 /* If data remains, store it in the pagelist */
138 rqstp->rq_arg.page_len = len;
139 rqstp->rq_arg.page_base = 0;
140
141 sge_no = 1;
142 while (len && sge_no < ctxt->count) {
143 page = ctxt->pages[sge_no];
144 put_page(rqstp->rq_pages[sge_no]);
145 rqstp->rq_pages[sge_no] = page;
146 len -= min_t(u32, len, ctxt->sge[sge_no].length);
147 sge_no++;
148 } 179 }
149 rqstp->rq_respages = &rqstp->rq_pages[sge_no]; 180}
150 rqstp->rq_next_page = rqstp->rq_respages + 1; 181
182static struct svc_rdma_recv_ctxt *
183svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
184{
185 struct svc_rdma_recv_ctxt *ctxt;
186
187 spin_lock(&rdma->sc_recv_lock);
188 ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts);
189 if (!ctxt)
190 goto out_empty;
191 list_del(&ctxt->rc_list);
192 spin_unlock(&rdma->sc_recv_lock);
193
194out:
195 ctxt->rc_page_count = 0;
196 return ctxt;
197
198out_empty:
199 spin_unlock(&rdma->sc_recv_lock);
200
201 ctxt = svc_rdma_recv_ctxt_alloc(rdma);
202 if (!ctxt)
203 return NULL;
204 goto out;
205}
206
207/**
208 * svc_rdma_recv_ctxt_put - Return recv_ctxt to free list
209 * @rdma: controlling svcxprt_rdma
210 * @ctxt: object to return to the free list
211 *
212 */
213void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
214 struct svc_rdma_recv_ctxt *ctxt)
215{
216 unsigned int i;
217
218 for (i = 0; i < ctxt->rc_page_count; i++)
219 put_page(ctxt->rc_pages[i]);
220
221 if (!ctxt->rc_temp) {
222 spin_lock(&rdma->sc_recv_lock);
223 list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
224 spin_unlock(&rdma->sc_recv_lock);
225 } else
226 svc_rdma_recv_ctxt_destroy(rdma, ctxt);
227}
228
229static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
230 struct svc_rdma_recv_ctxt *ctxt)
231{
232 struct ib_recv_wr *bad_recv_wr;
233 int ret;
234
235 svc_xprt_get(&rdma->sc_xprt);
236 ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, &bad_recv_wr);
237 trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);
238 if (ret)
239 goto err_post;
240 return 0;
241
242err_post:
243 svc_rdma_recv_ctxt_put(rdma, ctxt);
244 svc_xprt_put(&rdma->sc_xprt);
245 return ret;
246}
151 247
152 /* If not all pages were used from the SGL, free the remaining ones */ 248static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
153 len = sge_no; 249{
154 while (sge_no < ctxt->count) { 250 struct svc_rdma_recv_ctxt *ctxt;
155 page = ctxt->pages[sge_no++]; 251
156 put_page(page); 252 ctxt = svc_rdma_recv_ctxt_get(rdma);
253 if (!ctxt)
254 return -ENOMEM;
255 return __svc_rdma_post_recv(rdma, ctxt);
256}
257
258/**
259 * svc_rdma_post_recvs - Post initial set of Recv WRs
260 * @rdma: fresh svcxprt_rdma
261 *
262 * Returns true if successful, otherwise false.
263 */
264bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
265{
266 struct svc_rdma_recv_ctxt *ctxt;
267 unsigned int i;
268 int ret;
269
270 for (i = 0; i < rdma->sc_max_requests; i++) {
271 ctxt = svc_rdma_recv_ctxt_get(rdma);
272 if (!ctxt)
273 return false;
274 ctxt->rc_temp = true;
275 ret = __svc_rdma_post_recv(rdma, ctxt);
276 if (ret) {
277 pr_err("svcrdma: failure posting recv buffers: %d\n",
278 ret);
279 return false;
280 }
157 } 281 }
158 ctxt->count = len; 282 return true;
283}
159 284
160 /* Set up tail */ 285/**
161 rqstp->rq_arg.tail[0].iov_base = NULL; 286 * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
162 rqstp->rq_arg.tail[0].iov_len = 0; 287 * @cq: Completion Queue context
288 * @wc: Work Completion object
289 *
290 * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
291 * the Receive completion handler could be running.
292 */
293static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
294{
295 struct svcxprt_rdma *rdma = cq->cq_context;
296 struct ib_cqe *cqe = wc->wr_cqe;
297 struct svc_rdma_recv_ctxt *ctxt;
298
299 trace_svcrdma_wc_receive(wc);
300
301 /* WARNING: Only wc->wr_cqe and wc->status are reliable */
302 ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
303
304 if (wc->status != IB_WC_SUCCESS)
305 goto flushed;
306
307 if (svc_rdma_post_recv(rdma))
308 goto post_err;
309
310 /* All wc fields are now known to be valid */
311 ctxt->rc_byte_len = wc->byte_len;
312 ib_dma_sync_single_for_cpu(rdma->sc_pd->device,
313 ctxt->rc_recv_sge.addr,
314 wc->byte_len, DMA_FROM_DEVICE);
315
316 spin_lock(&rdma->sc_rq_dto_lock);
317 list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
318 spin_unlock(&rdma->sc_rq_dto_lock);
319 set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
320 if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
321 svc_xprt_enqueue(&rdma->sc_xprt);
322 goto out;
323
324flushed:
325 if (wc->status != IB_WC_WR_FLUSH_ERR)
326 pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
327 ib_wc_status_msg(wc->status),
328 wc->status, wc->vendor_err);
329post_err:
330 svc_rdma_recv_ctxt_put(rdma, ctxt);
331 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
332 svc_xprt_enqueue(&rdma->sc_xprt);
333out:
334 svc_xprt_put(&rdma->sc_xprt);
335}
336
337/**
338 * svc_rdma_flush_recv_queues - Drain pending Receive work
339 * @rdma: svcxprt_rdma being shut down
340 *
341 */
342void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma)
343{
344 struct svc_rdma_recv_ctxt *ctxt;
345
346 while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) {
347 list_del(&ctxt->rc_list);
348 svc_rdma_recv_ctxt_put(rdma, ctxt);
349 }
350 while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) {
351 list_del(&ctxt->rc_list);
352 svc_rdma_recv_ctxt_put(rdma, ctxt);
353 }
354}
355
356static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
357 struct svc_rdma_recv_ctxt *ctxt)
358{
359 struct xdr_buf *arg = &rqstp->rq_arg;
360
361 arg->head[0].iov_base = ctxt->rc_recv_buf;
362 arg->head[0].iov_len = ctxt->rc_byte_len;
363 arg->tail[0].iov_base = NULL;
364 arg->tail[0].iov_len = 0;
365 arg->page_len = 0;
366 arg->page_base = 0;
367 arg->buflen = ctxt->rc_byte_len;
368 arg->len = ctxt->rc_byte_len;
369
370 rqstp->rq_respages = &rqstp->rq_pages[0];
371 rqstp->rq_next_page = rqstp->rq_respages + 1;
163} 372}
164 373
165/* This accommodates the largest possible Write chunk, 374/* This accommodates the largest possible Write chunk,
@@ -294,7 +503,6 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
294{ 503{
295 __be32 *p, *end, *rdma_argp; 504 __be32 *p, *end, *rdma_argp;
296 unsigned int hdr_len; 505 unsigned int hdr_len;
297 char *proc;
298 506
299 /* Verify that there's enough bytes for header + something */ 507 /* Verify that there's enough bytes for header + something */
300 if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) 508 if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
@@ -306,10 +514,8 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
306 514
307 switch (*(rdma_argp + 3)) { 515 switch (*(rdma_argp + 3)) {
308 case rdma_msg: 516 case rdma_msg:
309 proc = "RDMA_MSG";
310 break; 517 break;
311 case rdma_nomsg: 518 case rdma_nomsg:
312 proc = "RDMA_NOMSG";
313 break; 519 break;
314 520
315 case rdma_done: 521 case rdma_done:
@@ -339,103 +545,94 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
339 hdr_len = (unsigned long)p - (unsigned long)rdma_argp; 545 hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
340 rq_arg->head[0].iov_len -= hdr_len; 546 rq_arg->head[0].iov_len -= hdr_len;
341 rq_arg->len -= hdr_len; 547 rq_arg->len -= hdr_len;
342 dprintk("svcrdma: received %s request for XID 0x%08x, hdr_len=%u\n", 548 trace_svcrdma_decode_rqst(rdma_argp, hdr_len);
343 proc, be32_to_cpup(rdma_argp), hdr_len);
344 return hdr_len; 549 return hdr_len;
345 550
346out_short: 551out_short:
347 dprintk("svcrdma: header too short = %d\n", rq_arg->len); 552 trace_svcrdma_decode_short(rq_arg->len);
348 return -EINVAL; 553 return -EINVAL;
349 554
350out_version: 555out_version:
351 dprintk("svcrdma: bad xprt version: %u\n", 556 trace_svcrdma_decode_badvers(rdma_argp);
352 be32_to_cpup(rdma_argp + 1));
353 return -EPROTONOSUPPORT; 557 return -EPROTONOSUPPORT;
354 558
355out_drop: 559out_drop:
356 dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n"); 560 trace_svcrdma_decode_drop(rdma_argp);
357 return 0; 561 return 0;
358 562
359out_proc: 563out_proc:
360 dprintk("svcrdma: bad rdma procedure (%u)\n", 564 trace_svcrdma_decode_badproc(rdma_argp);
361 be32_to_cpup(rdma_argp + 3));
362 return -EINVAL; 565 return -EINVAL;
363 566
364out_inval: 567out_inval:
365 dprintk("svcrdma: failed to parse transport header\n"); 568 trace_svcrdma_decode_parse(rdma_argp);
366 return -EINVAL; 569 return -EINVAL;
367} 570}
368 571
369static void rdma_read_complete(struct svc_rqst *rqstp, 572static void rdma_read_complete(struct svc_rqst *rqstp,
370 struct svc_rdma_op_ctxt *head) 573 struct svc_rdma_recv_ctxt *head)
371{ 574{
372 int page_no; 575 int page_no;
373 576
374 /* Copy RPC pages */ 577 /* Move Read chunk pages to rqstp so that they will be released
375 for (page_no = 0; page_no < head->count; page_no++) { 578 * when svc_process is done with them.
579 */
580 for (page_no = 0; page_no < head->rc_page_count; page_no++) {
376 put_page(rqstp->rq_pages[page_no]); 581 put_page(rqstp->rq_pages[page_no]);
377 rqstp->rq_pages[page_no] = head->pages[page_no]; 582 rqstp->rq_pages[page_no] = head->rc_pages[page_no];
378 } 583 }
584 head->rc_page_count = 0;
379 585
380 /* Point rq_arg.pages past header */ 586 /* Point rq_arg.pages past header */
381 rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; 587 rqstp->rq_arg.pages = &rqstp->rq_pages[head->rc_hdr_count];
382 rqstp->rq_arg.page_len = head->arg.page_len; 588 rqstp->rq_arg.page_len = head->rc_arg.page_len;
383 589
384 /* rq_respages starts after the last arg page */ 590 /* rq_respages starts after the last arg page */
385 rqstp->rq_respages = &rqstp->rq_pages[page_no]; 591 rqstp->rq_respages = &rqstp->rq_pages[page_no];
386 rqstp->rq_next_page = rqstp->rq_respages + 1; 592 rqstp->rq_next_page = rqstp->rq_respages + 1;
387 593
388 /* Rebuild rq_arg head and tail. */ 594 /* Rebuild rq_arg head and tail. */
389 rqstp->rq_arg.head[0] = head->arg.head[0]; 595 rqstp->rq_arg.head[0] = head->rc_arg.head[0];
390 rqstp->rq_arg.tail[0] = head->arg.tail[0]; 596 rqstp->rq_arg.tail[0] = head->rc_arg.tail[0];
391 rqstp->rq_arg.len = head->arg.len; 597 rqstp->rq_arg.len = head->rc_arg.len;
392 rqstp->rq_arg.buflen = head->arg.buflen; 598 rqstp->rq_arg.buflen = head->rc_arg.buflen;
393} 599}
394 600
395static void svc_rdma_send_error(struct svcxprt_rdma *xprt, 601static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
396 __be32 *rdma_argp, int status) 602 __be32 *rdma_argp, int status)
397{ 603{
398 struct svc_rdma_op_ctxt *ctxt; 604 struct svc_rdma_send_ctxt *ctxt;
399 __be32 *p, *err_msgp;
400 unsigned int length; 605 unsigned int length;
401 struct page *page; 606 __be32 *p;
402 int ret; 607 int ret;
403 608
404 page = alloc_page(GFP_KERNEL); 609 ctxt = svc_rdma_send_ctxt_get(xprt);
405 if (!page) 610 if (!ctxt)
406 return; 611 return;
407 err_msgp = page_address(page);
408 612
409 p = err_msgp; 613 p = ctxt->sc_xprt_buf;
410 *p++ = *rdma_argp; 614 *p++ = *rdma_argp;
411 *p++ = *(rdma_argp + 1); 615 *p++ = *(rdma_argp + 1);
412 *p++ = xprt->sc_fc_credits; 616 *p++ = xprt->sc_fc_credits;
413 *p++ = rdma_error; 617 *p++ = rdma_error;
414 if (status == -EPROTONOSUPPORT) { 618 switch (status) {
619 case -EPROTONOSUPPORT:
415 *p++ = err_vers; 620 *p++ = err_vers;
416 *p++ = rpcrdma_version; 621 *p++ = rpcrdma_version;
417 *p++ = rpcrdma_version; 622 *p++ = rpcrdma_version;
418 } else { 623 trace_svcrdma_err_vers(*rdma_argp);
624 break;
625 default:
419 *p++ = err_chunk; 626 *p++ = err_chunk;
627 trace_svcrdma_err_chunk(*rdma_argp);
420 } 628 }
421 length = (unsigned long)p - (unsigned long)err_msgp; 629 length = (unsigned long)p - (unsigned long)ctxt->sc_xprt_buf;
422 630 svc_rdma_sync_reply_hdr(xprt, ctxt, length);
423 /* Map transport header; no RPC message payload */
424 ctxt = svc_rdma_get_context(xprt);
425 ret = svc_rdma_map_reply_hdr(xprt, ctxt, err_msgp, length);
426 if (ret) {
427 dprintk("svcrdma: Error %d mapping send for protocol error\n",
428 ret);
429 return;
430 }
431 631
432 ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0); 632 ctxt->sc_send_wr.opcode = IB_WR_SEND;
433 if (ret) { 633 ret = svc_rdma_send(xprt, &ctxt->sc_send_wr);
434 dprintk("svcrdma: Error %d posting send for protocol error\n", 634 if (ret)
435 ret); 635 svc_rdma_send_ctxt_put(xprt, ctxt);
436 svc_rdma_unmap_dma(ctxt);
437 svc_rdma_put_context(ctxt, 1);
438 }
439} 636}
440 637
441/* By convention, backchannel calls arrive via rdma_msg type 638/* By convention, backchannel calls arrive via rdma_msg type
@@ -507,32 +704,28 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
507 struct svc_xprt *xprt = rqstp->rq_xprt; 704 struct svc_xprt *xprt = rqstp->rq_xprt;
508 struct svcxprt_rdma *rdma_xprt = 705 struct svcxprt_rdma *rdma_xprt =
509 container_of(xprt, struct svcxprt_rdma, sc_xprt); 706 container_of(xprt, struct svcxprt_rdma, sc_xprt);
510 struct svc_rdma_op_ctxt *ctxt; 707 struct svc_rdma_recv_ctxt *ctxt;
511 __be32 *p; 708 __be32 *p;
512 int ret; 709 int ret;
513 710
514 spin_lock(&rdma_xprt->sc_rq_dto_lock); 711 spin_lock(&rdma_xprt->sc_rq_dto_lock);
515 if (!list_empty(&rdma_xprt->sc_read_complete_q)) { 712 ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
516 ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q, 713 if (ctxt) {
517 struct svc_rdma_op_ctxt, list); 714 list_del(&ctxt->rc_list);
518 list_del(&ctxt->list);
519 spin_unlock(&rdma_xprt->sc_rq_dto_lock); 715 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
520 rdma_read_complete(rqstp, ctxt); 716 rdma_read_complete(rqstp, ctxt);
521 goto complete; 717 goto complete;
522 } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { 718 }
523 ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q, 719 ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q);
524 struct svc_rdma_op_ctxt, list); 720 if (!ctxt) {
525 list_del(&ctxt->list);
526 } else {
527 /* No new incoming requests, terminate the loop */ 721 /* No new incoming requests, terminate the loop */
528 clear_bit(XPT_DATA, &xprt->xpt_flags); 722 clear_bit(XPT_DATA, &xprt->xpt_flags);
529 spin_unlock(&rdma_xprt->sc_rq_dto_lock); 723 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
530 return 0; 724 return 0;
531 } 725 }
726 list_del(&ctxt->rc_list);
532 spin_unlock(&rdma_xprt->sc_rq_dto_lock); 727 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
533 728
534 dprintk("svcrdma: recvfrom: ctxt=%p on xprt=%p, rqstp=%p\n",
535 ctxt, rdma_xprt, rqstp);
536 atomic_inc(&rdma_stat_recv); 729 atomic_inc(&rdma_stat_recv);
537 730
538 svc_rdma_build_arg_xdr(rqstp, ctxt); 731 svc_rdma_build_arg_xdr(rqstp, ctxt);
@@ -548,7 +741,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
548 if (svc_rdma_is_backchannel_reply(xprt, p)) { 741 if (svc_rdma_is_backchannel_reply(xprt, p)) {
549 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p, 742 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
550 &rqstp->rq_arg); 743 &rqstp->rq_arg);
551 svc_rdma_put_context(ctxt, 0); 744 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
552 return ret; 745 return ret;
553 } 746 }
554 747
@@ -557,9 +750,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
557 goto out_readchunk; 750 goto out_readchunk;
558 751
559complete: 752complete:
560 svc_rdma_put_context(ctxt, 0); 753 rqstp->rq_xprt_ctxt = ctxt;
561 dprintk("svcrdma: recvfrom: xprt=%p, rqstp=%p, rq_arg.len=%u\n",
562 rdma_xprt, rqstp, rqstp->rq_arg.len);
563 rqstp->rq_prot = IPPROTO_MAX; 754 rqstp->rq_prot = IPPROTO_MAX;
564 svc_xprt_copy_addrs(rqstp, xprt); 755 svc_xprt_copy_addrs(rqstp, xprt);
565 return rqstp->rq_arg.len; 756 return rqstp->rq_arg.len;
@@ -572,16 +763,16 @@ out_readchunk:
572 763
573out_err: 764out_err:
574 svc_rdma_send_error(rdma_xprt, p, ret); 765 svc_rdma_send_error(rdma_xprt, p, ret);
575 svc_rdma_put_context(ctxt, 0); 766 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
576 return 0; 767 return 0;
577 768
578out_postfail: 769out_postfail:
579 if (ret == -EINVAL) 770 if (ret == -EINVAL)
580 svc_rdma_send_error(rdma_xprt, p, ret); 771 svc_rdma_send_error(rdma_xprt, p, ret);
581 svc_rdma_put_context(ctxt, 1); 772 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
582 return ret; 773 return ret;
583 774
584out_drop: 775out_drop:
585 svc_rdma_put_context(ctxt, 1); 776 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
586 return 0; 777 return 0;
587} 778}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 12b9a7e0b6d2..ce3ea8419704 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1,15 +1,18 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * Copyright (c) 2016 Oracle. All rights reserved. 3 * Copyright (c) 2016-2018 Oracle. All rights reserved.
4 * 4 *
5 * Use the core R/W API to move RPC-over-RDMA Read and Write chunks. 5 * Use the core R/W API to move RPC-over-RDMA Read and Write chunks.
6 */ 6 */
7 7
8#include <rdma/rw.h>
9
8#include <linux/sunrpc/rpc_rdma.h> 10#include <linux/sunrpc/rpc_rdma.h>
9#include <linux/sunrpc/svc_rdma.h> 11#include <linux/sunrpc/svc_rdma.h>
10#include <linux/sunrpc/debug.h> 12#include <linux/sunrpc/debug.h>
11 13
12#include <rdma/rw.h> 14#include "xprt_rdma.h"
15#include <trace/events/rpcrdma.h>
13 16
14#define RPCDBG_FACILITY RPCDBG_SVCXPRT 17#define RPCDBG_FACILITY RPCDBG_SVCXPRT
15 18
@@ -205,6 +208,8 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
205 struct svc_rdma_write_info *info = 208 struct svc_rdma_write_info *info =
206 container_of(cc, struct svc_rdma_write_info, wi_cc); 209 container_of(cc, struct svc_rdma_write_info, wi_cc);
207 210
211 trace_svcrdma_wc_write(wc);
212
208 atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); 213 atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
209 wake_up(&rdma->sc_send_wait); 214 wake_up(&rdma->sc_send_wait);
210 215
@@ -222,7 +227,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
222/* State for pulling a Read chunk. 227/* State for pulling a Read chunk.
223 */ 228 */
224struct svc_rdma_read_info { 229struct svc_rdma_read_info {
225 struct svc_rdma_op_ctxt *ri_readctxt; 230 struct svc_rdma_recv_ctxt *ri_readctxt;
226 unsigned int ri_position; 231 unsigned int ri_position;
227 unsigned int ri_pageno; 232 unsigned int ri_pageno;
228 unsigned int ri_pageoff; 233 unsigned int ri_pageoff;
@@ -266,6 +271,8 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
266 struct svc_rdma_read_info *info = 271 struct svc_rdma_read_info *info =
267 container_of(cc, struct svc_rdma_read_info, ri_cc); 272 container_of(cc, struct svc_rdma_read_info, ri_cc);
268 273
274 trace_svcrdma_wc_read(wc);
275
269 atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); 276 atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
270 wake_up(&rdma->sc_send_wait); 277 wake_up(&rdma->sc_send_wait);
271 278
@@ -275,10 +282,10 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
275 pr_err("svcrdma: read ctx: %s (%u/0x%x)\n", 282 pr_err("svcrdma: read ctx: %s (%u/0x%x)\n",
276 ib_wc_status_msg(wc->status), 283 ib_wc_status_msg(wc->status),
277 wc->status, wc->vendor_err); 284 wc->status, wc->vendor_err);
278 svc_rdma_put_context(info->ri_readctxt, 1); 285 svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt);
279 } else { 286 } else {
280 spin_lock(&rdma->sc_rq_dto_lock); 287 spin_lock(&rdma->sc_rq_dto_lock);
281 list_add_tail(&info->ri_readctxt->list, 288 list_add_tail(&info->ri_readctxt->rc_list,
282 &rdma->sc_read_complete_q); 289 &rdma->sc_read_complete_q);
283 spin_unlock(&rdma->sc_rq_dto_lock); 290 spin_unlock(&rdma->sc_rq_dto_lock);
284 291
@@ -323,18 +330,20 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
323 if (atomic_sub_return(cc->cc_sqecount, 330 if (atomic_sub_return(cc->cc_sqecount,
324 &rdma->sc_sq_avail) > 0) { 331 &rdma->sc_sq_avail) > 0) {
325 ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); 332 ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
333 trace_svcrdma_post_rw(&cc->cc_cqe,
334 cc->cc_sqecount, ret);
326 if (ret) 335 if (ret)
327 break; 336 break;
328 return 0; 337 return 0;
329 } 338 }
330 339
331 atomic_inc(&rdma_stat_sq_starve); 340 trace_svcrdma_sq_full(rdma);
332 atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); 341 atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
333 wait_event(rdma->sc_send_wait, 342 wait_event(rdma->sc_send_wait,
334 atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount); 343 atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
344 trace_svcrdma_sq_retry(rdma);
335 } while (1); 345 } while (1);
336 346
337 pr_err("svcrdma: ib_post_send failed (%d)\n", ret);
338 set_bit(XPT_CLOSE, &xprt->xpt_flags); 347 set_bit(XPT_CLOSE, &xprt->xpt_flags);
339 348
340 /* If even one was posted, there will be a completion. */ 349 /* If even one was posted, there will be a completion. */
@@ -437,6 +446,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
437 if (ret < 0) 446 if (ret < 0)
438 goto out_initerr; 447 goto out_initerr;
439 448
449 trace_svcrdma_encode_wseg(seg_handle, write_len, seg_offset);
440 list_add(&ctxt->rw_list, &cc->cc_rwctxts); 450 list_add(&ctxt->rw_list, &cc->cc_rwctxts);
441 cc->cc_sqecount += ret; 451 cc->cc_sqecount += ret;
442 if (write_len == seg_length - info->wi_seg_off) { 452 if (write_len == seg_length - info->wi_seg_off) {
@@ -462,7 +472,7 @@ out_noctx:
462 472
463out_initerr: 473out_initerr:
464 svc_rdma_put_rw_ctxt(rdma, ctxt); 474 svc_rdma_put_rw_ctxt(rdma, ctxt);
465 pr_err("svcrdma: failed to map pagelist (%d)\n", ret); 475 trace_svcrdma_dma_map_rwctx(rdma, ret);
466 return -EIO; 476 return -EIO;
467} 477}
468 478
@@ -526,6 +536,8 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
526 ret = svc_rdma_post_chunk_ctxt(&info->wi_cc); 536 ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
527 if (ret < 0) 537 if (ret < 0)
528 goto out_err; 538 goto out_err;
539
540 trace_svcrdma_encode_write(xdr->page_len);
529 return xdr->page_len; 541 return xdr->page_len;
530 542
531out_err: 543out_err:
@@ -582,6 +594,8 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch,
582 ret = svc_rdma_post_chunk_ctxt(&info->wi_cc); 594 ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
583 if (ret < 0) 595 if (ret < 0)
584 goto out_err; 596 goto out_err;
597
598 trace_svcrdma_encode_reply(consumed);
585 return consumed; 599 return consumed;
586 600
587out_err: 601out_err:
@@ -593,7 +607,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
593 struct svc_rqst *rqstp, 607 struct svc_rqst *rqstp,
594 u32 rkey, u32 len, u64 offset) 608 u32 rkey, u32 len, u64 offset)
595{ 609{
596 struct svc_rdma_op_ctxt *head = info->ri_readctxt; 610 struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
597 struct svc_rdma_chunk_ctxt *cc = &info->ri_cc; 611 struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
598 struct svc_rdma_rw_ctxt *ctxt; 612 struct svc_rdma_rw_ctxt *ctxt;
599 unsigned int sge_no, seg_len; 613 unsigned int sge_no, seg_len;
@@ -606,18 +620,15 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
606 goto out_noctx; 620 goto out_noctx;
607 ctxt->rw_nents = sge_no; 621 ctxt->rw_nents = sge_no;
608 622
609 dprintk("svcrdma: reading segment %u@0x%016llx:0x%08x (%u sges)\n",
610 len, offset, rkey, sge_no);
611
612 sg = ctxt->rw_sg_table.sgl; 623 sg = ctxt->rw_sg_table.sgl;
613 for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) { 624 for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) {
614 seg_len = min_t(unsigned int, len, 625 seg_len = min_t(unsigned int, len,
615 PAGE_SIZE - info->ri_pageoff); 626 PAGE_SIZE - info->ri_pageoff);
616 627
617 head->arg.pages[info->ri_pageno] = 628 head->rc_arg.pages[info->ri_pageno] =
618 rqstp->rq_pages[info->ri_pageno]; 629 rqstp->rq_pages[info->ri_pageno];
619 if (!info->ri_pageoff) 630 if (!info->ri_pageoff)
620 head->count++; 631 head->rc_page_count++;
621 632
622 sg_set_page(sg, rqstp->rq_pages[info->ri_pageno], 633 sg_set_page(sg, rqstp->rq_pages[info->ri_pageno],
623 seg_len, info->ri_pageoff); 634 seg_len, info->ri_pageoff);
@@ -656,8 +667,8 @@ out_overrun:
656 return -EINVAL; 667 return -EINVAL;
657 668
658out_initerr: 669out_initerr:
670 trace_svcrdma_dma_map_rwctx(cc->cc_rdma, ret);
659 svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt); 671 svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt);
660 pr_err("svcrdma: failed to map pagelist (%d)\n", ret);
661 return -EIO; 672 return -EIO;
662} 673}
663 674
@@ -686,6 +697,7 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
686 if (ret < 0) 697 if (ret < 0)
687 break; 698 break;
688 699
700 trace_svcrdma_encode_rseg(rs_handle, rs_length, rs_offset);
689 info->ri_chunklen += rs_length; 701 info->ri_chunklen += rs_length;
690 } 702 }
691 703
@@ -693,9 +705,9 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
693} 705}
694 706
695/* Construct RDMA Reads to pull over a normal Read chunk. The chunk 707/* Construct RDMA Reads to pull over a normal Read chunk. The chunk
696 * data lands in the page list of head->arg.pages. 708 * data lands in the page list of head->rc_arg.pages.
697 * 709 *
698 * Currently NFSD does not look at the head->arg.tail[0] iovec. 710 * Currently NFSD does not look at the head->rc_arg.tail[0] iovec.
699 * Therefore, XDR round-up of the Read chunk and trailing 711 * Therefore, XDR round-up of the Read chunk and trailing
700 * inline content must both be added at the end of the pagelist. 712 * inline content must both be added at the end of the pagelist.
701 */ 713 */
@@ -703,29 +715,27 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
703 struct svc_rdma_read_info *info, 715 struct svc_rdma_read_info *info,
704 __be32 *p) 716 __be32 *p)
705{ 717{
706 struct svc_rdma_op_ctxt *head = info->ri_readctxt; 718 struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
707 int ret; 719 int ret;
708 720
709 dprintk("svcrdma: Reading Read chunk at position %u\n",
710 info->ri_position);
711
712 info->ri_pageno = head->hdr_count;
713 info->ri_pageoff = 0;
714
715 ret = svc_rdma_build_read_chunk(rqstp, info, p); 721 ret = svc_rdma_build_read_chunk(rqstp, info, p);
716 if (ret < 0) 722 if (ret < 0)
717 goto out; 723 goto out;
718 724
725 trace_svcrdma_encode_read(info->ri_chunklen, info->ri_position);
726
727 head->rc_hdr_count = 0;
728
719 /* Split the Receive buffer between the head and tail 729 /* Split the Receive buffer between the head and tail
720 * buffers at Read chunk's position. XDR roundup of the 730 * buffers at Read chunk's position. XDR roundup of the
721 * chunk is not included in either the pagelist or in 731 * chunk is not included in either the pagelist or in
722 * the tail. 732 * the tail.
723 */ 733 */
724 head->arg.tail[0].iov_base = 734 head->rc_arg.tail[0].iov_base =
725 head->arg.head[0].iov_base + info->ri_position; 735 head->rc_arg.head[0].iov_base + info->ri_position;
726 head->arg.tail[0].iov_len = 736 head->rc_arg.tail[0].iov_len =
727 head->arg.head[0].iov_len - info->ri_position; 737 head->rc_arg.head[0].iov_len - info->ri_position;
728 head->arg.head[0].iov_len = info->ri_position; 738 head->rc_arg.head[0].iov_len = info->ri_position;
729 739
730 /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2). 740 /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
731 * 741 *
@@ -738,9 +748,9 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
738 */ 748 */
739 info->ri_chunklen = XDR_QUADLEN(info->ri_chunklen) << 2; 749 info->ri_chunklen = XDR_QUADLEN(info->ri_chunklen) << 2;
740 750
741 head->arg.page_len = info->ri_chunklen; 751 head->rc_arg.page_len = info->ri_chunklen;
742 head->arg.len += info->ri_chunklen; 752 head->rc_arg.len += info->ri_chunklen;
743 head->arg.buflen += info->ri_chunklen; 753 head->rc_arg.buflen += info->ri_chunklen;
744 754
745out: 755out:
746 return ret; 756 return ret;
@@ -749,7 +759,7 @@ out:
749/* Construct RDMA Reads to pull over a Position Zero Read chunk. 759/* Construct RDMA Reads to pull over a Position Zero Read chunk.
750 * The start of the data lands in the first page just after 760 * The start of the data lands in the first page just after
751 * the Transport header, and the rest lands in the page list of 761 * the Transport header, and the rest lands in the page list of
752 * head->arg.pages. 762 * head->rc_arg.pages.
753 * 763 *
754 * Assumptions: 764 * Assumptions:
755 * - A PZRC has an XDR-aligned length (no implicit round-up). 765 * - A PZRC has an XDR-aligned length (no implicit round-up).
@@ -761,35 +771,25 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
761 struct svc_rdma_read_info *info, 771 struct svc_rdma_read_info *info,
762 __be32 *p) 772 __be32 *p)
763{ 773{
764 struct svc_rdma_op_ctxt *head = info->ri_readctxt; 774 struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
765 int ret; 775 int ret;
766 776
767 dprintk("svcrdma: Reading Position Zero Read chunk\n");
768
769 info->ri_pageno = head->hdr_count - 1;
770 info->ri_pageoff = offset_in_page(head->byte_len);
771
772 ret = svc_rdma_build_read_chunk(rqstp, info, p); 777 ret = svc_rdma_build_read_chunk(rqstp, info, p);
773 if (ret < 0) 778 if (ret < 0)
774 goto out; 779 goto out;
775 780
776 head->arg.len += info->ri_chunklen; 781 trace_svcrdma_encode_pzr(info->ri_chunklen);
777 head->arg.buflen += info->ri_chunklen;
778 782
779 if (head->arg.buflen <= head->sge[0].length) { 783 head->rc_arg.len += info->ri_chunklen;
780 /* Transport header and RPC message fit entirely 784 head->rc_arg.buflen += info->ri_chunklen;
781 * in page where head iovec resides. 785
782 */ 786 head->rc_hdr_count = 1;
783 head->arg.head[0].iov_len = info->ri_chunklen; 787 head->rc_arg.head[0].iov_base = page_address(head->rc_pages[0]);
784 } else { 788 head->rc_arg.head[0].iov_len = min_t(size_t, PAGE_SIZE,
785 /* Transport header and part of RPC message reside 789 info->ri_chunklen);
786 * in the head iovec's page. 790
787 */ 791 head->rc_arg.page_len = info->ri_chunklen -
788 head->arg.head[0].iov_len = 792 head->rc_arg.head[0].iov_len;
789 head->sge[0].length - head->byte_len;
790 head->arg.page_len =
791 info->ri_chunklen - head->arg.head[0].iov_len;
792 }
793 793
794out: 794out:
795 return ret; 795 return ret;
@@ -813,29 +813,30 @@ out:
813 * - All Read segments in @p have the same Position value. 813 * - All Read segments in @p have the same Position value.
814 */ 814 */
815int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, 815int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
816 struct svc_rdma_op_ctxt *head, __be32 *p) 816 struct svc_rdma_recv_ctxt *head, __be32 *p)
817{ 817{
818 struct svc_rdma_read_info *info; 818 struct svc_rdma_read_info *info;
819 struct page **page; 819 struct page **page;
820 int ret; 820 int ret;
821 821
822 /* The request (with page list) is constructed in 822 /* The request (with page list) is constructed in
823 * head->arg. Pages involved with RDMA Read I/O are 823 * head->rc_arg. Pages involved with RDMA Read I/O are
824 * transferred there. 824 * transferred there.
825 */ 825 */
826 head->hdr_count = head->count; 826 head->rc_arg.head[0] = rqstp->rq_arg.head[0];
827 head->arg.head[0] = rqstp->rq_arg.head[0]; 827 head->rc_arg.tail[0] = rqstp->rq_arg.tail[0];
828 head->arg.tail[0] = rqstp->rq_arg.tail[0]; 828 head->rc_arg.pages = head->rc_pages;
829 head->arg.pages = head->pages; 829 head->rc_arg.page_base = 0;
830 head->arg.page_base = 0; 830 head->rc_arg.page_len = 0;
831 head->arg.page_len = 0; 831 head->rc_arg.len = rqstp->rq_arg.len;
832 head->arg.len = rqstp->rq_arg.len; 832 head->rc_arg.buflen = rqstp->rq_arg.buflen;
833 head->arg.buflen = rqstp->rq_arg.buflen;
834 833
835 info = svc_rdma_read_info_alloc(rdma); 834 info = svc_rdma_read_info_alloc(rdma);
836 if (!info) 835 if (!info)
837 return -ENOMEM; 836 return -ENOMEM;
838 info->ri_readctxt = head; 837 info->ri_readctxt = head;
838 info->ri_pageno = 0;
839 info->ri_pageoff = 0;
839 840
840 info->ri_position = be32_to_cpup(p + 1); 841 info->ri_position = be32_to_cpup(p + 1);
841 if (info->ri_position) 842 if (info->ri_position)
@@ -856,7 +857,7 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
856 857
857out: 858out:
858 /* Read sink pages have been moved from rqstp->rq_pages to 859 /* Read sink pages have been moved from rqstp->rq_pages to
859 * head->arg.pages. Force svc_recv to refill those slots 860 * head->rc_arg.pages. Force svc_recv to refill those slots
860 * in rq_pages. 861 * in rq_pages.
861 */ 862 */
862 for (page = rqstp->rq_pages; page < rqstp->rq_respages; page++) 863 for (page = rqstp->rq_pages; page < rqstp->rq_respages; page++)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 649441d5087d..4a3efaea277c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -1,5 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
1/* 2/*
2 * Copyright (c) 2016 Oracle. All rights reserved. 3 * Copyright (c) 2016-2018 Oracle. All rights reserved.
3 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 4 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
4 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 5 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
5 * 6 *
@@ -74,11 +75,11 @@
74 * DMA-unmap the pages under I/O for that Write segment. The Write 75 * DMA-unmap the pages under I/O for that Write segment. The Write
75 * completion handler does not release any pages. 76 * completion handler does not release any pages.
76 * 77 *
77 * When the Send WR is constructed, it also gets its own svc_rdma_op_ctxt. 78 * When the Send WR is constructed, it also gets its own svc_rdma_send_ctxt.
78 * The ownership of all of the Reply's pages are transferred into that 79 * The ownership of all of the Reply's pages are transferred into that
79 * ctxt, the Send WR is posted, and sendto returns. 80 * ctxt, the Send WR is posted, and sendto returns.
80 * 81 *
81 * The svc_rdma_op_ctxt is presented when the Send WR completes. The 82 * The svc_rdma_send_ctxt is presented when the Send WR completes. The
82 * Send completion handler finally releases the Reply's pages. 83 * Send completion handler finally releases the Reply's pages.
83 * 84 *
84 * This mechanism also assumes that completions on the transport's Send 85 * This mechanism also assumes that completions on the transport's Send
@@ -98,16 +99,230 @@
98 * where two different Write segments send portions of the same page. 99 * where two different Write segments send portions of the same page.
99 */ 100 */
100 101
101#include <linux/sunrpc/debug.h>
102#include <linux/sunrpc/rpc_rdma.h>
103#include <linux/spinlock.h> 102#include <linux/spinlock.h>
104#include <asm/unaligned.h> 103#include <asm/unaligned.h>
104
105#include <rdma/ib_verbs.h> 105#include <rdma/ib_verbs.h>
106#include <rdma/rdma_cm.h> 106#include <rdma/rdma_cm.h>
107
108#include <linux/sunrpc/debug.h>
109#include <linux/sunrpc/rpc_rdma.h>
107#include <linux/sunrpc/svc_rdma.h> 110#include <linux/sunrpc/svc_rdma.h>
108 111
112#include "xprt_rdma.h"
113#include <trace/events/rpcrdma.h>
114
109#define RPCDBG_FACILITY RPCDBG_SVCXPRT 115#define RPCDBG_FACILITY RPCDBG_SVCXPRT
110 116
117static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
118
119static inline struct svc_rdma_send_ctxt *
120svc_rdma_next_send_ctxt(struct list_head *list)
121{
122 return list_first_entry_or_null(list, struct svc_rdma_send_ctxt,
123 sc_list);
124}
125
126static struct svc_rdma_send_ctxt *
127svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
128{
129 struct svc_rdma_send_ctxt *ctxt;
130 dma_addr_t addr;
131 void *buffer;
132 size_t size;
133 int i;
134
135 size = sizeof(*ctxt);
136 size += rdma->sc_max_send_sges * sizeof(struct ib_sge);
137 ctxt = kmalloc(size, GFP_KERNEL);
138 if (!ctxt)
139 goto fail0;
140 buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
141 if (!buffer)
142 goto fail1;
143 addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
144 rdma->sc_max_req_size, DMA_TO_DEVICE);
145 if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
146 goto fail2;
147
148 ctxt->sc_send_wr.next = NULL;
149 ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
150 ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
151 ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
152 ctxt->sc_cqe.done = svc_rdma_wc_send;
153 ctxt->sc_xprt_buf = buffer;
154 ctxt->sc_sges[0].addr = addr;
155
156 for (i = 0; i < rdma->sc_max_send_sges; i++)
157 ctxt->sc_sges[i].lkey = rdma->sc_pd->local_dma_lkey;
158 return ctxt;
159
160fail2:
161 kfree(buffer);
162fail1:
163 kfree(ctxt);
164fail0:
165 return NULL;
166}
167
168/**
169 * svc_rdma_send_ctxts_destroy - Release all send_ctxt's for an xprt
170 * @rdma: svcxprt_rdma being torn down
171 *
172 */
173void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
174{
175 struct svc_rdma_send_ctxt *ctxt;
176
177 while ((ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts))) {
178 list_del(&ctxt->sc_list);
179 ib_dma_unmap_single(rdma->sc_pd->device,
180 ctxt->sc_sges[0].addr,
181 rdma->sc_max_req_size,
182 DMA_TO_DEVICE);
183 kfree(ctxt->sc_xprt_buf);
184 kfree(ctxt);
185 }
186}
187
188/**
189 * svc_rdma_send_ctxt_get - Get a free send_ctxt
190 * @rdma: controlling svcxprt_rdma
191 *
192 * Returns a ready-to-use send_ctxt, or NULL if none are
193 * available and a fresh one cannot be allocated.
194 */
195struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
196{
197 struct svc_rdma_send_ctxt *ctxt;
198
199 spin_lock(&rdma->sc_send_lock);
200 ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts);
201 if (!ctxt)
202 goto out_empty;
203 list_del(&ctxt->sc_list);
204 spin_unlock(&rdma->sc_send_lock);
205
206out:
207 ctxt->sc_send_wr.num_sge = 0;
208 ctxt->sc_cur_sge_no = 0;
209 ctxt->sc_page_count = 0;
210 return ctxt;
211
212out_empty:
213 spin_unlock(&rdma->sc_send_lock);
214 ctxt = svc_rdma_send_ctxt_alloc(rdma);
215 if (!ctxt)
216 return NULL;
217 goto out;
218}
219
220/**
221 * svc_rdma_send_ctxt_put - Return send_ctxt to free list
222 * @rdma: controlling svcxprt_rdma
223 * @ctxt: object to return to the free list
224 *
225 * Pages left in sc_pages are DMA unmapped and released.
226 */
227void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
228 struct svc_rdma_send_ctxt *ctxt)
229{
230 struct ib_device *device = rdma->sc_cm_id->device;
231 unsigned int i;
232
233 /* The first SGE contains the transport header, which
234 * remains mapped until @ctxt is destroyed.
235 */
236 for (i = 1; i < ctxt->sc_send_wr.num_sge; i++)
237 ib_dma_unmap_page(device,
238 ctxt->sc_sges[i].addr,
239 ctxt->sc_sges[i].length,
240 DMA_TO_DEVICE);
241
242 for (i = 0; i < ctxt->sc_page_count; ++i)
243 put_page(ctxt->sc_pages[i]);
244
245 spin_lock(&rdma->sc_send_lock);
246 list_add(&ctxt->sc_list, &rdma->sc_send_ctxts);
247 spin_unlock(&rdma->sc_send_lock);
248}
249
250/**
251 * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
252 * @cq: Completion Queue context
253 * @wc: Work Completion object
254 *
255 * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
256 * the Send completion handler could be running.
257 */
258static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
259{
260 struct svcxprt_rdma *rdma = cq->cq_context;
261 struct ib_cqe *cqe = wc->wr_cqe;
262 struct svc_rdma_send_ctxt *ctxt;
263
264 trace_svcrdma_wc_send(wc);
265
266 atomic_inc(&rdma->sc_sq_avail);
267 wake_up(&rdma->sc_send_wait);
268
269 ctxt = container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
270 svc_rdma_send_ctxt_put(rdma, ctxt);
271
272 if (unlikely(wc->status != IB_WC_SUCCESS)) {
273 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
274 svc_xprt_enqueue(&rdma->sc_xprt);
275 if (wc->status != IB_WC_WR_FLUSH_ERR)
276 pr_err("svcrdma: Send: %s (%u/0x%x)\n",
277 ib_wc_status_msg(wc->status),
278 wc->status, wc->vendor_err);
279 }
280
281 svc_xprt_put(&rdma->sc_xprt);
282}
283
284/**
285 * svc_rdma_send - Post a single Send WR
286 * @rdma: transport on which to post the WR
287 * @wr: prepared Send WR to post
288 *
289 * Returns zero the Send WR was posted successfully. Otherwise, a
290 * negative errno is returned.
291 */
292int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
293{
294 struct ib_send_wr *bad_wr;
295 int ret;
296
297 might_sleep();
298
299 /* If the SQ is full, wait until an SQ entry is available */
300 while (1) {
301 if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
302 atomic_inc(&rdma_stat_sq_starve);
303 trace_svcrdma_sq_full(rdma);
304 atomic_inc(&rdma->sc_sq_avail);
305 wait_event(rdma->sc_send_wait,
306 atomic_read(&rdma->sc_sq_avail) > 1);
307 if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
308 return -ENOTCONN;
309 trace_svcrdma_sq_retry(rdma);
310 continue;
311 }
312
313 svc_xprt_get(&rdma->sc_xprt);
314 ret = ib_post_send(rdma->sc_qp, wr, &bad_wr);
315 trace_svcrdma_post_send(wr, ret);
316 if (ret) {
317 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
318 svc_xprt_put(&rdma->sc_xprt);
319 wake_up(&rdma->sc_send_wait);
320 }
321 break;
322 }
323 return ret;
324}
325
111static u32 xdr_padsize(u32 len) 326static u32 xdr_padsize(u32 len)
112{ 327{
113 return (len & 3) ? (4 - (len & 3)) : 0; 328 return (len & 3) ? (4 - (len & 3)) : 0;
@@ -296,41 +511,10 @@ static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
296 return be32_to_cpup(p); 511 return be32_to_cpup(p);
297} 512}
298 513
299/* ib_dma_map_page() is used here because svc_rdma_dma_unmap()
300 * is used during completion to DMA-unmap this memory, and
301 * it uses ib_dma_unmap_page() exclusively.
302 */
303static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
304 struct svc_rdma_op_ctxt *ctxt,
305 unsigned int sge_no,
306 unsigned char *base,
307 unsigned int len)
308{
309 unsigned long offset = (unsigned long)base & ~PAGE_MASK;
310 struct ib_device *dev = rdma->sc_cm_id->device;
311 dma_addr_t dma_addr;
312
313 dma_addr = ib_dma_map_page(dev, virt_to_page(base),
314 offset, len, DMA_TO_DEVICE);
315 if (ib_dma_mapping_error(dev, dma_addr))
316 goto out_maperr;
317
318 ctxt->sge[sge_no].addr = dma_addr;
319 ctxt->sge[sge_no].length = len;
320 ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
321 svc_rdma_count_mappings(rdma, ctxt);
322 return 0;
323
324out_maperr:
325 pr_err("svcrdma: failed to map buffer\n");
326 return -EIO;
327}
328
329static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, 514static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
330 struct svc_rdma_op_ctxt *ctxt, 515 struct svc_rdma_send_ctxt *ctxt,
331 unsigned int sge_no,
332 struct page *page, 516 struct page *page,
333 unsigned int offset, 517 unsigned long offset,
334 unsigned int len) 518 unsigned int len)
335{ 519{
336 struct ib_device *dev = rdma->sc_cm_id->device; 520 struct ib_device *dev = rdma->sc_cm_id->device;
@@ -340,58 +524,71 @@ static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
340 if (ib_dma_mapping_error(dev, dma_addr)) 524 if (ib_dma_mapping_error(dev, dma_addr))
341 goto out_maperr; 525 goto out_maperr;
342 526
343 ctxt->sge[sge_no].addr = dma_addr; 527 ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr;
344 ctxt->sge[sge_no].length = len; 528 ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len;
345 ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; 529 ctxt->sc_send_wr.num_sge++;
346 svc_rdma_count_mappings(rdma, ctxt);
347 return 0; 530 return 0;
348 531
349out_maperr: 532out_maperr:
350 pr_err("svcrdma: failed to map page\n"); 533 trace_svcrdma_dma_map_page(rdma, page);
351 return -EIO; 534 return -EIO;
352} 535}
353 536
537/* ib_dma_map_page() is used here because svc_rdma_dma_unmap()
538 * handles DMA-unmap and it uses ib_dma_unmap_page() exclusively.
539 */
540static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
541 struct svc_rdma_send_ctxt *ctxt,
542 unsigned char *base,
543 unsigned int len)
544{
545 return svc_rdma_dma_map_page(rdma, ctxt, virt_to_page(base),
546 offset_in_page(base), len);
547}
548
354/** 549/**
355 * svc_rdma_map_reply_hdr - DMA map the transport header buffer 550 * svc_rdma_sync_reply_hdr - DMA sync the transport header buffer
356 * @rdma: controlling transport 551 * @rdma: controlling transport
357 * @ctxt: op_ctxt for the Send WR 552 * @ctxt: send_ctxt for the Send WR
358 * @rdma_resp: buffer containing transport header
359 * @len: length of transport header 553 * @len: length of transport header
360 * 554 *
361 * Returns:
362 * %0 if the header is DMA mapped,
363 * %-EIO if DMA mapping failed.
364 */ 555 */
365int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma, 556void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
366 struct svc_rdma_op_ctxt *ctxt, 557 struct svc_rdma_send_ctxt *ctxt,
367 __be32 *rdma_resp, 558 unsigned int len)
368 unsigned int len)
369{ 559{
370 ctxt->direction = DMA_TO_DEVICE; 560 ctxt->sc_sges[0].length = len;
371 ctxt->pages[0] = virt_to_page(rdma_resp); 561 ctxt->sc_send_wr.num_sge++;
372 ctxt->count = 1; 562 ib_dma_sync_single_for_device(rdma->sc_pd->device,
373 return svc_rdma_dma_map_page(rdma, ctxt, 0, ctxt->pages[0], 0, len); 563 ctxt->sc_sges[0].addr, len,
564 DMA_TO_DEVICE);
374} 565}
375 566
376/* Load the xdr_buf into the ctxt's sge array, and DMA map each 567/* svc_rdma_map_reply_msg - Map the buffer holding RPC message
568 * @rdma: controlling transport
569 * @ctxt: send_ctxt for the Send WR
570 * @xdr: prepared xdr_buf containing RPC message
571 * @wr_lst: pointer to Call header's Write list, or NULL
572 *
573 * Load the xdr_buf into the ctxt's sge array, and DMA map each
377 * element as it is added. 574 * element as it is added.
378 * 575 *
379 * Returns the number of sge elements loaded on success, or 576 * Returns zero on success, or a negative errno on failure.
380 * a negative errno on failure.
381 */ 577 */
382static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, 578int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
383 struct svc_rdma_op_ctxt *ctxt, 579 struct svc_rdma_send_ctxt *ctxt,
384 struct xdr_buf *xdr, __be32 *wr_lst) 580 struct xdr_buf *xdr, __be32 *wr_lst)
385{ 581{
386 unsigned int len, sge_no, remaining, page_off; 582 unsigned int len, remaining;
583 unsigned long page_off;
387 struct page **ppages; 584 struct page **ppages;
388 unsigned char *base; 585 unsigned char *base;
389 u32 xdr_pad; 586 u32 xdr_pad;
390 int ret; 587 int ret;
391 588
392 sge_no = 1; 589 if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
393 590 return -EIO;
394 ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, 591 ret = svc_rdma_dma_map_buf(rdma, ctxt,
395 xdr->head[0].iov_base, 592 xdr->head[0].iov_base,
396 xdr->head[0].iov_len); 593 xdr->head[0].iov_len);
397 if (ret < 0) 594 if (ret < 0)
@@ -421,8 +618,10 @@ static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
421 while (remaining) { 618 while (remaining) {
422 len = min_t(u32, PAGE_SIZE - page_off, remaining); 619 len = min_t(u32, PAGE_SIZE - page_off, remaining);
423 620
424 ret = svc_rdma_dma_map_page(rdma, ctxt, sge_no++, 621 if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
425 *ppages++, page_off, len); 622 return -EIO;
623 ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++,
624 page_off, len);
426 if (ret < 0) 625 if (ret < 0)
427 return ret; 626 return ret;
428 627
@@ -434,12 +633,14 @@ static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
434 len = xdr->tail[0].iov_len; 633 len = xdr->tail[0].iov_len;
435tail: 634tail:
436 if (len) { 635 if (len) {
437 ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, base, len); 636 if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
637 return -EIO;
638 ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len);
438 if (ret < 0) 639 if (ret < 0)
439 return ret; 640 return ret;
440 } 641 }
441 642
442 return sge_no - 1; 643 return 0;
443} 644}
444 645
445/* The svc_rqst and all resources it owns are released as soon as 646/* The svc_rqst and all resources it owns are released as soon as
@@ -447,62 +648,25 @@ tail:
447 * so they are released by the Send completion handler. 648 * so they are released by the Send completion handler.
448 */ 649 */
449static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, 650static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
450 struct svc_rdma_op_ctxt *ctxt) 651 struct svc_rdma_send_ctxt *ctxt)
451{ 652{
452 int i, pages = rqstp->rq_next_page - rqstp->rq_respages; 653 int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
453 654
454 ctxt->count += pages; 655 ctxt->sc_page_count += pages;
455 for (i = 0; i < pages; i++) { 656 for (i = 0; i < pages; i++) {
456 ctxt->pages[i + 1] = rqstp->rq_respages[i]; 657 ctxt->sc_pages[i] = rqstp->rq_respages[i];
457 rqstp->rq_respages[i] = NULL; 658 rqstp->rq_respages[i] = NULL;
458 } 659 }
459 rqstp->rq_next_page = rqstp->rq_respages + 1; 660 rqstp->rq_next_page = rqstp->rq_respages + 1;
460} 661}
461 662
462/**
463 * svc_rdma_post_send_wr - Set up and post one Send Work Request
464 * @rdma: controlling transport
465 * @ctxt: op_ctxt for transmitting the Send WR
466 * @num_sge: number of SGEs to send
467 * @inv_rkey: R_key argument to Send With Invalidate, or zero
468 *
469 * Returns:
470 * %0 if the Send* was posted successfully,
471 * %-ENOTCONN if the connection was lost or dropped,
472 * %-EINVAL if there was a problem with the Send we built,
473 * %-ENOMEM if ib_post_send failed.
474 */
475int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
476 struct svc_rdma_op_ctxt *ctxt, int num_sge,
477 u32 inv_rkey)
478{
479 struct ib_send_wr *send_wr = &ctxt->send_wr;
480
481 dprintk("svcrdma: posting Send WR with %u sge(s)\n", num_sge);
482
483 send_wr->next = NULL;
484 ctxt->cqe.done = svc_rdma_wc_send;
485 send_wr->wr_cqe = &ctxt->cqe;
486 send_wr->sg_list = ctxt->sge;
487 send_wr->num_sge = num_sge;
488 send_wr->send_flags = IB_SEND_SIGNALED;
489 if (inv_rkey) {
490 send_wr->opcode = IB_WR_SEND_WITH_INV;
491 send_wr->ex.invalidate_rkey = inv_rkey;
492 } else {
493 send_wr->opcode = IB_WR_SEND;
494 }
495
496 return svc_rdma_send(rdma, send_wr);
497}
498
499/* Prepare the portion of the RPC Reply that will be transmitted 663/* Prepare the portion of the RPC Reply that will be transmitted
500 * via RDMA Send. The RPC-over-RDMA transport header is prepared 664 * via RDMA Send. The RPC-over-RDMA transport header is prepared
501 * in sge[0], and the RPC xdr_buf is prepared in following sges. 665 * in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
502 * 666 *
503 * Depending on whether a Write list or Reply chunk is present, 667 * Depending on whether a Write list or Reply chunk is present,
504 * the server may send all, a portion of, or none of the xdr_buf. 668 * the server may send all, a portion of, or none of the xdr_buf.
505 * In the latter case, only the transport header (sge[0]) is 669 * In the latter case, only the transport header (sc_sges[0]) is
506 * transmitted. 670 * transmitted.
507 * 671 *
508 * RDMA Send is the last step of transmitting an RPC reply. Pages 672 * RDMA Send is the last step of transmitting an RPC reply. Pages
@@ -515,49 +679,32 @@ int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
515 * - The Reply's transport header will never be larger than a page. 679 * - The Reply's transport header will never be larger than a page.
516 */ 680 */
517static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, 681static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
518 __be32 *rdma_argp, __be32 *rdma_resp, 682 struct svc_rdma_send_ctxt *ctxt,
683 __be32 *rdma_argp,
519 struct svc_rqst *rqstp, 684 struct svc_rqst *rqstp,
520 __be32 *wr_lst, __be32 *rp_ch) 685 __be32 *wr_lst, __be32 *rp_ch)
521{ 686{
522 struct svc_rdma_op_ctxt *ctxt;
523 u32 inv_rkey;
524 int ret; 687 int ret;
525 688
526 dprintk("svcrdma: sending %s reply: head=%zu, pagelen=%u, tail=%zu\n",
527 (rp_ch ? "RDMA_NOMSG" : "RDMA_MSG"),
528 rqstp->rq_res.head[0].iov_len,
529 rqstp->rq_res.page_len,
530 rqstp->rq_res.tail[0].iov_len);
531
532 ctxt = svc_rdma_get_context(rdma);
533
534 ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp,
535 svc_rdma_reply_hdr_len(rdma_resp));
536 if (ret < 0)
537 goto err;
538
539 if (!rp_ch) { 689 if (!rp_ch) {
540 ret = svc_rdma_map_reply_msg(rdma, ctxt, 690 ret = svc_rdma_map_reply_msg(rdma, ctxt,
541 &rqstp->rq_res, wr_lst); 691 &rqstp->rq_res, wr_lst);
542 if (ret < 0) 692 if (ret < 0)
543 goto err; 693 return ret;
544 } 694 }
545 695
546 svc_rdma_save_io_pages(rqstp, ctxt); 696 svc_rdma_save_io_pages(rqstp, ctxt);
547 697
548 inv_rkey = 0; 698 ctxt->sc_send_wr.opcode = IB_WR_SEND;
549 if (rdma->sc_snd_w_inv) 699 if (rdma->sc_snd_w_inv) {
550 inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch); 700 ctxt->sc_send_wr.ex.invalidate_rkey =
551 ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, inv_rkey); 701 svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
552 if (ret) 702 if (ctxt->sc_send_wr.ex.invalidate_rkey)
553 goto err; 703 ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
554 704 }
555 return 0; 705 dprintk("svcrdma: posting Send WR with %u sge(s)\n",
556 706 ctxt->sc_send_wr.num_sge);
557err: 707 return svc_rdma_send(rdma, &ctxt->sc_send_wr);
558 svc_rdma_unmap_dma(ctxt);
559 svc_rdma_put_context(ctxt, 1);
560 return ret;
561} 708}
562 709
563/* Given the client-provided Write and Reply chunks, the server was not 710/* Given the client-provided Write and Reply chunks, the server was not
@@ -568,38 +715,29 @@ err:
568 * Remote Invalidation is skipped for simplicity. 715 * Remote Invalidation is skipped for simplicity.
569 */ 716 */
570static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, 717static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
571 __be32 *rdma_resp, struct svc_rqst *rqstp) 718 struct svc_rdma_send_ctxt *ctxt,
719 struct svc_rqst *rqstp)
572{ 720{
573 struct svc_rdma_op_ctxt *ctxt;
574 __be32 *p; 721 __be32 *p;
575 int ret; 722 int ret;
576 723
577 ctxt = svc_rdma_get_context(rdma); 724 p = ctxt->sc_xprt_buf;
578 725 trace_svcrdma_err_chunk(*p);
579 /* Replace the original transport header with an 726 p += 3;
580 * RDMA_ERROR response. XID etc are preserved.
581 */
582 p = rdma_resp + 3;
583 *p++ = rdma_error; 727 *p++ = rdma_error;
584 *p = err_chunk; 728 *p = err_chunk;
585 729 svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_ERR);
586 ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, 20);
587 if (ret < 0)
588 goto err;
589 730
590 svc_rdma_save_io_pages(rqstp, ctxt); 731 svc_rdma_save_io_pages(rqstp, ctxt);
591 732
592 ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, 0); 733 ctxt->sc_send_wr.opcode = IB_WR_SEND;
593 if (ret) 734 ret = svc_rdma_send(rdma, &ctxt->sc_send_wr);
594 goto err; 735 if (ret) {
736 svc_rdma_send_ctxt_put(rdma, ctxt);
737 return ret;
738 }
595 739
596 return 0; 740 return 0;
597
598err:
599 pr_err("svcrdma: failed to post Send WR (%d)\n", ret);
600 svc_rdma_unmap_dma(ctxt);
601 svc_rdma_put_context(ctxt, 1);
602 return ret;
603} 741}
604 742
605void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) 743void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
@@ -623,20 +761,15 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
623 struct svc_xprt *xprt = rqstp->rq_xprt; 761 struct svc_xprt *xprt = rqstp->rq_xprt;
624 struct svcxprt_rdma *rdma = 762 struct svcxprt_rdma *rdma =
625 container_of(xprt, struct svcxprt_rdma, sc_xprt); 763 container_of(xprt, struct svcxprt_rdma, sc_xprt);
764 struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
626 __be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch; 765 __be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch;
627 struct xdr_buf *xdr = &rqstp->rq_res; 766 struct xdr_buf *xdr = &rqstp->rq_res;
628 struct page *res_page; 767 struct svc_rdma_send_ctxt *sctxt;
629 int ret; 768 int ret;
630 769
631 /* Find the call's chunk lists to decide how to send the reply. 770 rdma_argp = rctxt->rc_recv_buf;
632 * Receive places the Call's xprt header at the start of page 0.
633 */
634 rdma_argp = page_address(rqstp->rq_pages[0]);
635 svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch); 771 svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch);
636 772
637 dprintk("svcrdma: preparing response for XID 0x%08x\n",
638 be32_to_cpup(rdma_argp));
639
640 /* Create the RDMA response header. xprt->xpt_mutex, 773 /* Create the RDMA response header. xprt->xpt_mutex,
641 * acquired in svc_send(), serializes RPC replies. The 774 * acquired in svc_send(), serializes RPC replies. The
642 * code path below that inserts the credit grant value 775 * code path below that inserts the credit grant value
@@ -644,10 +777,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
644 * critical section. 777 * critical section.
645 */ 778 */
646 ret = -ENOMEM; 779 ret = -ENOMEM;
647 res_page = alloc_page(GFP_KERNEL); 780 sctxt = svc_rdma_send_ctxt_get(rdma);
648 if (!res_page) 781 if (!sctxt)
649 goto err0; 782 goto err0;
650 rdma_resp = page_address(res_page); 783 rdma_resp = sctxt->sc_xprt_buf;
651 784
652 p = rdma_resp; 785 p = rdma_resp;
653 *p++ = *rdma_argp; 786 *p++ = *rdma_argp;
@@ -674,26 +807,33 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
674 svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret); 807 svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret);
675 } 808 }
676 809
677 ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp, 810 svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp));
811 ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp,
678 wr_lst, rp_ch); 812 wr_lst, rp_ch);
679 if (ret < 0) 813 if (ret < 0)
680 goto err0; 814 goto err1;
681 return 0; 815 ret = 0;
816
817out:
818 rqstp->rq_xprt_ctxt = NULL;
819 svc_rdma_recv_ctxt_put(rdma, rctxt);
820 return ret;
682 821
683 err2: 822 err2:
684 if (ret != -E2BIG && ret != -EINVAL) 823 if (ret != -E2BIG && ret != -EINVAL)
685 goto err1; 824 goto err1;
686 825
687 ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp); 826 ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp);
688 if (ret < 0) 827 if (ret < 0)
689 goto err0; 828 goto err1;
690 return 0; 829 ret = 0;
830 goto out;
691 831
692 err1: 832 err1:
693 put_page(res_page); 833 svc_rdma_send_ctxt_put(rdma, sctxt);
694 err0: 834 err0:
695 pr_err("svcrdma: Could not send reply, err=%d. Closing transport.\n", 835 trace_svcrdma_send_failed(rqstp, ret);
696 ret);
697 set_bit(XPT_CLOSE, &xprt->xpt_flags); 836 set_bit(XPT_CLOSE, &xprt->xpt_flags);
698 return -ENOTCONN; 837 ret = -ENOTCONN;
838 goto out;
699} 839}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 96cc8f6597d3..e9535a66bab0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1,4 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
1/* 2/*
3 * Copyright (c) 2015-2018 Oracle. All rights reserved.
2 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 4 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
3 * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved. 5 * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
4 * 6 *
@@ -40,26 +42,30 @@
40 * Author: Tom Tucker <tom@opengridcomputing.com> 42 * Author: Tom Tucker <tom@opengridcomputing.com>
41 */ 43 */
42 44
43#include <linux/sunrpc/svc_xprt.h>
44#include <linux/sunrpc/addr.h>
45#include <linux/sunrpc/debug.h>
46#include <linux/sunrpc/rpc_rdma.h>
47#include <linux/interrupt.h> 45#include <linux/interrupt.h>
48#include <linux/sched.h> 46#include <linux/sched.h>
49#include <linux/slab.h> 47#include <linux/slab.h>
50#include <linux/spinlock.h> 48#include <linux/spinlock.h>
51#include <linux/workqueue.h> 49#include <linux/workqueue.h>
50#include <linux/export.h>
51
52#include <rdma/ib_verbs.h> 52#include <rdma/ib_verbs.h>
53#include <rdma/rdma_cm.h> 53#include <rdma/rdma_cm.h>
54#include <rdma/rw.h> 54#include <rdma/rw.h>
55
56#include <linux/sunrpc/addr.h>
57#include <linux/sunrpc/debug.h>
58#include <linux/sunrpc/rpc_rdma.h>
59#include <linux/sunrpc/svc_xprt.h>
55#include <linux/sunrpc/svc_rdma.h> 60#include <linux/sunrpc/svc_rdma.h>
56#include <linux/export.h> 61
57#include "xprt_rdma.h" 62#include "xprt_rdma.h"
63#include <trace/events/rpcrdma.h>
58 64
59#define RPCDBG_FACILITY RPCDBG_SVCXPRT 65#define RPCDBG_FACILITY RPCDBG_SVCXPRT
60 66
61static int svc_rdma_post_recv(struct svcxprt_rdma *xprt); 67static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
62static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int); 68 struct net *net);
63static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 69static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
64 struct net *net, 70 struct net *net,
65 struct sockaddr *sa, int salen, 71 struct sockaddr *sa, int salen,
@@ -123,7 +129,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
123 struct svcxprt_rdma *cma_xprt; 129 struct svcxprt_rdma *cma_xprt;
124 struct svc_xprt *xprt; 130 struct svc_xprt *xprt;
125 131
126 cma_xprt = rdma_create_xprt(serv, 0); 132 cma_xprt = svc_rdma_create_xprt(serv, net);
127 if (!cma_xprt) 133 if (!cma_xprt)
128 return ERR_PTR(-ENOMEM); 134 return ERR_PTR(-ENOMEM);
129 xprt = &cma_xprt->sc_xprt; 135 xprt = &cma_xprt->sc_xprt;
@@ -152,133 +158,20 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt)
152} 158}
153#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 159#endif /* CONFIG_SUNRPC_BACKCHANNEL */
154 160
155static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
156 gfp_t flags)
157{
158 struct svc_rdma_op_ctxt *ctxt;
159
160 ctxt = kmalloc(sizeof(*ctxt), flags);
161 if (ctxt) {
162 ctxt->xprt = xprt;
163 INIT_LIST_HEAD(&ctxt->list);
164 }
165 return ctxt;
166}
167
168static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
169{
170 unsigned int i;
171
172 /* Each RPC/RDMA credit can consume one Receive and
173 * one Send WQE at the same time.
174 */
175 i = xprt->sc_sq_depth + xprt->sc_rq_depth;
176
177 while (i--) {
178 struct svc_rdma_op_ctxt *ctxt;
179
180 ctxt = alloc_ctxt(xprt, GFP_KERNEL);
181 if (!ctxt) {
182 dprintk("svcrdma: No memory for RDMA ctxt\n");
183 return false;
184 }
185 list_add(&ctxt->list, &xprt->sc_ctxts);
186 }
187 return true;
188}
189
190struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
191{
192 struct svc_rdma_op_ctxt *ctxt = NULL;
193
194 spin_lock(&xprt->sc_ctxt_lock);
195 xprt->sc_ctxt_used++;
196 if (list_empty(&xprt->sc_ctxts))
197 goto out_empty;
198
199 ctxt = list_first_entry(&xprt->sc_ctxts,
200 struct svc_rdma_op_ctxt, list);
201 list_del(&ctxt->list);
202 spin_unlock(&xprt->sc_ctxt_lock);
203
204out:
205 ctxt->count = 0;
206 ctxt->mapped_sges = 0;
207 return ctxt;
208
209out_empty:
210 /* Either pre-allocation missed the mark, or send
211 * queue accounting is broken.
212 */
213 spin_unlock(&xprt->sc_ctxt_lock);
214
215 ctxt = alloc_ctxt(xprt, GFP_NOIO);
216 if (ctxt)
217 goto out;
218
219 spin_lock(&xprt->sc_ctxt_lock);
220 xprt->sc_ctxt_used--;
221 spin_unlock(&xprt->sc_ctxt_lock);
222 WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
223 return NULL;
224}
225
226void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
227{
228 struct svcxprt_rdma *xprt = ctxt->xprt;
229 struct ib_device *device = xprt->sc_cm_id->device;
230 unsigned int i;
231
232 for (i = 0; i < ctxt->mapped_sges; i++)
233 ib_dma_unmap_page(device,
234 ctxt->sge[i].addr,
235 ctxt->sge[i].length,
236 ctxt->direction);
237 ctxt->mapped_sges = 0;
238}
239
240void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
241{
242 struct svcxprt_rdma *xprt = ctxt->xprt;
243 int i;
244
245 if (free_pages)
246 for (i = 0; i < ctxt->count; i++)
247 put_page(ctxt->pages[i]);
248
249 spin_lock(&xprt->sc_ctxt_lock);
250 xprt->sc_ctxt_used--;
251 list_add(&ctxt->list, &xprt->sc_ctxts);
252 spin_unlock(&xprt->sc_ctxt_lock);
253}
254
255static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
256{
257 while (!list_empty(&xprt->sc_ctxts)) {
258 struct svc_rdma_op_ctxt *ctxt;
259
260 ctxt = list_first_entry(&xprt->sc_ctxts,
261 struct svc_rdma_op_ctxt, list);
262 list_del(&ctxt->list);
263 kfree(ctxt);
264 }
265}
266
267/* QP event handler */ 161/* QP event handler */
268static void qp_event_handler(struct ib_event *event, void *context) 162static void qp_event_handler(struct ib_event *event, void *context)
269{ 163{
270 struct svc_xprt *xprt = context; 164 struct svc_xprt *xprt = context;
271 165
166 trace_svcrdma_qp_error(event, (struct sockaddr *)&xprt->xpt_remote);
272 switch (event->event) { 167 switch (event->event) {
273 /* These are considered benign events */ 168 /* These are considered benign events */
274 case IB_EVENT_PATH_MIG: 169 case IB_EVENT_PATH_MIG:
275 case IB_EVENT_COMM_EST: 170 case IB_EVENT_COMM_EST:
276 case IB_EVENT_SQ_DRAINED: 171 case IB_EVENT_SQ_DRAINED:
277 case IB_EVENT_QP_LAST_WQE_REACHED: 172 case IB_EVENT_QP_LAST_WQE_REACHED:
278 dprintk("svcrdma: QP event %s (%d) received for QP=%p\n",
279 ib_event_msg(event->event), event->event,
280 event->element.qp);
281 break; 173 break;
174
282 /* These are considered fatal events */ 175 /* These are considered fatal events */
283 case IB_EVENT_PATH_MIG_ERR: 176 case IB_EVENT_PATH_MIG_ERR:
284 case IB_EVENT_QP_FATAL: 177 case IB_EVENT_QP_FATAL:
@@ -286,111 +179,34 @@ static void qp_event_handler(struct ib_event *event, void *context)
286 case IB_EVENT_QP_ACCESS_ERR: 179 case IB_EVENT_QP_ACCESS_ERR:
287 case IB_EVENT_DEVICE_FATAL: 180 case IB_EVENT_DEVICE_FATAL:
288 default: 181 default:
289 dprintk("svcrdma: QP ERROR event %s (%d) received for QP=%p, "
290 "closing transport\n",
291 ib_event_msg(event->event), event->event,
292 event->element.qp);
293 set_bit(XPT_CLOSE, &xprt->xpt_flags); 182 set_bit(XPT_CLOSE, &xprt->xpt_flags);
294 svc_xprt_enqueue(xprt); 183 svc_xprt_enqueue(xprt);
295 break; 184 break;
296 } 185 }
297} 186}
298 187
299/** 188static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
300 * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC 189 struct net *net)
301 * @cq: completion queue
302 * @wc: completed WR
303 *
304 */
305static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
306{
307 struct svcxprt_rdma *xprt = cq->cq_context;
308 struct ib_cqe *cqe = wc->wr_cqe;
309 struct svc_rdma_op_ctxt *ctxt;
310
311 /* WARNING: Only wc->wr_cqe and wc->status are reliable */
312 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
313 svc_rdma_unmap_dma(ctxt);
314
315 if (wc->status != IB_WC_SUCCESS)
316 goto flushed;
317
318 /* All wc fields are now known to be valid */
319 ctxt->byte_len = wc->byte_len;
320 spin_lock(&xprt->sc_rq_dto_lock);
321 list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
322 spin_unlock(&xprt->sc_rq_dto_lock);
323
324 svc_rdma_post_recv(xprt);
325
326 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
327 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
328 goto out;
329 goto out_enqueue;
330
331flushed:
332 if (wc->status != IB_WC_WR_FLUSH_ERR)
333 pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
334 ib_wc_status_msg(wc->status),
335 wc->status, wc->vendor_err);
336 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
337 svc_rdma_put_context(ctxt, 1);
338
339out_enqueue:
340 svc_xprt_enqueue(&xprt->sc_xprt);
341out:
342 svc_xprt_put(&xprt->sc_xprt);
343}
344
345/**
346 * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
347 * @cq: completion queue
348 * @wc: completed WR
349 *
350 */
351void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
352{
353 struct svcxprt_rdma *xprt = cq->cq_context;
354 struct ib_cqe *cqe = wc->wr_cqe;
355 struct svc_rdma_op_ctxt *ctxt;
356
357 atomic_inc(&xprt->sc_sq_avail);
358 wake_up(&xprt->sc_send_wait);
359
360 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
361 svc_rdma_unmap_dma(ctxt);
362 svc_rdma_put_context(ctxt, 1);
363
364 if (unlikely(wc->status != IB_WC_SUCCESS)) {
365 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
366 svc_xprt_enqueue(&xprt->sc_xprt);
367 if (wc->status != IB_WC_WR_FLUSH_ERR)
368 pr_err("svcrdma: Send: %s (%u/0x%x)\n",
369 ib_wc_status_msg(wc->status),
370 wc->status, wc->vendor_err);
371 }
372
373 svc_xprt_put(&xprt->sc_xprt);
374}
375
376static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
377 int listener)
378{ 190{
379 struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL); 191 struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL);
380 192
381 if (!cma_xprt) 193 if (!cma_xprt) {
194 dprintk("svcrdma: failed to create new transport\n");
382 return NULL; 195 return NULL;
383 svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); 196 }
197 svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
384 INIT_LIST_HEAD(&cma_xprt->sc_accept_q); 198 INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
385 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); 199 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
386 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); 200 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
387 INIT_LIST_HEAD(&cma_xprt->sc_ctxts); 201 INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
202 INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts);
388 INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts); 203 INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
389 init_waitqueue_head(&cma_xprt->sc_send_wait); 204 init_waitqueue_head(&cma_xprt->sc_send_wait);
390 205
391 spin_lock_init(&cma_xprt->sc_lock); 206 spin_lock_init(&cma_xprt->sc_lock);
392 spin_lock_init(&cma_xprt->sc_rq_dto_lock); 207 spin_lock_init(&cma_xprt->sc_rq_dto_lock);
393 spin_lock_init(&cma_xprt->sc_ctxt_lock); 208 spin_lock_init(&cma_xprt->sc_send_lock);
209 spin_lock_init(&cma_xprt->sc_recv_lock);
394 spin_lock_init(&cma_xprt->sc_rw_ctxt_lock); 210 spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
395 211
396 /* 212 /*
@@ -401,70 +217,9 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
401 */ 217 */
402 set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags); 218 set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
403 219
404 if (listener) {
405 strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
406 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
407 }
408
409 return cma_xprt; 220 return cma_xprt;
410} 221}
411 222
412static int
413svc_rdma_post_recv(struct svcxprt_rdma *xprt)
414{
415 struct ib_recv_wr recv_wr, *bad_recv_wr;
416 struct svc_rdma_op_ctxt *ctxt;
417 struct page *page;
418 dma_addr_t pa;
419 int sge_no;
420 int buflen;
421 int ret;
422
423 ctxt = svc_rdma_get_context(xprt);
424 buflen = 0;
425 ctxt->direction = DMA_FROM_DEVICE;
426 ctxt->cqe.done = svc_rdma_wc_receive;
427 for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
428 if (sge_no >= xprt->sc_max_sge) {
429 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
430 goto err_put_ctxt;
431 }
432 page = alloc_page(GFP_KERNEL);
433 if (!page)
434 goto err_put_ctxt;
435 ctxt->pages[sge_no] = page;
436 pa = ib_dma_map_page(xprt->sc_cm_id->device,
437 page, 0, PAGE_SIZE,
438 DMA_FROM_DEVICE);
439 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
440 goto err_put_ctxt;
441 svc_rdma_count_mappings(xprt, ctxt);
442 ctxt->sge[sge_no].addr = pa;
443 ctxt->sge[sge_no].length = PAGE_SIZE;
444 ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
445 ctxt->count = sge_no + 1;
446 buflen += PAGE_SIZE;
447 }
448 recv_wr.next = NULL;
449 recv_wr.sg_list = &ctxt->sge[0];
450 recv_wr.num_sge = ctxt->count;
451 recv_wr.wr_cqe = &ctxt->cqe;
452
453 svc_xprt_get(&xprt->sc_xprt);
454 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
455 if (ret) {
456 svc_rdma_unmap_dma(ctxt);
457 svc_rdma_put_context(ctxt, 1);
458 svc_xprt_put(&xprt->sc_xprt);
459 }
460 return ret;
461
462 err_put_ctxt:
463 svc_rdma_unmap_dma(ctxt);
464 svc_rdma_put_context(ctxt, 1);
465 return -ENOMEM;
466}
467
468static void 223static void
469svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt, 224svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
470 struct rdma_conn_param *param) 225 struct rdma_conn_param *param)
@@ -504,15 +259,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
504 struct sockaddr *sa; 259 struct sockaddr *sa;
505 260
506 /* Create a new transport */ 261 /* Create a new transport */
507 newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); 262 newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server,
508 if (!newxprt) { 263 listen_xprt->sc_xprt.xpt_net);
509 dprintk("svcrdma: failed to create new transport\n"); 264 if (!newxprt)
510 return; 265 return;
511 }
512 newxprt->sc_cm_id = new_cma_id; 266 newxprt->sc_cm_id = new_cma_id;
513 new_cma_id->context = newxprt; 267 new_cma_id->context = newxprt;
514 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
515 newxprt, newxprt->sc_cm_id, listen_xprt);
516 svc_rdma_parse_connect_private(newxprt, param); 268 svc_rdma_parse_connect_private(newxprt, param);
517 269
518 /* Save client advertised inbound read limit for use later in accept. */ 270 /* Save client advertised inbound read limit for use later in accept. */
@@ -543,9 +295,11 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
543static int rdma_listen_handler(struct rdma_cm_id *cma_id, 295static int rdma_listen_handler(struct rdma_cm_id *cma_id,
544 struct rdma_cm_event *event) 296 struct rdma_cm_event *event)
545{ 297{
546 struct svcxprt_rdma *xprt = cma_id->context; 298 struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr;
547 int ret = 0; 299 int ret = 0;
548 300
301 trace_svcrdma_cm_event(event, sap);
302
549 switch (event->event) { 303 switch (event->event) {
550 case RDMA_CM_EVENT_CONNECT_REQUEST: 304 case RDMA_CM_EVENT_CONNECT_REQUEST:
551 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " 305 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
@@ -553,23 +307,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
553 rdma_event_msg(event->event), event->event); 307 rdma_event_msg(event->event), event->event);
554 handle_connect_req(cma_id, &event->param.conn); 308 handle_connect_req(cma_id, &event->param.conn);
555 break; 309 break;
556
557 case RDMA_CM_EVENT_ESTABLISHED:
558 /* Accept complete */
559 dprintk("svcrdma: Connection completed on LISTEN xprt=%p, "
560 "cm_id=%p\n", xprt, cma_id);
561 break;
562
563 case RDMA_CM_EVENT_DEVICE_REMOVAL:
564 dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
565 xprt, cma_id);
566 if (xprt) {
567 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
568 svc_xprt_enqueue(&xprt->sc_xprt);
569 }
570 break;
571
572 default: 310 default:
311 /* NB: No device removal upcall for INADDR_ANY listeners */
573 dprintk("svcrdma: Unexpected event on listening endpoint %p, " 312 dprintk("svcrdma: Unexpected event on listening endpoint %p, "
574 "event = %s (%d)\n", cma_id, 313 "event = %s (%d)\n", cma_id,
575 rdma_event_msg(event->event), event->event); 314 rdma_event_msg(event->event), event->event);
@@ -582,9 +321,12 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
582static int rdma_cma_handler(struct rdma_cm_id *cma_id, 321static int rdma_cma_handler(struct rdma_cm_id *cma_id,
583 struct rdma_cm_event *event) 322 struct rdma_cm_event *event)
584{ 323{
585 struct svc_xprt *xprt = cma_id->context; 324 struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.dst_addr;
586 struct svcxprt_rdma *rdma = 325 struct svcxprt_rdma *rdma = cma_id->context;
587 container_of(xprt, struct svcxprt_rdma, sc_xprt); 326 struct svc_xprt *xprt = &rdma->sc_xprt;
327
328 trace_svcrdma_cm_event(event, sap);
329
588 switch (event->event) { 330 switch (event->event) {
589 case RDMA_CM_EVENT_ESTABLISHED: 331 case RDMA_CM_EVENT_ESTABLISHED:
590 /* Accept complete */ 332 /* Accept complete */
@@ -597,21 +339,17 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
597 case RDMA_CM_EVENT_DISCONNECTED: 339 case RDMA_CM_EVENT_DISCONNECTED:
598 dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n", 340 dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n",
599 xprt, cma_id); 341 xprt, cma_id);
600 if (xprt) { 342 set_bit(XPT_CLOSE, &xprt->xpt_flags);
601 set_bit(XPT_CLOSE, &xprt->xpt_flags); 343 svc_xprt_enqueue(xprt);
602 svc_xprt_enqueue(xprt); 344 svc_xprt_put(xprt);
603 svc_xprt_put(xprt);
604 }
605 break; 345 break;
606 case RDMA_CM_EVENT_DEVICE_REMOVAL: 346 case RDMA_CM_EVENT_DEVICE_REMOVAL:
607 dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, " 347 dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, "
608 "event = %s (%d)\n", cma_id, xprt, 348 "event = %s (%d)\n", cma_id, xprt,
609 rdma_event_msg(event->event), event->event); 349 rdma_event_msg(event->event), event->event);
610 if (xprt) { 350 set_bit(XPT_CLOSE, &xprt->xpt_flags);
611 set_bit(XPT_CLOSE, &xprt->xpt_flags); 351 svc_xprt_enqueue(xprt);
612 svc_xprt_enqueue(xprt); 352 svc_xprt_put(xprt);
613 svc_xprt_put(xprt);
614 }
615 break; 353 break;
616 default: 354 default:
617 dprintk("svcrdma: Unexpected event on DTO endpoint %p, " 355 dprintk("svcrdma: Unexpected event on DTO endpoint %p, "
@@ -634,16 +372,18 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
634 struct svcxprt_rdma *cma_xprt; 372 struct svcxprt_rdma *cma_xprt;
635 int ret; 373 int ret;
636 374
637 dprintk("svcrdma: Creating RDMA socket\n"); 375 dprintk("svcrdma: Creating RDMA listener\n");
638 if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) { 376 if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) {
639 dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family); 377 dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
640 return ERR_PTR(-EAFNOSUPPORT); 378 return ERR_PTR(-EAFNOSUPPORT);
641 } 379 }
642 cma_xprt = rdma_create_xprt(serv, 1); 380 cma_xprt = svc_rdma_create_xprt(serv, net);
643 if (!cma_xprt) 381 if (!cma_xprt)
644 return ERR_PTR(-ENOMEM); 382 return ERR_PTR(-ENOMEM);
383 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
384 strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
645 385
646 listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt, 386 listen_id = rdma_create_id(net, rdma_listen_handler, cma_xprt,
647 RDMA_PS_TCP, IB_QPT_RC); 387 RDMA_PS_TCP, IB_QPT_RC);
648 if (IS_ERR(listen_id)) { 388 if (IS_ERR(listen_id)) {
649 ret = PTR_ERR(listen_id); 389 ret = PTR_ERR(listen_id);
@@ -708,9 +448,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
708 struct rdma_conn_param conn_param; 448 struct rdma_conn_param conn_param;
709 struct rpcrdma_connect_private pmsg; 449 struct rpcrdma_connect_private pmsg;
710 struct ib_qp_init_attr qp_attr; 450 struct ib_qp_init_attr qp_attr;
451 unsigned int ctxts, rq_depth;
711 struct ib_device *dev; 452 struct ib_device *dev;
712 struct sockaddr *sap; 453 struct sockaddr *sap;
713 unsigned int i, ctxts;
714 int ret = 0; 454 int ret = 0;
715 455
716 listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); 456 listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
@@ -736,24 +476,28 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
736 476
737 /* Qualify the transport resource defaults with the 477 /* Qualify the transport resource defaults with the
738 * capabilities of this particular device */ 478 * capabilities of this particular device */
739 newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge, 479 newxprt->sc_max_send_sges = dev->attrs.max_sge;
740 (size_t)RPCSVC_MAXPAGES); 480 /* transport hdr, head iovec, one page list entry, tail iovec */
481 if (newxprt->sc_max_send_sges < 4) {
482 pr_err("svcrdma: too few Send SGEs available (%d)\n",
483 newxprt->sc_max_send_sges);
484 goto errout;
485 }
741 newxprt->sc_max_req_size = svcrdma_max_req_size; 486 newxprt->sc_max_req_size = svcrdma_max_req_size;
742 newxprt->sc_max_requests = svcrdma_max_requests; 487 newxprt->sc_max_requests = svcrdma_max_requests;
743 newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; 488 newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
744 newxprt->sc_rq_depth = newxprt->sc_max_requests + 489 rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests;
745 newxprt->sc_max_bc_requests; 490 if (rq_depth > dev->attrs.max_qp_wr) {
746 if (newxprt->sc_rq_depth > dev->attrs.max_qp_wr) {
747 pr_warn("svcrdma: reducing receive depth to %d\n", 491 pr_warn("svcrdma: reducing receive depth to %d\n",
748 dev->attrs.max_qp_wr); 492 dev->attrs.max_qp_wr);
749 newxprt->sc_rq_depth = dev->attrs.max_qp_wr; 493 rq_depth = dev->attrs.max_qp_wr;
750 newxprt->sc_max_requests = newxprt->sc_rq_depth - 2; 494 newxprt->sc_max_requests = rq_depth - 2;
751 newxprt->sc_max_bc_requests = 2; 495 newxprt->sc_max_bc_requests = 2;
752 } 496 }
753 newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); 497 newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
754 ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES); 498 ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
755 ctxts *= newxprt->sc_max_requests; 499 ctxts *= newxprt->sc_max_requests;
756 newxprt->sc_sq_depth = newxprt->sc_rq_depth + ctxts; 500 newxprt->sc_sq_depth = rq_depth + ctxts;
757 if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) { 501 if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
758 pr_warn("svcrdma: reducing send depth to %d\n", 502 pr_warn("svcrdma: reducing send depth to %d\n",
759 dev->attrs.max_qp_wr); 503 dev->attrs.max_qp_wr);
@@ -761,9 +505,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
761 } 505 }
762 atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); 506 atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
763 507
764 if (!svc_rdma_prealloc_ctxts(newxprt))
765 goto errout;
766
767 newxprt->sc_pd = ib_alloc_pd(dev, 0); 508 newxprt->sc_pd = ib_alloc_pd(dev, 0);
768 if (IS_ERR(newxprt->sc_pd)) { 509 if (IS_ERR(newxprt->sc_pd)) {
769 dprintk("svcrdma: error creating PD for connect request\n"); 510 dprintk("svcrdma: error creating PD for connect request\n");
@@ -775,7 +516,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
775 dprintk("svcrdma: error creating SQ CQ for connect request\n"); 516 dprintk("svcrdma: error creating SQ CQ for connect request\n");
776 goto errout; 517 goto errout;
777 } 518 }
778 newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth, 519 newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth,
779 0, IB_POLL_WORKQUEUE); 520 0, IB_POLL_WORKQUEUE);
780 if (IS_ERR(newxprt->sc_rq_cq)) { 521 if (IS_ERR(newxprt->sc_rq_cq)) {
781 dprintk("svcrdma: error creating RQ CQ for connect request\n"); 522 dprintk("svcrdma: error creating RQ CQ for connect request\n");
@@ -788,9 +529,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
788 qp_attr.port_num = newxprt->sc_port_num; 529 qp_attr.port_num = newxprt->sc_port_num;
789 qp_attr.cap.max_rdma_ctxs = ctxts; 530 qp_attr.cap.max_rdma_ctxs = ctxts;
790 qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts; 531 qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts;
791 qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth; 532 qp_attr.cap.max_recv_wr = rq_depth;
792 qp_attr.cap.max_send_sge = newxprt->sc_max_sge; 533 qp_attr.cap.max_send_sge = newxprt->sc_max_send_sges;
793 qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; 534 qp_attr.cap.max_recv_sge = 1;
794 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 535 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
795 qp_attr.qp_type = IB_QPT_RC; 536 qp_attr.qp_type = IB_QPT_RC;
796 qp_attr.send_cq = newxprt->sc_sq_cq; 537 qp_attr.send_cq = newxprt->sc_sq_cq;
@@ -815,14 +556,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
815 !rdma_ib_or_roce(dev, newxprt->sc_port_num)) 556 !rdma_ib_or_roce(dev, newxprt->sc_port_num))
816 goto errout; 557 goto errout;
817 558
818 /* Post receive buffers */ 559 if (!svc_rdma_post_recvs(newxprt))
819 for (i = 0; i < newxprt->sc_max_requests; i++) { 560 goto errout;
820 ret = svc_rdma_post_recv(newxprt);
821 if (ret) {
822 dprintk("svcrdma: failure posting receive buffers\n");
823 goto errout;
824 }
825 }
826 561
827 /* Swap out the handler */ 562 /* Swap out the handler */
828 newxprt->sc_cm_id->event_handler = rdma_cma_handler; 563 newxprt->sc_cm_id->event_handler = rdma_cma_handler;
@@ -856,16 +591,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
856 dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap)); 591 dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
857 sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; 592 sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
858 dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap)); 593 dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
859 dprintk(" max_sge : %d\n", newxprt->sc_max_sge); 594 dprintk(" max_sge : %d\n", newxprt->sc_max_send_sges);
860 dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); 595 dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
861 dprintk(" rdma_rw_ctxs : %d\n", ctxts); 596 dprintk(" rdma_rw_ctxs : %d\n", ctxts);
862 dprintk(" max_requests : %d\n", newxprt->sc_max_requests); 597 dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
863 dprintk(" ord : %d\n", conn_param.initiator_depth); 598 dprintk(" ord : %d\n", conn_param.initiator_depth);
864 599
600 trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
865 return &newxprt->sc_xprt; 601 return &newxprt->sc_xprt;
866 602
867 errout: 603 errout:
868 dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); 604 dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
605 trace_svcrdma_xprt_fail(&newxprt->sc_xprt);
869 /* Take a reference in case the DTO handler runs */ 606 /* Take a reference in case the DTO handler runs */
870 svc_xprt_get(&newxprt->sc_xprt); 607 svc_xprt_get(&newxprt->sc_xprt);
871 if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) 608 if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
@@ -896,7 +633,6 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
896{ 633{
897 struct svcxprt_rdma *rdma = 634 struct svcxprt_rdma *rdma =
898 container_of(xprt, struct svcxprt_rdma, sc_xprt); 635 container_of(xprt, struct svcxprt_rdma, sc_xprt);
899 dprintk("svc: svc_rdma_detach(%p)\n", xprt);
900 636
901 /* Disconnect and flush posted WQE */ 637 /* Disconnect and flush posted WQE */
902 rdma_disconnect(rdma->sc_cm_id); 638 rdma_disconnect(rdma->sc_cm_id);
@@ -908,7 +644,7 @@ static void __svc_rdma_free(struct work_struct *work)
908 container_of(work, struct svcxprt_rdma, sc_work); 644 container_of(work, struct svcxprt_rdma, sc_work);
909 struct svc_xprt *xprt = &rdma->sc_xprt; 645 struct svc_xprt *xprt = &rdma->sc_xprt;
910 646
911 dprintk("svcrdma: %s(%p)\n", __func__, rdma); 647 trace_svcrdma_xprt_free(xprt);
912 648
913 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) 649 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
914 ib_drain_qp(rdma->sc_qp); 650 ib_drain_qp(rdma->sc_qp);
@@ -918,25 +654,7 @@ static void __svc_rdma_free(struct work_struct *work)
918 pr_err("svcrdma: sc_xprt still in use? (%d)\n", 654 pr_err("svcrdma: sc_xprt still in use? (%d)\n",
919 kref_read(&xprt->xpt_ref)); 655 kref_read(&xprt->xpt_ref));
920 656
921 while (!list_empty(&rdma->sc_read_complete_q)) { 657 svc_rdma_flush_recv_queues(rdma);
922 struct svc_rdma_op_ctxt *ctxt;
923 ctxt = list_first_entry(&rdma->sc_read_complete_q,
924 struct svc_rdma_op_ctxt, list);
925 list_del(&ctxt->list);
926 svc_rdma_put_context(ctxt, 1);
927 }
928 while (!list_empty(&rdma->sc_rq_dto_q)) {
929 struct svc_rdma_op_ctxt *ctxt;
930 ctxt = list_first_entry(&rdma->sc_rq_dto_q,
931 struct svc_rdma_op_ctxt, list);
932 list_del(&ctxt->list);
933 svc_rdma_put_context(ctxt, 1);
934 }
935
936 /* Warn if we leaked a resource or under-referenced */
937 if (rdma->sc_ctxt_used != 0)
938 pr_err("svcrdma: ctxt still in use? (%d)\n",
939 rdma->sc_ctxt_used);
940 658
941 /* Final put of backchannel client transport */ 659 /* Final put of backchannel client transport */
942 if (xprt->xpt_bc_xprt) { 660 if (xprt->xpt_bc_xprt) {
@@ -945,7 +663,8 @@ static void __svc_rdma_free(struct work_struct *work)
945 } 663 }
946 664
947 svc_rdma_destroy_rw_ctxts(rdma); 665 svc_rdma_destroy_rw_ctxts(rdma);
948 svc_rdma_destroy_ctxts(rdma); 666 svc_rdma_send_ctxts_destroy(rdma);
667 svc_rdma_recv_ctxts_destroy(rdma);
949 668
950 /* Destroy the QP if present (not a listener) */ 669 /* Destroy the QP if present (not a listener) */
951 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) 670 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -998,51 +717,3 @@ static void svc_rdma_secure_port(struct svc_rqst *rqstp)
998static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) 717static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
999{ 718{
1000} 719}
1001
1002int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1003{
1004 struct ib_send_wr *bad_wr, *n_wr;
1005 int wr_count;
1006 int i;
1007 int ret;
1008
1009 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1010 return -ENOTCONN;
1011
1012 wr_count = 1;
1013 for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
1014 wr_count++;
1015
1016 /* If the SQ is full, wait until an SQ entry is available */
1017 while (1) {
1018 if ((atomic_sub_return(wr_count, &xprt->sc_sq_avail) < 0)) {
1019 atomic_inc(&rdma_stat_sq_starve);
1020
1021 /* Wait until SQ WR available if SQ still full */
1022 atomic_add(wr_count, &xprt->sc_sq_avail);
1023 wait_event(xprt->sc_send_wait,
1024 atomic_read(&xprt->sc_sq_avail) > wr_count);
1025 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1026 return -ENOTCONN;
1027 continue;
1028 }
1029 /* Take a transport ref for each WR posted */
1030 for (i = 0; i < wr_count; i++)
1031 svc_xprt_get(&xprt->sc_xprt);
1032
1033 /* Bump used SQ WR count and post */
1034 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
1035 if (ret) {
1036 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
1037 for (i = 0; i < wr_count; i ++)
1038 svc_xprt_put(&xprt->sc_xprt);
1039 dprintk("svcrdma: failed to post SQ WR rc=%d\n", ret);
1040 dprintk(" sc_sq_avail=%d, sc_sq_depth=%d\n",
1041 atomic_read(&xprt->sc_sq_avail),
1042 xprt->sc_sq_depth);
1043 wake_up(&xprt->sc_send_wait);
1044 }
1045 break;
1046 }
1047 return ret;
1048}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index cc1aad325496..143ce2579ba9 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
1/* 2/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved. 3 * Copyright (c) 2014-2017 Oracle. All rights reserved.
3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -51,9 +52,13 @@
51#include <linux/module.h> 52#include <linux/module.h>
52#include <linux/slab.h> 53#include <linux/slab.h>
53#include <linux/seq_file.h> 54#include <linux/seq_file.h>
55#include <linux/smp.h>
56
54#include <linux/sunrpc/addr.h> 57#include <linux/sunrpc/addr.h>
58#include <linux/sunrpc/svc_rdma.h>
55 59
56#include "xprt_rdma.h" 60#include "xprt_rdma.h"
61#include <trace/events/rpcrdma.h>
57 62
58#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 63#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
59# define RPCDBG_FACILITY RPCDBG_TRANS 64# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -330,9 +335,7 @@ xprt_setup_rdma(struct xprt_create *args)
330 return ERR_PTR(-EBADF); 335 return ERR_PTR(-EBADF);
331 } 336 }
332 337
333 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 338 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0);
334 xprt_rdma_slot_table_entries,
335 xprt_rdma_slot_table_entries);
336 if (xprt == NULL) { 339 if (xprt == NULL) {
337 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", 340 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
338 __func__); 341 __func__);
@@ -364,7 +367,7 @@ xprt_setup_rdma(struct xprt_create *args)
364 xprt_set_bound(xprt); 367 xprt_set_bound(xprt);
365 xprt_rdma_format_addresses(xprt, sap); 368 xprt_rdma_format_addresses(xprt, sap);
366 369
367 cdata.max_requests = xprt->max_reqs; 370 cdata.max_requests = xprt_rdma_slot_table_entries;
368 371
369 cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ 372 cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
370 cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ 373 cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
@@ -537,6 +540,47 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
537 } 540 }
538} 541}
539 542
543/**
544 * xprt_rdma_alloc_slot - allocate an rpc_rqst
545 * @xprt: controlling RPC transport
546 * @task: RPC task requesting a fresh rpc_rqst
547 *
548 * tk_status values:
549 * %0 if task->tk_rqstp points to a fresh rpc_rqst
550 * %-EAGAIN if no rpc_rqst is available; queued on backlog
551 */
552static void
553xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
554{
555 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
556 struct rpcrdma_req *req;
557
558 req = rpcrdma_buffer_get(&r_xprt->rx_buf);
559 if (!req)
560 goto out_sleep;
561 task->tk_rqstp = &req->rl_slot;
562 task->tk_status = 0;
563 return;
564
565out_sleep:
566 rpc_sleep_on(&xprt->backlog, task, NULL);
567 task->tk_status = -EAGAIN;
568}
569
570/**
571 * xprt_rdma_free_slot - release an rpc_rqst
572 * @xprt: controlling RPC transport
573 * @rqst: rpc_rqst to release
574 *
575 */
576static void
577xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
578{
579 memset(rqst, 0, sizeof(*rqst));
580 rpcrdma_buffer_put(rpcr_to_rdmar(rqst));
581 rpc_wake_up_next(&xprt->backlog);
582}
583
540static bool 584static bool
541rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 585rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
542 size_t size, gfp_t flags) 586 size_t size, gfp_t flags)
@@ -607,13 +651,9 @@ xprt_rdma_allocate(struct rpc_task *task)
607{ 651{
608 struct rpc_rqst *rqst = task->tk_rqstp; 652 struct rpc_rqst *rqst = task->tk_rqstp;
609 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 653 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
610 struct rpcrdma_req *req; 654 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
611 gfp_t flags; 655 gfp_t flags;
612 656
613 req = rpcrdma_buffer_get(&r_xprt->rx_buf);
614 if (req == NULL)
615 goto out_get;
616
617 flags = RPCRDMA_DEF_GFP; 657 flags = RPCRDMA_DEF_GFP;
618 if (RPC_IS_SWAPPER(task)) 658 if (RPC_IS_SWAPPER(task))
619 flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; 659 flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
@@ -623,15 +663,12 @@ xprt_rdma_allocate(struct rpc_task *task)
623 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) 663 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
624 goto out_fail; 664 goto out_fail;
625 665
626 rpcrdma_set_xprtdata(rqst, req);
627 rqst->rq_buffer = req->rl_sendbuf->rg_base; 666 rqst->rq_buffer = req->rl_sendbuf->rg_base;
628 rqst->rq_rbuffer = req->rl_recvbuf->rg_base; 667 rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
629 trace_xprtrdma_allocate(task, req); 668 trace_xprtrdma_allocate(task, req);
630 return 0; 669 return 0;
631 670
632out_fail: 671out_fail:
633 rpcrdma_buffer_put(req);
634out_get:
635 trace_xprtrdma_allocate(task, NULL); 672 trace_xprtrdma_allocate(task, NULL);
636 return -ENOMEM; 673 return -ENOMEM;
637} 674}
@@ -652,7 +689,6 @@ xprt_rdma_free(struct rpc_task *task)
652 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) 689 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
653 rpcrdma_release_rqst(r_xprt, req); 690 rpcrdma_release_rqst(r_xprt, req);
654 trace_xprtrdma_rpc_done(task, req); 691 trace_xprtrdma_rpc_done(task, req);
655 rpcrdma_buffer_put(req);
656} 692}
657 693
658/** 694/**
@@ -690,9 +726,6 @@ xprt_rdma_send_request(struct rpc_task *task)
690 if (rc < 0) 726 if (rc < 0)
691 goto failed_marshal; 727 goto failed_marshal;
692 728
693 if (req->rl_reply == NULL) /* e.g. reconnection */
694 rpcrdma_recv_buffer_get(req);
695
696 /* Must suppress retransmit to maintain credits */ 729 /* Must suppress retransmit to maintain credits */
697 if (rqst->rq_connect_cookie == xprt->connect_cookie) 730 if (rqst->rq_connect_cookie == xprt->connect_cookie)
698 goto drop_connection; 731 goto drop_connection;
@@ -779,7 +812,8 @@ xprt_rdma_disable_swap(struct rpc_xprt *xprt)
779static const struct rpc_xprt_ops xprt_rdma_procs = { 812static const struct rpc_xprt_ops xprt_rdma_procs = {
780 .reserve_xprt = xprt_reserve_xprt_cong, 813 .reserve_xprt = xprt_reserve_xprt_cong,
781 .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ 814 .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
782 .alloc_slot = xprt_alloc_slot, 815 .alloc_slot = xprt_rdma_alloc_slot,
816 .free_slot = xprt_rdma_free_slot,
783 .release_request = xprt_release_rqst_cong, /* ditto */ 817 .release_request = xprt_release_rqst_cong, /* ditto */
784 .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ 818 .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
785 .timer = xprt_rdma_timer, 819 .timer = xprt_rdma_timer,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index c345d365af88..16161a36dc73 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
1/* 2/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved. 3 * Copyright (c) 2014-2017 Oracle. All rights reserved.
3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -59,6 +60,7 @@
59#include <rdma/ib_cm.h> 60#include <rdma/ib_cm.h>
60 61
61#include "xprt_rdma.h" 62#include "xprt_rdma.h"
63#include <trace/events/rpcrdma.h>
62 64
63/* 65/*
64 * Globals/Macros 66 * Globals/Macros
@@ -71,8 +73,10 @@
71/* 73/*
72 * internal functions 74 * internal functions
73 */ 75 */
76static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
74static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); 77static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
75static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); 78static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
79static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
76static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); 80static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
77 81
78struct workqueue_struct *rpcrdma_receive_wq __read_mostly; 82struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
@@ -159,7 +163,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
159 rr_cqe); 163 rr_cqe);
160 164
161 /* WARNING: Only wr_id and status are reliable at this point */ 165 /* WARNING: Only wr_id and status are reliable at this point */
162 trace_xprtrdma_wc_receive(rep, wc); 166 trace_xprtrdma_wc_receive(wc);
163 if (wc->status != IB_WC_SUCCESS) 167 if (wc->status != IB_WC_SUCCESS)
164 goto out_fail; 168 goto out_fail;
165 169
@@ -231,7 +235,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
231 complete(&ia->ri_done); 235 complete(&ia->ri_done);
232 break; 236 break;
233 case RDMA_CM_EVENT_ADDR_ERROR: 237 case RDMA_CM_EVENT_ADDR_ERROR:
234 ia->ri_async_rc = -EHOSTUNREACH; 238 ia->ri_async_rc = -EPROTO;
235 complete(&ia->ri_done); 239 complete(&ia->ri_done);
236 break; 240 break;
237 case RDMA_CM_EVENT_ROUTE_ERROR: 241 case RDMA_CM_EVENT_ROUTE_ERROR:
@@ -262,7 +266,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
262 connstate = -ENOTCONN; 266 connstate = -ENOTCONN;
263 goto connected; 267 goto connected;
264 case RDMA_CM_EVENT_UNREACHABLE: 268 case RDMA_CM_EVENT_UNREACHABLE:
265 connstate = -ENETDOWN; 269 connstate = -ENETUNREACH;
266 goto connected; 270 goto connected;
267 case RDMA_CM_EVENT_REJECTED: 271 case RDMA_CM_EVENT_REJECTED:
268 dprintk("rpcrdma: connection to %s:%s rejected: %s\n", 272 dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
@@ -305,8 +309,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
305 init_completion(&ia->ri_done); 309 init_completion(&ia->ri_done);
306 init_completion(&ia->ri_remove_done); 310 init_completion(&ia->ri_remove_done);
307 311
308 id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, 312 id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall,
309 IB_QPT_RC); 313 xprt, RDMA_PS_TCP, IB_QPT_RC);
310 if (IS_ERR(id)) { 314 if (IS_ERR(id)) {
311 rc = PTR_ERR(id); 315 rc = PTR_ERR(id);
312 dprintk("RPC: %s: rdma_create_id() failed %i\n", 316 dprintk("RPC: %s: rdma_create_id() failed %i\n",
@@ -500,8 +504,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
500 struct rpcrdma_create_data_internal *cdata) 504 struct rpcrdma_create_data_internal *cdata)
501{ 505{
502 struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; 506 struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
503 unsigned int max_qp_wr, max_sge;
504 struct ib_cq *sendcq, *recvcq; 507 struct ib_cq *sendcq, *recvcq;
508 unsigned int max_sge;
505 int rc; 509 int rc;
506 510
507 max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge, 511 max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
@@ -512,29 +516,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
512 } 516 }
513 ia->ri_max_send_sges = max_sge; 517 ia->ri_max_send_sges = max_sge;
514 518
515 if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { 519 rc = ia->ri_ops->ro_open(ia, ep, cdata);
516 dprintk("RPC: %s: insufficient wqe's available\n", 520 if (rc)
517 __func__); 521 return rc;
518 return -ENOMEM;
519 }
520 max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1;
521
522 /* check provider's send/recv wr limits */
523 if (cdata->max_requests > max_qp_wr)
524 cdata->max_requests = max_qp_wr;
525 522
526 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; 523 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
527 ep->rep_attr.qp_context = ep; 524 ep->rep_attr.qp_context = ep;
528 ep->rep_attr.srq = NULL; 525 ep->rep_attr.srq = NULL;
529 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
530 ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
531 ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */
532 rc = ia->ri_ops->ro_open(ia, ep, cdata);
533 if (rc)
534 return rc;
535 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
536 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
537 ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
538 ep->rep_attr.cap.max_send_sge = max_sge; 526 ep->rep_attr.cap.max_send_sge = max_sge;
539 ep->rep_attr.cap.max_recv_sge = 1; 527 ep->rep_attr.cap.max_recv_sge = 1;
540 ep->rep_attr.cap.max_inline_data = 0; 528 ep->rep_attr.cap.max_inline_data = 0;
@@ -741,7 +729,6 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
741{ 729{
742 struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, 730 struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
743 rx_ia); 731 rx_ia);
744 unsigned int extras;
745 int rc; 732 int rc;
746 733
747retry: 734retry:
@@ -785,9 +772,8 @@ retry:
785 } 772 }
786 773
787 dprintk("RPC: %s: connected\n", __func__); 774 dprintk("RPC: %s: connected\n", __func__);
788 extras = r_xprt->rx_buf.rb_bc_srv_max_requests; 775
789 if (extras) 776 rpcrdma_post_recvs(r_xprt, true);
790 rpcrdma_ep_post_extra_recv(r_xprt, extras);
791 777
792out: 778out:
793 if (rc) 779 if (rc)
@@ -893,6 +879,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
893 sc->sc_xprt = r_xprt; 879 sc->sc_xprt = r_xprt;
894 buf->rb_sc_ctxs[i] = sc; 880 buf->rb_sc_ctxs[i] = sc;
895 } 881 }
882 buf->rb_flags = 0;
896 883
897 return 0; 884 return 0;
898 885
@@ -950,7 +937,7 @@ out_emptyq:
950 * completions recently. This is a sign the Send Queue is 937 * completions recently. This is a sign the Send Queue is
951 * backing up. Cause the caller to pause and try again. 938 * backing up. Cause the caller to pause and try again.
952 */ 939 */
953 dprintk("RPC: %s: empty sendctx queue\n", __func__); 940 set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags);
954 r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); 941 r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
955 r_xprt->rx_stats.empty_sendctx_q++; 942 r_xprt->rx_stats.empty_sendctx_q++;
956 return NULL; 943 return NULL;
@@ -965,7 +952,8 @@ out_emptyq:
965 * 952 *
966 * The caller serializes calls to this function (per rpcrdma_buffer). 953 * The caller serializes calls to this function (per rpcrdma_buffer).
967 */ 954 */
968void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) 955static void
956rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
969{ 957{
970 struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; 958 struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
971 unsigned long next_tail; 959 unsigned long next_tail;
@@ -984,6 +972,11 @@ void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
984 972
985 /* Paired with READ_ONCE */ 973 /* Paired with READ_ONCE */
986 smp_store_release(&buf->rb_sc_tail, next_tail); 974 smp_store_release(&buf->rb_sc_tail, next_tail);
975
976 if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) {
977 smp_mb__after_atomic();
978 xprt_write_space(&sc->sc_xprt->rx_xprt);
979 }
987} 980}
988 981
989static void 982static void
@@ -1097,14 +1090,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1097 return req; 1090 return req;
1098} 1091}
1099 1092
1100/** 1093static int
1101 * rpcrdma_create_rep - Allocate an rpcrdma_rep object 1094rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
1102 * @r_xprt: controlling transport
1103 *
1104 * Returns 0 on success or a negative errno on failure.
1105 */
1106int
1107rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1108{ 1095{
1109 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; 1096 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
1110 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1097 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
@@ -1132,6 +1119,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1132 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 1119 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
1133 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 1120 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
1134 rep->rr_recv_wr.num_sge = 1; 1121 rep->rr_recv_wr.num_sge = 1;
1122 rep->rr_temp = temp;
1135 1123
1136 spin_lock(&buf->rb_lock); 1124 spin_lock(&buf->rb_lock);
1137 list_add(&rep->rr_list, &buf->rb_recv_bufs); 1125 list_add(&rep->rr_list, &buf->rb_recv_bufs);
@@ -1183,12 +1171,8 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1183 list_add(&req->rl_list, &buf->rb_send_bufs); 1171 list_add(&req->rl_list, &buf->rb_send_bufs);
1184 } 1172 }
1185 1173
1174 buf->rb_posted_receives = 0;
1186 INIT_LIST_HEAD(&buf->rb_recv_bufs); 1175 INIT_LIST_HEAD(&buf->rb_recv_bufs);
1187 for (i = 0; i <= buf->rb_max_requests; i++) {
1188 rc = rpcrdma_create_rep(r_xprt);
1189 if (rc)
1190 goto out;
1191 }
1192 1176
1193 rc = rpcrdma_sendctxs_create(r_xprt); 1177 rc = rpcrdma_sendctxs_create(r_xprt);
1194 if (rc) 1178 if (rc)
@@ -1200,28 +1184,6 @@ out:
1200 return rc; 1184 return rc;
1201} 1185}
1202 1186
1203static struct rpcrdma_req *
1204rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
1205{
1206 struct rpcrdma_req *req;
1207
1208 req = list_first_entry(&buf->rb_send_bufs,
1209 struct rpcrdma_req, rl_list);
1210 list_del_init(&req->rl_list);
1211 return req;
1212}
1213
1214static struct rpcrdma_rep *
1215rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
1216{
1217 struct rpcrdma_rep *rep;
1218
1219 rep = list_first_entry(&buf->rb_recv_bufs,
1220 struct rpcrdma_rep, rr_list);
1221 list_del(&rep->rr_list);
1222 return rep;
1223}
1224
1225static void 1187static void
1226rpcrdma_destroy_rep(struct rpcrdma_rep *rep) 1188rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
1227{ 1189{
@@ -1280,10 +1242,11 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1280 while (!list_empty(&buf->rb_recv_bufs)) { 1242 while (!list_empty(&buf->rb_recv_bufs)) {
1281 struct rpcrdma_rep *rep; 1243 struct rpcrdma_rep *rep;
1282 1244
1283 rep = rpcrdma_buffer_get_rep_locked(buf); 1245 rep = list_first_entry(&buf->rb_recv_bufs,
1246 struct rpcrdma_rep, rr_list);
1247 list_del(&rep->rr_list);
1284 rpcrdma_destroy_rep(rep); 1248 rpcrdma_destroy_rep(rep);
1285 } 1249 }
1286 buf->rb_send_count = 0;
1287 1250
1288 spin_lock(&buf->rb_reqslock); 1251 spin_lock(&buf->rb_reqslock);
1289 while (!list_empty(&buf->rb_allreqs)) { 1252 while (!list_empty(&buf->rb_allreqs)) {
@@ -1298,7 +1261,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1298 spin_lock(&buf->rb_reqslock); 1261 spin_lock(&buf->rb_reqslock);
1299 } 1262 }
1300 spin_unlock(&buf->rb_reqslock); 1263 spin_unlock(&buf->rb_reqslock);
1301 buf->rb_recv_count = 0;
1302 1264
1303 rpcrdma_mrs_destroy(buf); 1265 rpcrdma_mrs_destroy(buf);
1304} 1266}
@@ -1371,27 +1333,11 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
1371 __rpcrdma_mr_put(&r_xprt->rx_buf, mr); 1333 __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
1372} 1334}
1373 1335
1374static struct rpcrdma_rep * 1336/**
1375rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers) 1337 * rpcrdma_buffer_get - Get a request buffer
1376{ 1338 * @buffers: Buffer pool from which to obtain a buffer
1377 /* If an RPC previously completed without a reply (say, a
1378 * credential problem or a soft timeout occurs) then hold off
1379 * on supplying more Receive buffers until the number of new
1380 * pending RPCs catches up to the number of posted Receives.
1381 */
1382 if (unlikely(buffers->rb_send_count < buffers->rb_recv_count))
1383 return NULL;
1384
1385 if (unlikely(list_empty(&buffers->rb_recv_bufs)))
1386 return NULL;
1387 buffers->rb_recv_count++;
1388 return rpcrdma_buffer_get_rep_locked(buffers);
1389}
1390
1391/*
1392 * Get a set of request/reply buffers.
1393 * 1339 *
1394 * Reply buffer (if available) is attached to send buffer upon return. 1340 * Returns a fresh rpcrdma_req, or NULL if none are available.
1395 */ 1341 */
1396struct rpcrdma_req * 1342struct rpcrdma_req *
1397rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) 1343rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
@@ -1399,23 +1345,18 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1399 struct rpcrdma_req *req; 1345 struct rpcrdma_req *req;
1400 1346
1401 spin_lock(&buffers->rb_lock); 1347 spin_lock(&buffers->rb_lock);
1402 if (list_empty(&buffers->rb_send_bufs)) 1348 req = list_first_entry_or_null(&buffers->rb_send_bufs,
1403 goto out_reqbuf; 1349 struct rpcrdma_req, rl_list);
1404 buffers->rb_send_count++; 1350 if (req)
1405 req = rpcrdma_buffer_get_req_locked(buffers); 1351 list_del_init(&req->rl_list);
1406 req->rl_reply = rpcrdma_buffer_get_rep(buffers);
1407 spin_unlock(&buffers->rb_lock); 1352 spin_unlock(&buffers->rb_lock);
1408
1409 return req; 1353 return req;
1410
1411out_reqbuf:
1412 spin_unlock(&buffers->rb_lock);
1413 return NULL;
1414} 1354}
1415 1355
1416/* 1356/**
1417 * Put request/reply buffers back into pool. 1357 * rpcrdma_buffer_put - Put request/reply buffers back into pool
1418 * Pre-decrement counter/array index. 1358 * @req: object to return
1359 *
1419 */ 1360 */
1420void 1361void
1421rpcrdma_buffer_put(struct rpcrdma_req *req) 1362rpcrdma_buffer_put(struct rpcrdma_req *req)
@@ -1426,27 +1367,16 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
1426 req->rl_reply = NULL; 1367 req->rl_reply = NULL;
1427 1368
1428 spin_lock(&buffers->rb_lock); 1369 spin_lock(&buffers->rb_lock);
1429 buffers->rb_send_count--; 1370 list_add(&req->rl_list, &buffers->rb_send_bufs);
1430 list_add_tail(&req->rl_list, &buffers->rb_send_bufs);
1431 if (rep) { 1371 if (rep) {
1432 buffers->rb_recv_count--; 1372 if (!rep->rr_temp) {
1433 list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); 1373 list_add(&rep->rr_list, &buffers->rb_recv_bufs);
1374 rep = NULL;
1375 }
1434 } 1376 }
1435 spin_unlock(&buffers->rb_lock); 1377 spin_unlock(&buffers->rb_lock);
1436} 1378 if (rep)
1437 1379 rpcrdma_destroy_rep(rep);
1438/*
1439 * Recover reply buffers from pool.
1440 * This happens when recovering from disconnect.
1441 */
1442void
1443rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1444{
1445 struct rpcrdma_buffer *buffers = req->rl_buffer;
1446
1447 spin_lock(&buffers->rb_lock);
1448 req->rl_reply = rpcrdma_buffer_get_rep(buffers);
1449 spin_unlock(&buffers->rb_lock);
1450} 1380}
1451 1381
1452/* 1382/*
@@ -1458,10 +1388,13 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1458{ 1388{
1459 struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; 1389 struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
1460 1390
1461 spin_lock(&buffers->rb_lock); 1391 if (!rep->rr_temp) {
1462 buffers->rb_recv_count--; 1392 spin_lock(&buffers->rb_lock);
1463 list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); 1393 list_add(&rep->rr_list, &buffers->rb_recv_bufs);
1464 spin_unlock(&buffers->rb_lock); 1394 spin_unlock(&buffers->rb_lock);
1395 } else {
1396 rpcrdma_destroy_rep(rep);
1397 }
1465} 1398}
1466 1399
1467/** 1400/**
@@ -1557,13 +1490,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1557 struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; 1490 struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
1558 int rc; 1491 int rc;
1559 1492
1560 if (req->rl_reply) {
1561 rc = rpcrdma_ep_post_recv(ia, req->rl_reply);
1562 if (rc)
1563 return rc;
1564 req->rl_reply = NULL;
1565 }
1566
1567 if (!ep->rep_send_count || 1493 if (!ep->rep_send_count ||
1568 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { 1494 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
1569 send_wr->send_flags |= IB_SEND_SIGNALED; 1495 send_wr->send_flags |= IB_SEND_SIGNALED;
@@ -1580,61 +1506,69 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1580 return 0; 1506 return 0;
1581} 1507}
1582 1508
1583int
1584rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1585 struct rpcrdma_rep *rep)
1586{
1587 struct ib_recv_wr *recv_wr_fail;
1588 int rc;
1589
1590 if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
1591 goto out_map;
1592 rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
1593 trace_xprtrdma_post_recv(rep, rc);
1594 if (rc)
1595 return -ENOTCONN;
1596 return 0;
1597
1598out_map:
1599 pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
1600 return -EIO;
1601}
1602
1603/** 1509/**
1604 * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests 1510 * rpcrdma_post_recvs - Maybe post some Receive buffers
1605 * @r_xprt: transport associated with these backchannel resources 1511 * @r_xprt: controlling transport
1606 * @count: minimum number of incoming requests expected 1512 * @temp: when true, allocate temp rpcrdma_rep objects
1607 * 1513 *
1608 * Returns zero if all requested buffers were posted, or a negative errno.
1609 */ 1514 */
1610int 1515void
1611rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) 1516rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
1612{ 1517{
1613 struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; 1518 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1614 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1519 struct ib_recv_wr *wr, *bad_wr;
1615 struct rpcrdma_rep *rep; 1520 int needed, count, rc;
1616 int rc;
1617 1521
1618 while (count--) { 1522 needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
1619 spin_lock(&buffers->rb_lock); 1523 if (buf->rb_posted_receives > needed)
1620 if (list_empty(&buffers->rb_recv_bufs)) 1524 return;
1621 goto out_reqbuf; 1525 needed -= buf->rb_posted_receives;
1622 rep = rpcrdma_buffer_get_rep_locked(buffers);
1623 spin_unlock(&buffers->rb_lock);
1624 1526
1625 rc = rpcrdma_ep_post_recv(ia, rep); 1527 count = 0;
1626 if (rc) 1528 wr = NULL;
1627 goto out_rc; 1529 while (needed) {
1628 } 1530 struct rpcrdma_regbuf *rb;
1531 struct rpcrdma_rep *rep;
1629 1532
1630 return 0; 1533 spin_lock(&buf->rb_lock);
1534 rep = list_first_entry_or_null(&buf->rb_recv_bufs,
1535 struct rpcrdma_rep, rr_list);
1536 if (likely(rep))
1537 list_del(&rep->rr_list);
1538 spin_unlock(&buf->rb_lock);
1539 if (!rep) {
1540 if (rpcrdma_create_rep(r_xprt, temp))
1541 break;
1542 continue;
1543 }
1631 1544
1632out_reqbuf: 1545 rb = rep->rr_rdmabuf;
1633 spin_unlock(&buffers->rb_lock); 1546 if (!rpcrdma_regbuf_is_mapped(rb)) {
1634 trace_xprtrdma_noreps(r_xprt); 1547 if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) {
1635 return -ENOMEM; 1548 rpcrdma_recv_buffer_put(rep);
1549 break;
1550 }
1551 }
1636 1552
1637out_rc: 1553 trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
1638 rpcrdma_recv_buffer_put(rep); 1554 rep->rr_recv_wr.next = wr;
1639 return rc; 1555 wr = &rep->rr_recv_wr;
1556 ++count;
1557 --needed;
1558 }
1559 if (!count)
1560 return;
1561
1562 rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, &bad_wr);
1563 if (rc) {
1564 for (wr = bad_wr; wr; wr = wr->next) {
1565 struct rpcrdma_rep *rep;
1566
1567 rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
1568 rpcrdma_recv_buffer_put(rep);
1569 --count;
1570 }
1571 }
1572 buf->rb_posted_receives += count;
1573 trace_xprtrdma_post_recvs(r_xprt, count, rc);
1640} 1574}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cb41b12a3bf8..2ca14f7c2d51 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
1/* 2/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved. 3 * Copyright (c) 2014-2017 Oracle. All rights reserved.
3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -196,6 +197,7 @@ struct rpcrdma_rep {
196 __be32 rr_proc; 197 __be32 rr_proc;
197 int rr_wc_flags; 198 int rr_wc_flags;
198 u32 rr_inv_rkey; 199 u32 rr_inv_rkey;
200 bool rr_temp;
199 struct rpcrdma_regbuf *rr_rdmabuf; 201 struct rpcrdma_regbuf *rr_rdmabuf;
200 struct rpcrdma_xprt *rr_rxprt; 202 struct rpcrdma_xprt *rr_rxprt;
201 struct work_struct rr_work; 203 struct work_struct rr_work;
@@ -334,6 +336,7 @@ enum {
334struct rpcrdma_buffer; 336struct rpcrdma_buffer;
335struct rpcrdma_req { 337struct rpcrdma_req {
336 struct list_head rl_list; 338 struct list_head rl_list;
339 struct rpc_rqst rl_slot;
337 struct rpcrdma_buffer *rl_buffer; 340 struct rpcrdma_buffer *rl_buffer;
338 struct rpcrdma_rep *rl_reply; 341 struct rpcrdma_rep *rl_reply;
339 struct xdr_stream rl_stream; 342 struct xdr_stream rl_stream;
@@ -356,16 +359,10 @@ enum {
356 RPCRDMA_REQ_F_TX_RESOURCES, 359 RPCRDMA_REQ_F_TX_RESOURCES,
357}; 360};
358 361
359static inline void
360rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
361{
362 rqst->rq_xprtdata = req;
363}
364
365static inline struct rpcrdma_req * 362static inline struct rpcrdma_req *
366rpcr_to_rdmar(const struct rpc_rqst *rqst) 363rpcr_to_rdmar(const struct rpc_rqst *rqst)
367{ 364{
368 return rqst->rq_xprtdata; 365 return container_of(rqst, struct rpcrdma_req, rl_slot);
369} 366}
370 367
371static inline void 368static inline void
@@ -401,11 +398,12 @@ struct rpcrdma_buffer {
401 struct rpcrdma_sendctx **rb_sc_ctxs; 398 struct rpcrdma_sendctx **rb_sc_ctxs;
402 399
403 spinlock_t rb_lock; /* protect buf lists */ 400 spinlock_t rb_lock; /* protect buf lists */
404 int rb_send_count, rb_recv_count;
405 struct list_head rb_send_bufs; 401 struct list_head rb_send_bufs;
406 struct list_head rb_recv_bufs; 402 struct list_head rb_recv_bufs;
403 unsigned long rb_flags;
407 u32 rb_max_requests; 404 u32 rb_max_requests;
408 u32 rb_credits; /* most recent credit grant */ 405 u32 rb_credits; /* most recent credit grant */
406 int rb_posted_receives;
409 407
410 u32 rb_bc_srv_max_requests; 408 u32 rb_bc_srv_max_requests;
411 spinlock_t rb_reqslock; /* protect rb_allreqs */ 409 spinlock_t rb_reqslock; /* protect rb_allreqs */
@@ -420,6 +418,11 @@ struct rpcrdma_buffer {
420}; 418};
421#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) 419#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
422 420
421/* rb_flags */
422enum {
423 RPCRDMA_BUF_F_EMPTY_SCQ = 0,
424};
425
423/* 426/*
424 * Internal structure for transport instance creation. This 427 * Internal structure for transport instance creation. This
425 * exists primarily for modularity. 428 * exists primarily for modularity.
@@ -561,18 +564,16 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
561 564
562int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, 565int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
563 struct rpcrdma_req *); 566 struct rpcrdma_req *);
564int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); 567void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
565 568
566/* 569/*
567 * Buffer calls - xprtrdma/verbs.c 570 * Buffer calls - xprtrdma/verbs.c
568 */ 571 */
569struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); 572struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
570void rpcrdma_destroy_req(struct rpcrdma_req *); 573void rpcrdma_destroy_req(struct rpcrdma_req *);
571int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
572int rpcrdma_buffer_create(struct rpcrdma_xprt *); 574int rpcrdma_buffer_create(struct rpcrdma_xprt *);
573void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 575void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
574struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); 576struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
575void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
576 577
577struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); 578struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
578void rpcrdma_mr_put(struct rpcrdma_mr *mr); 579void rpcrdma_mr_put(struct rpcrdma_mr *mr);
@@ -581,7 +582,6 @@ void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
581 582
582struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); 583struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
583void rpcrdma_buffer_put(struct rpcrdma_req *); 584void rpcrdma_buffer_put(struct rpcrdma_req *);
584void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
585void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); 585void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
586 586
587struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, 587struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
@@ -603,8 +603,6 @@ rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
603 return __rpcrdma_dma_map_regbuf(ia, rb); 603 return __rpcrdma_dma_map_regbuf(ia, rb);
604} 604}
605 605
606int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
607
608int rpcrdma_alloc_wq(void); 606int rpcrdma_alloc_wq(void);
609void rpcrdma_destroy_wq(void); 607void rpcrdma_destroy_wq(void);
610 608
@@ -675,5 +673,3 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
675extern struct xprt_class xprt_rdma_bc; 673extern struct xprt_class xprt_rdma_bc;
676 674
677#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ 675#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
678
679#include <trace/events/rpcrdma.h>
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c8902f11efdd..9e1c5024aba9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2763,6 +2763,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
2763 .reserve_xprt = xprt_reserve_xprt, 2763 .reserve_xprt = xprt_reserve_xprt,
2764 .release_xprt = xs_tcp_release_xprt, 2764 .release_xprt = xs_tcp_release_xprt,
2765 .alloc_slot = xprt_alloc_slot, 2765 .alloc_slot = xprt_alloc_slot,
2766 .free_slot = xprt_free_slot,
2766 .rpcbind = xs_local_rpcbind, 2767 .rpcbind = xs_local_rpcbind,
2767 .set_port = xs_local_set_port, 2768 .set_port = xs_local_set_port,
2768 .connect = xs_local_connect, 2769 .connect = xs_local_connect,
@@ -2782,6 +2783,7 @@ static const struct rpc_xprt_ops xs_udp_ops = {
2782 .reserve_xprt = xprt_reserve_xprt_cong, 2783 .reserve_xprt = xprt_reserve_xprt_cong,
2783 .release_xprt = xprt_release_xprt_cong, 2784 .release_xprt = xprt_release_xprt_cong,
2784 .alloc_slot = xprt_alloc_slot, 2785 .alloc_slot = xprt_alloc_slot,
2786 .free_slot = xprt_free_slot,
2785 .rpcbind = rpcb_getport_async, 2787 .rpcbind = rpcb_getport_async,
2786 .set_port = xs_set_port, 2788 .set_port = xs_set_port,
2787 .connect = xs_connect, 2789 .connect = xs_connect,
@@ -2803,6 +2805,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
2803 .reserve_xprt = xprt_reserve_xprt, 2805 .reserve_xprt = xprt_reserve_xprt,
2804 .release_xprt = xs_tcp_release_xprt, 2806 .release_xprt = xs_tcp_release_xprt,
2805 .alloc_slot = xprt_lock_and_alloc_slot, 2807 .alloc_slot = xprt_lock_and_alloc_slot,
2808 .free_slot = xprt_free_slot,
2806 .rpcbind = rpcb_getport_async, 2809 .rpcbind = rpcb_getport_async,
2807 .set_port = xs_set_port, 2810 .set_port = xs_set_port,
2808 .connect = xs_connect, 2811 .connect = xs_connect,
@@ -2834,6 +2837,7 @@ static const struct rpc_xprt_ops bc_tcp_ops = {
2834 .reserve_xprt = xprt_reserve_xprt, 2837 .reserve_xprt = xprt_reserve_xprt,
2835 .release_xprt = xprt_release_xprt, 2838 .release_xprt = xprt_release_xprt,
2836 .alloc_slot = xprt_alloc_slot, 2839 .alloc_slot = xprt_alloc_slot,
2840 .free_slot = xprt_free_slot,
2837 .buf_alloc = bc_malloc, 2841 .buf_alloc = bc_malloc,
2838 .buf_free = bc_free, 2842 .buf_free = bc_free,
2839 .send_request = bc_send_request, 2843 .send_request = bc_send_request,
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 4492cda45566..a2f76743c73a 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -285,8 +285,9 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
285 if (!trans_buf) 285 if (!trans_buf)
286 return -ENOMEM; 286 return -ENOMEM;
287 287
288 attrbuf = kmalloc((tipc_genl_family.maxattr + 1) * 288 attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1,
289 sizeof(struct nlattr *), GFP_KERNEL); 289 sizeof(struct nlattr *),
290 GFP_KERNEL);
290 if (!attrbuf) { 291 if (!attrbuf) {
291 err = -ENOMEM; 292 err = -ENOMEM;
292 goto trans_out; 293 goto trans_out;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 14a5d055717d..930852c54d7a 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -692,9 +692,10 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
692} 692}
693 693
694/** 694/**
695 * tipc_poll - read pollmask 695 * tipc_poll - read and possibly block on pollmask
696 * @file: file structure associated with the socket 696 * @file: file structure associated with the socket
697 * @sock: socket for which to calculate the poll bits 697 * @sock: socket for which to calculate the poll bits
698 * @wait: ???
698 * 699 *
699 * Returns pollmask value 700 * Returns pollmask value
700 * 701 *
@@ -708,12 +709,15 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
708 * imply that the operation will succeed, merely that it should be performed 709 * imply that the operation will succeed, merely that it should be performed
709 * and will not block. 710 * and will not block.
710 */ 711 */
711static __poll_t tipc_poll_mask(struct socket *sock, __poll_t events) 712static __poll_t tipc_poll(struct file *file, struct socket *sock,
713 poll_table *wait)
712{ 714{
713 struct sock *sk = sock->sk; 715 struct sock *sk = sock->sk;
714 struct tipc_sock *tsk = tipc_sk(sk); 716 struct tipc_sock *tsk = tipc_sk(sk);
715 __poll_t revents = 0; 717 __poll_t revents = 0;
716 718
719 sock_poll_wait(file, sk_sleep(sk), wait);
720
717 if (sk->sk_shutdown & RCV_SHUTDOWN) 721 if (sk->sk_shutdown & RCV_SHUTDOWN)
718 revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 722 revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
719 if (sk->sk_shutdown == SHUTDOWN_MASK) 723 if (sk->sk_shutdown == SHUTDOWN_MASK)
@@ -3033,7 +3037,7 @@ static const struct proto_ops msg_ops = {
3033 .socketpair = tipc_socketpair, 3037 .socketpair = tipc_socketpair,
3034 .accept = sock_no_accept, 3038 .accept = sock_no_accept,
3035 .getname = tipc_getname, 3039 .getname = tipc_getname,
3036 .poll_mask = tipc_poll_mask, 3040 .poll = tipc_poll,
3037 .ioctl = tipc_ioctl, 3041 .ioctl = tipc_ioctl,
3038 .listen = sock_no_listen, 3042 .listen = sock_no_listen,
3039 .shutdown = tipc_shutdown, 3043 .shutdown = tipc_shutdown,
@@ -3054,7 +3058,7 @@ static const struct proto_ops packet_ops = {
3054 .socketpair = tipc_socketpair, 3058 .socketpair = tipc_socketpair,
3055 .accept = tipc_accept, 3059 .accept = tipc_accept,
3056 .getname = tipc_getname, 3060 .getname = tipc_getname,
3057 .poll_mask = tipc_poll_mask, 3061 .poll = tipc_poll,
3058 .ioctl = tipc_ioctl, 3062 .ioctl = tipc_ioctl,
3059 .listen = tipc_listen, 3063 .listen = tipc_listen,
3060 .shutdown = tipc_shutdown, 3064 .shutdown = tipc_shutdown,
@@ -3075,7 +3079,7 @@ static const struct proto_ops stream_ops = {
3075 .socketpair = tipc_socketpair, 3079 .socketpair = tipc_socketpair,
3076 .accept = tipc_accept, 3080 .accept = tipc_accept,
3077 .getname = tipc_getname, 3081 .getname = tipc_getname,
3078 .poll_mask = tipc_poll_mask, 3082 .poll = tipc_poll,
3079 .ioctl = tipc_ioctl, 3083 .ioctl = tipc_ioctl,
3080 .listen = tipc_listen, 3084 .listen = tipc_listen,
3081 .shutdown = tipc_shutdown, 3085 .shutdown = tipc_shutdown,
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 8ca57d01b18f..d2380548f8f6 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -191,18 +191,12 @@ static void tls_free_both_sg(struct sock *sk)
191} 191}
192 192
193static int tls_do_encryption(struct tls_context *tls_ctx, 193static int tls_do_encryption(struct tls_context *tls_ctx,
194 struct tls_sw_context_tx *ctx, size_t data_len, 194 struct tls_sw_context_tx *ctx,
195 gfp_t flags) 195 struct aead_request *aead_req,
196 size_t data_len)
196{ 197{
197 unsigned int req_size = sizeof(struct aead_request) +
198 crypto_aead_reqsize(ctx->aead_send);
199 struct aead_request *aead_req;
200 int rc; 198 int rc;
201 199
202 aead_req = kzalloc(req_size, flags);
203 if (!aead_req)
204 return -ENOMEM;
205
206 ctx->sg_encrypted_data[0].offset += tls_ctx->tx.prepend_size; 200 ctx->sg_encrypted_data[0].offset += tls_ctx->tx.prepend_size;
207 ctx->sg_encrypted_data[0].length -= tls_ctx->tx.prepend_size; 201 ctx->sg_encrypted_data[0].length -= tls_ctx->tx.prepend_size;
208 202
@@ -219,7 +213,6 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
219 ctx->sg_encrypted_data[0].offset -= tls_ctx->tx.prepend_size; 213 ctx->sg_encrypted_data[0].offset -= tls_ctx->tx.prepend_size;
220 ctx->sg_encrypted_data[0].length += tls_ctx->tx.prepend_size; 214 ctx->sg_encrypted_data[0].length += tls_ctx->tx.prepend_size;
221 215
222 kfree(aead_req);
223 return rc; 216 return rc;
224} 217}
225 218
@@ -228,8 +221,14 @@ static int tls_push_record(struct sock *sk, int flags,
228{ 221{
229 struct tls_context *tls_ctx = tls_get_ctx(sk); 222 struct tls_context *tls_ctx = tls_get_ctx(sk);
230 struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); 223 struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
224 struct aead_request *req;
231 int rc; 225 int rc;
232 226
227 req = kzalloc(sizeof(struct aead_request) +
228 crypto_aead_reqsize(ctx->aead_send), sk->sk_allocation);
229 if (!req)
230 return -ENOMEM;
231
233 sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1); 232 sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
234 sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1); 233 sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
235 234
@@ -245,15 +244,14 @@ static int tls_push_record(struct sock *sk, int flags,
245 tls_ctx->pending_open_record_frags = 0; 244 tls_ctx->pending_open_record_frags = 0;
246 set_bit(TLS_PENDING_CLOSED_RECORD, &tls_ctx->flags); 245 set_bit(TLS_PENDING_CLOSED_RECORD, &tls_ctx->flags);
247 246
248 rc = tls_do_encryption(tls_ctx, ctx, ctx->sg_plaintext_size, 247 rc = tls_do_encryption(tls_ctx, ctx, req, ctx->sg_plaintext_size);
249 sk->sk_allocation);
250 if (rc < 0) { 248 if (rc < 0) {
251 /* If we are called from write_space and 249 /* If we are called from write_space and
252 * we fail, we need to set this SOCK_NOSPACE 250 * we fail, we need to set this SOCK_NOSPACE
253 * to trigger another write_space in the future. 251 * to trigger another write_space in the future.
254 */ 252 */
255 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 253 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
256 return rc; 254 goto out_req;
257 } 255 }
258 256
259 free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem, 257 free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem,
@@ -268,6 +266,8 @@ static int tls_push_record(struct sock *sk, int flags,
268 tls_err_abort(sk, EBADMSG); 266 tls_err_abort(sk, EBADMSG);
269 267
270 tls_advance_record_sn(sk, &tls_ctx->tx); 268 tls_advance_record_sn(sk, &tls_ctx->tx);
269out_req:
270 kfree(req);
271 return rc; 271 return rc;
272} 272}
273 273
@@ -754,7 +754,7 @@ int tls_sw_recvmsg(struct sock *sk,
754 struct sk_buff *skb; 754 struct sk_buff *skb;
755 ssize_t copied = 0; 755 ssize_t copied = 0;
756 bool cmsg = false; 756 bool cmsg = false;
757 int err = 0; 757 int target, err = 0;
758 long timeo; 758 long timeo;
759 759
760 flags |= nonblock; 760 flags |= nonblock;
@@ -764,6 +764,7 @@ int tls_sw_recvmsg(struct sock *sk,
764 764
765 lock_sock(sk); 765 lock_sock(sk);
766 766
767 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
767 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 768 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
768 do { 769 do {
769 bool zc = false; 770 bool zc = false;
@@ -856,6 +857,9 @@ fallback_to_reg_recv:
856 goto recv_end; 857 goto recv_end;
857 } 858 }
858 } 859 }
860 /* If we have a new message from strparser, continue now. */
861 if (copied >= target && !ctx->recv_pkt)
862 break;
859 } while (len); 863 } while (len);
860 864
861recv_end: 865recv_end:
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 95b02a71fd47..e5473c03d667 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -638,8 +638,9 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
638static int unix_socketpair(struct socket *, struct socket *); 638static int unix_socketpair(struct socket *, struct socket *);
639static int unix_accept(struct socket *, struct socket *, int, bool); 639static int unix_accept(struct socket *, struct socket *, int, bool);
640static int unix_getname(struct socket *, struct sockaddr *, int); 640static int unix_getname(struct socket *, struct sockaddr *, int);
641static __poll_t unix_poll_mask(struct socket *, __poll_t); 641static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
642static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t); 642static __poll_t unix_dgram_poll(struct file *, struct socket *,
643 poll_table *);
643static int unix_ioctl(struct socket *, unsigned int, unsigned long); 644static int unix_ioctl(struct socket *, unsigned int, unsigned long);
644static int unix_shutdown(struct socket *, int); 645static int unix_shutdown(struct socket *, int);
645static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 646static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -680,7 +681,7 @@ static const struct proto_ops unix_stream_ops = {
680 .socketpair = unix_socketpair, 681 .socketpair = unix_socketpair,
681 .accept = unix_accept, 682 .accept = unix_accept,
682 .getname = unix_getname, 683 .getname = unix_getname,
683 .poll_mask = unix_poll_mask, 684 .poll = unix_poll,
684 .ioctl = unix_ioctl, 685 .ioctl = unix_ioctl,
685 .listen = unix_listen, 686 .listen = unix_listen,
686 .shutdown = unix_shutdown, 687 .shutdown = unix_shutdown,
@@ -703,7 +704,7 @@ static const struct proto_ops unix_dgram_ops = {
703 .socketpair = unix_socketpair, 704 .socketpair = unix_socketpair,
704 .accept = sock_no_accept, 705 .accept = sock_no_accept,
705 .getname = unix_getname, 706 .getname = unix_getname,
706 .poll_mask = unix_dgram_poll_mask, 707 .poll = unix_dgram_poll,
707 .ioctl = unix_ioctl, 708 .ioctl = unix_ioctl,
708 .listen = sock_no_listen, 709 .listen = sock_no_listen,
709 .shutdown = unix_shutdown, 710 .shutdown = unix_shutdown,
@@ -725,7 +726,7 @@ static const struct proto_ops unix_seqpacket_ops = {
725 .socketpair = unix_socketpair, 726 .socketpair = unix_socketpair,
726 .accept = unix_accept, 727 .accept = unix_accept,
727 .getname = unix_getname, 728 .getname = unix_getname,
728 .poll_mask = unix_dgram_poll_mask, 729 .poll = unix_dgram_poll,
729 .ioctl = unix_ioctl, 730 .ioctl = unix_ioctl,
730 .listen = unix_listen, 731 .listen = unix_listen,
731 .shutdown = unix_shutdown, 732 .shutdown = unix_shutdown,
@@ -2629,10 +2630,13 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2629 return err; 2630 return err;
2630} 2631}
2631 2632
2632static __poll_t unix_poll_mask(struct socket *sock, __poll_t events) 2633static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2633{ 2634{
2634 struct sock *sk = sock->sk; 2635 struct sock *sk = sock->sk;
2635 __poll_t mask = 0; 2636 __poll_t mask;
2637
2638 sock_poll_wait(file, sk_sleep(sk), wait);
2639 mask = 0;
2636 2640
2637 /* exceptional events? */ 2641 /* exceptional events? */
2638 if (sk->sk_err) 2642 if (sk->sk_err)
@@ -2661,11 +2665,15 @@ static __poll_t unix_poll_mask(struct socket *sock, __poll_t events)
2661 return mask; 2665 return mask;
2662} 2666}
2663 2667
2664static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events) 2668static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2669 poll_table *wait)
2665{ 2670{
2666 struct sock *sk = sock->sk, *other; 2671 struct sock *sk = sock->sk, *other;
2667 int writable; 2672 unsigned int writable;
2668 __poll_t mask = 0; 2673 __poll_t mask;
2674
2675 sock_poll_wait(file, sk_sleep(sk), wait);
2676 mask = 0;
2669 2677
2670 /* exceptional events? */ 2678 /* exceptional events? */
2671 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 2679 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -2691,7 +2699,7 @@ static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events)
2691 } 2699 }
2692 2700
2693 /* No write status requested, avoid expensive OUT tests. */ 2701 /* No write status requested, avoid expensive OUT tests. */
2694 if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 2702 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2695 return mask; 2703 return mask;
2696 2704
2697 writable = unix_writable(sk); 2705 writable = unix_writable(sk);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index bb5d5fa68c35..c1076c19b858 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -850,11 +850,18 @@ static int vsock_shutdown(struct socket *sock, int mode)
850 return err; 850 return err;
851} 851}
852 852
853static __poll_t vsock_poll_mask(struct socket *sock, __poll_t events) 853static __poll_t vsock_poll(struct file *file, struct socket *sock,
854 poll_table *wait)
854{ 855{
855 struct sock *sk = sock->sk; 856 struct sock *sk;
856 struct vsock_sock *vsk = vsock_sk(sk); 857 __poll_t mask;
857 __poll_t mask = 0; 858 struct vsock_sock *vsk;
859
860 sk = sock->sk;
861 vsk = vsock_sk(sk);
862
863 poll_wait(file, sk_sleep(sk), wait);
864 mask = 0;
858 865
859 if (sk->sk_err) 866 if (sk->sk_err)
860 /* Signify that there has been an error on this socket. */ 867 /* Signify that there has been an error on this socket. */
@@ -1084,7 +1091,7 @@ static const struct proto_ops vsock_dgram_ops = {
1084 .socketpair = sock_no_socketpair, 1091 .socketpair = sock_no_socketpair,
1085 .accept = sock_no_accept, 1092 .accept = sock_no_accept,
1086 .getname = vsock_getname, 1093 .getname = vsock_getname,
1087 .poll_mask = vsock_poll_mask, 1094 .poll = vsock_poll,
1088 .ioctl = sock_no_ioctl, 1095 .ioctl = sock_no_ioctl,
1089 .listen = sock_no_listen, 1096 .listen = sock_no_listen,
1090 .shutdown = vsock_shutdown, 1097 .shutdown = vsock_shutdown,
@@ -1842,7 +1849,7 @@ static const struct proto_ops vsock_stream_ops = {
1842 .socketpair = sock_no_socketpair, 1849 .socketpair = sock_no_socketpair,
1843 .accept = vsock_accept, 1850 .accept = vsock_accept,
1844 .getname = vsock_getname, 1851 .getname = vsock_getname,
1845 .poll_mask = vsock_poll_mask, 1852 .poll = vsock_poll,
1846 .ioctl = sock_no_ioctl, 1853 .ioctl = sock_no_ioctl,
1847 .listen = vsock_listen, 1854 .listen = vsock_listen,
1848 .shutdown = vsock_shutdown, 1855 .shutdown = vsock_shutdown,
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 8e03bd3f3668..5d3cce9e8744 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -201,7 +201,7 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
201 return -ENODEV; 201 return -ENODEV;
202 } 202 }
203 203
204 if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) 204 if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid)
205 return virtio_transport_send_pkt_loopback(vsock, pkt); 205 return virtio_transport_send_pkt_loopback(vsock, pkt);
206 206
207 if (pkt->reply) 207 if (pkt->reply)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5fe35aafdd9c..48e8097339ab 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1012,6 +1012,7 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
1012 nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); 1012 nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
1013 1013
1014 list_del_rcu(&wdev->list); 1014 list_del_rcu(&wdev->list);
1015 synchronize_rcu();
1015 rdev->devlist_generation++; 1016 rdev->devlist_generation++;
1016 1017
1017 switch (wdev->iftype) { 1018 switch (wdev->iftype) {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 07514ca011b2..4eece06be1e7 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6231,7 +6231,7 @@ do { \
6231 nl80211_check_s32); 6231 nl80211_check_s32);
6232 /* 6232 /*
6233 * Check HT operation mode based on 6233 * Check HT operation mode based on
6234 * IEEE 802.11 2012 8.4.2.59 HT Operation element. 6234 * IEEE 802.11-2016 9.4.2.57 HT Operation element.
6235 */ 6235 */
6236 if (tb[NL80211_MESHCONF_HT_OPMODE]) { 6236 if (tb[NL80211_MESHCONF_HT_OPMODE]) {
6237 ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]); 6237 ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]);
@@ -6241,22 +6241,9 @@ do { \
6241 IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) 6241 IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
6242 return -EINVAL; 6242 return -EINVAL;
6243 6243
6244 if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) && 6244 /* NON_HT_STA bit is reserved, but some programs set it */
6245 (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) 6245 ht_opmode &= ~IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
6246 return -EINVAL;
6247 6246
6248 switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) {
6249 case IEEE80211_HT_OP_MODE_PROTECTION_NONE:
6250 case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ:
6251 if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)
6252 return -EINVAL;
6253 break;
6254 case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER:
6255 case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED:
6256 if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
6257 return -EINVAL;
6258 break;
6259 }
6260 cfg->ht_opmode = ht_opmode; 6247 cfg->ht_opmode = ht_opmode;
6261 mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1)); 6248 mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1));
6262 } 6249 }
@@ -10833,7 +10820,7 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev,
10833 struct nlattr **tb; 10820 struct nlattr **tb;
10834 int err; 10821 int err;
10835 10822
10836 tb = kzalloc(NUM_NL80211_ATTR * sizeof(*tb), GFP_KERNEL); 10823 tb = kcalloc(NUM_NL80211_ATTR, sizeof(*tb), GFP_KERNEL);
10837 if (!tb) 10824 if (!tb)
10838 return -ENOMEM; 10825 return -ENOMEM;
10839 10826
@@ -10962,9 +10949,12 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
10962 rem) { 10949 rem) {
10963 u8 *mask_pat; 10950 u8 *mask_pat;
10964 10951
10965 nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, 10952 err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
10966 nl80211_packet_pattern_policy, 10953 nl80211_packet_pattern_policy,
10967 info->extack); 10954 info->extack);
10955 if (err)
10956 goto error;
10957
10968 err = -EINVAL; 10958 err = -EINVAL;
10969 if (!pat_tb[NL80211_PKTPAT_MASK] || 10959 if (!pat_tb[NL80211_PKTPAT_MASK] ||
10970 !pat_tb[NL80211_PKTPAT_PATTERN]) 10960 !pat_tb[NL80211_PKTPAT_PATTERN])
@@ -11213,8 +11203,11 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
11213 rem) { 11203 rem) {
11214 u8 *mask_pat; 11204 u8 *mask_pat;
11215 11205
11216 nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, 11206 err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
11217 nl80211_packet_pattern_policy, NULL); 11207 nl80211_packet_pattern_policy, NULL);
11208 if (err)
11209 return err;
11210
11218 if (!pat_tb[NL80211_PKTPAT_MASK] || 11211 if (!pat_tb[NL80211_PKTPAT_MASK] ||
11219 !pat_tb[NL80211_PKTPAT_PATTERN]) 11212 !pat_tb[NL80211_PKTPAT_PATTERN])
11220 return -EINVAL; 11213 return -EINVAL;
@@ -11793,7 +11786,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
11793 11786
11794 func->srf_num_macs = n_entries; 11787 func->srf_num_macs = n_entries;
11795 func->srf_macs = 11788 func->srf_macs =
11796 kzalloc(sizeof(*func->srf_macs) * n_entries, 11789 kcalloc(n_entries, sizeof(*func->srf_macs),
11797 GFP_KERNEL); 11790 GFP_KERNEL);
11798 if (!func->srf_macs) { 11791 if (!func->srf_macs) {
11799 err = -ENOMEM; 11792 err = -ENOMEM;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index b5bb1c309914..3c654cd7ba56 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1746,6 +1746,8 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
1746 if (!rdev->ops->get_station) 1746 if (!rdev->ops->get_station)
1747 return -EOPNOTSUPP; 1747 return -EOPNOTSUPP;
1748 1748
1749 memset(sinfo, 0, sizeof(*sinfo));
1750
1749 return rdev_get_station(rdev, dev, mac_addr, sinfo); 1751 return rdev_get_station(rdev, dev, mac_addr, sinfo);
1750} 1752}
1751EXPORT_SYMBOL(cfg80211_get_station); 1753EXPORT_SYMBOL(cfg80211_get_station);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f93365ae0fdd..d49aa79b7997 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1750,7 +1750,7 @@ static const struct proto_ops x25_proto_ops = {
1750 .socketpair = sock_no_socketpair, 1750 .socketpair = sock_no_socketpair,
1751 .accept = x25_accept, 1751 .accept = x25_accept,
1752 .getname = x25_getname, 1752 .getname = x25_getname,
1753 .poll_mask = datagram_poll_mask, 1753 .poll = datagram_poll,
1754 .ioctl = x25_ioctl, 1754 .ioctl = x25_ioctl,
1755#ifdef CONFIG_COMPAT 1755#ifdef CONFIG_COMPAT
1756 .compat_ioctl = compat_x25_ioctl, 1756 .compat_ioctl = compat_x25_ioctl,
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 7eb4948a38d2..f47abb46c587 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -132,8 +132,10 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem)
132 132
133static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 133static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
134{ 134{
135 atomic_long_sub(umem->npgs, &umem->user->locked_vm); 135 if (umem->user) {
136 free_uid(umem->user); 136 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
137 free_uid(umem->user);
138 }
137} 139}
138 140
139static void xdp_umem_release(struct xdp_umem *umem) 141static void xdp_umem_release(struct xdp_umem *umem)
@@ -202,7 +204,8 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem)
202 long npgs; 204 long npgs;
203 int err; 205 int err;
204 206
205 umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL); 207 umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
208 GFP_KERNEL | __GFP_NOWARN);
206 if (!umem->pgs) 209 if (!umem->pgs)
207 return -ENOMEM; 210 return -ENOMEM;
208 211
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c6ed2454f7ce..59fb7d3c36a3 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -118,6 +118,9 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
118 u64 addr; 118 u64 addr;
119 int err; 119 int err;
120 120
121 if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
122 return -EINVAL;
123
121 if (!xskq_peek_addr(xs->umem->fq, &addr) || 124 if (!xskq_peek_addr(xs->umem->fq, &addr) ||
122 len > xs->umem->chunk_size_nohr) { 125 len > xs->umem->chunk_size_nohr) {
123 xs->rx_dropped++; 126 xs->rx_dropped++;
@@ -300,9 +303,10 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
300 return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len); 303 return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
301} 304}
302 305
303static __poll_t xsk_poll_mask(struct socket *sock, __poll_t events) 306static unsigned int xsk_poll(struct file *file, struct socket *sock,
307 struct poll_table_struct *wait)
304{ 308{
305 __poll_t mask = datagram_poll_mask(sock, events); 309 unsigned int mask = datagram_poll(file, sock, wait);
306 struct sock *sk = sock->sk; 310 struct sock *sk = sock->sk;
307 struct xdp_sock *xs = xdp_sk(sk); 311 struct xdp_sock *xs = xdp_sk(sk);
308 312
@@ -643,7 +647,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
643static int xsk_mmap(struct file *file, struct socket *sock, 647static int xsk_mmap(struct file *file, struct socket *sock,
644 struct vm_area_struct *vma) 648 struct vm_area_struct *vma)
645{ 649{
646 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; 650 loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
647 unsigned long size = vma->vm_end - vma->vm_start; 651 unsigned long size = vma->vm_end - vma->vm_start;
648 struct xdp_sock *xs = xdp_sk(sock->sk); 652 struct xdp_sock *xs = xdp_sk(sock->sk);
649 struct xsk_queue *q = NULL; 653 struct xsk_queue *q = NULL;
@@ -693,7 +697,7 @@ static const struct proto_ops xsk_proto_ops = {
693 .socketpair = sock_no_socketpair, 697 .socketpair = sock_no_socketpair,
694 .accept = sock_no_accept, 698 .accept = sock_no_accept,
695 .getname = sock_no_getname, 699 .getname = sock_no_getname,
696 .poll_mask = xsk_poll_mask, 700 .poll = xsk_poll,
697 .ioctl = sock_no_ioctl, 701 .ioctl = sock_no_ioctl,
698 .listen = sock_no_listen, 702 .listen = sock_no_listen,
699 .shutdown = sock_no_shutdown, 703 .shutdown = sock_no_shutdown,