aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c7
-rw-r--r--net/9p/trans_fd.c61
-rw-r--r--net/9p/trans_xen.c4
-rw-r--r--net/atm/common.c4
-rw-r--r--net/atm/common.h2
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/batman-adv/icmp_socket.c2
-rw-r--r--net/batman-adv/log.c2
-rw-r--r--net/batman-adv/tp_meter.c4
-rw-r--r--net/bluetooth/af_bluetooth.c6
-rw-r--r--net/bluetooth/l2cap_core.c20
-rw-r--r--net/bridge/br_netlink.c11
-rw-r--r--net/caif/caif_dev.c5
-rw-r--r--net/caif/caif_socket.c4
-rw-r--r--net/caif/caif_usb.c4
-rw-r--r--net/caif/cfcnfg.c10
-rw-r--r--net/caif/cfctrl.c4
-rw-r--r--net/can/af_can.c36
-rw-r--r--net/core/datagram.c8
-rw-r--r--net/core/dev.c37
-rw-r--r--net/core/ethtool.c15
-rw-r--r--net/core/filter.c10
-rw-r--r--net/core/flow_dissector.c3
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/rtnetlink.c10
-rw-r--r--net/core/skbuff.c17
-rw-r--r--net/core/sock.c2
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/core/sysctl_net_core.c6
-rw-r--r--net/dccp/ccids/ccid2.c3
-rw-r--r--net/dccp/dccp.h2
-rw-r--r--net/dccp/minisocks.c6
-rw-r--r--net/dccp/proto.c9
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/dsa/dsa2.c25
-rw-r--r--net/dsa/slave.c1
-rw-r--r--net/ipv4/arp.c7
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c1
-rw-r--r--net/ipv4/esp4_offload.c6
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/igmp.c44
-rw-r--r--net/ipv4/inet_timewait_sock.c6
-rw-r--r--net/ipv4/ip_gre.c3
-rw-r--r--net/ipv4/ip_tunnel.c7
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c8
-rw-r--r--net/ipv4/netfilter/ip_tables.c8
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c3
-rw-r--r--net/ipv4/raw.c17
-rw-r--r--net/ipv4/route.c1
-rw-r--r--net/ipv4/tcp.c8
-rw-r--r--net/ipv4/tcp_bbr.c12
-rw-r--r--net/ipv4/tcp_input.c22
-rw-r--r--net/ipv4/tcp_ipv4.c61
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_offload.c3
-rw-r--r--net/ipv4/tcp_rate.c10
-rw-r--r--net/ipv4/tcp_recovery.c28
-rw-r--r--net/ipv4/tcp_timer.c17
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/udp_offload.c3
-rw-r--r--net/ipv4/xfrm4_input.c12
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c1
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/esp6.c3
-rw-r--r--net/ipv6/esp6_offload.c6
-rw-r--r--net/ipv6/exthdrs.c9
-rw-r--r--net/ipv6/ip6_fib.c83
-rw-r--r--net/ipv6/ip6_gre.c72
-rw-r--r--net/ipv6/ip6_output.c24
-rw-r--r--net/ipv6/ip6_tunnel.c32
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c3
-rw-r--r--net/ipv6/mcast.c25
-rw-r--r--net/ipv6/netfilter/ip6_tables.c8
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/route.c20
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/tcp_ipv6.c13
-rw-r--r--net/ipv6/tcpv6_offload.c3
-rw-r--r--net/ipv6/udp_offload.c3
-rw-r--r--net/ipv6/xfrm6_input.c10
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c1
-rw-r--r--net/iucv/af_iucv.c6
-rw-r--r--net/kcm/kcmsock.c93
-rw-r--r--net/key/af_key.c12
-rw-r--r--net/mac80211/ht.c7
-rw-r--r--net/mac80211/mesh_hwmp.c15
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mac80211/tx.c29
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c9
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c128
-rw-r--r--net/netfilter/nf_conntrack_netlink.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c3
-rw-r--r--net/netfilter/nf_tables_api.c15
-rw-r--r--net/netfilter/nfnetlink_cthelper.c10
-rw-r--r--net/netfilter/nfnetlink_log.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c5
-rw-r--r--net/netfilter/nft_exthdr.c2
-rw-r--r--net/netfilter/x_tables.c9
-rw-r--r--net/netfilter/xt_bpf.c20
-rw-r--r--net/netfilter/xt_osf.c7
-rw-r--r--net/netlink/af_netlink.c6
-rw-r--r--net/nfc/llcp_sock.c6
-rw-r--r--net/nfc/nci/uart.c2
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/openvswitch/flow.c15
-rw-r--r--net/openvswitch/flow_netlink.c67
-rw-r--r--net/packet/af_packet.c41
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/phonet/socket.c4
-rw-r--r--net/rds/af_rds.c4
-rw-r--r--net/rds/ib.c6
-rw-r--r--net/rds/rdma.c6
-rw-r--r--net/rds/send.c3
-rw-r--r--net/rds/tcp.c5
-rw-r--r--net/rds/tcp.h2
-rw-r--r--net/rds/tcp_send.c4
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/rxrpc/af_rxrpc.c28
-rw-r--r--net/rxrpc/ar-internal.h103
-rw-r--r--net/rxrpc/call_accept.c2
-rw-r--r--net/rxrpc/call_event.c229
-rw-r--r--net/rxrpc/call_object.c62
-rw-r--r--net/rxrpc/conn_client.c54
-rw-r--r--net/rxrpc/conn_event.c124
-rw-r--r--net/rxrpc/conn_object.c76
-rw-r--r--net/rxrpc/input.c76
-rw-r--r--net/rxrpc/misc.c19
-rw-r--r--net/rxrpc/net_ns.c33
-rw-r--r--net/rxrpc/output.c43
-rw-r--r--net/rxrpc/recvmsg.c12
-rw-r--r--net/rxrpc/sendmsg.c126
-rw-r--r--net/rxrpc/sysctl.c60
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_meta_mark.c1
-rw-r--r--net/sched/act_meta_skbtcindex.c1
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_sample.c14
-rw-r--r--net/sched/cls_api.c20
-rw-r--r--net/sched/cls_bpf.c123
-rw-r--r--net/sched/cls_u32.c1
-rw-r--r--net/sched/em_nbyte.c2
-rw-r--r--net/sched/sch_api.c17
-rw-r--r--net/sched/sch_cbq.c9
-rw-r--r--net/sched/sch_choke.c3
-rw-r--r--net/sched/sch_generic.c25
-rw-r--r--net/sched/sch_gred.c3
-rw-r--r--net/sched/sch_ingress.c32
-rw-r--r--net/sched/sch_red.c33
-rw-r--r--net/sched/sch_sfq.c4
-rw-r--r--net/sctp/chunk.c11
-rw-r--r--net/sctp/debug.c3
-rw-r--r--net/sctp/input.c28
-rw-r--r--net/sctp/ipv6.c1
-rw-r--r--net/sctp/offload.c3
-rw-r--r--net/sctp/outqueue.c23
-rw-r--r--net/sctp/protocol.c1
-rw-r--r--net/sctp/socket.c148
-rw-r--r--net/sctp/stream.c101
-rw-r--r--net/sctp/stream_sched.c25
-rw-r--r--net/sctp/stream_sched_prio.c7
-rw-r--r--net/sctp/stream_sched_rr.c7
-rw-r--r--net/sctp/transport.c29
-rw-r--r--net/sctp/ulpqueue.c24
-rw-r--r--net/smc/af_smc.c6
-rw-r--r--net/smc/smc_clc.c18
-rw-r--r--net/socket.c129
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c5
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/clnt.c21
-rw-r--r--net/sunrpc/rpc_pipe.c4
-rw-r--r--net/sunrpc/sched.c26
-rw-r--r--net/sunrpc/svcauth_unix.c6
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/sunrpc/xprt.c30
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c78
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c157
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c329
-rw-r--r--net/sunrpc/xprtrdma/module.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c168
-rw-r--r--net/sunrpc/xprtrdma/transport.c130
-rw-r--r--net/sunrpc/xprtrdma/verbs.c282
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h117
-rw-r--r--net/sunrpc/xprtsock.c38
-rw-r--r--net/tipc/bearer.c5
-rw-r--r--net/tipc/group.c71
-rw-r--r--net/tipc/monitor.c6
-rw-r--r--net/tipc/node.c26
-rw-r--r--net/tipc/server.c7
-rw-r--r--net/tipc/socket.c8
-rw-r--r--net/tipc/udp_media.c4
-rw-r--r--net/tls/tls_main.c17
-rw-r--r--net/tls/tls_sw.c18
-rw-r--r--net/unix/af_unix.c15
-rw-r--r--net/vmw_vsock/af_vsock.c6
-rw-r--r--net/vmw_vsock/hyperv_transport.c2
-rw-r--r--net/vmw_vsock/vmci_transport.c14
-rw-r--r--net/wireless/Kconfig7
-rw-r--r--net/wireless/Makefile39
-rw-r--r--net/wireless/certs/sforshee.hex86
-rw-r--r--net/wireless/certs/sforshee.x509bin680 -> 0 bytes
-rw-r--r--net/wireless/core.c8
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/nl80211.c20
-rw-r--r--net/wireless/reg.c3
-rw-r--r--net/wireless/wext-compat.c3
-rw-r--r--net/xfrm/xfrm_device.c1
-rw-r--r--net/xfrm/xfrm_input.c69
-rw-r--r--net/xfrm/xfrm_policy.c24
-rw-r--r--net/xfrm/xfrm_state.c24
-rw-r--r--net/xfrm/xfrm_user.c44
222 files changed, 3284 insertions, 2151 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8dfdd94e430f..bad01b14a4ad 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
111 vlan_gvrp_uninit_applicant(real_dev); 111 vlan_gvrp_uninit_applicant(real_dev);
112 } 112 }
113 113
114 /* Take it out of our own structures, but be sure to interlock with 114 vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
115 * HW accelerating devices or SW vlan input packet processing if
116 * VLAN is not 0 (leave it there for 802.1p).
117 */
118 if (vlan_id)
119 vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
120 115
121 /* Get rid of the vlan's reference to real_dev */ 116 /* Get rid of the vlan's reference to real_dev */
122 dev_put(real_dev); 117 dev_put(real_dev);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 985046ae4231..d6f7f7cb79c4 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -228,32 +228,31 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
228 } 228 }
229} 229}
230 230
231static int 231static __poll_t
232p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) 232p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
233{ 233{
234 int ret, n; 234 __poll_t ret, n;
235 struct p9_trans_fd *ts = NULL; 235 struct p9_trans_fd *ts = NULL;
236 236
237 if (client && client->status == Connected) 237 if (client && client->status == Connected)
238 ts = client->trans; 238 ts = client->trans;
239 239
240 if (!ts) 240 if (!ts) {
241 return -EREMOTEIO; 241 if (err)
242 *err = -EREMOTEIO;
243 return POLLERR;
244 }
242 245
243 if (!ts->rd->f_op->poll) 246 if (!ts->rd->f_op->poll)
244 return -EIO; 247 ret = DEFAULT_POLLMASK;
245 248 else
246 if (!ts->wr->f_op->poll) 249 ret = ts->rd->f_op->poll(ts->rd, pt);
247 return -EIO;
248
249 ret = ts->rd->f_op->poll(ts->rd, pt);
250 if (ret < 0)
251 return ret;
252 250
253 if (ts->rd != ts->wr) { 251 if (ts->rd != ts->wr) {
254 n = ts->wr->f_op->poll(ts->wr, pt); 252 if (!ts->wr->f_op->poll)
255 if (n < 0) 253 n = DEFAULT_POLLMASK;
256 return n; 254 else
255 n = ts->wr->f_op->poll(ts->wr, pt);
257 ret = (ret & ~POLLOUT) | (n & ~POLLIN); 256 ret = (ret & ~POLLOUT) | (n & ~POLLIN);
258 } 257 }
259 258
@@ -298,7 +297,8 @@ static int p9_fd_read(struct p9_client *client, void *v, int len)
298 297
299static void p9_read_work(struct work_struct *work) 298static void p9_read_work(struct work_struct *work)
300{ 299{
301 int n, err; 300 __poll_t n;
301 int err;
302 struct p9_conn *m; 302 struct p9_conn *m;
303 int status = REQ_STATUS_ERROR; 303 int status = REQ_STATUS_ERROR;
304 304
@@ -398,7 +398,7 @@ end_clear:
398 if (test_and_clear_bit(Rpending, &m->wsched)) 398 if (test_and_clear_bit(Rpending, &m->wsched))
399 n = POLLIN; 399 n = POLLIN;
400 else 400 else
401 n = p9_fd_poll(m->client, NULL); 401 n = p9_fd_poll(m->client, NULL, NULL);
402 402
403 if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { 403 if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
404 p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); 404 p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
@@ -448,7 +448,8 @@ static int p9_fd_write(struct p9_client *client, void *v, int len)
448 448
449static void p9_write_work(struct work_struct *work) 449static void p9_write_work(struct work_struct *work)
450{ 450{
451 int n, err; 451 __poll_t n;
452 int err;
452 struct p9_conn *m; 453 struct p9_conn *m;
453 struct p9_req_t *req; 454 struct p9_req_t *req;
454 455
@@ -506,7 +507,7 @@ end_clear:
506 if (test_and_clear_bit(Wpending, &m->wsched)) 507 if (test_and_clear_bit(Wpending, &m->wsched))
507 n = POLLOUT; 508 n = POLLOUT;
508 else 509 else
509 n = p9_fd_poll(m->client, NULL); 510 n = p9_fd_poll(m->client, NULL, NULL);
510 511
511 if ((n & POLLOUT) && 512 if ((n & POLLOUT) &&
512 !test_and_set_bit(Wworksched, &m->wsched)) { 513 !test_and_set_bit(Wworksched, &m->wsched)) {
@@ -581,7 +582,7 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
581 582
582static void p9_conn_create(struct p9_client *client) 583static void p9_conn_create(struct p9_client *client)
583{ 584{
584 int n; 585 __poll_t n;
585 struct p9_trans_fd *ts = client->trans; 586 struct p9_trans_fd *ts = client->trans;
586 struct p9_conn *m = &ts->conn; 587 struct p9_conn *m = &ts->conn;
587 588
@@ -597,7 +598,7 @@ static void p9_conn_create(struct p9_client *client)
597 INIT_LIST_HEAD(&m->poll_pending_link); 598 INIT_LIST_HEAD(&m->poll_pending_link);
598 init_poll_funcptr(&m->pt, p9_pollwait); 599 init_poll_funcptr(&m->pt, p9_pollwait);
599 600
600 n = p9_fd_poll(client, &m->pt); 601 n = p9_fd_poll(client, &m->pt, NULL);
601 if (n & POLLIN) { 602 if (n & POLLIN) {
602 p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); 603 p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
603 set_bit(Rpending, &m->wsched); 604 set_bit(Rpending, &m->wsched);
@@ -617,17 +618,16 @@ static void p9_conn_create(struct p9_client *client)
617 618
618static void p9_poll_mux(struct p9_conn *m) 619static void p9_poll_mux(struct p9_conn *m)
619{ 620{
620 int n; 621 __poll_t n;
622 int err = -ECONNRESET;
621 623
622 if (m->err < 0) 624 if (m->err < 0)
623 return; 625 return;
624 626
625 n = p9_fd_poll(m->client, NULL); 627 n = p9_fd_poll(m->client, NULL, &err);
626 if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { 628 if (n & (POLLERR | POLLHUP | POLLNVAL)) {
627 p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); 629 p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
628 if (n >= 0) 630 p9_conn_cancel(m, err);
629 n = -ECONNRESET;
630 p9_conn_cancel(m, n);
631 } 631 }
632 632
633 if (n & POLLIN) { 633 if (n & POLLIN) {
@@ -663,7 +663,7 @@ static void p9_poll_mux(struct p9_conn *m)
663 663
664static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) 664static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
665{ 665{
666 int n; 666 __poll_t n;
667 struct p9_trans_fd *ts = client->trans; 667 struct p9_trans_fd *ts = client->trans;
668 struct p9_conn *m = &ts->conn; 668 struct p9_conn *m = &ts->conn;
669 669
@@ -680,7 +680,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
680 if (test_and_clear_bit(Wpending, &m->wsched)) 680 if (test_and_clear_bit(Wpending, &m->wsched))
681 n = POLLOUT; 681 n = POLLOUT;
682 else 682 else
683 n = p9_fd_poll(m->client, NULL); 683 n = p9_fd_poll(m->client, NULL, NULL);
684 684
685 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) 685 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
686 schedule_work(&m->wq); 686 schedule_work(&m->wq);
@@ -839,7 +839,6 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
839 if (IS_ERR(file)) { 839 if (IS_ERR(file)) {
840 pr_err("%s (%d): failed to map fd\n", 840 pr_err("%s (%d): failed to map fd\n",
841 __func__, task_pid_nr(current)); 841 __func__, task_pid_nr(current));
842 sock_release(csocket);
843 kfree(p); 842 kfree(p);
844 return PTR_ERR(file); 843 return PTR_ERR(file);
845 } 844 }
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 325c56043007..086a4abdfa7c 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -543,3 +543,7 @@ static void p9_trans_xen_exit(void)
543 return xenbus_unregister_driver(&xen_9pfs_front_driver); 543 return xenbus_unregister_driver(&xen_9pfs_front_driver);
544} 544}
545module_exit(p9_trans_xen_exit); 545module_exit(p9_trans_xen_exit);
546
547MODULE_AUTHOR("Stefano Stabellini <stefano@aporeto.com>");
548MODULE_DESCRIPTION("Xen Transport for 9P");
549MODULE_LICENSE("GPL");
diff --git a/net/atm/common.c b/net/atm/common.c
index 8a4f99114cd2..8f12f1c6fa14 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -648,11 +648,11 @@ out:
648 return error; 648 return error;
649} 649}
650 650
651unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) 651__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
652{ 652{
653 struct sock *sk = sock->sk; 653 struct sock *sk = sock->sk;
654 struct atm_vcc *vcc; 654 struct atm_vcc *vcc;
655 unsigned int mask; 655 __poll_t mask;
656 656
657 sock_poll_wait(file, sk_sleep(sk), wait); 657 sock_poll_wait(file, sk_sleep(sk), wait);
658 mask = 0; 658 mask = 0;
diff --git a/net/atm/common.h b/net/atm/common.h
index d9d583712a91..5850649068bb 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
17int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 17int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
18 int flags); 18 int flags);
19int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); 19int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
20unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait); 20__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
21int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 21int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
22int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 22int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
23int vcc_setsockopt(struct socket *sock, int level, int optname, 23int vcc_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 1b659ab652fb..bbe8414b6ee7 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1214,7 +1214,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1214 orig_node->last_seen = jiffies; 1214 orig_node->last_seen = jiffies;
1215 1215
1216 /* find packet count of corresponding one hop neighbor */ 1216 /* find packet count of corresponding one hop neighbor */
1217 spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); 1217 spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
1218 if_num = if_incoming->if_num; 1218 if_num = if_incoming->if_num;
1219 orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; 1219 orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
1220 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); 1220 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
@@ -1224,7 +1224,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1224 } else { 1224 } else {
1225 neigh_rq_count = 0; 1225 neigh_rq_count = 0;
1226 } 1226 }
1227 spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); 1227 spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
1228 1228
1229 /* pay attention to not get a value bigger than 100 % */ 1229 /* pay attention to not get a value bigger than 100 % */
1230 if (orig_eq_count > neigh_rq_count) 1230 if (orig_eq_count > neigh_rq_count)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 341ceab8338d..e0e2bfcd6b3e 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -814,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
814 } 814 }
815 815
816 orig_gw = batadv_gw_node_get(bat_priv, orig_node); 816 orig_gw = batadv_gw_node_get(bat_priv, orig_node);
817 if (!orig_node) 817 if (!orig_gw)
818 goto out; 818 goto out;
819 819
820 if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) 820 if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index a98cf1104a30..ebe6e38934e4 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -499,6 +499,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
499 */ 499 */
500 if (skb->priority >= 256 && skb->priority <= 263) 500 if (skb->priority >= 256 && skb->priority <= 263)
501 frag_header.priority = skb->priority - 256; 501 frag_header.priority = skb->priority - 256;
502 else
503 frag_header.priority = 0;
502 504
503 ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); 505 ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
504 ether_addr_copy(frag_header.dest, orig_node->orig); 506 ether_addr_copy(frag_header.dest, orig_node->orig);
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index bded31121d12..a98e0a986cef 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -292,7 +292,7 @@ out:
292 return len; 292 return len;
293} 293}
294 294
295static unsigned int batadv_socket_poll(struct file *file, poll_table *wait) 295static __poll_t batadv_socket_poll(struct file *file, poll_table *wait)
296{ 296{
297 struct batadv_socket_client *socket_client = file->private_data; 297 struct batadv_socket_client *socket_client = file->private_data;
298 298
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 4ef4bde2cc2d..76451460c98d 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -176,7 +176,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf,
176 return error; 176 return error;
177} 177}
178 178
179static unsigned int batadv_log_poll(struct file *file, poll_table *wait) 179static __poll_t batadv_log_poll(struct file *file, poll_table *wait)
180{ 180{
181 struct batadv_priv *bat_priv = file->private_data; 181 struct batadv_priv *bat_priv = file->private_data;
182 struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; 182 struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 15cd2139381e..ebc4e2241c77 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -482,7 +482,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
482 482
483/** 483/**
484 * batadv_tp_sender_timeout - timer that fires in case of packet loss 484 * batadv_tp_sender_timeout - timer that fires in case of packet loss
485 * @arg: address of the related tp_vars 485 * @t: address to timer_list inside tp_vars
486 * 486 *
487 * If fired it means that there was packet loss. 487 * If fired it means that there was packet loss.
488 * Switch to Slow Start, set the ss_threshold to half of the current cwnd and 488 * Switch to Slow Start, set the ss_threshold to half of the current cwnd and
@@ -1106,7 +1106,7 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
1106/** 1106/**
1107 * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is 1107 * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
1108 * reached without received ack 1108 * reached without received ack
1109 * @arg: address of the related tp_vars 1109 * @t: address to timer_list inside tp_vars
1110 */ 1110 */
1111static void batadv_tp_receiver_shutdown(struct timer_list *t) 1111static void batadv_tp_receiver_shutdown(struct timer_list *t)
1112{ 1112{
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 91e3ba280706..671b907ba678 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -421,7 +421,7 @@ out:
421} 421}
422EXPORT_SYMBOL(bt_sock_stream_recvmsg); 422EXPORT_SYMBOL(bt_sock_stream_recvmsg);
423 423
424static inline unsigned int bt_accept_poll(struct sock *parent) 424static inline __poll_t bt_accept_poll(struct sock *parent)
425{ 425{
426 struct bt_sock *s, *n; 426 struct bt_sock *s, *n;
427 struct sock *sk; 427 struct sock *sk;
@@ -437,11 +437,11 @@ static inline unsigned int bt_accept_poll(struct sock *parent)
437 return 0; 437 return 0;
438} 438}
439 439
440unsigned int bt_sock_poll(struct file *file, struct socket *sock, 440__poll_t bt_sock_poll(struct file *file, struct socket *sock,
441 poll_table *wait) 441 poll_table *wait)
442{ 442{
443 struct sock *sk = sock->sk; 443 struct sock *sk = sock->sk;
444 unsigned int mask = 0; 444 __poll_t mask = 0;
445 445
446 BT_DBG("sock %p, sk %p", sock, sk); 446 BT_DBG("sock %p, sk %p", sock, sk);
447 447
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 43ba91c440bc..fc6615d59165 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3363,9 +3363,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
3363 break; 3363 break;
3364 3364
3365 case L2CAP_CONF_EFS: 3365 case L2CAP_CONF_EFS:
3366 remote_efs = 1; 3366 if (olen == sizeof(efs)) {
3367 if (olen == sizeof(efs)) 3367 remote_efs = 1;
3368 memcpy(&efs, (void *) val, olen); 3368 memcpy(&efs, (void *) val, olen);
3369 }
3369 break; 3370 break;
3370 3371
3371 case L2CAP_CONF_EWS: 3372 case L2CAP_CONF_EWS:
@@ -3584,16 +3585,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
3584 break; 3585 break;
3585 3586
3586 case L2CAP_CONF_EFS: 3587 case L2CAP_CONF_EFS:
3587 if (olen == sizeof(efs)) 3588 if (olen == sizeof(efs)) {
3588 memcpy(&efs, (void *)val, olen); 3589 memcpy(&efs, (void *)val, olen);
3589 3590
3590 if (chan->local_stype != L2CAP_SERV_NOTRAFIC && 3591 if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
3591 efs.stype != L2CAP_SERV_NOTRAFIC && 3592 efs.stype != L2CAP_SERV_NOTRAFIC &&
3592 efs.stype != chan->local_stype) 3593 efs.stype != chan->local_stype)
3593 return -ECONNREFUSED; 3594 return -ECONNREFUSED;
3594 3595
3595 l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), 3596 l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
3596 (unsigned long) &efs, endptr - ptr); 3597 (unsigned long) &efs, endptr - ptr);
3598 }
3597 break; 3599 break;
3598 3600
3599 case L2CAP_CONF_FCS: 3601 case L2CAP_CONF_FCS:
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d0ef0a8e8831..015f465c514b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
1262 struct net_bridge *br = netdev_priv(dev); 1262 struct net_bridge *br = netdev_priv(dev);
1263 int err; 1263 int err;
1264 1264
1265 err = register_netdevice(dev);
1266 if (err)
1267 return err;
1268
1265 if (tb[IFLA_ADDRESS]) { 1269 if (tb[IFLA_ADDRESS]) {
1266 spin_lock_bh(&br->lock); 1270 spin_lock_bh(&br->lock);
1267 br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); 1271 br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
1268 spin_unlock_bh(&br->lock); 1272 spin_unlock_bh(&br->lock);
1269 } 1273 }
1270 1274
1271 err = register_netdevice(dev);
1272 if (err)
1273 return err;
1274
1275 err = br_changelink(dev, tb, data, extack); 1275 err = br_changelink(dev, tb, data, extack);
1276 if (err) 1276 if (err)
1277 unregister_netdevice(dev); 1277 br_dev_delete(dev, NULL);
1278
1278 return err; 1279 return err;
1279} 1280}
1280 1281
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 2d38b6e34203..e0adcd123f48 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -334,9 +334,8 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
334 mutex_lock(&caifdevs->lock); 334 mutex_lock(&caifdevs->lock);
335 list_add_rcu(&caifd->list, &caifdevs->list); 335 list_add_rcu(&caifd->list, &caifdevs->list);
336 336
337 strncpy(caifd->layer.name, dev->name, 337 strlcpy(caifd->layer.name, dev->name,
338 sizeof(caifd->layer.name) - 1); 338 sizeof(caifd->layer.name));
339 caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
340 caifd->layer.transmit = transmit; 339 caifd->layer.transmit = transmit;
341 cfcnfg_add_phy_layer(cfg, 340 cfcnfg_add_phy_layer(cfg,
342 dev, 341 dev,
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 632d5a416d97..64048cec41e0 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -934,11 +934,11 @@ static int caif_release(struct socket *sock)
934} 934}
935 935
936/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ 936/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
937static unsigned int caif_poll(struct file *file, 937static __poll_t caif_poll(struct file *file,
938 struct socket *sock, poll_table *wait) 938 struct socket *sock, poll_table *wait)
939{ 939{
940 struct sock *sk = sock->sk; 940 struct sock *sk = sock->sk;
941 unsigned int mask; 941 __poll_t mask;
942 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 942 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
943 943
944 sock_poll_wait(file, sk_sleep(sk), wait); 944 sock_poll_wait(file, sk_sleep(sk), wait);
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 5cd44f001f64..1a082a946045 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -176,9 +176,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
176 dev_add_pack(&caif_usb_type); 176 dev_add_pack(&caif_usb_type);
177 pack_added = true; 177 pack_added = true;
178 178
179 strncpy(layer->name, dev->name, 179 strlcpy(layer->name, dev->name, sizeof(layer->name));
180 sizeof(layer->name) - 1);
181 layer->name[sizeof(layer->name) - 1] = 0;
182 180
183 return 0; 181 return 0;
184} 182}
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 273cb07f57d8..8f00bea093b9 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -268,17 +268,15 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
268 case CAIFPROTO_RFM: 268 case CAIFPROTO_RFM:
269 l->linktype = CFCTRL_SRV_RFM; 269 l->linktype = CFCTRL_SRV_RFM;
270 l->u.datagram.connid = s->sockaddr.u.rfm.connection_id; 270 l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
271 strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume, 271 strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
272 sizeof(l->u.rfm.volume)-1); 272 sizeof(l->u.rfm.volume));
273 l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
274 break; 273 break;
275 case CAIFPROTO_UTIL: 274 case CAIFPROTO_UTIL:
276 l->linktype = CFCTRL_SRV_UTIL; 275 l->linktype = CFCTRL_SRV_UTIL;
277 l->endpoint = 0x00; 276 l->endpoint = 0x00;
278 l->chtype = 0x00; 277 l->chtype = 0x00;
279 strncpy(l->u.utility.name, s->sockaddr.u.util.service, 278 strlcpy(l->u.utility.name, s->sockaddr.u.util.service,
280 sizeof(l->u.utility.name)-1); 279 sizeof(l->u.utility.name));
281 l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
282 caif_assert(sizeof(l->u.utility.name) > 10); 280 caif_assert(sizeof(l->u.utility.name) > 10);
283 l->u.utility.paramlen = s->param.size; 281 l->u.utility.paramlen = s->param.size;
284 if (l->u.utility.paramlen > sizeof(l->u.utility.params)) 282 if (l->u.utility.paramlen > sizeof(l->u.utility.params))
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index f5afda1abc76..655ed7032150 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -258,8 +258,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
258 tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs); 258 tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
259 cfpkt_add_body(pkt, &tmp16, 2); 259 cfpkt_add_body(pkt, &tmp16, 2);
260 memset(utility_name, 0, sizeof(utility_name)); 260 memset(utility_name, 0, sizeof(utility_name));
261 strncpy(utility_name, param->u.utility.name, 261 strlcpy(utility_name, param->u.utility.name,
262 UTILITY_NAME_LENGTH - 1); 262 UTILITY_NAME_LENGTH);
263 cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH); 263 cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
264 tmp8 = param->u.utility.paramlen; 264 tmp8 = param->u.utility.paramlen;
265 cfpkt_add_body(pkt, &tmp8, 1); 265 cfpkt_add_body(pkt, &tmp8, 1);
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 003b2d6d655f..4d7f988a3130 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -721,20 +721,16 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
721{ 721{
722 struct canfd_frame *cfd = (struct canfd_frame *)skb->data; 722 struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
723 723
724 if (WARN_ONCE(dev->type != ARPHRD_CAN || 724 if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU ||
725 skb->len != CAN_MTU || 725 cfd->len > CAN_MAX_DLEN)) {
726 cfd->len > CAN_MAX_DLEN, 726 pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n",
727 "PF_CAN: dropped non conform CAN skbuf: " 727 dev->type, skb->len, cfd->len);
728 "dev type %d, len %d, datalen %d\n", 728 kfree_skb(skb);
729 dev->type, skb->len, cfd->len)) 729 return NET_RX_DROP;
730 goto drop; 730 }
731 731
732 can_receive(skb, dev); 732 can_receive(skb, dev);
733 return NET_RX_SUCCESS; 733 return NET_RX_SUCCESS;
734
735drop:
736 kfree_skb(skb);
737 return NET_RX_DROP;
738} 734}
739 735
740static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, 736static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -742,20 +738,16 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
742{ 738{
743 struct canfd_frame *cfd = (struct canfd_frame *)skb->data; 739 struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
744 740
745 if (WARN_ONCE(dev->type != ARPHRD_CAN || 741 if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
746 skb->len != CANFD_MTU || 742 cfd->len > CANFD_MAX_DLEN)) {
747 cfd->len > CANFD_MAX_DLEN, 743 pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n",
748 "PF_CAN: dropped non conform CAN FD skbuf: " 744 dev->type, skb->len, cfd->len);
749 "dev type %d, len %d, datalen %d\n", 745 kfree_skb(skb);
750 dev->type, skb->len, cfd->len)) 746 return NET_RX_DROP;
751 goto drop; 747 }
752 748
753 can_receive(skb, dev); 749 can_receive(skb, dev);
754 return NET_RX_SUCCESS; 750 return NET_RX_SUCCESS;
755
756drop:
757 kfree_skb(skb);
758 return NET_RX_DROP;
759} 751}
760 752
761/* 753/*
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 522873ed120b..b7d9293940b5 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -72,12 +72,10 @@ static inline int connection_based(struct sock *sk)
72static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, 72static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
73 void *key) 73 void *key)
74{ 74{
75 unsigned long bits = (unsigned long)key;
76
77 /* 75 /*
78 * Avoid a wakeup if event not interesting for us 76 * Avoid a wakeup if event not interesting for us
79 */ 77 */
80 if (bits && !(bits & (POLLIN | POLLERR))) 78 if (key && !(key_to_poll(key) & (POLLIN | POLLERR)))
81 return 0; 79 return 0;
82 return autoremove_wake_function(wait, mode, sync, key); 80 return autoremove_wake_function(wait, mode, sync, key);
83} 81}
@@ -833,11 +831,11 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
833 * and you use a different write policy from sock_writeable() 831 * and you use a different write policy from sock_writeable()
834 * then please supply your own write_space callback. 832 * then please supply your own write_space callback.
835 */ 833 */
836unsigned int datagram_poll(struct file *file, struct socket *sock, 834__poll_t datagram_poll(struct file *file, struct socket *sock,
837 poll_table *wait) 835 poll_table *wait)
838{ 836{
839 struct sock *sk = sock->sk; 837 struct sock *sk = sock->sk;
840 unsigned int mask; 838 __poll_t mask;
841 839
842 sock_poll_wait(file, sk_sleep(sk), wait); 840 sock_poll_wait(file, sk_sleep(sk), wait);
843 mask = 0; 841 mask = 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index 07ed21d64f92..613fb4066be7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1106,7 +1106,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1106 * when the name is long and there isn't enough space left 1106 * when the name is long and there isn't enough space left
1107 * for the digits, or if all bits are used. 1107 * for the digits, or if all bits are used.
1108 */ 1108 */
1109 return p ? -ENFILE : -EEXIST; 1109 return -ENFILE;
1110} 1110}
1111 1111
1112static int dev_alloc_name_ns(struct net *net, 1112static int dev_alloc_name_ns(struct net *net,
@@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name);
1146int dev_get_valid_name(struct net *net, struct net_device *dev, 1146int dev_get_valid_name(struct net *net, struct net_device *dev,
1147 const char *name) 1147 const char *name)
1148{ 1148{
1149 return dev_alloc_name_ns(net, dev, name); 1149 BUG_ON(!net);
1150
1151 if (!dev_valid_name(name))
1152 return -EINVAL;
1153
1154 if (strchr(name, '%'))
1155 return dev_alloc_name_ns(net, dev, name);
1156 else if (__dev_get_by_name(net, name))
1157 return -EEXIST;
1158 else if (dev->name != name)
1159 strlcpy(dev->name, name, IFNAMSIZ);
1160
1161 return 0;
1150} 1162}
1151EXPORT_SYMBOL(dev_get_valid_name); 1163EXPORT_SYMBOL(dev_get_valid_name);
1152 1164
@@ -3139,10 +3151,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
3139 hdr_len = skb_transport_header(skb) - skb_mac_header(skb); 3151 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
3140 3152
3141 /* + transport layer */ 3153 /* + transport layer */
3142 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) 3154 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
3143 hdr_len += tcp_hdrlen(skb); 3155 const struct tcphdr *th;
3144 else 3156 struct tcphdr _tcphdr;
3145 hdr_len += sizeof(struct udphdr); 3157
3158 th = skb_header_pointer(skb, skb_transport_offset(skb),
3159 sizeof(_tcphdr), &_tcphdr);
3160 if (likely(th))
3161 hdr_len += __tcp_hdrlen(th);
3162 } else {
3163 struct udphdr _udphdr;
3164
3165 if (skb_header_pointer(skb, skb_transport_offset(skb),
3166 sizeof(_udphdr), &_udphdr))
3167 hdr_len += sizeof(struct udphdr);
3168 }
3146 3169
3147 if (shinfo->gso_type & SKB_GSO_DODGY) 3170 if (shinfo->gso_type & SKB_GSO_DODGY)
3148 gso_segs = DIV_ROUND_UP(skb->len - hdr_len, 3171 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
@@ -3904,7 +3927,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3904 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, 3927 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
3905 troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) 3928 troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
3906 goto do_drop; 3929 goto do_drop;
3907 if (troom > 0 && __skb_linearize(skb)) 3930 if (skb_linearize(skb))
3908 goto do_drop; 3931 goto do_drop;
3909 } 3932 }
3910 3933
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f8fcf450a36e..8225416911ae 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -770,15 +770,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
770 return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); 770 return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
771} 771}
772 772
773static void
774warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
775{
776 char name[sizeof(current->comm)];
777
778 pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
779 get_task_comm(name, current), details);
780}
781
782/* Query device for its ethtool_cmd settings. 773/* Query device for its ethtool_cmd settings.
783 * 774 *
784 * Backward compatibility note: for compatibility with legacy ethtool, 775 * Backward compatibility note: for compatibility with legacy ethtool,
@@ -805,10 +796,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
805 &link_ksettings); 796 &link_ksettings);
806 if (err < 0) 797 if (err < 0)
807 return err; 798 return err;
808 if (!convert_link_ksettings_to_legacy_settings(&cmd, 799 convert_link_ksettings_to_legacy_settings(&cmd,
809 &link_ksettings)) 800 &link_ksettings);
810 warn_incomplete_ethtool_legacy_settings_conversion(
811 "link modes are only partially reported");
812 801
813 /* send a sensible cmd tag back to user */ 802 /* send a sensible cmd tag back to user */
814 cmd.cmd = ETHTOOL_GSET; 803 cmd.cmd = ETHTOOL_GSET;
diff --git a/net/core/filter.c b/net/core/filter.c
index 6a85e67fafce..1c0eb436671f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -458,6 +458,10 @@ do_pass:
458 convert_bpf_extensions(fp, &insn)) 458 convert_bpf_extensions(fp, &insn))
459 break; 459 break;
460 460
461 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
462 fp->code == (BPF_ALU | BPF_MOD | BPF_X))
463 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
464
461 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); 465 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
462 break; 466 break;
463 467
@@ -1054,11 +1058,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
1054 */ 1058 */
1055 goto out_err_free; 1059 goto out_err_free;
1056 1060
1057 /* We are guaranteed to never error here with cBPF to eBPF
1058 * transitions, since there's no issue with type compatibility
1059 * checks on program arrays.
1060 */
1061 fp = bpf_prog_select_runtime(fp, &err); 1061 fp = bpf_prog_select_runtime(fp, &err);
1062 if (err)
1063 goto out_err_free;
1062 1064
1063 kfree(old_prog); 1065 kfree(old_prog);
1064 return fp; 1066 return fp;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 15ce30063765..544bddf08e13 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -976,8 +976,8 @@ ip_proto_again:
976out_good: 976out_good:
977 ret = true; 977 ret = true;
978 978
979 key_control->thoff = (u16)nhoff;
980out: 979out:
980 key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
981 key_basic->n_proto = proto; 981 key_basic->n_proto = proto;
982 key_basic->ip_proto = ip_proto; 982 key_basic->ip_proto = ip_proto;
983 983
@@ -985,7 +985,6 @@ out:
985 985
986out_bad: 986out_bad:
987 ret = false; 987 ret = false;
988 key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
989 goto out; 988 goto out;
990} 989}
991EXPORT_SYMBOL(__skb_flow_dissect); 990EXPORT_SYMBOL(__skb_flow_dissect);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d1f5fe986edd..7f831711b6e0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -532,7 +532,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
532 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) 532 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
533 nht = neigh_hash_grow(tbl, nht->hash_shift + 1); 533 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
534 534
535 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); 535 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
536 536
537 if (n->parms->dead) { 537 if (n->parms->dead) {
538 rc = ERR_PTR(-EINVAL); 538 rc = ERR_PTR(-EINVAL);
@@ -544,7 +544,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
544 n1 != NULL; 544 n1 != NULL;
545 n1 = rcu_dereference_protected(n1->next, 545 n1 = rcu_dereference_protected(n1->next,
546 lockdep_is_held(&tbl->lock))) { 546 lockdep_is_held(&tbl->lock))) {
547 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { 547 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
548 if (want_ref) 548 if (want_ref)
549 neigh_hold(n1); 549 neigh_hold(n1);
550 rc = n1; 550 rc = n1;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b797832565d3..60a71be75aea 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
267 spin_lock_bh(&net->nsid_lock); 267 spin_lock_bh(&net->nsid_lock);
268 peer = idr_find(&net->netns_ids, id); 268 peer = idr_find(&net->netns_ids, id);
269 if (peer) 269 if (peer)
270 get_net(peer); 270 peer = maybe_get_net(peer);
271 spin_unlock_bh(&net->nsid_lock); 271 spin_unlock_bh(&net->nsid_lock);
272 rcu_read_unlock(); 272 rcu_read_unlock();
273 273
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 1c4810919a0a..b9057478d69c 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -14,7 +14,6 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/module.h>
18#include <linux/string.h> 17#include <linux/string.h>
19#include <linux/errno.h> 18#include <linux/errno.h>
20#include <linux/skbuff.h> 19#include <linux/skbuff.h>
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dabba2a91fc8..778d7f03404a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1681,18 +1681,18 @@ static bool link_dump_filtered(struct net_device *dev,
1681 return false; 1681 return false;
1682} 1682}
1683 1683
1684static struct net *get_target_net(struct sk_buff *skb, int netnsid) 1684static struct net *get_target_net(struct sock *sk, int netnsid)
1685{ 1685{
1686 struct net *net; 1686 struct net *net;
1687 1687
1688 net = get_net_ns_by_id(sock_net(skb->sk), netnsid); 1688 net = get_net_ns_by_id(sock_net(sk), netnsid);
1689 if (!net) 1689 if (!net)
1690 return ERR_PTR(-EINVAL); 1690 return ERR_PTR(-EINVAL);
1691 1691
1692 /* For now, the caller is required to have CAP_NET_ADMIN in 1692 /* For now, the caller is required to have CAP_NET_ADMIN in
1693 * the user namespace owning the target net ns. 1693 * the user namespace owning the target net ns.
1694 */ 1694 */
1695 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { 1695 if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) {
1696 put_net(net); 1696 put_net(net);
1697 return ERR_PTR(-EACCES); 1697 return ERR_PTR(-EACCES);
1698 } 1698 }
@@ -1733,7 +1733,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1733 ifla_policy, NULL) >= 0) { 1733 ifla_policy, NULL) >= 0) {
1734 if (tb[IFLA_IF_NETNSID]) { 1734 if (tb[IFLA_IF_NETNSID]) {
1735 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); 1735 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
1736 tgt_net = get_target_net(skb, netnsid); 1736 tgt_net = get_target_net(skb->sk, netnsid);
1737 if (IS_ERR(tgt_net)) { 1737 if (IS_ERR(tgt_net)) {
1738 tgt_net = net; 1738 tgt_net = net;
1739 netnsid = -1; 1739 netnsid = -1;
@@ -2883,7 +2883,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2883 2883
2884 if (tb[IFLA_IF_NETNSID]) { 2884 if (tb[IFLA_IF_NETNSID]) {
2885 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); 2885 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
2886 tgt_net = get_target_net(skb, netnsid); 2886 tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
2887 if (IS_ERR(tgt_net)) 2887 if (IS_ERR(tgt_net))
2888 return PTR_ERR(tgt_net); 2888 return PTR_ERR(tgt_net);
2889 } 2889 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6b0ff396fa9d..08f574081315 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1177 int i, new_frags; 1177 int i, new_frags;
1178 u32 d_off; 1178 u32 d_off;
1179 1179
1180 if (!num_frags)
1181 return 0;
1182
1183 if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) 1180 if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
1184 return -EINVAL; 1181 return -EINVAL;
1185 1182
1183 if (!num_frags)
1184 goto release;
1185
1186 new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; 1186 new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1187 for (i = 0; i < new_frags; i++) { 1187 for (i = 0; i < new_frags; i++) {
1188 page = alloc_page(gfp_mask); 1188 page = alloc_page(gfp_mask);
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1238 __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); 1238 __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
1239 skb_shinfo(skb)->nr_frags = new_frags; 1239 skb_shinfo(skb)->nr_frags = new_frags;
1240 1240
1241release:
1241 skb_zcopy_clear(skb, false); 1242 skb_zcopy_clear(skb, false);
1242 return 0; 1243 return 0;
1243} 1244}
@@ -3654,8 +3655,6 @@ normal:
3654 3655
3655 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & 3656 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
3656 SKBTX_SHARED_FRAG; 3657 SKBTX_SHARED_FRAG;
3657 if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
3658 goto err;
3659 3658
3660 while (pos < offset + len) { 3659 while (pos < offset + len) {
3661 if (i >= nfrags) { 3660 if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ normal:
3681 3680
3682 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) 3681 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
3683 goto err; 3682 goto err;
3683 if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
3684 goto err;
3684 3685
3685 *nskb_frag = *frag; 3686 *nskb_frag = *frag;
3686 __skb_frag_ref(nskb_frag); 3687 __skb_frag_ref(nskb_frag);
@@ -4293,7 +4294,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
4293 struct sock *sk = skb->sk; 4294 struct sock *sk = skb->sk;
4294 4295
4295 if (!skb_may_tx_timestamp(sk, false)) 4296 if (!skb_may_tx_timestamp(sk, false))
4296 return; 4297 goto err;
4297 4298
4298 /* Take a reference to prevent skb_orphan() from freeing the socket, 4299 /* Take a reference to prevent skb_orphan() from freeing the socket,
4299 * but only if the socket refcount is not zero. 4300 * but only if the socket refcount is not zero.
@@ -4302,7 +4303,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
4302 *skb_hwtstamps(skb) = *hwtstamps; 4303 *skb_hwtstamps(skb) = *hwtstamps;
4303 __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); 4304 __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
4304 sock_put(sk); 4305 sock_put(sk);
4306 return;
4305 } 4307 }
4308
4309err:
4310 kfree_skb(skb);
4306} 4311}
4307EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); 4312EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
4308 4313
diff --git a/net/core/sock.c b/net/core/sock.c
index c0b5b2f17412..1211159718ad 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2496,7 +2496,7 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2496} 2496}
2497EXPORT_SYMBOL(sock_no_getname); 2497EXPORT_SYMBOL(sock_no_getname);
2498 2498
2499unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) 2499__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2500{ 2500{
2501 return 0; 2501 return 0;
2502} 2502}
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 217f4e3b82f6..146b50e30659 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group)
288 case SKNLGRP_INET6_UDP_DESTROY: 288 case SKNLGRP_INET6_UDP_DESTROY:
289 if (!sock_diag_handlers[AF_INET6]) 289 if (!sock_diag_handlers[AF_INET6])
290 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, 290 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
291 NETLINK_SOCK_DIAG, AF_INET); 291 NETLINK_SOCK_DIAG, AF_INET6);
292 break; 292 break;
293 } 293 }
294 return 0; 294 return 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cbc3dde4cfcc..a47ad6cd41c0 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
325 .data = &bpf_jit_enable, 325 .data = &bpf_jit_enable,
326 .maxlen = sizeof(int), 326 .maxlen = sizeof(int),
327 .mode = 0644, 327 .mode = 0644,
328#ifndef CONFIG_BPF_JIT_ALWAYS_ON
328 .proc_handler = proc_dointvec 329 .proc_handler = proc_dointvec
330#else
331 .proc_handler = proc_dointvec_minmax,
332 .extra1 = &one,
333 .extra2 = &one,
334#endif
329 }, 335 },
330# ifdef CONFIG_HAVE_EBPF_JIT 336# ifdef CONFIG_HAVE_EBPF_JIT
331 { 337 {
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 1c75cd1255f6..92d016e87816 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(struct timer_list *t)
140 140
141 ccid2_pr_debug("RTO_EXPIRE\n"); 141 ccid2_pr_debug("RTO_EXPIRE\n");
142 142
143 if (sk->sk_state == DCCP_CLOSED)
144 goto out;
145
143 /* back-off timer */ 146 /* back-off timer */
144 hc->tx_rto <<= 1; 147 hc->tx_rto <<= 1;
145 if (hc->tx_rto > DCCP_RTO_MAX) 148 if (hc->tx_rto > DCCP_RTO_MAX)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 0c55ffb859bf..f91e3816806b 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -316,7 +316,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
316 int flags, int *addr_len); 316 int flags, int *addr_len);
317void dccp_shutdown(struct sock *sk, int how); 317void dccp_shutdown(struct sock *sk, int how);
318int inet_dccp_listen(struct socket *sock, int backlog); 318int inet_dccp_listen(struct socket *sock, int backlog);
319unsigned int dccp_poll(struct file *file, struct socket *sock, 319__poll_t dccp_poll(struct file *file, struct socket *sock,
320 poll_table *wait); 320 poll_table *wait);
321int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); 321int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
322void dccp_req_err(struct sock *sk, u64 seq); 322void dccp_req_err(struct sock *sk, u64 seq);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index abd07a443219..178bb9833311 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
57 if (state == DCCP_TIME_WAIT) 57 if (state == DCCP_TIME_WAIT)
58 timeo = DCCP_TIMEWAIT_LEN; 58 timeo = DCCP_TIMEWAIT_LEN;
59 59
60 /* tw_timer is pinned, so we need to make sure BH are disabled
61 * in following section, otherwise timer handler could run before
62 * we complete the initialization.
63 */
64 local_bh_disable();
60 inet_twsk_schedule(tw, timeo); 65 inet_twsk_schedule(tw, timeo);
61 /* Linkage updates. */ 66 /* Linkage updates. */
62 __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); 67 __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
63 inet_twsk_put(tw); 68 inet_twsk_put(tw);
69 local_bh_enable();
64 } else { 70 } else {
65 /* Sorry, if we're out of memory, just CLOSE this 71 /* Sorry, if we're out of memory, just CLOSE this
66 * socket up. We've got bigger problems than 72 * socket up. We've got bigger problems than
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b68168fcc06a..8b8db3d481bd 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags)
259{ 259{
260 struct inet_connection_sock *icsk = inet_csk(sk); 260 struct inet_connection_sock *icsk = inet_csk(sk);
261 struct inet_sock *inet = inet_sk(sk); 261 struct inet_sock *inet = inet_sk(sk);
262 struct dccp_sock *dp = dccp_sk(sk);
262 int err = 0; 263 int err = 0;
263 const int old_state = sk->sk_state; 264 const int old_state = sk->sk_state;
264 265
@@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags)
278 sk->sk_err = ECONNRESET; 279 sk->sk_err = ECONNRESET;
279 280
280 dccp_clear_xmit_timers(sk); 281 dccp_clear_xmit_timers(sk);
282 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
283 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
284 dp->dccps_hc_rx_ccid = NULL;
285 dp->dccps_hc_tx_ccid = NULL;
281 286
282 __skb_queue_purge(&sk->sk_receive_queue); 287 __skb_queue_purge(&sk->sk_receive_queue);
283 __skb_queue_purge(&sk->sk_write_queue); 288 __skb_queue_purge(&sk->sk_write_queue);
@@ -313,10 +318,10 @@ EXPORT_SYMBOL_GPL(dccp_disconnect);
313 * take care of normal races (between the test and the event) and we don't 318 * take care of normal races (between the test and the event) and we don't
314 * go look at any of the socket buffers directly. 319 * go look at any of the socket buffers directly.
315 */ 320 */
316unsigned int dccp_poll(struct file *file, struct socket *sock, 321__poll_t dccp_poll(struct file *file, struct socket *sock,
317 poll_table *wait) 322 poll_table *wait)
318{ 323{
319 unsigned int mask; 324 __poll_t mask;
320 struct sock *sk = sock->sk; 325 struct sock *sk = sock->sk;
321 326
322 sock_poll_wait(file, sk_sleep(sk), wait); 327 sock_poll_wait(file, sk_sleep(sk), wait);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 518cea17b811..9c2dde819817 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1209,11 +1209,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
1209} 1209}
1210 1210
1211 1211
1212static unsigned int dn_poll(struct file *file, struct socket *sock, poll_table *wait) 1212static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
1213{ 1213{
1214 struct sock *sk = sock->sk; 1214 struct sock *sk = sock->sk;
1215 struct dn_scp *scp = DN_SK(sk); 1215 struct dn_scp *scp = DN_SK(sk);
1216 int mask = datagram_poll(file, sock, wait); 1216 __poll_t mask = datagram_poll(file, sock, wait);
1217 1217
1218 if (!skb_queue_empty(&scp->other_receive_queue)) 1218 if (!skb_queue_empty(&scp->other_receive_queue))
1219 mask |= POLLRDBAND; 1219 mask |= POLLRDBAND;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 44e3fb7dec8c..1e287420ff49 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -51,9 +51,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index)
51 INIT_LIST_HEAD(&dst->list); 51 INIT_LIST_HEAD(&dst->list);
52 list_add_tail(&dsa_tree_list, &dst->list); 52 list_add_tail(&dsa_tree_list, &dst->list);
53 53
54 /* Initialize the reference counter to the number of switches, not 1 */
55 kref_init(&dst->refcount); 54 kref_init(&dst->refcount);
56 refcount_set(&dst->refcount.refcount, 0);
57 55
58 return dst; 56 return dst;
59} 57}
@@ -64,20 +62,23 @@ static void dsa_tree_free(struct dsa_switch_tree *dst)
64 kfree(dst); 62 kfree(dst);
65} 63}
66 64
67static struct dsa_switch_tree *dsa_tree_touch(int index) 65static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst)
68{ 66{
69 struct dsa_switch_tree *dst; 67 if (dst)
70 68 kref_get(&dst->refcount);
71 dst = dsa_tree_find(index);
72 if (!dst)
73 dst = dsa_tree_alloc(index);
74 69
75 return dst; 70 return dst;
76} 71}
77 72
78static void dsa_tree_get(struct dsa_switch_tree *dst) 73static struct dsa_switch_tree *dsa_tree_touch(int index)
79{ 74{
80 kref_get(&dst->refcount); 75 struct dsa_switch_tree *dst;
76
77 dst = dsa_tree_find(index);
78 if (dst)
79 return dsa_tree_get(dst);
80 else
81 return dsa_tree_alloc(index);
81} 82}
82 83
83static void dsa_tree_release(struct kref *ref) 84static void dsa_tree_release(struct kref *ref)
@@ -91,7 +92,8 @@ static void dsa_tree_release(struct kref *ref)
91 92
92static void dsa_tree_put(struct dsa_switch_tree *dst) 93static void dsa_tree_put(struct dsa_switch_tree *dst)
93{ 94{
94 kref_put(&dst->refcount, dsa_tree_release); 95 if (dst)
96 kref_put(&dst->refcount, dsa_tree_release);
95} 97}
96 98
97static bool dsa_port_is_dsa(struct dsa_port *port) 99static bool dsa_port_is_dsa(struct dsa_port *port)
@@ -765,6 +767,7 @@ int dsa_register_switch(struct dsa_switch *ds)
765 767
766 mutex_lock(&dsa2_mutex); 768 mutex_lock(&dsa2_mutex);
767 err = dsa_switch_probe(ds); 769 err = dsa_switch_probe(ds);
770 dsa_tree_put(ds->dst);
768 mutex_unlock(&dsa2_mutex); 771 mutex_unlock(&dsa2_mutex);
769 772
770 return err; 773 return err;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d6e7a642493b..a95a55f79137 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -16,7 +16,6 @@
16#include <linux/of_net.h> 16#include <linux/of_net.h>
17#include <linux/of_mdio.h> 17#include <linux/of_mdio.h>
18#include <linux/mdio.h> 18#include <linux/mdio.h>
19#include <linux/list.h>
20#include <net/rtnetlink.h> 19#include <net/rtnetlink.h>
21#include <net/pkt_cls.h> 20#include <net/pkt_cls.h>
22#include <net/tc_act/tc_mirred.h> 21#include <net/tc_act/tc_mirred.h>
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a8d7c5a9fb05..6c231b43974d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey)
223 223
224static int arp_constructor(struct neighbour *neigh) 224static int arp_constructor(struct neighbour *neigh)
225{ 225{
226 __be32 addr = *(__be32 *)neigh->primary_key; 226 __be32 addr;
227 struct net_device *dev = neigh->dev; 227 struct net_device *dev = neigh->dev;
228 struct in_device *in_dev; 228 struct in_device *in_dev;
229 struct neigh_parms *parms; 229 struct neigh_parms *parms;
230 u32 inaddr_any = INADDR_ANY;
230 231
232 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
233 memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len);
234
235 addr = *(__be32 *)neigh->primary_key;
231 rcu_read_lock(); 236 rcu_read_lock();
232 in_dev = __in_dev_get_rcu(dev); 237 in_dev = __in_dev_get_rcu(dev);
233 if (!in_dev) { 238 if (!in_dev) {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a4573bccd6da..7a93359fbc72 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1428,7 +1428,7 @@ skip:
1428 1428
1429static bool inetdev_valid_mtu(unsigned int mtu) 1429static bool inetdev_valid_mtu(unsigned int mtu)
1430{ 1430{
1431 return mtu >= 68; 1431 return mtu >= IPV4_MIN_MTU;
1432} 1432}
1433 1433
1434static void inetdev_send_gratuitous_arp(struct net_device *dev, 1434static void inetdev_send_gratuitous_arp(struct net_device *dev,
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d57aa64fa7c7..61fe6e4d23fc 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -981,6 +981,7 @@ static int esp_init_state(struct xfrm_state *x)
981 981
982 switch (encap->encap_type) { 982 switch (encap->encap_type) {
983 default: 983 default:
984 err = -EINVAL;
984 goto error; 985 goto error;
985 case UDP_ENCAP_ESPINUDP: 986 case UDP_ENCAP_ESPINUDP:
986 x->props.header_len += sizeof(struct udphdr); 987 x->props.header_len += sizeof(struct udphdr);
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index f8b918c766b0..29b333a62ab0 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -38,7 +38,8 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
38 __be32 spi; 38 __be32 spi;
39 int err; 39 int err;
40 40
41 skb_pull(skb, offset); 41 if (!pskb_pull(skb, offset))
42 return NULL;
42 43
43 if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) 44 if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
44 goto out; 45 goto out;
@@ -121,6 +122,9 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
121 if (!xo) 122 if (!xo)
122 goto out; 123 goto out;
123 124
125 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
126 goto out;
127
124 seq = xo->seq.low; 128 seq = xo->seq.low;
125 129
126 x = skb->sp->xvec[skb->sp->len - 1]; 130 x = skb->sp->xvec[skb->sp->len - 1];
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f52d27a422c3..08259d078b1c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1298,14 +1298,19 @@ err_table_hash_alloc:
1298 1298
1299static void ip_fib_net_exit(struct net *net) 1299static void ip_fib_net_exit(struct net *net)
1300{ 1300{
1301 unsigned int i; 1301 int i;
1302 1302
1303 rtnl_lock(); 1303 rtnl_lock();
1304#ifdef CONFIG_IP_MULTIPLE_TABLES 1304#ifdef CONFIG_IP_MULTIPLE_TABLES
1305 RCU_INIT_POINTER(net->ipv4.fib_main, NULL); 1305 RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
1306 RCU_INIT_POINTER(net->ipv4.fib_default, NULL); 1306 RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
1307#endif 1307#endif
1308 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1308 /* Destroy the tables in reverse order to guarantee that the
1309 * local table, ID 255, is destroyed before the main table, ID
1310 * 254. This is necessary as the local table may contain
1311 * references to data contained in the main table.
1312 */
1313 for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
1309 struct hlist_head *head = &net->ipv4.fib_table_hash[i]; 1314 struct hlist_head *head = &net->ipv4.fib_table_hash[i];
1310 struct hlist_node *tmp; 1315 struct hlist_node *tmp;
1311 struct fib_table *tb; 1316 struct fib_table *tb;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f04d944f8abe..c586597da20d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
698 698
699 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 699 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
700 int type = nla_type(nla); 700 int type = nla_type(nla);
701 u32 val; 701 u32 fi_val, val;
702 702
703 if (!type) 703 if (!type)
704 continue; 704 continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
715 val = nla_get_u32(nla); 715 val = nla_get_u32(nla);
716 } 716 }
717 717
718 if (fi->fib_metrics->metrics[type - 1] != val) 718 fi_val = fi->fib_metrics->metrics[type - 1];
719 if (type == RTAX_FEATURES)
720 fi_val &= ~DST_FEATURE_ECN_CA;
721
722 if (fi_val != val)
719 return false; 723 return false;
720 } 724 }
721 725
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d1f8f302dbf3..2d49717a7421 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -89,6 +89,7 @@
89#include <linux/rtnetlink.h> 89#include <linux/rtnetlink.h>
90#include <linux/times.h> 90#include <linux/times.h>
91#include <linux/pkt_sched.h> 91#include <linux/pkt_sched.h>
92#include <linux/byteorder/generic.h>
92 93
93#include <net/net_namespace.h> 94#include <net/net_namespace.h>
94#include <net/arp.h> 95#include <net/arp.h>
@@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
321 return scount; 322 return scount;
322} 323}
323 324
325/* source address selection per RFC 3376 section 4.2.13 */
326static __be32 igmpv3_get_srcaddr(struct net_device *dev,
327 const struct flowi4 *fl4)
328{
329 struct in_device *in_dev = __in_dev_get_rcu(dev);
330
331 if (!in_dev)
332 return htonl(INADDR_ANY);
333
334 for_ifa(in_dev) {
335 if (fl4->saddr == ifa->ifa_local)
336 return fl4->saddr;
337 } endfor_ifa(in_dev);
338
339 return htonl(INADDR_ANY);
340}
341
324static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) 342static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
325{ 343{
326 struct sk_buff *skb; 344 struct sk_buff *skb;
@@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
368 pip->frag_off = htons(IP_DF); 386 pip->frag_off = htons(IP_DF);
369 pip->ttl = 1; 387 pip->ttl = 1;
370 pip->daddr = fl4.daddr; 388 pip->daddr = fl4.daddr;
371 pip->saddr = fl4.saddr; 389 pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
372 pip->protocol = IPPROTO_IGMP; 390 pip->protocol = IPPROTO_IGMP;
373 pip->tot_len = 0; /* filled in later */ 391 pip->tot_len = 0; /* filled in later */
374 ip_select_ident(net, skb, NULL); 392 ip_select_ident(net, skb, NULL);
@@ -404,16 +422,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
404} 422}
405 423
406static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, 424static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
407 int type, struct igmpv3_grec **ppgr) 425 int type, struct igmpv3_grec **ppgr, unsigned int mtu)
408{ 426{
409 struct net_device *dev = pmc->interface->dev; 427 struct net_device *dev = pmc->interface->dev;
410 struct igmpv3_report *pih; 428 struct igmpv3_report *pih;
411 struct igmpv3_grec *pgr; 429 struct igmpv3_grec *pgr;
412 430
413 if (!skb) 431 if (!skb) {
414 skb = igmpv3_newpack(dev, dev->mtu); 432 skb = igmpv3_newpack(dev, mtu);
415 if (!skb) 433 if (!skb)
416 return NULL; 434 return NULL;
435 }
417 pgr = skb_put(skb, sizeof(struct igmpv3_grec)); 436 pgr = skb_put(skb, sizeof(struct igmpv3_grec));
418 pgr->grec_type = type; 437 pgr->grec_type = type;
419 pgr->grec_auxwords = 0; 438 pgr->grec_auxwords = 0;
@@ -436,12 +455,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
436 struct igmpv3_grec *pgr = NULL; 455 struct igmpv3_grec *pgr = NULL;
437 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; 456 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
438 int scount, stotal, first, isquery, truncate; 457 int scount, stotal, first, isquery, truncate;
458 unsigned int mtu;
439 459
440 if (pmc->multiaddr == IGMP_ALL_HOSTS) 460 if (pmc->multiaddr == IGMP_ALL_HOSTS)
441 return skb; 461 return skb;
442 if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) 462 if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
443 return skb; 463 return skb;
444 464
465 mtu = READ_ONCE(dev->mtu);
466 if (mtu < IPV4_MIN_MTU)
467 return skb;
468
445 isquery = type == IGMPV3_MODE_IS_INCLUDE || 469 isquery = type == IGMPV3_MODE_IS_INCLUDE ||
446 type == IGMPV3_MODE_IS_EXCLUDE; 470 type == IGMPV3_MODE_IS_EXCLUDE;
447 truncate = type == IGMPV3_MODE_IS_EXCLUDE || 471 truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -462,7 +486,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
462 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { 486 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
463 if (skb) 487 if (skb)
464 igmpv3_sendpack(skb); 488 igmpv3_sendpack(skb);
465 skb = igmpv3_newpack(dev, dev->mtu); 489 skb = igmpv3_newpack(dev, mtu);
466 } 490 }
467 } 491 }
468 first = 1; 492 first = 1;
@@ -498,12 +522,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
498 pgr->grec_nsrcs = htons(scount); 522 pgr->grec_nsrcs = htons(scount);
499 if (skb) 523 if (skb)
500 igmpv3_sendpack(skb); 524 igmpv3_sendpack(skb);
501 skb = igmpv3_newpack(dev, dev->mtu); 525 skb = igmpv3_newpack(dev, mtu);
502 first = 1; 526 first = 1;
503 scount = 0; 527 scount = 0;
504 } 528 }
505 if (first) { 529 if (first) {
506 skb = add_grhead(skb, pmc, type, &pgr); 530 skb = add_grhead(skb, pmc, type, &pgr, mtu);
507 first = 0; 531 first = 0;
508 } 532 }
509 if (!skb) 533 if (!skb)
@@ -538,7 +562,7 @@ empty_source:
538 igmpv3_sendpack(skb); 562 igmpv3_sendpack(skb);
539 skb = NULL; /* add_grhead will get a new one */ 563 skb = NULL; /* add_grhead will get a new one */
540 } 564 }
541 skb = add_grhead(skb, pmc, type, &pgr); 565 skb = add_grhead(skb, pmc, type, &pgr, mtu);
542 } 566 }
543 } 567 }
544 if (pgr) 568 if (pgr)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c690cd0d9b3f..b563e0c46bac 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -93,7 +93,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
93} 93}
94 94
95/* 95/*
96 * Enter the time wait state. 96 * Enter the time wait state. This is called with locally disabled BH.
97 * Essentially we whip up a timewait bucket, copy the relevant info into it 97 * Essentially we whip up a timewait bucket, copy the relevant info into it
98 * from the SK, and mess with hash chains and list linkage. 98 * from the SK, and mess with hash chains and list linkage.
99 */ 99 */
@@ -111,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
111 */ 111 */
112 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, 112 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
113 hashinfo->bhash_size)]; 113 hashinfo->bhash_size)];
114 spin_lock_bh(&bhead->lock); 114 spin_lock(&bhead->lock);
115 tw->tw_tb = icsk->icsk_bind_hash; 115 tw->tw_tb = icsk->icsk_bind_hash;
116 WARN_ON(!icsk->icsk_bind_hash); 116 WARN_ON(!icsk->icsk_bind_hash);
117 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); 117 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
@@ -137,7 +137,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
137 if (__sk_nulls_del_node_init_rcu(sk)) 137 if (__sk_nulls_del_node_init_rcu(sk))
138 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 138 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
139 139
140 spin_unlock_bh(lock); 140 spin_unlock(lock);
141} 141}
142EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 142EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
143 143
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bb6239169b1a..45ffd3d045d2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -266,7 +266,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
266 len = gre_hdr_len + sizeof(*ershdr); 266 len = gre_hdr_len + sizeof(*ershdr);
267 267
268 if (unlikely(!pskb_may_pull(skb, len))) 268 if (unlikely(!pskb_may_pull(skb, len)))
269 return -ENOMEM; 269 return PACKET_REJECT;
270 270
271 iph = ip_hdr(skb); 271 iph = ip_hdr(skb);
272 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); 272 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = {
1310static void ipgre_tap_setup(struct net_device *dev) 1310static void ipgre_tap_setup(struct net_device *dev)
1311{ 1311{
1312 ether_setup(dev); 1312 ether_setup(dev);
1313 dev->max_mtu = 0;
1313 dev->netdev_ops = &gre_tap_netdev_ops; 1314 dev->netdev_ops = &gre_tap_netdev_ops;
1314 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1315 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1315 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1316 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fe6fee728ce4..6d21068f9b55 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
349 dev->needed_headroom = t_hlen + hlen; 349 dev->needed_headroom = t_hlen + hlen;
350 mtu -= (dev->hard_header_len + t_hlen); 350 mtu -= (dev->hard_header_len + t_hlen);
351 351
352 if (mtu < 68) 352 if (mtu < IPV4_MIN_MTU)
353 mtu = 68; 353 mtu = IPV4_MIN_MTU;
354 354
355 return mtu; 355 return mtu;
356} 356}
@@ -520,8 +520,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
520 else 520 else
521 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 521 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
522 522
523 if (skb_dst(skb)) 523 skb_dst_update_pmtu(skb, mtu);
524 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
525 524
526 if (skb->protocol == htons(ETH_P_IP)) { 525 if (skb->protocol == htons(ETH_P_IP)) {
527 if (!skb_is_gso(skb) && 526 if (!skb_is_gso(skb) &&
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 949f432a5f04..51b1669334fe 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -200,7 +200,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
200 200
201 mtu = dst_mtu(dst); 201 mtu = dst_mtu(dst);
202 if (skb->len > mtu) { 202 if (skb->len > mtu) {
203 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 203 skb_dst_update_pmtu(skb, mtu);
204 if (skb->protocol == htons(ETH_P_IP)) { 204 if (skb->protocol == htons(ETH_P_IP)) {
205 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 205 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
206 htonl(mtu)); 206 htonl(mtu));
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f88221aebc9d..eb8246c39de0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -202,13 +202,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
202 202
203 local_bh_disable(); 203 local_bh_disable();
204 addend = xt_write_recseq_begin(); 204 addend = xt_write_recseq_begin();
205 private = table->private; 205 private = READ_ONCE(table->private); /* Address dependency. */
206 cpu = smp_processor_id(); 206 cpu = smp_processor_id();
207 /*
208 * Ensure we load private-> members after we've fetched the base
209 * pointer.
210 */
211 smp_read_barrier_depends();
212 table_base = private->entries; 207 table_base = private->entries;
213 jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; 208 jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
214 209
@@ -373,7 +368,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
373 if (!xt_find_jump_offset(offsets, newpos, 368 if (!xt_find_jump_offset(offsets, newpos,
374 newinfo->number)) 369 newinfo->number))
375 return 0; 370 return 0;
376 e = entry0 + newpos;
377 } else { 371 } else {
378 /* ... this is a fallthru */ 372 /* ... this is a fallthru */
379 newpos = pos + e->next_offset; 373 newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4cbe5e80f3bf..cc984d0e0c69 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -260,13 +260,8 @@ ipt_do_table(struct sk_buff *skb,
260 WARN_ON(!(table->valid_hooks & (1 << hook))); 260 WARN_ON(!(table->valid_hooks & (1 << hook)));
261 local_bh_disable(); 261 local_bh_disable();
262 addend = xt_write_recseq_begin(); 262 addend = xt_write_recseq_begin();
263 private = table->private; 263 private = READ_ONCE(table->private); /* Address dependency. */
264 cpu = smp_processor_id(); 264 cpu = smp_processor_id();
265 /*
266 * Ensure we load private-> members after we've fetched the base
267 * pointer.
268 */
269 smp_read_barrier_depends();
270 table_base = private->entries; 265 table_base = private->entries;
271 jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; 266 jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
272 267
@@ -439,7 +434,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
439 if (!xt_find_jump_offset(offsets, newpos, 434 if (!xt_find_jump_offset(offsets, newpos,
440 newinfo->number)) 435 newinfo->number))
441 return 0; 436 return 0;
442 e = entry0 + newpos;
443 } else { 437 } else {
444 /* ... this is a fallthru */ 438 /* ... this is a fallthru */
445 newpos = pos + e->next_offset; 439 newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 17b4ca562944..69060e3abe85 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -813,12 +813,13 @@ static int clusterip_net_init(struct net *net)
813 813
814static void clusterip_net_exit(struct net *net) 814static void clusterip_net_exit(struct net *net)
815{ 815{
816#ifdef CONFIG_PROC_FS
817 struct clusterip_net *cn = net_generic(net, clusterip_net_id); 816 struct clusterip_net *cn = net_generic(net, clusterip_net_id);
817#ifdef CONFIG_PROC_FS
818 proc_remove(cn->procdir); 818 proc_remove(cn->procdir);
819 cn->procdir = NULL; 819 cn->procdir = NULL;
820#endif 820#endif
821 nf_unregister_net_hook(net, &cip_arp_ops); 821 nf_unregister_net_hook(net, &cip_arp_ops);
822 WARN_ON_ONCE(!list_empty(&cn->configs));
822} 823}
823 824
824static struct pernet_operations clusterip_net_ops = { 825static struct pernet_operations clusterip_net_ops = {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 33b70bfd1122..5e570aa9e43b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -513,11 +513,18 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
513 int err; 513 int err;
514 struct ip_options_data opt_copy; 514 struct ip_options_data opt_copy;
515 struct raw_frag_vec rfv; 515 struct raw_frag_vec rfv;
516 int hdrincl;
516 517
517 err = -EMSGSIZE; 518 err = -EMSGSIZE;
518 if (len > 0xFFFF) 519 if (len > 0xFFFF)
519 goto out; 520 goto out;
520 521
522 /* hdrincl should be READ_ONCE(inet->hdrincl)
523 * but READ_ONCE() doesn't work with bit fields.
524 * Doing this indirectly yields the same result.
525 */
526 hdrincl = inet->hdrincl;
527 hdrincl = READ_ONCE(hdrincl);
521 /* 528 /*
522 * Check the flags. 529 * Check the flags.
523 */ 530 */
@@ -593,7 +600,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
593 /* Linux does not mangle headers on raw sockets, 600 /* Linux does not mangle headers on raw sockets,
594 * so that IP options + IP_HDRINCL is non-sense. 601 * so that IP options + IP_HDRINCL is non-sense.
595 */ 602 */
596 if (inet->hdrincl) 603 if (hdrincl)
597 goto done; 604 goto done;
598 if (ipc.opt->opt.srr) { 605 if (ipc.opt->opt.srr) {
599 if (!daddr) 606 if (!daddr)
@@ -615,12 +622,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
615 622
616 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, 623 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
617 RT_SCOPE_UNIVERSE, 624 RT_SCOPE_UNIVERSE,
618 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 625 hdrincl ? IPPROTO_RAW : sk->sk_protocol,
619 inet_sk_flowi_flags(sk) | 626 inet_sk_flowi_flags(sk) |
620 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), 627 (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
621 daddr, saddr, 0, 0, sk->sk_uid); 628 daddr, saddr, 0, 0, sk->sk_uid);
622 629
623 if (!inet->hdrincl) { 630 if (!hdrincl) {
624 rfv.msg = msg; 631 rfv.msg = msg;
625 rfv.hlen = 0; 632 rfv.hlen = 0;
626 633
@@ -645,7 +652,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
645 goto do_confirm; 652 goto do_confirm;
646back_from_confirm: 653back_from_confirm:
647 654
648 if (inet->hdrincl) 655 if (hdrincl)
649 err = raw_send_hdrinc(sk, &fl4, msg, len, 656 err = raw_send_hdrinc(sk, &fl4, msg, len,
650 &rt, msg->msg_flags, &ipc.sockc); 657 &rt, msg->msg_flags, &ipc.sockc);
651 658
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 43b69af242e1..4e153b23bcec 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2762,6 +2762,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2762 if (err == 0 && rt->dst.error) 2762 if (err == 0 && rt->dst.error)
2763 err = -rt->dst.error; 2763 err = -rt->dst.error;
2764 } else { 2764 } else {
2765 fl4.flowi4_iif = LOOPBACK_IFINDEX;
2765 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); 2766 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
2766 err = 0; 2767 err = 0;
2767 if (IS_ERR(rt)) 2768 if (IS_ERR(rt))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bf97317e6c97..1b38b4282cc9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -493,9 +493,9 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
493 * take care of normal races (between the test and the event) and we don't 493 * take care of normal races (between the test and the event) and we don't
494 * go look at any of the socket buffers directly. 494 * go look at any of the socket buffers directly.
495 */ 495 */
496unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) 496__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
497{ 497{
498 unsigned int mask; 498 __poll_t mask;
499 struct sock *sk = sock->sk; 499 struct sock *sk = sock->sk;
500 const struct tcp_sock *tp = tcp_sk(sk); 500 const struct tcp_sock *tp = tcp_sk(sk);
501 int state; 501 int state;
@@ -2298,6 +2298,9 @@ adjudge_to_death:
2298 tcp_send_active_reset(sk, GFP_ATOMIC); 2298 tcp_send_active_reset(sk, GFP_ATOMIC);
2299 __NET_INC_STATS(sock_net(sk), 2299 __NET_INC_STATS(sock_net(sk),
2300 LINUX_MIB_TCPABORTONMEMORY); 2300 LINUX_MIB_TCPABORTONMEMORY);
2301 } else if (!check_net(sock_net(sk))) {
2302 /* Not possible to send reset; just close */
2303 tcp_set_state(sk, TCP_CLOSE);
2301 } 2304 }
2302 } 2305 }
2303 2306
@@ -2412,6 +2415,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2412 tp->snd_cwnd_cnt = 0; 2415 tp->snd_cwnd_cnt = 0;
2413 tp->window_clamp = 0; 2416 tp->window_clamp = 0;
2414 tcp_set_ca_state(sk, TCP_CA_Open); 2417 tcp_set_ca_state(sk, TCP_CA_Open);
2418 tp->is_sack_reneg = 0;
2415 tcp_clear_retrans(tp); 2419 tcp_clear_retrans(tp);
2416 inet_csk_delack_init(sk); 2420 inet_csk_delack_init(sk);
2417 /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 2421 /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 69ee877574d0..8322f26e770e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -110,7 +110,8 @@ struct bbr {
110 u32 lt_last_lost; /* LT intvl start: tp->lost */ 110 u32 lt_last_lost; /* LT intvl start: tp->lost */
111 u32 pacing_gain:10, /* current gain for setting pacing rate */ 111 u32 pacing_gain:10, /* current gain for setting pacing rate */
112 cwnd_gain:10, /* current gain for setting cwnd */ 112 cwnd_gain:10, /* current gain for setting cwnd */
113 full_bw_cnt:3, /* number of rounds without large bw gains */ 113 full_bw_reached:1, /* reached full bw in Startup? */
114 full_bw_cnt:2, /* number of rounds without large bw gains */
114 cycle_idx:3, /* current index in pacing_gain cycle array */ 115 cycle_idx:3, /* current index in pacing_gain cycle array */
115 has_seen_rtt:1, /* have we seen an RTT sample yet? */ 116 has_seen_rtt:1, /* have we seen an RTT sample yet? */
116 unused_b:5; 117 unused_b:5;
@@ -180,7 +181,7 @@ static bool bbr_full_bw_reached(const struct sock *sk)
180{ 181{
181 const struct bbr *bbr = inet_csk_ca(sk); 182 const struct bbr *bbr = inet_csk_ca(sk);
182 183
183 return bbr->full_bw_cnt >= bbr_full_bw_cnt; 184 return bbr->full_bw_reached;
184} 185}
185 186
186/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ 187/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
@@ -717,6 +718,7 @@ static void bbr_check_full_bw_reached(struct sock *sk,
717 return; 718 return;
718 } 719 }
719 ++bbr->full_bw_cnt; 720 ++bbr->full_bw_cnt;
721 bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
720} 722}
721 723
722/* If pipe is probably full, drain the queue and then enter steady-state. */ 724/* If pipe is probably full, drain the queue and then enter steady-state. */
@@ -850,6 +852,7 @@ static void bbr_init(struct sock *sk)
850 bbr->restore_cwnd = 0; 852 bbr->restore_cwnd = 0;
851 bbr->round_start = 0; 853 bbr->round_start = 0;
852 bbr->idle_restart = 0; 854 bbr->idle_restart = 0;
855 bbr->full_bw_reached = 0;
853 bbr->full_bw = 0; 856 bbr->full_bw = 0;
854 bbr->full_bw_cnt = 0; 857 bbr->full_bw_cnt = 0;
855 bbr->cycle_mstamp = 0; 858 bbr->cycle_mstamp = 0;
@@ -871,6 +874,11 @@ static u32 bbr_sndbuf_expand(struct sock *sk)
871 */ 874 */
872static u32 bbr_undo_cwnd(struct sock *sk) 875static u32 bbr_undo_cwnd(struct sock *sk)
873{ 876{
877 struct bbr *bbr = inet_csk_ca(sk);
878
879 bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
880 bbr->full_bw_cnt = 0;
881 bbr_reset_lt_bw_sampling(sk);
874 return tcp_sk(sk)->snd_cwnd; 882 return tcp_sk(sk)->snd_cwnd;
875} 883}
876 884
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 734cfc8ff76e..45f750e85714 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -508,9 +508,6 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
508 u32 new_sample = tp->rcv_rtt_est.rtt_us; 508 u32 new_sample = tp->rcv_rtt_est.rtt_us;
509 long m = sample; 509 long m = sample;
510 510
511 if (m == 0)
512 m = 1;
513
514 if (new_sample != 0) { 511 if (new_sample != 0) {
515 /* If we sample in larger samples in the non-timestamp 512 /* If we sample in larger samples in the non-timestamp
516 * case, we could grossly overestimate the RTT especially 513 * case, we could grossly overestimate the RTT especially
@@ -547,6 +544,8 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
547 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) 544 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
548 return; 545 return;
549 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time); 546 delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
547 if (!delta_us)
548 delta_us = 1;
550 tcp_rcv_rtt_update(tp, delta_us, 1); 549 tcp_rcv_rtt_update(tp, delta_us, 1);
551 550
552new_measure: 551new_measure:
@@ -563,8 +562,11 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
563 (TCP_SKB_CB(skb)->end_seq - 562 (TCP_SKB_CB(skb)->end_seq -
564 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) { 563 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
565 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; 564 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
566 u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); 565 u32 delta_us;
567 566
567 if (!delta)
568 delta = 1;
569 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
568 tcp_rcv_rtt_update(tp, delta_us, 0); 570 tcp_rcv_rtt_update(tp, delta_us, 0);
569 } 571 }
570} 572}
@@ -579,6 +581,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
579 int time; 581 int time;
580 int copied; 582 int copied;
581 583
584 tcp_mstamp_refresh(tp);
582 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); 585 time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
583 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) 586 if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
584 return; 587 return;
@@ -1941,6 +1944,8 @@ void tcp_enter_loss(struct sock *sk)
1941 if (is_reneg) { 1944 if (is_reneg) {
1942 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); 1945 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1943 tp->sacked_out = 0; 1946 tp->sacked_out = 0;
1947 /* Mark SACK reneging until we recover from this loss event. */
1948 tp->is_sack_reneg = 1;
1944 } 1949 }
1945 tcp_clear_all_retrans_hints(tp); 1950 tcp_clear_all_retrans_hints(tp);
1946 1951
@@ -2326,6 +2331,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2326 } 2331 }
2327 tp->snd_cwnd_stamp = tcp_jiffies32; 2332 tp->snd_cwnd_stamp = tcp_jiffies32;
2328 tp->undo_marker = 0; 2333 tp->undo_marker = 0;
2334 tp->rack.advanced = 1; /* Force RACK to re-exam losses */
2329} 2335}
2330 2336
2331static inline bool tcp_may_undo(const struct tcp_sock *tp) 2337static inline bool tcp_may_undo(const struct tcp_sock *tp)
@@ -2364,6 +2370,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
2364 return true; 2370 return true;
2365 } 2371 }
2366 tcp_set_ca_state(sk, TCP_CA_Open); 2372 tcp_set_ca_state(sk, TCP_CA_Open);
2373 tp->is_sack_reneg = 0;
2367 return false; 2374 return false;
2368} 2375}
2369 2376
@@ -2397,8 +2404,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2397 NET_INC_STATS(sock_net(sk), 2404 NET_INC_STATS(sock_net(sk),
2398 LINUX_MIB_TCPSPURIOUSRTOS); 2405 LINUX_MIB_TCPSPURIOUSRTOS);
2399 inet_csk(sk)->icsk_retransmits = 0; 2406 inet_csk(sk)->icsk_retransmits = 0;
2400 if (frto_undo || tcp_is_sack(tp)) 2407 if (frto_undo || tcp_is_sack(tp)) {
2401 tcp_set_ca_state(sk, TCP_CA_Open); 2408 tcp_set_ca_state(sk, TCP_CA_Open);
2409 tp->is_sack_reneg = 0;
2410 }
2402 return true; 2411 return true;
2403 } 2412 }
2404 return false; 2413 return false;
@@ -3495,6 +3504,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3495 struct tcp_sacktag_state sack_state; 3504 struct tcp_sacktag_state sack_state;
3496 struct rate_sample rs = { .prior_delivered = 0 }; 3505 struct rate_sample rs = { .prior_delivered = 0 };
3497 u32 prior_snd_una = tp->snd_una; 3506 u32 prior_snd_una = tp->snd_una;
3507 bool is_sack_reneg = tp->is_sack_reneg;
3498 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3508 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3499 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3509 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3500 bool is_dupack = false; 3510 bool is_dupack = false;
@@ -3611,7 +3621,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3611 3621
3612 delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ 3622 delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
3613 lost = tp->lost - lost; /* freshly marked lost */ 3623 lost = tp->lost - lost; /* freshly marked lost */
3614 tcp_rate_gen(sk, delivered, lost, sack_state.rate); 3624 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
3615 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); 3625 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
3616 tcp_xmit_recovery(sk, rexmit); 3626 tcp_xmit_recovery(sk, rexmit);
3617 return 1; 3627 return 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c6bc0c4d19c6..94e28350f420 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -848,7 +848,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
848 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 848 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
849 req->ts_recent, 849 req->ts_recent,
850 0, 850 0,
851 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 851 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
852 AF_INET), 852 AF_INET),
853 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, 853 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
854 ip_hdr(skb)->tos); 854 ip_hdr(skb)->tos);
@@ -1591,6 +1591,34 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb)
1591} 1591}
1592EXPORT_SYMBOL(tcp_filter); 1592EXPORT_SYMBOL(tcp_filter);
1593 1593
1594static void tcp_v4_restore_cb(struct sk_buff *skb)
1595{
1596 memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
1597 sizeof(struct inet_skb_parm));
1598}
1599
1600static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
1601 const struct tcphdr *th)
1602{
1603 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1604 * barrier() makes sure compiler wont play fool^Waliasing games.
1605 */
1606 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1607 sizeof(struct inet_skb_parm));
1608 barrier();
1609
1610 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1611 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1612 skb->len - th->doff * 4);
1613 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1614 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1615 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1616 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1617 TCP_SKB_CB(skb)->sacked = 0;
1618 TCP_SKB_CB(skb)->has_rxtstamp =
1619 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1620}
1621
1594/* 1622/*
1595 * From tcp_input.c 1623 * From tcp_input.c
1596 */ 1624 */
@@ -1631,24 +1659,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
1631 1659
1632 th = (const struct tcphdr *)skb->data; 1660 th = (const struct tcphdr *)skb->data;
1633 iph = ip_hdr(skb); 1661 iph = ip_hdr(skb);
1634 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1635 * barrier() makes sure compiler wont play fool^Waliasing games.
1636 */
1637 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1638 sizeof(struct inet_skb_parm));
1639 barrier();
1640
1641 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1642 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1643 skb->len - th->doff * 4);
1644 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1645 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1646 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1647 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1648 TCP_SKB_CB(skb)->sacked = 0;
1649 TCP_SKB_CB(skb)->has_rxtstamp =
1650 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1651
1652lookup: 1662lookup:
1653 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, 1663 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1654 th->dest, sdif, &refcounted); 1664 th->dest, sdif, &refcounted);
@@ -1679,14 +1689,19 @@ process:
1679 sock_hold(sk); 1689 sock_hold(sk);
1680 refcounted = true; 1690 refcounted = true;
1681 nsk = NULL; 1691 nsk = NULL;
1682 if (!tcp_filter(sk, skb)) 1692 if (!tcp_filter(sk, skb)) {
1693 th = (const struct tcphdr *)skb->data;
1694 iph = ip_hdr(skb);
1695 tcp_v4_fill_cb(skb, iph, th);
1683 nsk = tcp_check_req(sk, skb, req, false); 1696 nsk = tcp_check_req(sk, skb, req, false);
1697 }
1684 if (!nsk) { 1698 if (!nsk) {
1685 reqsk_put(req); 1699 reqsk_put(req);
1686 goto discard_and_relse; 1700 goto discard_and_relse;
1687 } 1701 }
1688 if (nsk == sk) { 1702 if (nsk == sk) {
1689 reqsk_put(req); 1703 reqsk_put(req);
1704 tcp_v4_restore_cb(skb);
1690 } else if (tcp_child_process(sk, nsk, skb)) { 1705 } else if (tcp_child_process(sk, nsk, skb)) {
1691 tcp_v4_send_reset(nsk, skb); 1706 tcp_v4_send_reset(nsk, skb);
1692 goto discard_and_relse; 1707 goto discard_and_relse;
@@ -1712,6 +1727,7 @@ process:
1712 goto discard_and_relse; 1727 goto discard_and_relse;
1713 th = (const struct tcphdr *)skb->data; 1728 th = (const struct tcphdr *)skb->data;
1714 iph = ip_hdr(skb); 1729 iph = ip_hdr(skb);
1730 tcp_v4_fill_cb(skb, iph, th);
1715 1731
1716 skb->dev = NULL; 1732 skb->dev = NULL;
1717 1733
@@ -1742,6 +1758,8 @@ no_tcp_socket:
1742 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1758 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1743 goto discard_it; 1759 goto discard_it;
1744 1760
1761 tcp_v4_fill_cb(skb, iph, th);
1762
1745 if (tcp_checksum_complete(skb)) { 1763 if (tcp_checksum_complete(skb)) {
1746csum_error: 1764csum_error:
1747 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1765 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
@@ -1768,6 +1786,8 @@ do_time_wait:
1768 goto discard_it; 1786 goto discard_it;
1769 } 1787 }
1770 1788
1789 tcp_v4_fill_cb(skb, iph, th);
1790
1771 if (tcp_checksum_complete(skb)) { 1791 if (tcp_checksum_complete(skb)) {
1772 inet_twsk_put(inet_twsk(sk)); 1792 inet_twsk_put(inet_twsk(sk));
1773 goto csum_error; 1793 goto csum_error;
@@ -1784,6 +1804,7 @@ do_time_wait:
1784 if (sk2) { 1804 if (sk2) {
1785 inet_twsk_deschedule_put(inet_twsk(sk)); 1805 inet_twsk_deschedule_put(inet_twsk(sk));
1786 sk = sk2; 1806 sk = sk2;
1807 tcp_v4_restore_cb(skb);
1787 refcounted = false; 1808 refcounted = false;
1788 goto process; 1809 goto process;
1789 } 1810 }
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e36eff0403f4..b079b619b60c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -310,10 +310,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
310 if (state == TCP_TIME_WAIT) 310 if (state == TCP_TIME_WAIT)
311 timeo = TCP_TIMEWAIT_LEN; 311 timeo = TCP_TIMEWAIT_LEN;
312 312
313 /* tw_timer is pinned, so we need to make sure BH are disabled
314 * in following section, otherwise timer handler could run before
315 * we complete the initialization.
316 */
317 local_bh_disable();
313 inet_twsk_schedule(tw, timeo); 318 inet_twsk_schedule(tw, timeo);
314 /* Linkage updates. */ 319 /* Linkage updates. */
315 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); 320 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
316 inet_twsk_put(tw); 321 inet_twsk_put(tw);
322 local_bh_enable();
317 } else { 323 } else {
318 /* Sorry, if we're out of memory, just CLOSE this 324 /* Sorry, if we're out of memory, just CLOSE this
319 * socket up. We've got bigger problems than 325 * socket up. We've got bigger problems than
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b6a2aa1dcf56..4d58e2ce0b5b 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
32static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, 32static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
33 netdev_features_t features) 33 netdev_features_t features)
34{ 34{
35 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4))
36 return ERR_PTR(-EINVAL);
37
35 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 38 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
36 return ERR_PTR(-EINVAL); 39 return ERR_PTR(-EINVAL);
37 40
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 3330a370d306..c61240e43923 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
106 106
107/* Update the connection delivery information and generate a rate sample. */ 107/* Update the connection delivery information and generate a rate sample. */
108void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, 108void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
109 struct rate_sample *rs) 109 bool is_sack_reneg, struct rate_sample *rs)
110{ 110{
111 struct tcp_sock *tp = tcp_sk(sk); 111 struct tcp_sock *tp = tcp_sk(sk);
112 u32 snd_us, ack_us; 112 u32 snd_us, ack_us;
@@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
124 124
125 rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ 125 rs->acked_sacked = delivered; /* freshly ACKed or SACKed */
126 rs->losses = lost; /* freshly marked lost */ 126 rs->losses = lost; /* freshly marked lost */
127 /* Return an invalid sample if no timing information is available. */ 127 /* Return an invalid sample if no timing information is available or
128 if (!rs->prior_mstamp) { 128 * in recovery from loss with SACK reneging. Rate samples taken during
129 * a SACK reneging event may overestimate bw by including packets that
130 * were SACKed before the reneg.
131 */
132 if (!rs->prior_mstamp || is_sack_reneg) {
129 rs->delivered = -1; 133 rs->delivered = -1;
130 rs->interval_us = -1; 134 rs->interval_us = -1;
131 return; 135 return;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index d3ea89020c69..3a81720ac0c4 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -55,7 +55,8 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
55 * to queuing or delayed ACKs. 55 * to queuing or delayed ACKs.
56 */ 56 */
57 reo_wnd = 1000; 57 reo_wnd = 1000;
58 if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) { 58 if ((tp->rack.reord || inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery) &&
59 min_rtt != ~0U) {
59 reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd); 60 reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
60 reo_wnd = min(reo_wnd, tp->srtt_us >> 3); 61 reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
61 } 62 }
@@ -79,12 +80,12 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
79 */ 80 */
80 remaining = tp->rack.rtt_us + reo_wnd - 81 remaining = tp->rack.rtt_us + reo_wnd -
81 tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp); 82 tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
82 if (remaining < 0) { 83 if (remaining <= 0) {
83 tcp_rack_mark_skb_lost(sk, skb); 84 tcp_rack_mark_skb_lost(sk, skb);
84 list_del_init(&skb->tcp_tsorted_anchor); 85 list_del_init(&skb->tcp_tsorted_anchor);
85 } else { 86 } else {
86 /* Record maximum wait time (+1 to avoid 0) */ 87 /* Record maximum wait time */
87 *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); 88 *reo_timeout = max_t(u32, *reo_timeout, remaining);
88 } 89 }
89 } 90 }
90} 91}
@@ -116,13 +117,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
116{ 117{
117 u32 rtt_us; 118 u32 rtt_us;
118 119
119 if (tp->rack.mstamp &&
120 !tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
121 end_seq, tp->rack.end_seq))
122 return;
123
124 rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); 120 rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
125 if (sacked & TCPCB_RETRANS) { 121 if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
126 /* If the sacked packet was retransmitted, it's ambiguous 122 /* If the sacked packet was retransmitted, it's ambiguous
127 * whether the retransmission or the original (or the prior 123 * whether the retransmission or the original (or the prior
128 * retransmission) was sacked. 124 * retransmission) was sacked.
@@ -133,13 +129,15 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
133 * so it's at least one RTT (i.e., retransmission is at least 129 * so it's at least one RTT (i.e., retransmission is at least
134 * an RTT later). 130 * an RTT later).
135 */ 131 */
136 if (rtt_us < tcp_min_rtt(tp)) 132 return;
137 return;
138 } 133 }
139 tp->rack.rtt_us = rtt_us;
140 tp->rack.mstamp = xmit_time;
141 tp->rack.end_seq = end_seq;
142 tp->rack.advanced = 1; 134 tp->rack.advanced = 1;
135 tp->rack.rtt_us = rtt_us;
136 if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
137 end_seq, tp->rack.end_seq)) {
138 tp->rack.mstamp = xmit_time;
139 tp->rack.end_seq = end_seq;
140 }
143} 141}
144 142
145/* We have waited long enough to accommodate reordering. Mark the expired 143/* We have waited long enough to accommodate reordering. Mark the expired
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 16df6dd44b98..388158c9d9f6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -48,11 +48,19 @@ static void tcp_write_err(struct sock *sk)
48 * to prevent DoS attacks. It is called when a retransmission timeout 48 * to prevent DoS attacks. It is called when a retransmission timeout
49 * or zero probe timeout occurs on orphaned socket. 49 * or zero probe timeout occurs on orphaned socket.
50 * 50 *
51 * Also close if our net namespace is exiting; in that case there is no
52 * hope of ever communicating again since all netns interfaces are already
53 * down (or about to be down), and we need to release our dst references,
54 * which have been moved to the netns loopback interface, so the namespace
55 * can finish exiting. This condition is only possible if we are a kernel
56 * socket, as those do not hold references to the namespace.
57 *
51 * Criteria is still not confirmed experimentally and may change. 58 * Criteria is still not confirmed experimentally and may change.
52 * We kill the socket, if: 59 * We kill the socket, if:
53 * 1. If number of orphaned sockets exceeds an administratively configured 60 * 1. If number of orphaned sockets exceeds an administratively configured
54 * limit. 61 * limit.
55 * 2. If we have strong memory pressure. 62 * 2. If we have strong memory pressure.
63 * 3. If our net namespace is exiting.
56 */ 64 */
57static int tcp_out_of_resources(struct sock *sk, bool do_reset) 65static int tcp_out_of_resources(struct sock *sk, bool do_reset)
58{ 66{
@@ -81,6 +89,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
81 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); 89 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
82 return 1; 90 return 1;
83 } 91 }
92
93 if (!check_net(sock_net(sk))) {
94 /* Not possible to send reset; just close */
95 tcp_done(sk);
96 return 1;
97 }
98
84 return 0; 99 return 0;
85} 100}
86 101
@@ -264,6 +279,7 @@ void tcp_delack_timer_handler(struct sock *sk)
264 icsk->icsk_ack.pingpong = 0; 279 icsk->icsk_ack.pingpong = 0;
265 icsk->icsk_ack.ato = TCP_ATO_MIN; 280 icsk->icsk_ack.ato = TCP_ATO_MIN;
266 } 281 }
282 tcp_mstamp_refresh(tcp_sk(sk));
267 tcp_send_ack(sk); 283 tcp_send_ack(sk);
268 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); 284 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
269 } 285 }
@@ -632,6 +648,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
632 goto out; 648 goto out;
633 } 649 }
634 650
651 tcp_mstamp_refresh(tp);
635 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { 652 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
636 if (tp->linger2 >= 0) { 653 if (tp->linger2 >= 0) {
637 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; 654 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e4ff25c947c5..ef45adfc0edb 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2502,9 +2502,9 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
2502 * but then block when reading it. Add special case code 2502 * but then block when reading it. Add special case code
2503 * to work around these arguably broken applications. 2503 * to work around these arguably broken applications.
2504 */ 2504 */
2505unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) 2505__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
2506{ 2506{
2507 unsigned int mask = datagram_poll(file, sock, wait); 2507 __poll_t mask = datagram_poll(file, sock, wait);
2508 struct sock *sk = sock->sk; 2508 struct sock *sk = sock->sk;
2509 2509
2510 if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) 2510 if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 01801b77bd0d..ea6e6e7df0ee 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
203 goto out; 203 goto out;
204 } 204 }
205 205
206 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
207 goto out;
208
206 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 209 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
207 goto out; 210 goto out;
208 211
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e50b7fea57ee..bcfc00e88756 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
23 return xfrm4_extract_header(skb); 23 return xfrm4_extract_header(skb);
24} 24}
25 25
26static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
27 struct sk_buff *skb)
28{
29 return dst_input(skb);
30}
31
26static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, 32static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
27 struct sk_buff *skb) 33 struct sk_buff *skb)
28{ 34{
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
33 iph->tos, skb->dev)) 39 iph->tos, skb->dev))
34 goto drop; 40 goto drop;
35 } 41 }
36 return dst_input(skb); 42
43 if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
44 goto drop;
45
46 return 0;
37drop: 47drop:
38 kfree_skb(skb); 48 kfree_skb(skb);
39 return NET_RX_DROP; 49 return NET_RX_DROP;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index e6265e2c274e..20ca486b3cad 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -92,6 +92,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
92 92
93 skb_reset_network_header(skb); 93 skb_reset_network_header(skb);
94 skb_mac_header_rebuild(skb); 94 skb_mac_header_rebuild(skb);
95 eth_hdr(skb)->h_proto = skb->protocol;
95 96
96 err = 0; 97 err = 0;
97 98
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f71234b9c..c9441ca45399 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ lookup_protocol:
210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; 210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
211 np->mc_loop = 1; 211 np->mc_loop = 1;
212 np->pmtudisc = IPV6_PMTUDISC_WANT; 212 np->pmtudisc = IPV6_PMTUDISC_WANT;
213 np->autoflowlabel = ip6_default_np_autolabel(net);
214 np->repflow = net->ipv6.sysctl.flowlabel_reflect; 213 np->repflow = net->ipv6.sysctl.flowlabel_reflect;
215 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; 214 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
216 215
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a902ff8f59be..1a7f00cd4803 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -890,13 +890,12 @@ static int esp6_init_state(struct xfrm_state *x)
890 x->props.header_len += IPV4_BEET_PHMAXLEN + 890 x->props.header_len += IPV4_BEET_PHMAXLEN +
891 (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); 891 (sizeof(struct ipv6hdr) - sizeof(struct iphdr));
892 break; 892 break;
893 default:
893 case XFRM_MODE_TRANSPORT: 894 case XFRM_MODE_TRANSPORT:
894 break; 895 break;
895 case XFRM_MODE_TUNNEL: 896 case XFRM_MODE_TUNNEL:
896 x->props.header_len += sizeof(struct ipv6hdr); 897 x->props.header_len += sizeof(struct ipv6hdr);
897 break; 898 break;
898 default:
899 goto error;
900 } 899 }
901 900
902 align = ALIGN(crypto_aead_blocksize(aead), 4); 901 align = ALIGN(crypto_aead_blocksize(aead), 4);
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 333a478aa161..f52c314d4c97 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -60,7 +60,8 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
60 int nhoff; 60 int nhoff;
61 int err; 61 int err;
62 62
63 skb_pull(skb, offset); 63 if (!pskb_pull(skb, offset))
64 return NULL;
64 65
65 if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) 66 if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
66 goto out; 67 goto out;
@@ -148,6 +149,9 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
148 if (!xo) 149 if (!xo)
149 goto out; 150 goto out;
150 151
152 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
153 goto out;
154
151 seq = xo->seq.low; 155 seq = xo->seq.low;
152 156
153 x = skb->sp->xvec[skb->sp->len - 1]; 157 x = skb->sp->xvec[skb->sp->len - 1];
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 83bd75713535..bc68eb661970 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -925,6 +925,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
925 sr_phdr->segments[0] = **addr_p; 925 sr_phdr->segments[0] = **addr_p;
926 *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left]; 926 *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
927 927
928 if (sr_ihdr->hdrlen > hops * 2) {
929 int tlvs_offset, tlvs_length;
930
931 tlvs_offset = (1 + hops * 2) << 3;
932 tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
933 memcpy((char *)sr_phdr + tlvs_offset,
934 (char *)sr_ihdr + tlvs_offset, tlvs_length);
935 }
936
928#ifdef CONFIG_IPV6_SEG6_HMAC 937#ifdef CONFIG_IPV6_SEG6_HMAC
929 if (sr_has_hmac(sr_phdr)) { 938 if (sr_has_hmac(sr_phdr)) {
930 struct net *net = NULL; 939 struct net *net = NULL;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f5285f4e1d08..217683d40f12 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -640,6 +640,11 @@ static struct fib6_node *fib6_add_1(struct net *net,
640 if (!(fn->fn_flags & RTN_RTINFO)) { 640 if (!(fn->fn_flags & RTN_RTINFO)) {
641 RCU_INIT_POINTER(fn->leaf, NULL); 641 RCU_INIT_POINTER(fn->leaf, NULL);
642 rt6_release(leaf); 642 rt6_release(leaf);
643 /* remove null_entry in the root node */
644 } else if (fn->fn_flags & RTN_TL_ROOT &&
645 rcu_access_pointer(fn->leaf) ==
646 net->ipv6.ip6_null_entry) {
647 RCU_INIT_POINTER(fn->leaf, NULL);
643 } 648 }
644 649
645 return fn; 650 return fn;
@@ -1221,8 +1226,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1221 } 1226 }
1222 1227
1223 if (!rcu_access_pointer(fn->leaf)) { 1228 if (!rcu_access_pointer(fn->leaf)) {
1224 atomic_inc(&rt->rt6i_ref); 1229 if (fn->fn_flags & RTN_TL_ROOT) {
1225 rcu_assign_pointer(fn->leaf, rt); 1230 /* put back null_entry for root node */
1231 rcu_assign_pointer(fn->leaf,
1232 info->nl_net->ipv6.ip6_null_entry);
1233 } else {
1234 atomic_inc(&rt->rt6i_ref);
1235 rcu_assign_pointer(fn->leaf, rt);
1236 }
1226 } 1237 }
1227 fn = sn; 1238 fn = sn;
1228 } 1239 }
@@ -1241,23 +1252,28 @@ out:
1241 * If fib6_add_1 has cleared the old leaf pointer in the 1252 * If fib6_add_1 has cleared the old leaf pointer in the
1242 * super-tree leaf node we have to find a new one for it. 1253 * super-tree leaf node we have to find a new one for it.
1243 */ 1254 */
1244 struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, 1255 if (pn != fn) {
1245 lockdep_is_held(&table->tb6_lock)); 1256 struct rt6_info *pn_leaf =
1246 if (pn != fn && pn_leaf == rt) { 1257 rcu_dereference_protected(pn->leaf,
1247 pn_leaf = NULL; 1258 lockdep_is_held(&table->tb6_lock));
1248 RCU_INIT_POINTER(pn->leaf, NULL); 1259 if (pn_leaf == rt) {
1249 atomic_dec(&rt->rt6i_ref); 1260 pn_leaf = NULL;
1250 } 1261 RCU_INIT_POINTER(pn->leaf, NULL);
1251 if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { 1262 atomic_dec(&rt->rt6i_ref);
1252 pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
1253#if RT6_DEBUG >= 2
1254 if (!pn_leaf) {
1255 WARN_ON(!pn_leaf);
1256 pn_leaf = info->nl_net->ipv6.ip6_null_entry;
1257 } 1263 }
1264 if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
1265 pn_leaf = fib6_find_prefix(info->nl_net, table,
1266 pn);
1267#if RT6_DEBUG >= 2
1268 if (!pn_leaf) {
1269 WARN_ON(!pn_leaf);
1270 pn_leaf =
1271 info->nl_net->ipv6.ip6_null_entry;
1272 }
1258#endif 1273#endif
1259 atomic_inc(&pn_leaf->rt6i_ref); 1274 atomic_inc(&pn_leaf->rt6i_ref);
1260 rcu_assign_pointer(pn->leaf, pn_leaf); 1275 rcu_assign_pointer(pn->leaf, pn_leaf);
1276 }
1261 } 1277 }
1262#endif 1278#endif
1263 goto failure; 1279 goto failure;
@@ -1265,13 +1281,17 @@ out:
1265 return err; 1281 return err;
1266 1282
1267failure: 1283failure:
1268 /* fn->leaf could be NULL if fn is an intermediate node and we 1284 /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
1269 * failed to add the new route to it in both subtree creation 1285 * 1. fn is an intermediate node and we failed to add the new
1270 * failure and fib6_add_rt2node() failure case. 1286 * route to it in both subtree creation failure and fib6_add_rt2node()
1271 * In both cases, fib6_repair_tree() should be called to fix 1287 * failure case.
1272 * fn->leaf. 1288 * 2. fn is the root node in the table and we fail to add the first
1289 * default route to it.
1273 */ 1290 */
1274 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) 1291 if (fn &&
1292 (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
1293 (fn->fn_flags & RTN_TL_ROOT &&
1294 !rcu_access_pointer(fn->leaf))))
1275 fib6_repair_tree(info->nl_net, table, fn); 1295 fib6_repair_tree(info->nl_net, table, fn);
1276 /* Always release dst as dst->__refcnt is guaranteed 1296 /* Always release dst as dst->__refcnt is guaranteed
1277 * to be taken before entering this function 1297 * to be taken before entering this function
@@ -1526,6 +1546,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1526 struct fib6_walker *w; 1546 struct fib6_walker *w;
1527 int iter = 0; 1547 int iter = 0;
1528 1548
1549 /* Set fn->leaf to null_entry for root node. */
1550 if (fn->fn_flags & RTN_TL_ROOT) {
1551 rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
1552 return fn;
1553 }
1554
1529 for (;;) { 1555 for (;;) {
1530 struct fib6_node *fn_r = rcu_dereference_protected(fn->right, 1556 struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
1531 lockdep_is_held(&table->tb6_lock)); 1557 lockdep_is_held(&table->tb6_lock));
@@ -1680,10 +1706,15 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
1680 } 1706 }
1681 read_unlock(&net->ipv6.fib6_walker_lock); 1707 read_unlock(&net->ipv6.fib6_walker_lock);
1682 1708
1683 /* If it was last route, expunge its radix tree node */ 1709 /* If it was last route, call fib6_repair_tree() to:
1710 * 1. For root node, put back null_entry as how the table was created.
1711 * 2. For other nodes, expunge its radix tree node.
1712 */
1684 if (!rcu_access_pointer(fn->leaf)) { 1713 if (!rcu_access_pointer(fn->leaf)) {
1685 fn->fn_flags &= ~RTN_RTINFO; 1714 if (!(fn->fn_flags & RTN_TL_ROOT)) {
1686 net->ipv6.rt6_stats->fib_route_nodes--; 1715 fn->fn_flags &= ~RTN_RTINFO;
1716 net->ipv6.rt6_stats->fib_route_nodes--;
1717 }
1687 fn = fib6_repair_tree(net, table, fn); 1718 fn = fib6_repair_tree(net, table, fn);
1688 } 1719 }
1689 1720
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4cfd8e0696fe..873549228ccb 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -337,11 +337,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
337 337
338 nt->dev = dev; 338 nt->dev = dev;
339 nt->net = dev_net(dev); 339 nt->net = dev_net(dev);
340 ip6gre_tnl_link_config(nt, 1);
341 340
342 if (register_netdevice(dev) < 0) 341 if (register_netdevice(dev) < 0)
343 goto failed_free; 342 goto failed_free;
344 343
344 ip6gre_tnl_link_config(nt, 1);
345
345 /* Can use a lockless transmit, unless we generate output sequences */ 346 /* Can use a lockless transmit, unless we generate output sequences */
346 if (!(nt->parms.o_flags & TUNNEL_SEQ)) 347 if (!(nt->parms.o_flags & TUNNEL_SEQ))
347 dev->features |= NETIF_F_LLTX; 348 dev->features |= NETIF_F_LLTX;
@@ -1014,6 +1015,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
1014 eth_random_addr(dev->perm_addr); 1015 eth_random_addr(dev->perm_addr);
1015} 1016}
1016 1017
1018#define GRE6_FEATURES (NETIF_F_SG | \
1019 NETIF_F_FRAGLIST | \
1020 NETIF_F_HIGHDMA | \
1021 NETIF_F_HW_CSUM)
1022
1023static void ip6gre_tnl_init_features(struct net_device *dev)
1024{
1025 struct ip6_tnl *nt = netdev_priv(dev);
1026
1027 dev->features |= GRE6_FEATURES;
1028 dev->hw_features |= GRE6_FEATURES;
1029
1030 if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
1031 /* TCP offload with GRE SEQ is not supported, nor
1032 * can we support 2 levels of outer headers requiring
1033 * an update.
1034 */
1035 if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
1036 nt->encap.type == TUNNEL_ENCAP_NONE) {
1037 dev->features |= NETIF_F_GSO_SOFTWARE;
1038 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1039 }
1040
1041 /* Can use a lockless transmit, unless we generate
1042 * output sequences
1043 */
1044 dev->features |= NETIF_F_LLTX;
1045 }
1046}
1047
1017static int ip6gre_tunnel_init_common(struct net_device *dev) 1048static int ip6gre_tunnel_init_common(struct net_device *dev)
1018{ 1049{
1019 struct ip6_tnl *tunnel; 1050 struct ip6_tnl *tunnel;
@@ -1048,6 +1079,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
1048 if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1079 if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1049 dev->mtu -= 8; 1080 dev->mtu -= 8;
1050 1081
1082 ip6gre_tnl_init_features(dev);
1083
1051 return 0; 1084 return 0;
1052} 1085}
1053 1086
@@ -1271,7 +1304,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
1271 1304
1272static int ip6gre_tap_init(struct net_device *dev) 1305static int ip6gre_tap_init(struct net_device *dev)
1273{ 1306{
1274 struct ip6_tnl *tunnel;
1275 int ret; 1307 int ret;
1276 1308
1277 ret = ip6gre_tunnel_init_common(dev); 1309 ret = ip6gre_tunnel_init_common(dev);
@@ -1280,10 +1312,6 @@ static int ip6gre_tap_init(struct net_device *dev)
1280 1312
1281 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1313 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1282 1314
1283 tunnel = netdev_priv(dev);
1284
1285 ip6gre_tnl_link_config(tunnel, 1);
1286
1287 return 0; 1315 return 0;
1288} 1316}
1289 1317
@@ -1298,16 +1326,12 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
1298 .ndo_get_iflink = ip6_tnl_get_iflink, 1326 .ndo_get_iflink = ip6_tnl_get_iflink,
1299}; 1327};
1300 1328
1301#define GRE6_FEATURES (NETIF_F_SG | \
1302 NETIF_F_FRAGLIST | \
1303 NETIF_F_HIGHDMA | \
1304 NETIF_F_HW_CSUM)
1305
1306static void ip6gre_tap_setup(struct net_device *dev) 1329static void ip6gre_tap_setup(struct net_device *dev)
1307{ 1330{
1308 1331
1309 ether_setup(dev); 1332 ether_setup(dev);
1310 1333
1334 dev->max_mtu = 0;
1311 dev->netdev_ops = &ip6gre_tap_netdev_ops; 1335 dev->netdev_ops = &ip6gre_tap_netdev_ops;
1312 dev->needs_free_netdev = true; 1336 dev->needs_free_netdev = true;
1313 dev->priv_destructor = ip6gre_dev_free; 1337 dev->priv_destructor = ip6gre_dev_free;
@@ -1380,32 +1404,16 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1380 1404
1381 nt->dev = dev; 1405 nt->dev = dev;
1382 nt->net = dev_net(dev); 1406 nt->net = dev_net(dev);
1383 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1384
1385 dev->features |= GRE6_FEATURES;
1386 dev->hw_features |= GRE6_FEATURES;
1387
1388 if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
1389 /* TCP offload with GRE SEQ is not supported, nor
1390 * can we support 2 levels of outer headers requiring
1391 * an update.
1392 */
1393 if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
1394 (nt->encap.type == TUNNEL_ENCAP_NONE)) {
1395 dev->features |= NETIF_F_GSO_SOFTWARE;
1396 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1397 }
1398
1399 /* Can use a lockless transmit, unless we generate
1400 * output sequences
1401 */
1402 dev->features |= NETIF_F_LLTX;
1403 }
1404 1407
1405 err = register_netdevice(dev); 1408 err = register_netdevice(dev);
1406 if (err) 1409 if (err)
1407 goto out; 1410 goto out;
1408 1411
1412 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1413
1414 if (tb[IFLA_MTU])
1415 ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
1416
1409 dev_hold(dev); 1417 dev_hold(dev);
1410 ip6gre_tunnel_link(ign, nt); 1418 ip6gre_tunnel_link(ign, nt);
1411 1419
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a418cc4d..3763dc01e374 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
166 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 166 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
167} 167}
168 168
169bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
170{
171 if (!np->autoflowlabel_set)
172 return ip6_default_np_autolabel(net);
173 else
174 return np->autoflowlabel;
175}
176
169/* 177/*
170 * xmit an sk_buff (used by TCP, SCTP and DCCP) 178 * xmit an sk_buff (used by TCP, SCTP and DCCP)
171 * Note : socket lock is not held for SYNACK packets, but might be modified 179 * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
230 hlimit = ip6_dst_hoplimit(dst); 238 hlimit = ip6_dst_hoplimit(dst);
231 239
232 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 240 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
233 np->autoflowlabel, fl6)); 241 ip6_autoflowlabel(net, np), fl6));
234 242
235 hdr->payload_len = htons(seg_len); 243 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto; 244 hdr->nexthdr = proto;
@@ -1198,14 +1206,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1198 v6_cork->tclass = ipc6->tclass; 1206 v6_cork->tclass = ipc6->tclass;
1199 if (rt->dst.flags & DST_XFRM_TUNNEL) 1207 if (rt->dst.flags & DST_XFRM_TUNNEL)
1200 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1208 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1201 rt->dst.dev->mtu : dst_mtu(&rt->dst); 1209 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1202 else 1210 else
1203 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1211 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1204 rt->dst.dev->mtu : dst_mtu(rt->dst.path); 1212 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
1205 if (np->frag_size < mtu) { 1213 if (np->frag_size < mtu) {
1206 if (np->frag_size) 1214 if (np->frag_size)
1207 mtu = np->frag_size; 1215 mtu = np->frag_size;
1208 } 1216 }
1217 if (mtu < IPV6_MIN_MTU)
1218 return -EINVAL;
1209 cork->base.fragsize = mtu; 1219 cork->base.fragsize = mtu;
1210 if (dst_allfrag(rt->dst.path)) 1220 if (dst_allfrag(rt->dst.path))
1211 cork->base.flags |= IPCORK_ALLFRAG; 1221 cork->base.flags |= IPCORK_ALLFRAG;
@@ -1626,7 +1636,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
1626 1636
1627 ip6_flow_hdr(hdr, v6_cork->tclass, 1637 ip6_flow_hdr(hdr, v6_cork->tclass,
1628 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1638 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1629 np->autoflowlabel, fl6)); 1639 ip6_autoflowlabel(net, np), fl6));
1630 hdr->hop_limit = v6_cork->hop_limit; 1640 hdr->hop_limit = v6_cork->hop_limit;
1631 hdr->nexthdr = proto; 1641 hdr->nexthdr = proto;
1632 hdr->saddr = fl6->saddr; 1642 hdr->saddr = fl6->saddr;
@@ -1725,11 +1735,13 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
1725 cork.base.flags = 0; 1735 cork.base.flags = 0;
1726 cork.base.addr = 0; 1736 cork.base.addr = 0;
1727 cork.base.opt = NULL; 1737 cork.base.opt = NULL;
1738 cork.base.dst = NULL;
1728 v6_cork.opt = NULL; 1739 v6_cork.opt = NULL;
1729 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); 1740 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
1730 if (err) 1741 if (err) {
1742 ip6_cork_release(&cork, &v6_cork);
1731 return ERR_PTR(err); 1743 return ERR_PTR(err);
1732 1744 }
1733 if (ipc6->dontfrag < 0) 1745 if (ipc6->dontfrag < 0)
1734 ipc6->dontfrag = inet6_sk(sk)->dontfrag; 1746 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1735 1747
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3d3092adf1d2..1ee5584c3555 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -642,8 +642,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
642 if (rel_info > dst_mtu(skb_dst(skb2))) 642 if (rel_info > dst_mtu(skb_dst(skb2)))
643 goto out; 643 goto out;
644 644
645 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, 645 skb_dst_update_pmtu(skb2, rel_info);
646 rel_info);
647 } 646 }
648 647
649 icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); 648 icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -904,7 +903,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
904 if (t->parms.collect_md) { 903 if (t->parms.collect_md) {
905 tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); 904 tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
906 if (!tun_dst) 905 if (!tun_dst)
907 return 0; 906 goto drop;
908 } 907 }
909 ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, 908 ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
910 log_ecn_error); 909 log_ecn_error);
@@ -1074,10 +1073,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
1074 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); 1073 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
1075 neigh_release(neigh); 1074 neigh_release(neigh);
1076 } 1075 }
1077 } else if (!(t->parms.flags & 1076 } else if (t->parms.proto != 0 && !(t->parms.flags &
1078 (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { 1077 (IP6_TNL_F_USE_ORIG_TCLASS |
1079 /* enable the cache only only if the routing decision does 1078 IP6_TNL_F_USE_ORIG_FWMARK))) {
1080 * not depend on the current inner header value 1079 /* enable the cache only if neither the outer protocol nor the
1080 * routing decision depends on the current inner header value
1081 */ 1081 */
1082 use_cache = true; 1082 use_cache = true;
1083 } 1083 }
@@ -1123,10 +1123,14 @@ route_lookup:
1123 max_headroom += 8; 1123 max_headroom += 8;
1124 mtu -= 8; 1124 mtu -= 8;
1125 } 1125 }
1126 if (mtu < IPV6_MIN_MTU) 1126 if (skb->protocol == htons(ETH_P_IPV6)) {
1127 mtu = IPV6_MIN_MTU; 1127 if (mtu < IPV6_MIN_MTU)
1128 if (skb_dst(skb) && !t->parms.collect_md) 1128 mtu = IPV6_MIN_MTU;
1129 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 1129 } else if (mtu < 576) {
1130 mtu = 576;
1131 }
1132
1133 skb_dst_update_pmtu(skb, mtu);
1130 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { 1134 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
1131 *pmtu = mtu; 1135 *pmtu = mtu;
1132 err = -EMSGSIZE; 1136 err = -EMSGSIZE;
@@ -1671,11 +1675,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1671{ 1675{
1672 struct ip6_tnl *tnl = netdev_priv(dev); 1676 struct ip6_tnl *tnl = netdev_priv(dev);
1673 1677
1674 if (tnl->parms.proto == IPPROTO_IPIP) { 1678 if (tnl->parms.proto == IPPROTO_IPV6) {
1675 if (new_mtu < ETH_MIN_MTU) 1679 if (new_mtu < IPV6_MIN_MTU)
1676 return -EINVAL; 1680 return -EINVAL;
1677 } else { 1681 } else {
1678 if (new_mtu < IPV6_MIN_MTU) 1682 if (new_mtu < ETH_MIN_MTU)
1679 return -EINVAL; 1683 return -EINVAL;
1680 } 1684 }
1681 if (new_mtu > 0xFFF8 - dev->hard_header_len) 1685 if (new_mtu > 0xFFF8 - dev->hard_header_len)
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index dbb74f3c57a7..8c184f84f353 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
483 483
484 mtu = dst_mtu(dst); 484 mtu = dst_mtu(dst);
485 if (!skb->ignore_df && skb->len > mtu) { 485 if (!skb->ignore_df && skb->len > mtu) {
486 skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); 486 skb_dst_update_pmtu(skb, mtu);
487 487
488 if (skb->protocol == htons(ETH_P_IPV6)) { 488 if (skb->protocol == htons(ETH_P_IPV6)) {
489 if (mtu < IPV6_MIN_MTU) 489 if (mtu < IPV6_MIN_MTU)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b9404feabd78..e8ffb5b5d84e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ pref_skip_coa:
886 break; 886 break;
887 case IPV6_AUTOFLOWLABEL: 887 case IPV6_AUTOFLOWLABEL:
888 np->autoflowlabel = valbool; 888 np->autoflowlabel = valbool;
889 np->autoflowlabel_set = 1;
889 retv = 0; 890 retv = 0;
890 break; 891 break;
891 case IPV6_RECVFRAGSIZE: 892 case IPV6_RECVFRAGSIZE:
@@ -1335,7 +1336,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1335 break; 1336 break;
1336 1337
1337 case IPV6_AUTOFLOWLABEL: 1338 case IPV6_AUTOFLOWLABEL:
1338 val = np->autoflowlabel; 1339 val = ip6_autoflowlabel(sock_net(sk), np);
1339 break; 1340 break;
1340 1341
1341 case IPV6_RECVFRAGSIZE: 1342 case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fc6d7d143f2c..844642682b83 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
1682} 1682}
1683 1683
1684static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, 1684static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1685 int type, struct mld2_grec **ppgr) 1685 int type, struct mld2_grec **ppgr, unsigned int mtu)
1686{ 1686{
1687 struct net_device *dev = pmc->idev->dev;
1688 struct mld2_report *pmr; 1687 struct mld2_report *pmr;
1689 struct mld2_grec *pgr; 1688 struct mld2_grec *pgr;
1690 1689
1691 if (!skb) 1690 if (!skb) {
1692 skb = mld_newpack(pmc->idev, dev->mtu); 1691 skb = mld_newpack(pmc->idev, mtu);
1693 if (!skb) 1692 if (!skb)
1694 return NULL; 1693 return NULL;
1694 }
1695 pgr = skb_put(skb, sizeof(struct mld2_grec)); 1695 pgr = skb_put(skb, sizeof(struct mld2_grec));
1696 pgr->grec_type = type; 1696 pgr->grec_type = type;
1697 pgr->grec_auxwords = 0; 1697 pgr->grec_auxwords = 0;
@@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1714 struct mld2_grec *pgr = NULL; 1714 struct mld2_grec *pgr = NULL;
1715 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; 1715 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
1716 int scount, stotal, first, isquery, truncate; 1716 int scount, stotal, first, isquery, truncate;
1717 unsigned int mtu;
1717 1718
1718 if (pmc->mca_flags & MAF_NOREPORT) 1719 if (pmc->mca_flags & MAF_NOREPORT)
1719 return skb; 1720 return skb;
1720 1721
1722 mtu = READ_ONCE(dev->mtu);
1723 if (mtu < IPV6_MIN_MTU)
1724 return skb;
1725
1721 isquery = type == MLD2_MODE_IS_INCLUDE || 1726 isquery = type == MLD2_MODE_IS_INCLUDE ||
1722 type == MLD2_MODE_IS_EXCLUDE; 1727 type == MLD2_MODE_IS_EXCLUDE;
1723 truncate = type == MLD2_MODE_IS_EXCLUDE || 1728 truncate = type == MLD2_MODE_IS_EXCLUDE ||
@@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1738 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { 1743 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
1739 if (skb) 1744 if (skb)
1740 mld_sendpack(skb); 1745 mld_sendpack(skb);
1741 skb = mld_newpack(idev, dev->mtu); 1746 skb = mld_newpack(idev, mtu);
1742 } 1747 }
1743 } 1748 }
1744 first = 1; 1749 first = 1;
@@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1774 pgr->grec_nsrcs = htons(scount); 1779 pgr->grec_nsrcs = htons(scount);
1775 if (skb) 1780 if (skb)
1776 mld_sendpack(skb); 1781 mld_sendpack(skb);
1777 skb = mld_newpack(idev, dev->mtu); 1782 skb = mld_newpack(idev, mtu);
1778 first = 1; 1783 first = 1;
1779 scount = 0; 1784 scount = 0;
1780 } 1785 }
1781 if (first) { 1786 if (first) {
1782 skb = add_grhead(skb, pmc, type, &pgr); 1787 skb = add_grhead(skb, pmc, type, &pgr, mtu);
1783 first = 0; 1788 first = 0;
1784 } 1789 }
1785 if (!skb) 1790 if (!skb)
@@ -1814,7 +1819,7 @@ empty_source:
1814 mld_sendpack(skb); 1819 mld_sendpack(skb);
1815 skb = NULL; /* add_grhead will get a new one */ 1820 skb = NULL; /* add_grhead will get a new one */
1816 } 1821 }
1817 skb = add_grhead(skb, pmc, type, &pgr); 1822 skb = add_grhead(skb, pmc, type, &pgr, mtu);
1818 } 1823 }
1819 } 1824 }
1820 if (pgr) 1825 if (pgr)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f06e25065a34..66a8c69a3db4 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -282,12 +282,7 @@ ip6t_do_table(struct sk_buff *skb,
282 282
283 local_bh_disable(); 283 local_bh_disable();
284 addend = xt_write_recseq_begin(); 284 addend = xt_write_recseq_begin();
285 private = table->private; 285 private = READ_ONCE(table->private); /* Address dependency. */
286 /*
287 * Ensure we load private-> members after we've fetched the base
288 * pointer.
289 */
290 smp_read_barrier_depends();
291 cpu = smp_processor_id(); 286 cpu = smp_processor_id();
292 table_base = private->entries; 287 table_base = private->entries;
293 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; 288 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
@@ -458,7 +453,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
458 if (!xt_find_jump_offset(offsets, newpos, 453 if (!xt_find_jump_offset(offsets, newpos,
459 newinfo->number)) 454 newinfo->number))
460 return 0; 455 return 0;
461 e = entry0 + newpos;
462 } else { 456 } else {
463 /* ... this is a fallthru */ 457 /* ... this is a fallthru */
464 newpos = pos + e->next_offset; 458 newpos = pos + e->next_offset;
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 2b1a15846f9a..92c0047e7e33 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -33,13 +33,19 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
33 33
34 if (range->flags & NF_NAT_RANGE_MAP_IPS) 34 if (range->flags & NF_NAT_RANGE_MAP_IPS)
35 return -EINVAL; 35 return -EINVAL;
36 return 0; 36 return nf_ct_netns_get(par->net, par->family);
37}
38
39static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
40{
41 nf_ct_netns_put(par->net, par->family);
37} 42}
38 43
39static struct xt_target masquerade_tg6_reg __read_mostly = { 44static struct xt_target masquerade_tg6_reg __read_mostly = {
40 .name = "MASQUERADE", 45 .name = "MASQUERADE",
41 .family = NFPROTO_IPV6, 46 .family = NFPROTO_IPV6,
42 .checkentry = masquerade_tg6_checkentry, 47 .checkentry = masquerade_tg6_checkentry,
48 .destroy = masquerade_tg6_destroy,
43 .target = masquerade_tg6, 49 .target = masquerade_tg6,
44 .targetsize = sizeof(struct nf_nat_range), 50 .targetsize = sizeof(struct nf_nat_range),
45 .table = "nat", 51 .table = "nat",
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7a8d1500d374..0458b761f3c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2336 } 2336 }
2337 2337
2338 rt->dst.flags |= DST_HOST; 2338 rt->dst.flags |= DST_HOST;
2339 rt->dst.input = ip6_input;
2339 rt->dst.output = ip6_output; 2340 rt->dst.output = ip6_output;
2340 rt->rt6i_gateway = fl6->daddr; 2341 rt->rt6i_gateway = fl6->daddr;
2341 rt->rt6i_dst.addr = fl6->daddr; 2342 rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4297 if (!ipv6_addr_any(&fl6.saddr)) 4298 if (!ipv6_addr_any(&fl6.saddr))
4298 flags |= RT6_LOOKUP_F_HAS_SADDR; 4299 flags |= RT6_LOOKUP_F_HAS_SADDR;
4299 4300
4300 if (!fibmatch) 4301 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4301 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4302 else
4303 dst = ip6_route_lookup(net, &fl6, 0);
4304 4302
4305 rcu_read_unlock(); 4303 rcu_read_unlock();
4306 } else { 4304 } else {
4307 fl6.flowi6_oif = oif; 4305 fl6.flowi6_oif = oif;
4308 4306
4309 if (!fibmatch) 4307 dst = ip6_route_output(net, NULL, &fl6);
4310 dst = ip6_route_output(net, NULL, &fl6);
4311 else
4312 dst = ip6_route_lookup(net, &fl6, 0);
4313 } 4308 }
4314 4309
4315 4310
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4326 goto errout; 4321 goto errout;
4327 } 4322 }
4328 4323
4324 if (fibmatch && rt->dst.from) {
4325 struct rt6_info *ort = container_of(rt->dst.from,
4326 struct rt6_info, dst);
4327
4328 dst_hold(&ort->dst);
4329 ip6_rt_put(rt);
4330 rt = ort;
4331 }
4332
4329 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 4333 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4330 if (!skb) { 4334 if (!skb) {
4331 ip6_rt_put(rt); 4335 ip6_rt_put(rt);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d60ddcb0bfe2..3873d3877135 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -934,8 +934,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
934 df = 0; 934 df = 0;
935 } 935 }
936 936
937 if (tunnel->parms.iph.daddr && skb_dst(skb)) 937 if (tunnel->parms.iph.daddr)
938 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 938 skb_dst_update_pmtu(skb, mtu);
939 939
940 if (skb->len > mtu && !skb_is_gso(skb)) { 940 if (skb->len > mtu && !skb_is_gso(skb)) {
941 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 941 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -1098,6 +1098,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
1098 ipip6_tunnel_link(sitn, t); 1098 ipip6_tunnel_link(sitn, t);
1099 t->parms.iph.ttl = p->iph.ttl; 1099 t->parms.iph.ttl = p->iph.ttl;
1100 t->parms.iph.tos = p->iph.tos; 1100 t->parms.iph.tos = p->iph.tos;
1101 t->parms.iph.frag_off = p->iph.frag_off;
1101 if (t->parms.link != p->link || t->fwmark != fwmark) { 1102 if (t->parms.link != p->link || t->fwmark != fwmark) {
1102 t->parms.link = p->link; 1103 t->parms.link = p->link;
1103 t->fwmark = fwmark; 1104 t->fwmark = fwmark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6bb98c93edfe..7178476b3d2f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -994,7 +994,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
994 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 994 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
995 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 995 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
996 req->ts_recent, sk->sk_bound_dev_if, 996 req->ts_recent, sk->sk_bound_dev_if,
997 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 997 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
998 0, 0); 998 0, 0);
999} 999}
1000 1000
@@ -1454,7 +1454,6 @@ process:
1454 struct sock *nsk; 1454 struct sock *nsk;
1455 1455
1456 sk = req->rsk_listener; 1456 sk = req->rsk_listener;
1457 tcp_v6_fill_cb(skb, hdr, th);
1458 if (tcp_v6_inbound_md5_hash(sk, skb)) { 1457 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1459 sk_drops_add(sk, skb); 1458 sk_drops_add(sk, skb);
1460 reqsk_put(req); 1459 reqsk_put(req);
@@ -1467,8 +1466,12 @@ process:
1467 sock_hold(sk); 1466 sock_hold(sk);
1468 refcounted = true; 1467 refcounted = true;
1469 nsk = NULL; 1468 nsk = NULL;
1470 if (!tcp_filter(sk, skb)) 1469 if (!tcp_filter(sk, skb)) {
1470 th = (const struct tcphdr *)skb->data;
1471 hdr = ipv6_hdr(skb);
1472 tcp_v6_fill_cb(skb, hdr, th);
1471 nsk = tcp_check_req(sk, skb, req, false); 1473 nsk = tcp_check_req(sk, skb, req, false);
1474 }
1472 if (!nsk) { 1475 if (!nsk) {
1473 reqsk_put(req); 1476 reqsk_put(req);
1474 goto discard_and_relse; 1477 goto discard_and_relse;
@@ -1492,8 +1495,6 @@ process:
1492 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1495 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1493 goto discard_and_relse; 1496 goto discard_and_relse;
1494 1497
1495 tcp_v6_fill_cb(skb, hdr, th);
1496
1497 if (tcp_v6_inbound_md5_hash(sk, skb)) 1498 if (tcp_v6_inbound_md5_hash(sk, skb))
1498 goto discard_and_relse; 1499 goto discard_and_relse;
1499 1500
@@ -1501,6 +1502,7 @@ process:
1501 goto discard_and_relse; 1502 goto discard_and_relse;
1502 th = (const struct tcphdr *)skb->data; 1503 th = (const struct tcphdr *)skb->data;
1503 hdr = ipv6_hdr(skb); 1504 hdr = ipv6_hdr(skb);
1505 tcp_v6_fill_cb(skb, hdr, th);
1504 1506
1505 skb->dev = NULL; 1507 skb->dev = NULL;
1506 1508
@@ -1590,7 +1592,6 @@ do_time_wait:
1590 tcp_v6_timewait_ack(sk, skb); 1592 tcp_v6_timewait_ack(sk, skb);
1591 break; 1593 break;
1592 case TCP_TW_RST: 1594 case TCP_TW_RST:
1593 tcp_v6_restore_cb(skb);
1594 tcp_v6_send_reset(sk, skb); 1595 tcp_v6_send_reset(sk, skb);
1595 inet_twsk_deschedule_put(inet_twsk(sk)); 1596 inet_twsk_deschedule_put(inet_twsk(sk));
1596 goto discard_it; 1597 goto discard_it;
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index d883c9204c01..278e49cd67d4 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
46{ 46{
47 struct tcphdr *th; 47 struct tcphdr *th;
48 48
49 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
50 return ERR_PTR(-EINVAL);
51
49 if (!pskb_may_pull(skb, sizeof(*th))) 52 if (!pskb_may_pull(skb, sizeof(*th)))
50 return ERR_PTR(-EINVAL); 53 return ERR_PTR(-EINVAL);
51 54
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a0f89ad76f9d..2a04dc9c781b 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
42 const struct ipv6hdr *ipv6h; 42 const struct ipv6hdr *ipv6h;
43 struct udphdr *uh; 43 struct udphdr *uh;
44 44
45 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
46 goto out;
47
45 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 48 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
46 goto out; 49 goto out;
47 50
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index fe04e23af986..841f4a07438e 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
32} 32}
33EXPORT_SYMBOL(xfrm6_rcv_spi); 33EXPORT_SYMBOL(xfrm6_rcv_spi);
34 34
35static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
36 struct sk_buff *skb)
37{
38 if (xfrm_trans_queue(skb, ip6_rcv_finish))
39 __kfree_skb(skb);
40 return -1;
41}
42
35int xfrm6_transport_finish(struct sk_buff *skb, int async) 43int xfrm6_transport_finish(struct sk_buff *skb, int async)
36{ 44{
37 struct xfrm_offload *xo = xfrm_offload(skb); 45 struct xfrm_offload *xo = xfrm_offload(skb);
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
56 64
57 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, 65 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
58 dev_net(skb->dev), NULL, skb, skb->dev, NULL, 66 dev_net(skb->dev), NULL, skb, skb->dev, NULL,
59 ip6_rcv_finish); 67 xfrm6_transport_finish2);
60 return -1; 68 return -1;
61} 69}
62 70
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 02556e356f87..dc93002ff9d1 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -92,6 +92,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
92 92
93 skb_reset_network_header(skb); 93 skb_reset_network_header(skb);
94 skb_mac_header_rebuild(skb); 94 skb_mac_header_rebuild(skb);
95 eth_hdr(skb)->h_proto = skb->protocol;
95 96
96 err = 0; 97 err = 0;
97 98
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 148533169b1d..64331158d693 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1474,7 +1474,7 @@ done:
1474 return copied; 1474 return copied;
1475} 1475}
1476 1476
1477static inline unsigned int iucv_accept_poll(struct sock *parent) 1477static inline __poll_t iucv_accept_poll(struct sock *parent)
1478{ 1478{
1479 struct iucv_sock *isk, *n; 1479 struct iucv_sock *isk, *n;
1480 struct sock *sk; 1480 struct sock *sk;
@@ -1489,11 +1489,11 @@ static inline unsigned int iucv_accept_poll(struct sock *parent)
1489 return 0; 1489 return 0;
1490} 1490}
1491 1491
1492unsigned int iucv_sock_poll(struct file *file, struct socket *sock, 1492__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
1493 poll_table *wait) 1493 poll_table *wait)
1494{ 1494{
1495 struct sock *sk = sock->sk; 1495 struct sock *sk = sock->sk;
1496 unsigned int mask = 0; 1496 __poll_t mask = 0;
1497 1497
1498 sock_poll_wait(file, sk_sleep(sk), wait); 1498 sock_poll_wait(file, sk_sleep(sk), wait);
1499 1499
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 0b750a22c4b9..4a8d407f8902 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1387,8 +1387,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
1387 if (!csk) 1387 if (!csk)
1388 return -EINVAL; 1388 return -EINVAL;
1389 1389
1390 /* We must prevent loops or risk deadlock ! */ 1390 /* Only allow TCP sockets to be attached for now */
1391 if (csk->sk_family == PF_KCM) 1391 if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
1392 csk->sk_protocol != IPPROTO_TCP)
1393 return -EOPNOTSUPP;
1394
1395 /* Don't allow listeners or closed sockets */
1396 if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE)
1392 return -EOPNOTSUPP; 1397 return -EOPNOTSUPP;
1393 1398
1394 psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); 1399 psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
@@ -1405,9 +1410,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
1405 return err; 1410 return err;
1406 } 1411 }
1407 1412
1408 sock_hold(csk);
1409
1410 write_lock_bh(&csk->sk_callback_lock); 1413 write_lock_bh(&csk->sk_callback_lock);
1414
1415 /* Check if sk_user_data is aready by KCM or someone else.
1416 * Must be done under lock to prevent race conditions.
1417 */
1418 if (csk->sk_user_data) {
1419 write_unlock_bh(&csk->sk_callback_lock);
1420 strp_done(&psock->strp);
1421 kmem_cache_free(kcm_psockp, psock);
1422 return -EALREADY;
1423 }
1424
1411 psock->save_data_ready = csk->sk_data_ready; 1425 psock->save_data_ready = csk->sk_data_ready;
1412 psock->save_write_space = csk->sk_write_space; 1426 psock->save_write_space = csk->sk_write_space;
1413 psock->save_state_change = csk->sk_state_change; 1427 psock->save_state_change = csk->sk_state_change;
@@ -1415,8 +1429,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
1415 csk->sk_data_ready = psock_data_ready; 1429 csk->sk_data_ready = psock_data_ready;
1416 csk->sk_write_space = psock_write_space; 1430 csk->sk_write_space = psock_write_space;
1417 csk->sk_state_change = psock_state_change; 1431 csk->sk_state_change = psock_state_change;
1432
1418 write_unlock_bh(&csk->sk_callback_lock); 1433 write_unlock_bh(&csk->sk_callback_lock);
1419 1434
1435 sock_hold(csk);
1436
1420 /* Finished initialization, now add the psock to the MUX. */ 1437 /* Finished initialization, now add the psock to the MUX. */
1421 spin_lock_bh(&mux->lock); 1438 spin_lock_bh(&mux->lock);
1422 head = &mux->psocks; 1439 head = &mux->psocks;
@@ -1625,60 +1642,30 @@ static struct proto kcm_proto = {
1625}; 1642};
1626 1643
1627/* Clone a kcm socket. */ 1644/* Clone a kcm socket. */
1628static int kcm_clone(struct socket *osock, struct kcm_clone *info, 1645static struct file *kcm_clone(struct socket *osock)
1629 struct socket **newsockp)
1630{ 1646{
1631 struct socket *newsock; 1647 struct socket *newsock;
1632 struct sock *newsk; 1648 struct sock *newsk;
1633 struct file *newfile;
1634 int err, newfd;
1635 1649
1636 err = -ENFILE;
1637 newsock = sock_alloc(); 1650 newsock = sock_alloc();
1638 if (!newsock) 1651 if (!newsock)
1639 goto out; 1652 return ERR_PTR(-ENFILE);
1640 1653
1641 newsock->type = osock->type; 1654 newsock->type = osock->type;
1642 newsock->ops = osock->ops; 1655 newsock->ops = osock->ops;
1643 1656
1644 __module_get(newsock->ops->owner); 1657 __module_get(newsock->ops->owner);
1645 1658
1646 newfd = get_unused_fd_flags(0);
1647 if (unlikely(newfd < 0)) {
1648 err = newfd;
1649 goto out_fd_fail;
1650 }
1651
1652 newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
1653 if (IS_ERR(newfile)) {
1654 err = PTR_ERR(newfile);
1655 goto out_sock_alloc_fail;
1656 }
1657
1658 newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, 1659 newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
1659 &kcm_proto, true); 1660 &kcm_proto, true);
1660 if (!newsk) { 1661 if (!newsk) {
1661 err = -ENOMEM; 1662 sock_release(newsock);
1662 goto out_sk_alloc_fail; 1663 return ERR_PTR(-ENOMEM);
1663 } 1664 }
1664
1665 sock_init_data(newsock, newsk); 1665 sock_init_data(newsock, newsk);
1666 init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); 1666 init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
1667 1667
1668 fd_install(newfd, newfile); 1668 return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
1669 *newsockp = newsock;
1670 info->fd = newfd;
1671
1672 return 0;
1673
1674out_sk_alloc_fail:
1675 fput(newfile);
1676out_sock_alloc_fail:
1677 put_unused_fd(newfd);
1678out_fd_fail:
1679 sock_release(newsock);
1680out:
1681 return err;
1682} 1669}
1683 1670
1684static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1671static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
@@ -1708,17 +1695,25 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1708 } 1695 }
1709 case SIOCKCMCLONE: { 1696 case SIOCKCMCLONE: {
1710 struct kcm_clone info; 1697 struct kcm_clone info;
1711 struct socket *newsock = NULL; 1698 struct file *file;
1712
1713 err = kcm_clone(sock, &info, &newsock);
1714 if (!err) {
1715 if (copy_to_user((void __user *)arg, &info,
1716 sizeof(info))) {
1717 err = -EFAULT;
1718 sys_close(info.fd);
1719 }
1720 }
1721 1699
1700 info.fd = get_unused_fd_flags(0);
1701 if (unlikely(info.fd < 0))
1702 return info.fd;
1703
1704 file = kcm_clone(sock);
1705 if (IS_ERR(file)) {
1706 put_unused_fd(info.fd);
1707 return PTR_ERR(file);
1708 }
1709 if (copy_to_user((void __user *)arg, &info,
1710 sizeof(info))) {
1711 put_unused_fd(info.fd);
1712 fput(file);
1713 return -EFAULT;
1714 }
1715 fd_install(info.fd, file);
1716 err = 0;
1722 break; 1717 break;
1723 } 1718 }
1724 default: 1719 default:
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 3dffb892d52c..7e2e7188e7f4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -401,6 +401,11 @@ static int verify_address_len(const void *p)
401#endif 401#endif
402 int len; 402 int len;
403 403
404 if (sp->sadb_address_len <
405 DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family),
406 sizeof(uint64_t)))
407 return -EINVAL;
408
404 switch (addr->sa_family) { 409 switch (addr->sa_family) {
405 case AF_INET: 410 case AF_INET:
406 len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); 411 len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t));
@@ -511,6 +516,9 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void *
511 uint16_t ext_type; 516 uint16_t ext_type;
512 int ext_len; 517 int ext_len;
513 518
519 if (len < sizeof(*ehdr))
520 return -EINVAL;
521
514 ext_len = ehdr->sadb_ext_len; 522 ext_len = ehdr->sadb_ext_len;
515 ext_len *= sizeof(uint64_t); 523 ext_len *= sizeof(uint64_t);
516 ext_type = ehdr->sadb_ext_type; 524 ext_type = ehdr->sadb_ext_type;
@@ -2194,8 +2202,10 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev
2194 return PTR_ERR(out_skb); 2202 return PTR_ERR(out_skb);
2195 2203
2196 err = pfkey_xfrm_policy2msg(out_skb, xp, dir); 2204 err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
2197 if (err < 0) 2205 if (err < 0) {
2206 kfree_skb(out_skb);
2198 return err; 2207 return err;
2208 }
2199 2209
2200 out_hdr = (struct sadb_msg *) out_skb->data; 2210 out_hdr = (struct sadb_msg *) out_skb->data;
2201 out_hdr->sadb_msg_version = PF_KEY_V2; 2211 out_hdr->sadb_msg_version = PF_KEY_V2;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 41f5e48f8021..1621b6ab17ba 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -291,13 +291,14 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
291 int i; 291 int i;
292 292
293 mutex_lock(&sta->ampdu_mlme.mtx); 293 mutex_lock(&sta->ampdu_mlme.mtx);
294 for (i = 0; i < IEEE80211_NUM_TIDS; i++) { 294 for (i = 0; i < IEEE80211_NUM_TIDS; i++)
295 ___ieee80211_stop_tx_ba_session(sta, i, reason);
296 ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, 295 ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
297 WLAN_REASON_QSTA_LEAVE_QBSS, 296 WLAN_REASON_QSTA_LEAVE_QBSS,
298 reason != AGG_STOP_DESTROY_STA && 297 reason != AGG_STOP_DESTROY_STA &&
299 reason != AGG_STOP_PEER_REQUEST); 298 reason != AGG_STOP_PEER_REQUEST);
300 } 299
300 for (i = 0; i < IEEE80211_NUM_TIDS; i++)
301 ___ieee80211_stop_tx_ba_session(sta, i, reason);
301 mutex_unlock(&sta->ampdu_mlme.mtx); 302 mutex_unlock(&sta->ampdu_mlme.mtx);
302 303
303 /* stopping might queue the work again - so cancel only afterwards */ 304 /* stopping might queue the work again - so cancel only afterwards */
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 4f7826d7b47c..4394463a0c2e 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -797,7 +797,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
797 struct mesh_path *mpath; 797 struct mesh_path *mpath;
798 u8 ttl, flags, hopcount; 798 u8 ttl, flags, hopcount;
799 const u8 *orig_addr; 799 const u8 *orig_addr;
800 u32 orig_sn, metric, metric_txsta, interval; 800 u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval;
801 bool root_is_gate; 801 bool root_is_gate;
802 802
803 ttl = rann->rann_ttl; 803 ttl = rann->rann_ttl;
@@ -808,7 +808,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
808 interval = le32_to_cpu(rann->rann_interval); 808 interval = le32_to_cpu(rann->rann_interval);
809 hopcount = rann->rann_hopcount; 809 hopcount = rann->rann_hopcount;
810 hopcount++; 810 hopcount++;
811 metric = le32_to_cpu(rann->rann_metric); 811 orig_metric = le32_to_cpu(rann->rann_metric);
812 812
813 /* Ignore our own RANNs */ 813 /* Ignore our own RANNs */
814 if (ether_addr_equal(orig_addr, sdata->vif.addr)) 814 if (ether_addr_equal(orig_addr, sdata->vif.addr))
@@ -825,7 +825,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
825 return; 825 return;
826 } 826 }
827 827
828 metric_txsta = airtime_link_metric_get(local, sta); 828 last_hop_metric = airtime_link_metric_get(local, sta);
829 new_metric = orig_metric + last_hop_metric;
830 if (new_metric < orig_metric)
831 new_metric = MAX_METRIC;
829 832
830 mpath = mesh_path_lookup(sdata, orig_addr); 833 mpath = mesh_path_lookup(sdata, orig_addr);
831 if (!mpath) { 834 if (!mpath) {
@@ -838,7 +841,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
838 } 841 }
839 842
840 if (!(SN_LT(mpath->sn, orig_sn)) && 843 if (!(SN_LT(mpath->sn, orig_sn)) &&
841 !(mpath->sn == orig_sn && metric < mpath->rann_metric)) { 844 !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) {
842 rcu_read_unlock(); 845 rcu_read_unlock();
843 return; 846 return;
844 } 847 }
@@ -856,7 +859,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
856 } 859 }
857 860
858 mpath->sn = orig_sn; 861 mpath->sn = orig_sn;
859 mpath->rann_metric = metric + metric_txsta; 862 mpath->rann_metric = new_metric;
860 mpath->is_root = true; 863 mpath->is_root = true;
861 /* Recording RANNs sender address to send individually 864 /* Recording RANNs sender address to send individually
862 * addressed PREQs destined for root mesh STA */ 865 * addressed PREQs destined for root mesh STA */
@@ -876,7 +879,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
876 mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, 879 mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr,
877 orig_sn, 0, NULL, 0, broadcast_addr, 880 orig_sn, 0, NULL, 0, broadcast_addr,
878 hopcount, ttl, interval, 881 hopcount, ttl, interval,
879 metric + metric_txsta, 0, sdata); 882 new_metric, 0, sdata);
880 } 883 }
881 884
882 rcu_read_unlock(); 885 rcu_read_unlock();
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 04460440d731..c244691deab9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -895,7 +895,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
895 struct ieee80211_hdr_3addr *nullfunc; 895 struct ieee80211_hdr_3addr *nullfunc;
896 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 896 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
897 897
898 skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif); 898 skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true);
899 if (!skb) 899 if (!skb)
900 return; 900 return;
901 901
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 70e9d2ca8bbe..4daafb07602f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3632,6 +3632,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
3632 } 3632 }
3633 return true; 3633 return true;
3634 case NL80211_IFTYPE_MESH_POINT: 3634 case NL80211_IFTYPE_MESH_POINT:
3635 if (ether_addr_equal(sdata->vif.addr, hdr->addr2))
3636 return false;
3635 if (multicast) 3637 if (multicast)
3636 return true; 3638 return true;
3637 return ether_addr_equal(sdata->vif.addr, hdr->addr1); 3639 return ether_addr_equal(sdata->vif.addr, hdr->addr1);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7b8154474b9e..3160954fc406 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4438,13 +4438,15 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
4438EXPORT_SYMBOL(ieee80211_pspoll_get); 4438EXPORT_SYMBOL(ieee80211_pspoll_get);
4439 4439
4440struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, 4440struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
4441 struct ieee80211_vif *vif) 4441 struct ieee80211_vif *vif,
4442 bool qos_ok)
4442{ 4443{
4443 struct ieee80211_hdr_3addr *nullfunc; 4444 struct ieee80211_hdr_3addr *nullfunc;
4444 struct ieee80211_sub_if_data *sdata; 4445 struct ieee80211_sub_if_data *sdata;
4445 struct ieee80211_if_managed *ifmgd; 4446 struct ieee80211_if_managed *ifmgd;
4446 struct ieee80211_local *local; 4447 struct ieee80211_local *local;
4447 struct sk_buff *skb; 4448 struct sk_buff *skb;
4449 bool qos = false;
4448 4450
4449 if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) 4451 if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
4450 return NULL; 4452 return NULL;
@@ -4453,7 +4455,17 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
4453 ifmgd = &sdata->u.mgd; 4455 ifmgd = &sdata->u.mgd;
4454 local = sdata->local; 4456 local = sdata->local;
4455 4457
4456 skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc)); 4458 if (qos_ok) {
4459 struct sta_info *sta;
4460
4461 rcu_read_lock();
4462 sta = sta_info_get(sdata, ifmgd->bssid);
4463 qos = sta && sta->sta.wme;
4464 rcu_read_unlock();
4465 }
4466
4467 skb = dev_alloc_skb(local->hw.extra_tx_headroom +
4468 sizeof(*nullfunc) + 2);
4457 if (!skb) 4469 if (!skb)
4458 return NULL; 4470 return NULL;
4459 4471
@@ -4463,6 +4475,19 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
4463 nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | 4475 nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
4464 IEEE80211_STYPE_NULLFUNC | 4476 IEEE80211_STYPE_NULLFUNC |
4465 IEEE80211_FCTL_TODS); 4477 IEEE80211_FCTL_TODS);
4478 if (qos) {
4479 __le16 qos = cpu_to_le16(7);
4480
4481 BUILD_BUG_ON((IEEE80211_STYPE_QOS_NULLFUNC |
4482 IEEE80211_STYPE_NULLFUNC) !=
4483 IEEE80211_STYPE_QOS_NULLFUNC);
4484 nullfunc->frame_control |=
4485 cpu_to_le16(IEEE80211_STYPE_QOS_NULLFUNC);
4486 skb->priority = 7;
4487 skb_set_queue_mapping(skb, IEEE80211_AC_VO);
4488 skb_put_data(skb, &qos, sizeof(qos));
4489 }
4490
4466 memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN); 4491 memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN);
4467 memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); 4492 memcpy(nullfunc->addr2, vif->addr, ETH_ALEN);
4468 memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN); 4493 memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN);
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 9ee71cb276d7..fbaf3bd05b2e 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1636,17 +1636,14 @@ static int
1636ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) 1636ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1637{ 1637{
1638 struct msghdr msg = {NULL,}; 1638 struct msghdr msg = {NULL,};
1639 struct kvec iov; 1639 struct kvec iov = {buffer, buflen};
1640 int len; 1640 int len;
1641 1641
1642 EnterFunction(7); 1642 EnterFunction(7);
1643 1643
1644 /* Receive a packet */ 1644 /* Receive a packet */
1645 iov.iov_base = buffer; 1645 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen);
1646 iov.iov_len = (size_t)buflen; 1646 len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
1647
1648 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
1649
1650 if (len < 0) 1647 if (len < 0)
1651 return len; 1648 return len;
1652 1649
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 85f643c1e227..4efaa3066c78 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1044,7 +1044,7 @@ static void gc_worker(struct work_struct *work)
1044 * we will just continue with next hash slot. 1044 * we will just continue with next hash slot.
1045 */ 1045 */
1046 rcu_read_unlock(); 1046 rcu_read_unlock();
1047 cond_resched_rcu_qs(); 1047 cond_resched();
1048 } while (++buckets < goal); 1048 } while (++buckets < goal);
1049 1049
1050 if (gc_work->exiting) 1050 if (gc_work->exiting)
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index cf1bf2605c10..dc6347342e34 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -103,7 +103,6 @@ struct bitstr {
103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;} 103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;} 104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;} 105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
106#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
107static unsigned int get_len(struct bitstr *bs); 106static unsigned int get_len(struct bitstr *bs);
108static unsigned int get_bit(struct bitstr *bs); 107static unsigned int get_bit(struct bitstr *bs);
109static unsigned int get_bits(struct bitstr *bs, unsigned int b); 108static unsigned int get_bits(struct bitstr *bs, unsigned int b);
@@ -165,6 +164,19 @@ static unsigned int get_len(struct bitstr *bs)
165 return v; 164 return v;
166} 165}
167 166
167static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
168{
169 bits += bs->bit;
170 bytes += bits / BITS_PER_BYTE;
171 if (bits % BITS_PER_BYTE > 0)
172 bytes++;
173
174 if (*bs->cur + bytes > *bs->end)
175 return 1;
176
177 return 0;
178}
179
168/****************************************************************************/ 180/****************************************************************************/
169static unsigned int get_bit(struct bitstr *bs) 181static unsigned int get_bit(struct bitstr *bs)
170{ 182{
@@ -279,8 +291,8 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f,
279 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 291 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
280 292
281 INC_BIT(bs); 293 INC_BIT(bs);
282 294 if (nf_h323_error_boundary(bs, 0, 0))
283 CHECK_BOUND(bs, 0); 295 return H323_ERROR_BOUND;
284 return H323_ERROR_NONE; 296 return H323_ERROR_NONE;
285} 297}
286 298
@@ -293,11 +305,14 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f,
293 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 305 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
294 306
295 BYTE_ALIGN(bs); 307 BYTE_ALIGN(bs);
296 CHECK_BOUND(bs, 1); 308 if (nf_h323_error_boundary(bs, 1, 0))
309 return H323_ERROR_BOUND;
310
297 len = *bs->cur++; 311 len = *bs->cur++;
298 bs->cur += len; 312 bs->cur += len;
313 if (nf_h323_error_boundary(bs, 0, 0))
314 return H323_ERROR_BOUND;
299 315
300 CHECK_BOUND(bs, 0);
301 return H323_ERROR_NONE; 316 return H323_ERROR_NONE;
302} 317}
303 318
@@ -319,6 +334,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
319 bs->cur += 2; 334 bs->cur += 2;
320 break; 335 break;
321 case CONS: /* 64K < Range < 4G */ 336 case CONS: /* 64K < Range < 4G */
337 if (nf_h323_error_boundary(bs, 0, 2))
338 return H323_ERROR_BOUND;
322 len = get_bits(bs, 2) + 1; 339 len = get_bits(bs, 2) + 1;
323 BYTE_ALIGN(bs); 340 BYTE_ALIGN(bs);
324 if (base && (f->attr & DECODE)) { /* timeToLive */ 341 if (base && (f->attr & DECODE)) { /* timeToLive */
@@ -330,7 +347,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
330 break; 347 break;
331 case UNCO: 348 case UNCO:
332 BYTE_ALIGN(bs); 349 BYTE_ALIGN(bs);
333 CHECK_BOUND(bs, 2); 350 if (nf_h323_error_boundary(bs, 2, 0))
351 return H323_ERROR_BOUND;
334 len = get_len(bs); 352 len = get_len(bs);
335 bs->cur += len; 353 bs->cur += len;
336 break; 354 break;
@@ -341,7 +359,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
341 359
342 PRINT("\n"); 360 PRINT("\n");
343 361
344 CHECK_BOUND(bs, 0); 362 if (nf_h323_error_boundary(bs, 0, 0))
363 return H323_ERROR_BOUND;
345 return H323_ERROR_NONE; 364 return H323_ERROR_NONE;
346} 365}
347 366
@@ -357,7 +376,8 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f,
357 INC_BITS(bs, f->sz); 376 INC_BITS(bs, f->sz);
358 } 377 }
359 378
360 CHECK_BOUND(bs, 0); 379 if (nf_h323_error_boundary(bs, 0, 0))
380 return H323_ERROR_BOUND;
361 return H323_ERROR_NONE; 381 return H323_ERROR_NONE;
362} 382}
363 383
@@ -375,12 +395,14 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
375 len = f->lb; 395 len = f->lb;
376 break; 396 break;
377 case WORD: /* 2-byte length */ 397 case WORD: /* 2-byte length */
378 CHECK_BOUND(bs, 2); 398 if (nf_h323_error_boundary(bs, 2, 0))
399 return H323_ERROR_BOUND;
379 len = (*bs->cur++) << 8; 400 len = (*bs->cur++) << 8;
380 len += (*bs->cur++) + f->lb; 401 len += (*bs->cur++) + f->lb;
381 break; 402 break;
382 case SEMI: 403 case SEMI:
383 CHECK_BOUND(bs, 2); 404 if (nf_h323_error_boundary(bs, 2, 0))
405 return H323_ERROR_BOUND;
384 len = get_len(bs); 406 len = get_len(bs);
385 break; 407 break;
386 default: 408 default:
@@ -391,7 +413,8 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
391 bs->cur += len >> 3; 413 bs->cur += len >> 3;
392 bs->bit = len & 7; 414 bs->bit = len & 7;
393 415
394 CHECK_BOUND(bs, 0); 416 if (nf_h323_error_boundary(bs, 0, 0))
417 return H323_ERROR_BOUND;
395 return H323_ERROR_NONE; 418 return H323_ERROR_NONE;
396} 419}
397 420
@@ -404,12 +427,15 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f,
404 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 427 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
405 428
406 /* 2 <= Range <= 255 */ 429 /* 2 <= Range <= 255 */
430 if (nf_h323_error_boundary(bs, 0, f->sz))
431 return H323_ERROR_BOUND;
407 len = get_bits(bs, f->sz) + f->lb; 432 len = get_bits(bs, f->sz) + f->lb;
408 433
409 BYTE_ALIGN(bs); 434 BYTE_ALIGN(bs);
410 INC_BITS(bs, (len << 2)); 435 INC_BITS(bs, (len << 2));
411 436
412 CHECK_BOUND(bs, 0); 437 if (nf_h323_error_boundary(bs, 0, 0))
438 return H323_ERROR_BOUND;
413 return H323_ERROR_NONE; 439 return H323_ERROR_NONE;
414} 440}
415 441
@@ -440,15 +466,19 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
440 break; 466 break;
441 case BYTE: /* Range == 256 */ 467 case BYTE: /* Range == 256 */
442 BYTE_ALIGN(bs); 468 BYTE_ALIGN(bs);
443 CHECK_BOUND(bs, 1); 469 if (nf_h323_error_boundary(bs, 1, 0))
470 return H323_ERROR_BOUND;
444 len = (*bs->cur++) + f->lb; 471 len = (*bs->cur++) + f->lb;
445 break; 472 break;
446 case SEMI: 473 case SEMI:
447 BYTE_ALIGN(bs); 474 BYTE_ALIGN(bs);
448 CHECK_BOUND(bs, 2); 475 if (nf_h323_error_boundary(bs, 2, 0))
476 return H323_ERROR_BOUND;
449 len = get_len(bs) + f->lb; 477 len = get_len(bs) + f->lb;
450 break; 478 break;
451 default: /* 2 <= Range <= 255 */ 479 default: /* 2 <= Range <= 255 */
480 if (nf_h323_error_boundary(bs, 0, f->sz))
481 return H323_ERROR_BOUND;
452 len = get_bits(bs, f->sz) + f->lb; 482 len = get_bits(bs, f->sz) + f->lb;
453 BYTE_ALIGN(bs); 483 BYTE_ALIGN(bs);
454 break; 484 break;
@@ -458,7 +488,8 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
458 488
459 PRINT("\n"); 489 PRINT("\n");
460 490
461 CHECK_BOUND(bs, 0); 491 if (nf_h323_error_boundary(bs, 0, 0))
492 return H323_ERROR_BOUND;
462 return H323_ERROR_NONE; 493 return H323_ERROR_NONE;
463} 494}
464 495
@@ -473,10 +504,13 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
473 switch (f->sz) { 504 switch (f->sz) {
474 case BYTE: /* Range == 256 */ 505 case BYTE: /* Range == 256 */
475 BYTE_ALIGN(bs); 506 BYTE_ALIGN(bs);
476 CHECK_BOUND(bs, 1); 507 if (nf_h323_error_boundary(bs, 1, 0))
508 return H323_ERROR_BOUND;
477 len = (*bs->cur++) + f->lb; 509 len = (*bs->cur++) + f->lb;
478 break; 510 break;
479 default: /* 2 <= Range <= 255 */ 511 default: /* 2 <= Range <= 255 */
512 if (nf_h323_error_boundary(bs, 0, f->sz))
513 return H323_ERROR_BOUND;
480 len = get_bits(bs, f->sz) + f->lb; 514 len = get_bits(bs, f->sz) + f->lb;
481 BYTE_ALIGN(bs); 515 BYTE_ALIGN(bs);
482 break; 516 break;
@@ -484,7 +518,8 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
484 518
485 bs->cur += len << 1; 519 bs->cur += len << 1;
486 520
487 CHECK_BOUND(bs, 0); 521 if (nf_h323_error_boundary(bs, 0, 0))
522 return H323_ERROR_BOUND;
488 return H323_ERROR_NONE; 523 return H323_ERROR_NONE;
489} 524}
490 525
@@ -503,9 +538,13 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
503 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; 538 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
504 539
505 /* Extensible? */ 540 /* Extensible? */
541 if (nf_h323_error_boundary(bs, 0, 1))
542 return H323_ERROR_BOUND;
506 ext = (f->attr & EXT) ? get_bit(bs) : 0; 543 ext = (f->attr & EXT) ? get_bit(bs) : 0;
507 544
508 /* Get fields bitmap */ 545 /* Get fields bitmap */
546 if (nf_h323_error_boundary(bs, 0, f->sz))
547 return H323_ERROR_BOUND;
509 bmp = get_bitmap(bs, f->sz); 548 bmp = get_bitmap(bs, f->sz);
510 if (base) 549 if (base)
511 *(unsigned int *)base = bmp; 550 *(unsigned int *)base = bmp;
@@ -525,9 +564,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
525 564
526 /* Decode */ 565 /* Decode */
527 if (son->attr & OPEN) { /* Open field */ 566 if (son->attr & OPEN) { /* Open field */
528 CHECK_BOUND(bs, 2); 567 if (nf_h323_error_boundary(bs, 2, 0))
568 return H323_ERROR_BOUND;
529 len = get_len(bs); 569 len = get_len(bs);
530 CHECK_BOUND(bs, len); 570 if (nf_h323_error_boundary(bs, len, 0))
571 return H323_ERROR_BOUND;
531 if (!base || !(son->attr & DECODE)) { 572 if (!base || !(son->attr & DECODE)) {
532 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, 573 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
533 " ", son->name); 574 " ", son->name);
@@ -555,8 +596,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
555 return H323_ERROR_NONE; 596 return H323_ERROR_NONE;
556 597
557 /* Get the extension bitmap */ 598 /* Get the extension bitmap */
599 if (nf_h323_error_boundary(bs, 0, 7))
600 return H323_ERROR_BOUND;
558 bmp2_len = get_bits(bs, 7) + 1; 601 bmp2_len = get_bits(bs, 7) + 1;
559 CHECK_BOUND(bs, (bmp2_len + 7) >> 3); 602 if (nf_h323_error_boundary(bs, 0, bmp2_len))
603 return H323_ERROR_BOUND;
560 bmp2 = get_bitmap(bs, bmp2_len); 604 bmp2 = get_bitmap(bs, bmp2_len);
561 bmp |= bmp2 >> f->sz; 605 bmp |= bmp2 >> f->sz;
562 if (base) 606 if (base)
@@ -567,9 +611,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
567 for (opt = 0; opt < bmp2_len; opt++, i++, son++) { 611 for (opt = 0; opt < bmp2_len; opt++, i++, son++) {
568 /* Check Range */ 612 /* Check Range */
569 if (i >= f->ub) { /* Newer Version? */ 613 if (i >= f->ub) { /* Newer Version? */
570 CHECK_BOUND(bs, 2); 614 if (nf_h323_error_boundary(bs, 2, 0))
615 return H323_ERROR_BOUND;
571 len = get_len(bs); 616 len = get_len(bs);
572 CHECK_BOUND(bs, len); 617 if (nf_h323_error_boundary(bs, len, 0))
618 return H323_ERROR_BOUND;
573 bs->cur += len; 619 bs->cur += len;
574 continue; 620 continue;
575 } 621 }
@@ -583,9 +629,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
583 if (!((0x80000000 >> opt) & bmp2)) /* Not present */ 629 if (!((0x80000000 >> opt) & bmp2)) /* Not present */
584 continue; 630 continue;
585 631
586 CHECK_BOUND(bs, 2); 632 if (nf_h323_error_boundary(bs, 2, 0))
633 return H323_ERROR_BOUND;
587 len = get_len(bs); 634 len = get_len(bs);
588 CHECK_BOUND(bs, len); 635 if (nf_h323_error_boundary(bs, len, 0))
636 return H323_ERROR_BOUND;
589 if (!base || !(son->attr & DECODE)) { 637 if (!base || !(son->attr & DECODE)) {
590 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", 638 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
591 son->name); 639 son->name);
@@ -623,22 +671,27 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
623 switch (f->sz) { 671 switch (f->sz) {
624 case BYTE: 672 case BYTE:
625 BYTE_ALIGN(bs); 673 BYTE_ALIGN(bs);
626 CHECK_BOUND(bs, 1); 674 if (nf_h323_error_boundary(bs, 1, 0))
675 return H323_ERROR_BOUND;
627 count = *bs->cur++; 676 count = *bs->cur++;
628 break; 677 break;
629 case WORD: 678 case WORD:
630 BYTE_ALIGN(bs); 679 BYTE_ALIGN(bs);
631 CHECK_BOUND(bs, 2); 680 if (nf_h323_error_boundary(bs, 2, 0))
681 return H323_ERROR_BOUND;
632 count = *bs->cur++; 682 count = *bs->cur++;
633 count <<= 8; 683 count <<= 8;
634 count += *bs->cur++; 684 count += *bs->cur++;
635 break; 685 break;
636 case SEMI: 686 case SEMI:
637 BYTE_ALIGN(bs); 687 BYTE_ALIGN(bs);
638 CHECK_BOUND(bs, 2); 688 if (nf_h323_error_boundary(bs, 2, 0))
689 return H323_ERROR_BOUND;
639 count = get_len(bs); 690 count = get_len(bs);
640 break; 691 break;
641 default: 692 default:
693 if (nf_h323_error_boundary(bs, 0, f->sz))
694 return H323_ERROR_BOUND;
642 count = get_bits(bs, f->sz); 695 count = get_bits(bs, f->sz);
643 break; 696 break;
644 } 697 }
@@ -658,8 +711,11 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
658 for (i = 0; i < count; i++) { 711 for (i = 0; i < count; i++) {
659 if (son->attr & OPEN) { 712 if (son->attr & OPEN) {
660 BYTE_ALIGN(bs); 713 BYTE_ALIGN(bs);
714 if (nf_h323_error_boundary(bs, 2, 0))
715 return H323_ERROR_BOUND;
661 len = get_len(bs); 716 len = get_len(bs);
662 CHECK_BOUND(bs, len); 717 if (nf_h323_error_boundary(bs, len, 0))
718 return H323_ERROR_BOUND;
663 if (!base || !(son->attr & DECODE)) { 719 if (!base || !(son->attr & DECODE)) {
664 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, 720 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
665 " ", son->name); 721 " ", son->name);
@@ -710,11 +766,17 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
710 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; 766 base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
711 767
712 /* Decode the choice index number */ 768 /* Decode the choice index number */
769 if (nf_h323_error_boundary(bs, 0, 1))
770 return H323_ERROR_BOUND;
713 if ((f->attr & EXT) && get_bit(bs)) { 771 if ((f->attr & EXT) && get_bit(bs)) {
714 ext = 1; 772 ext = 1;
773 if (nf_h323_error_boundary(bs, 0, 7))
774 return H323_ERROR_BOUND;
715 type = get_bits(bs, 7) + f->lb; 775 type = get_bits(bs, 7) + f->lb;
716 } else { 776 } else {
717 ext = 0; 777 ext = 0;
778 if (nf_h323_error_boundary(bs, 0, f->sz))
779 return H323_ERROR_BOUND;
718 type = get_bits(bs, f->sz); 780 type = get_bits(bs, f->sz);
719 if (type >= f->lb) 781 if (type >= f->lb)
720 return H323_ERROR_RANGE; 782 return H323_ERROR_RANGE;
@@ -727,8 +789,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
727 /* Check Range */ 789 /* Check Range */
728 if (type >= f->ub) { /* Newer version? */ 790 if (type >= f->ub) { /* Newer version? */
729 BYTE_ALIGN(bs); 791 BYTE_ALIGN(bs);
792 if (nf_h323_error_boundary(bs, 2, 0))
793 return H323_ERROR_BOUND;
730 len = get_len(bs); 794 len = get_len(bs);
731 CHECK_BOUND(bs, len); 795 if (nf_h323_error_boundary(bs, len, 0))
796 return H323_ERROR_BOUND;
732 bs->cur += len; 797 bs->cur += len;
733 return H323_ERROR_NONE; 798 return H323_ERROR_NONE;
734 } 799 }
@@ -742,8 +807,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
742 807
743 if (ext || (son->attr & OPEN)) { 808 if (ext || (son->attr & OPEN)) {
744 BYTE_ALIGN(bs); 809 BYTE_ALIGN(bs);
810 if (nf_h323_error_boundary(bs, len, 0))
811 return H323_ERROR_BOUND;
745 len = get_len(bs); 812 len = get_len(bs);
746 CHECK_BOUND(bs, len); 813 if (nf_h323_error_boundary(bs, len, 0))
814 return H323_ERROR_BOUND;
747 if (!base || !(son->attr & DECODE)) { 815 if (!base || !(son->attr & DECODE)) {
748 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", 816 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
749 son->name); 817 son->name);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59c08997bfdf..382d49792f42 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,7 +45,6 @@
45#include <net/netfilter/nf_conntrack_zones.h> 45#include <net/netfilter/nf_conntrack_zones.h>
46#include <net/netfilter/nf_conntrack_timestamp.h> 46#include <net/netfilter/nf_conntrack_timestamp.h>
47#include <net/netfilter/nf_conntrack_labels.h> 47#include <net/netfilter/nf_conntrack_labels.h>
48#include <net/netfilter/nf_conntrack_seqadj.h>
49#include <net/netfilter/nf_conntrack_synproxy.h> 48#include <net/netfilter/nf_conntrack_synproxy.h>
50#ifdef CONFIG_NF_NAT_NEEDED 49#ifdef CONFIG_NF_NAT_NEEDED
51#include <net/netfilter/nf_nat_core.h> 50#include <net/netfilter/nf_nat_core.h>
@@ -1566,9 +1565,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
1566static int ctnetlink_change_timeout(struct nf_conn *ct, 1565static int ctnetlink_change_timeout(struct nf_conn *ct,
1567 const struct nlattr * const cda[]) 1566 const struct nlattr * const cda[])
1568{ 1567{
1569 u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); 1568 u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
1570 1569
1571 ct->timeout = nfct_time_stamp + timeout * HZ; 1570 if (timeout > INT_MAX)
1571 timeout = INT_MAX;
1572 ct->timeout = nfct_time_stamp + (u32)timeout;
1572 1573
1573 if (test_bit(IPS_DYING_BIT, &ct->status)) 1574 if (test_bit(IPS_DYING_BIT, &ct->status))
1574 return -ETIME; 1575 return -ETIME;
@@ -1768,6 +1769,7 @@ ctnetlink_create_conntrack(struct net *net,
1768 int err = -EINVAL; 1769 int err = -EINVAL;
1769 struct nf_conntrack_helper *helper; 1770 struct nf_conntrack_helper *helper;
1770 struct nf_conn_tstamp *tstamp; 1771 struct nf_conn_tstamp *tstamp;
1772 u64 timeout;
1771 1773
1772 ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); 1774 ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
1773 if (IS_ERR(ct)) 1775 if (IS_ERR(ct))
@@ -1776,7 +1778,10 @@ ctnetlink_create_conntrack(struct net *net,
1776 if (!cda[CTA_TIMEOUT]) 1778 if (!cda[CTA_TIMEOUT])
1777 goto err1; 1779 goto err1;
1778 1780
1779 ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; 1781 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
1782 if (timeout > INT_MAX)
1783 timeout = INT_MAX;
1784 ct->timeout = (u32)timeout + nfct_time_stamp;
1780 1785
1781 rcu_read_lock(); 1786 rcu_read_lock();
1782 if (cda[CTA_HELP]) { 1787 if (cda[CTA_HELP]) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b12fc07111d0..37ef35b861f2 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1039,6 +1039,9 @@ static int tcp_packet(struct nf_conn *ct,
1039 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && 1039 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1040 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) 1040 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1041 timeout = timeouts[TCP_CONNTRACK_UNACK]; 1041 timeout = timeouts[TCP_CONNTRACK_UNACK];
1042 else if (ct->proto.tcp.last_win == 0 &&
1043 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1044 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1042 else 1045 else
1043 timeout = timeouts[new_state]; 1046 timeout = timeouts[new_state];
1044 spin_unlock_bh(&ct->lock); 1047 spin_unlock_bh(&ct->lock);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d8327b43e4dc..07bd4138c84e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2072,7 +2072,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
2072 continue; 2072 continue;
2073 2073
2074 list_for_each_entry_rcu(chain, &table->chains, list) { 2074 list_for_each_entry_rcu(chain, &table->chains, list) {
2075 if (ctx && ctx->chain[0] && 2075 if (ctx && ctx->chain &&
2076 strcmp(ctx->chain, chain->name) != 0) 2076 strcmp(ctx->chain, chain->name) != 0)
2077 continue; 2077 continue;
2078 2078
@@ -4665,8 +4665,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
4665{ 4665{
4666 struct nft_obj_filter *filter = cb->data; 4666 struct nft_obj_filter *filter = cb->data;
4667 4667
4668 kfree(filter->table); 4668 if (filter) {
4669 kfree(filter); 4669 kfree(filter->table);
4670 kfree(filter);
4671 }
4670 4672
4671 return 0; 4673 return 0;
4672} 4674}
@@ -5847,6 +5849,12 @@ static int __net_init nf_tables_init_net(struct net *net)
5847 return 0; 5849 return 0;
5848} 5850}
5849 5851
5852static void __net_exit nf_tables_exit_net(struct net *net)
5853{
5854 WARN_ON_ONCE(!list_empty(&net->nft.af_info));
5855 WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
5856}
5857
5850int __nft_release_basechain(struct nft_ctx *ctx) 5858int __nft_release_basechain(struct nft_ctx *ctx)
5851{ 5859{
5852 struct nft_rule *rule, *nr; 5860 struct nft_rule *rule, *nr;
@@ -5917,6 +5925,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
5917 5925
5918static struct pernet_operations nf_tables_net_ops = { 5926static struct pernet_operations nf_tables_net_ops = {
5919 .init = nf_tables_init_net, 5927 .init = nf_tables_init_net,
5928 .exit = nf_tables_exit_net,
5920}; 5929};
5921 5930
5922static int __init nf_tables_module_init(void) 5931static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 41628b393673..d33ce6d5ebce 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -17,6 +17,7 @@
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/capability.h>
20#include <net/netlink.h> 21#include <net/netlink.h>
21#include <net/sock.h> 22#include <net/sock.h>
22 23
@@ -407,6 +408,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
407 struct nfnl_cthelper *nlcth; 408 struct nfnl_cthelper *nlcth;
408 int ret = 0; 409 int ret = 0;
409 410
411 if (!capable(CAP_NET_ADMIN))
412 return -EPERM;
413
410 if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) 414 if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
411 return -EINVAL; 415 return -EINVAL;
412 416
@@ -611,6 +615,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
611 struct nfnl_cthelper *nlcth; 615 struct nfnl_cthelper *nlcth;
612 bool tuple_set = false; 616 bool tuple_set = false;
613 617
618 if (!capable(CAP_NET_ADMIN))
619 return -EPERM;
620
614 if (nlh->nlmsg_flags & NLM_F_DUMP) { 621 if (nlh->nlmsg_flags & NLM_F_DUMP) {
615 struct netlink_dump_control c = { 622 struct netlink_dump_control c = {
616 .dump = nfnl_cthelper_dump_table, 623 .dump = nfnl_cthelper_dump_table,
@@ -678,6 +685,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
678 struct nfnl_cthelper *nlcth, *n; 685 struct nfnl_cthelper *nlcth, *n;
679 int j = 0, ret; 686 int j = 0, ret;
680 687
688 if (!capable(CAP_NET_ADMIN))
689 return -EPERM;
690
681 if (tb[NFCTH_NAME]) 691 if (tb[NFCTH_NAME])
682 helper_name = nla_data(tb[NFCTH_NAME]); 692 helper_name = nla_data(tb[NFCTH_NAME]);
683 693
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e5afab86381c..e955bec0acc6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(struct net *net)
1093 1093
1094static void __net_exit nfnl_log_net_exit(struct net *net) 1094static void __net_exit nfnl_log_net_exit(struct net *net)
1095{ 1095{
1096 struct nfnl_log_net *log = nfnl_log_pernet(net);
1097 unsigned int i;
1098
1096#ifdef CONFIG_PROC_FS 1099#ifdef CONFIG_PROC_FS
1097 remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); 1100 remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
1098#endif 1101#endif
1099 nf_log_unset(net, &nfulnl_logger); 1102 nf_log_unset(net, &nfulnl_logger);
1103 for (i = 0; i < INSTANCE_BUCKETS; i++)
1104 WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
1100} 1105}
1101 1106
1102static struct pernet_operations nfnl_log_net_ops = { 1107static struct pernet_operations nfnl_log_net_ops = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index a16356cacec3..c09b36755ed7 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_init(struct net *net)
1512 1512
1513static void __net_exit nfnl_queue_net_exit(struct net *net) 1513static void __net_exit nfnl_queue_net_exit(struct net *net)
1514{ 1514{
1515 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
1516 unsigned int i;
1517
1515 nf_unregister_queue_handler(net); 1518 nf_unregister_queue_handler(net);
1516#ifdef CONFIG_PROC_FS 1519#ifdef CONFIG_PROC_FS
1517 remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); 1520 remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
1518#endif 1521#endif
1522 for (i = 0; i < INSTANCE_BUCKETS; i++)
1523 WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
1519} 1524}
1520 1525
1521static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) 1526static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a0a93d987a3b..47ec1046ad11 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -214,6 +214,8 @@ static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
214 [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, 214 [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
215 [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, 215 [NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
216 [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, 216 [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
217 [NFTA_EXTHDR_OP] = { .type = NLA_U32 },
218 [NFTA_EXTHDR_SREG] = { .type = NLA_U32 },
217}; 219};
218 220
219static int nft_exthdr_init(const struct nft_ctx *ctx, 221static int nft_exthdr_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a77dd514297c..55802e97f906 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1729,8 +1729,17 @@ static int __net_init xt_net_init(struct net *net)
1729 return 0; 1729 return 0;
1730} 1730}
1731 1731
1732static void __net_exit xt_net_exit(struct net *net)
1733{
1734 int i;
1735
1736 for (i = 0; i < NFPROTO_NUMPROTO; i++)
1737 WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
1738}
1739
1732static struct pernet_operations xt_net_ops = { 1740static struct pernet_operations xt_net_ops = {
1733 .init = xt_net_init, 1741 .init = xt_net_init,
1742 .exit = xt_net_exit,
1734}; 1743};
1735 1744
1736static int __init xt_init(void) 1745static int __init xt_init(void)
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 041da0d9c06f..06b090d8e901 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -27,6 +27,9 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
27{ 27{
28 struct sock_fprog_kern program; 28 struct sock_fprog_kern program;
29 29
30 if (len > XT_BPF_MAX_NUM_INSTR)
31 return -EINVAL;
32
30 program.len = len; 33 program.len = len;
31 program.filter = insns; 34 program.filter = insns;
32 35
@@ -52,18 +55,11 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
52 55
53static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) 56static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
54{ 57{
55 mm_segment_t oldfs = get_fs(); 58 if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
56 int retval, fd; 59 return -EINVAL;
57 60
58 set_fs(KERNEL_DS); 61 *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
59 fd = bpf_obj_get_user(path, 0); 62 return PTR_ERR_OR_ZERO(*ret);
60 set_fs(oldfs);
61 if (fd < 0)
62 return fd;
63
64 retval = __bpf_mt_check_fd(fd, ret);
65 sys_close(fd);
66 return retval;
67} 63}
68 64
69static int bpf_mt_check(const struct xt_mtchk_param *par) 65static int bpf_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 36e14b1f061d..a34f314a8c23 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/kernel.h> 20#include <linux/kernel.h>
21 21
22#include <linux/capability.h>
22#include <linux/if.h> 23#include <linux/if.h>
23#include <linux/inetdevice.h> 24#include <linux/inetdevice.h>
24#include <linux/ip.h> 25#include <linux/ip.h>
@@ -70,6 +71,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
70 struct xt_osf_finger *kf = NULL, *sf; 71 struct xt_osf_finger *kf = NULL, *sf;
71 int err = 0; 72 int err = 0;
72 73
74 if (!capable(CAP_NET_ADMIN))
75 return -EPERM;
76
73 if (!osf_attrs[OSF_ATTR_FINGER]) 77 if (!osf_attrs[OSF_ATTR_FINGER])
74 return -EINVAL; 78 return -EINVAL;
75 79
@@ -115,6 +119,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
115 struct xt_osf_finger *sf; 119 struct xt_osf_finger *sf;
116 int err = -ENOENT; 120 int err = -ENOENT;
117 121
122 if (!capable(CAP_NET_ADMIN))
123 return -EPERM;
124
118 if (!osf_attrs[OSF_ATTR_FINGER]) 125 if (!osf_attrs[OSF_ATTR_FINGER])
119 return -EINVAL; 126 return -EINVAL;
120 127
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b9e0ee4e22f5..84a4e4c3be4b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -253,6 +253,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
253 struct sock *sk = skb->sk; 253 struct sock *sk = skb->sk;
254 int ret = -ENOMEM; 254 int ret = -ENOMEM;
255 255
256 if (!net_eq(dev_net(dev), sock_net(sk)))
257 return 0;
258
256 dev_hold(dev); 259 dev_hold(dev);
257 260
258 if (is_vmalloc_addr(skb->head)) 261 if (is_vmalloc_addr(skb->head))
@@ -2381,13 +2384,14 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
2381 struct nlmsghdr *, 2384 struct nlmsghdr *,
2382 struct netlink_ext_ack *)) 2385 struct netlink_ext_ack *))
2383{ 2386{
2384 struct netlink_ext_ack extack = {}; 2387 struct netlink_ext_ack extack;
2385 struct nlmsghdr *nlh; 2388 struct nlmsghdr *nlh;
2386 int err; 2389 int err;
2387 2390
2388 while (skb->len >= nlmsg_total_size(0)) { 2391 while (skb->len >= nlmsg_total_size(0)) {
2389 int msglen; 2392 int msglen;
2390 2393
2394 memset(&extack, 0, sizeof(extack));
2391 nlh = nlmsg_hdr(skb); 2395 nlh = nlmsg_hdr(skb);
2392 err = 0; 2396 err = 0;
2393 2397
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index fb7afcaa3004..985909f105eb 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -531,7 +531,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
531 return 0; 531 return 0;
532} 532}
533 533
534static inline unsigned int llcp_accept_poll(struct sock *parent) 534static inline __poll_t llcp_accept_poll(struct sock *parent)
535{ 535{
536 struct nfc_llcp_sock *llcp_sock, *parent_sock; 536 struct nfc_llcp_sock *llcp_sock, *parent_sock;
537 struct sock *sk; 537 struct sock *sk;
@@ -549,11 +549,11 @@ static inline unsigned int llcp_accept_poll(struct sock *parent)
549 return 0; 549 return 0;
550} 550}
551 551
552static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, 552static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
553 poll_table *wait) 553 poll_table *wait)
554{ 554{
555 struct sock *sk = sock->sk; 555 struct sock *sk = sock->sk;
556 unsigned int mask = 0; 556 __poll_t mask = 0;
557 557
558 pr_debug("%p\n", sk); 558 pr_debug("%p\n", sk);
559 559
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 8d104c1db628..a66f102c6c01 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -305,7 +305,7 @@ static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file,
305 return 0; 305 return 0;
306} 306}
307 307
308static unsigned int nci_uart_tty_poll(struct tty_struct *tty, 308static __poll_t nci_uart_tty_poll(struct tty_struct *tty,
309 struct file *filp, poll_table *wait) 309 struct file *filp, poll_table *wait)
310{ 310{
311 return 0; 311 return 0;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 99cfafc2a139..ef38e5aecd28 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -308,7 +308,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
308 const struct dp_upcall_info *upcall_info, 308 const struct dp_upcall_info *upcall_info,
309 uint32_t cutlen) 309 uint32_t cutlen)
310{ 310{
311 unsigned short gso_type = skb_shinfo(skb)->gso_type; 311 unsigned int gso_type = skb_shinfo(skb)->gso_type;
312 struct sw_flow_key later_key; 312 struct sw_flow_key later_key;
313 struct sk_buff *segs, *nskb; 313 struct sk_buff *segs, *nskb;
314 int err; 314 int err;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dbe2379329c5..f039064ce922 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
579 return -EINVAL; 579 return -EINVAL;
580 580
581 skb_reset_network_header(skb); 581 skb_reset_network_header(skb);
582 key->eth.type = skb->protocol;
582 } else { 583 } else {
583 eth = eth_hdr(skb); 584 eth = eth_hdr(skb);
584 ether_addr_copy(key->eth.src, eth->h_source); 585 ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
592 if (unlikely(parse_vlan(skb, key))) 593 if (unlikely(parse_vlan(skb, key)))
593 return -ENOMEM; 594 return -ENOMEM;
594 595
595 skb->protocol = parse_ethertype(skb); 596 key->eth.type = parse_ethertype(skb);
596 if (unlikely(skb->protocol == htons(0))) 597 if (unlikely(key->eth.type == htons(0)))
597 return -ENOMEM; 598 return -ENOMEM;
598 599
600 /* Multiple tagged packets need to retain TPID to satisfy
601 * skb_vlan_pop(), which will later shift the ethertype into
602 * skb->protocol.
603 */
604 if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
605 skb->protocol = key->eth.cvlan.tpid;
606 else
607 skb->protocol = key->eth.type;
608
599 skb_reset_network_header(skb); 609 skb_reset_network_header(skb);
600 __skb_push(skb, skb->data - skb_mac_header(skb)); 610 __skb_push(skb, skb->data - skb_mac_header(skb));
601 } 611 }
602 skb_reset_mac_len(skb); 612 skb_reset_mac_len(skb);
603 key->eth.type = skb->protocol;
604 613
605 /* Network layer. */ 614 /* Network layer. */
606 if (key->eth.type == htons(ETH_P_IP)) { 615 if (key->eth.type == htons(ETH_P_IP)) {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index dc424798ba6f..f143908b651d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -49,7 +49,6 @@
49#include <net/mpls.h> 49#include <net/mpls.h>
50#include <net/vxlan.h> 50#include <net/vxlan.h>
51#include <net/tun_proto.h> 51#include <net/tun_proto.h>
52#include <net/erspan.h>
53 52
54#include "flow_netlink.h" 53#include "flow_netlink.h"
55 54
@@ -334,8 +333,7 @@ size_t ovs_tun_key_attr_size(void)
334 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. 333 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
335 */ 334 */
336 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 335 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
337 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */ 336 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
338 + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
339} 337}
340 338
341static size_t ovs_nsh_key_attr_size(void) 339static size_t ovs_nsh_key_attr_size(void)
@@ -402,7 +400,6 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
402 .next = ovs_vxlan_ext_key_lens }, 400 .next = ovs_vxlan_ext_key_lens },
403 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, 401 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
404 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, 402 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
405 [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) },
406}; 403};
407 404
408static const struct ovs_len_tbl 405static const struct ovs_len_tbl
@@ -634,33 +631,6 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
634 return 0; 631 return 0;
635} 632}
636 633
637static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
638 struct sw_flow_match *match, bool is_mask,
639 bool log)
640{
641 unsigned long opt_key_offset;
642 struct erspan_metadata opts;
643
644 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
645
646 memset(&opts, 0, sizeof(opts));
647 opts.index = nla_get_be32(attr);
648
649 /* Index has only 20-bit */
650 if (ntohl(opts.index) & ~INDEX_MASK) {
651 OVS_NLERR(log, "ERSPAN index number %x too large.",
652 ntohl(opts.index));
653 return -EINVAL;
654 }
655
656 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
657 opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
658 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
659 is_mask);
660
661 return 0;
662}
663
664static int ip_tun_from_nlattr(const struct nlattr *attr, 634static int ip_tun_from_nlattr(const struct nlattr *attr,
665 struct sw_flow_match *match, bool is_mask, 635 struct sw_flow_match *match, bool is_mask,
666 bool log) 636 bool log)
@@ -768,19 +738,6 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
768 break; 738 break;
769 case OVS_TUNNEL_KEY_ATTR_PAD: 739 case OVS_TUNNEL_KEY_ATTR_PAD:
770 break; 740 break;
771 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
772 if (opts_type) {
773 OVS_NLERR(log, "Multiple metadata blocks provided");
774 return -EINVAL;
775 }
776
777 err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
778 if (err)
779 return err;
780
781 tun_flags |= TUNNEL_ERSPAN_OPT;
782 opts_type = type;
783 break;
784 default: 741 default:
785 OVS_NLERR(log, "Unknown IP tunnel attribute %d", 742 OVS_NLERR(log, "Unknown IP tunnel attribute %d",
786 type); 743 type);
@@ -905,10 +862,6 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
905 else if (output->tun_flags & TUNNEL_VXLAN_OPT && 862 else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
906 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) 863 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
907 return -EMSGSIZE; 864 return -EMSGSIZE;
908 else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
909 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
910 ((struct erspan_metadata *)tun_opts)->index))
911 return -EMSGSIZE;
912 } 865 }
913 866
914 return 0; 867 return 0;
@@ -2241,14 +2194,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
2241 2194
2242#define MAX_ACTIONS_BUFSIZE (32 * 1024) 2195#define MAX_ACTIONS_BUFSIZE (32 * 1024)
2243 2196
2244static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) 2197static struct sw_flow_actions *nla_alloc_flow_actions(int size)
2245{ 2198{
2246 struct sw_flow_actions *sfa; 2199 struct sw_flow_actions *sfa;
2247 2200
2248 if (size > MAX_ACTIONS_BUFSIZE) { 2201 WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
2249 OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
2250 return ERR_PTR(-EINVAL);
2251 }
2252 2202
2253 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 2203 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
2254 if (!sfa) 2204 if (!sfa)
@@ -2321,12 +2271,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
2321 new_acts_size = ksize(*sfa) * 2; 2271 new_acts_size = ksize(*sfa) * 2;
2322 2272
2323 if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 2273 if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
2324 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 2274 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
2275 OVS_NLERR(log, "Flow action size exceeds max %u",
2276 MAX_ACTIONS_BUFSIZE);
2325 return ERR_PTR(-EMSGSIZE); 2277 return ERR_PTR(-EMSGSIZE);
2278 }
2326 new_acts_size = MAX_ACTIONS_BUFSIZE; 2279 new_acts_size = MAX_ACTIONS_BUFSIZE;
2327 } 2280 }
2328 2281
2329 acts = nla_alloc_flow_actions(new_acts_size, log); 2282 acts = nla_alloc_flow_actions(new_acts_size);
2330 if (IS_ERR(acts)) 2283 if (IS_ERR(acts))
2331 return (void *)acts; 2284 return (void *)acts;
2332 2285
@@ -2533,8 +2486,6 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
2533 break; 2486 break;
2534 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 2487 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2535 break; 2488 break;
2536 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2537 break;
2538 } 2489 }
2539 }; 2490 };
2540 2491
@@ -3059,7 +3010,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3059{ 3010{
3060 int err; 3011 int err;
3061 3012
3062 *sfa = nla_alloc_flow_actions(nla_len(attr), log); 3013 *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
3063 if (IS_ERR(*sfa)) 3014 if (IS_ERR(*sfa))
3064 return PTR_ERR(*sfa); 3015 return PTR_ERR(*sfa);
3065 3016
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 737092ca9b4e..3b4d6a3cf190 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1687,7 +1687,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1687 atomic_long_set(&rollover->num, 0); 1687 atomic_long_set(&rollover->num, 0);
1688 atomic_long_set(&rollover->num_huge, 0); 1688 atomic_long_set(&rollover->num_huge, 0);
1689 atomic_long_set(&rollover->num_failed, 0); 1689 atomic_long_set(&rollover->num_failed, 0);
1690 po->rollover = rollover;
1691 } 1690 }
1692 1691
1693 if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { 1692 if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
@@ -1745,6 +1744,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1745 if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { 1744 if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1746 __dev_remove_pack(&po->prot_hook); 1745 __dev_remove_pack(&po->prot_hook);
1747 po->fanout = match; 1746 po->fanout = match;
1747 po->rollover = rollover;
1748 rollover = NULL;
1748 refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); 1749 refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
1749 __fanout_link(sk, po); 1750 __fanout_link(sk, po);
1750 err = 0; 1751 err = 0;
@@ -1758,10 +1759,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1758 } 1759 }
1759 1760
1760out: 1761out:
1761 if (err && rollover) { 1762 kfree(rollover);
1762 kfree_rcu(rollover, rcu);
1763 po->rollover = NULL;
1764 }
1765 mutex_unlock(&fanout_mutex); 1763 mutex_unlock(&fanout_mutex);
1766 return err; 1764 return err;
1767} 1765}
@@ -1785,11 +1783,6 @@ static struct packet_fanout *fanout_release(struct sock *sk)
1785 list_del(&f->list); 1783 list_del(&f->list);
1786 else 1784 else
1787 f = NULL; 1785 f = NULL;
1788
1789 if (po->rollover) {
1790 kfree_rcu(po->rollover, rcu);
1791 po->rollover = NULL;
1792 }
1793 } 1786 }
1794 mutex_unlock(&fanout_mutex); 1787 mutex_unlock(&fanout_mutex);
1795 1788
@@ -3029,6 +3022,7 @@ static int packet_release(struct socket *sock)
3029 synchronize_net(); 3022 synchronize_net();
3030 3023
3031 if (f) { 3024 if (f) {
3025 kfree(po->rollover);
3032 fanout_release_data(f); 3026 fanout_release_data(f);
3033 kfree(f); 3027 kfree(f);
3034 } 3028 }
@@ -3097,6 +3091,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
3097 if (need_rehook) { 3091 if (need_rehook) {
3098 if (po->running) { 3092 if (po->running) {
3099 rcu_read_unlock(); 3093 rcu_read_unlock();
3094 /* prevents packet_notifier() from calling
3095 * register_prot_hook()
3096 */
3097 po->num = 0;
3100 __unregister_prot_hook(sk, true); 3098 __unregister_prot_hook(sk, true);
3101 rcu_read_lock(); 3099 rcu_read_lock();
3102 dev_curr = po->prot_hook.dev; 3100 dev_curr = po->prot_hook.dev;
@@ -3105,6 +3103,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
3105 dev->ifindex); 3103 dev->ifindex);
3106 } 3104 }
3107 3105
3106 BUG_ON(po->running);
3108 po->num = proto; 3107 po->num = proto;
3109 po->prot_hook.type = proto; 3108 po->prot_hook.type = proto;
3110 3109
@@ -3843,7 +3842,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3843 void *data = &val; 3842 void *data = &val;
3844 union tpacket_stats_u st; 3843 union tpacket_stats_u st;
3845 struct tpacket_rollover_stats rstats; 3844 struct tpacket_rollover_stats rstats;
3846 struct packet_rollover *rollover;
3847 3845
3848 if (level != SOL_PACKET) 3846 if (level != SOL_PACKET)
3849 return -ENOPROTOOPT; 3847 return -ENOPROTOOPT;
@@ -3922,18 +3920,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3922 0); 3920 0);
3923 break; 3921 break;
3924 case PACKET_ROLLOVER_STATS: 3922 case PACKET_ROLLOVER_STATS:
3925 rcu_read_lock(); 3923 if (!po->rollover)
3926 rollover = rcu_dereference(po->rollover);
3927 if (rollover) {
3928 rstats.tp_all = atomic_long_read(&rollover->num);
3929 rstats.tp_huge = atomic_long_read(&rollover->num_huge);
3930 rstats.tp_failed = atomic_long_read(&rollover->num_failed);
3931 data = &rstats;
3932 lv = sizeof(rstats);
3933 }
3934 rcu_read_unlock();
3935 if (!rollover)
3936 return -EINVAL; 3924 return -EINVAL;
3925 rstats.tp_all = atomic_long_read(&po->rollover->num);
3926 rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
3927 rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
3928 data = &rstats;
3929 lv = sizeof(rstats);
3937 break; 3930 break;
3938 case PACKET_TX_HAS_OFF: 3931 case PACKET_TX_HAS_OFF:
3939 val = po->tp_tx_has_off; 3932 val = po->tp_tx_has_off;
@@ -4080,12 +4073,12 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
4080 return 0; 4073 return 0;
4081} 4074}
4082 4075
4083static unsigned int packet_poll(struct file *file, struct socket *sock, 4076static __poll_t packet_poll(struct file *file, struct socket *sock,
4084 poll_table *wait) 4077 poll_table *wait)
4085{ 4078{
4086 struct sock *sk = sock->sk; 4079 struct sock *sk = sock->sk;
4087 struct packet_sock *po = pkt_sk(sk); 4080 struct packet_sock *po = pkt_sk(sk);
4088 unsigned int mask = datagram_poll(file, sock, wait); 4081 __poll_t mask = datagram_poll(file, sock, wait);
4089 4082
4090 spin_lock_bh(&sk->sk_receive_queue.lock); 4083 spin_lock_bh(&sk->sk_receive_queue.lock);
4091 if (po->rx_ring.pg_vec) { 4084 if (po->rx_ring.pg_vec) {
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 562fbc155006..a1d2b2319ae9 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -95,7 +95,6 @@ struct packet_fanout {
95 95
96struct packet_rollover { 96struct packet_rollover {
97 int sock; 97 int sock;
98 struct rcu_head rcu;
99 atomic_long_t num; 98 atomic_long_t num;
100 atomic_long_t num_huge; 99 atomic_long_t num_huge;
101 atomic_long_t num_failed; 100 atomic_long_t num_failed;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 1b050dd17393..44417480dab7 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -341,12 +341,12 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
341 return 0; 341 return 0;
342} 342}
343 343
344static unsigned int pn_socket_poll(struct file *file, struct socket *sock, 344static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
345 poll_table *wait) 345 poll_table *wait)
346{ 346{
347 struct sock *sk = sock->sk; 347 struct sock *sk = sock->sk;
348 struct pep_sock *pn = pep_sk(sk); 348 struct pep_sock *pn = pep_sk(sk);
349 unsigned int mask = 0; 349 __poll_t mask = 0;
350 350
351 poll_wait(file, sk_sleep(sk), wait); 351 poll_wait(file, sk_sleep(sk), wait);
352 352
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index b405f77d664c..88aa8ad0f5b6 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -152,12 +152,12 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
152 * to send to a congested destination, the system call may still fail (and 152 * to send to a congested destination, the system call may still fail (and
153 * return ENOBUFS). 153 * return ENOBUFS).
154 */ 154 */
155static unsigned int rds_poll(struct file *file, struct socket *sock, 155static __poll_t rds_poll(struct file *file, struct socket *sock,
156 poll_table *wait) 156 poll_table *wait)
157{ 157{
158 struct sock *sk = sock->sk; 158 struct sock *sk = sock->sk;
159 struct rds_sock *rs = rds_sk_to_rs(sk); 159 struct rds_sock *rs = rds_sk_to_rs(sk);
160 unsigned int mask = 0; 160 __poll_t mask = 0;
161 unsigned long flags; 161 unsigned long flags;
162 162
163 poll_wait(file, sk_sleep(sk), wait); 163 poll_wait(file, sk_sleep(sk), wait);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 36dd2099048a..b2a5067b4afe 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -301,13 +301,11 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
301 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); 301 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
302 if (rds_conn_state(conn) == RDS_CONN_UP) { 302 if (rds_conn_state(conn) == RDS_CONN_UP) {
303 struct rds_ib_device *rds_ibdev; 303 struct rds_ib_device *rds_ibdev;
304 struct rdma_dev_addr *dev_addr;
305 304
306 ic = conn->c_transport_data; 305 ic = conn->c_transport_data;
307 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
308 306
309 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); 307 rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
310 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); 308 (union ib_gid *)&iinfo->dst_gid);
311 309
312 rds_ibdev = ic->rds_ibdev; 310 rds_ibdev = ic->rds_ibdev;
313 iinfo->max_send_wr = ic->i_send_ring.w_nr; 311 iinfo->max_send_wr = ic->i_send_ring.w_nr;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 8886f15abe90..634cfcb7bba6 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
183 long i; 183 long i;
184 int ret; 184 int ret;
185 185
186 if (rs->rs_bound_addr == 0) { 186 if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
187 ret = -ENOTCONN; /* XXX not a great errno */ 187 ret = -ENOTCONN; /* XXX not a great errno */
188 goto out; 188 goto out;
189 } 189 }
@@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
525 525
526 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; 526 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
527 527
528 if (args->nr_local == 0)
529 return -EINVAL;
530
528 /* figure out the number of pages in the vector */ 531 /* figure out the number of pages in the vector */
529 for (i = 0; i < args->nr_local; i++) { 532 for (i = 0; i < args->nr_local; i++) {
530 if (copy_from_user(&vec, &local_vec[i], 533 if (copy_from_user(&vec, &local_vec[i],
@@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
874err: 877err:
875 if (page) 878 if (page)
876 put_page(page); 879 put_page(page);
880 rm->atomic.op_active = 0;
877 kfree(rm->atomic.op_notifier); 881 kfree(rm->atomic.op_notifier);
878 882
879 return ret; 883 return ret;
diff --git a/net/rds/send.c b/net/rds/send.c
index b52cdc8ae428..f72466c63f0c 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
1009 continue; 1009 continue;
1010 1010
1011 if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { 1011 if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
1012 if (cmsg->cmsg_len <
1013 CMSG_LEN(sizeof(struct rds_rdma_args)))
1014 return -EINVAL;
1012 args = CMSG_DATA(cmsg); 1015 args = CMSG_DATA(cmsg);
1013 *rdma_bytes += args->remote_vec.bytes; 1016 *rdma_bytes += args->remote_vec.bytes;
1014 } 1017 }
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 6b7ee71f40c6..ab7356e0ba83 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -90,9 +90,10 @@ void rds_tcp_nonagle(struct socket *sock)
90 sizeof(val)); 90 sizeof(val));
91} 91}
92 92
93u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) 93u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
94{ 94{
95 return tcp_sk(tc->t_sock->sk)->snd_nxt; 95 /* seq# of the last byte of data in tcp send buffer */
96 return tcp_sk(tc->t_sock->sk)->write_seq;
96} 97}
97 98
98u32 rds_tcp_snd_una(struct rds_tcp_connection *tc) 99u32 rds_tcp_snd_una(struct rds_tcp_connection *tc)
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 1aafbf7c3011..864ca7d8f019 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -54,7 +54,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
54void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); 54void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
55void rds_tcp_restore_callbacks(struct socket *sock, 55void rds_tcp_restore_callbacks(struct socket *sock,
56 struct rds_tcp_connection *tc); 56 struct rds_tcp_connection *tc);
57u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); 57u32 rds_tcp_write_seq(struct rds_tcp_connection *tc);
58u32 rds_tcp_snd_una(struct rds_tcp_connection *tc); 58u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
59u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq); 59u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
60extern struct rds_transport rds_tcp_transport; 60extern struct rds_transport rds_tcp_transport;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index dc860d1bb608..9b76e0fa1722 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -86,7 +86,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
86 * m_ack_seq is set to the sequence number of the last byte of 86 * m_ack_seq is set to the sequence number of the last byte of
87 * header and data. see rds_tcp_is_acked(). 87 * header and data. see rds_tcp_is_acked().
88 */ 88 */
89 tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc); 89 tc->t_last_sent_nxt = rds_tcp_write_seq(tc);
90 rm->m_ack_seq = tc->t_last_sent_nxt + 90 rm->m_ack_seq = tc->t_last_sent_nxt +
91 sizeof(struct rds_header) + 91 sizeof(struct rds_header) +
92 be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1; 92 be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1;
@@ -98,7 +98,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
98 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; 98 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
99 99
100 rdsdebug("rm %p tcp nxt %u ack_seq %llu\n", 100 rdsdebug("rm %p tcp nxt %u ack_seq %llu\n",
101 rm, rds_tcp_snd_nxt(tc), 101 rm, rds_tcp_write_seq(tc),
102 (unsigned long long)rm->m_ack_seq); 102 (unsigned long long)rm->m_ack_seq);
103 } 103 }
104 104
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 2064c3a35ef8..124c77e9d058 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1139,10 +1139,10 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
1139 return -ENOMEM; 1139 return -ENOMEM;
1140} 1140}
1141 1141
1142static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait) 1142static __poll_t rfkill_fop_poll(struct file *file, poll_table *wait)
1143{ 1143{
1144 struct rfkill_data *data = file->private_data; 1144 struct rfkill_data *data = file->private_data;
1145 unsigned int res = POLLOUT | POLLWRNORM; 1145 __poll_t res = POLLOUT | POLLWRNORM;
1146 1146
1147 poll_wait(file, &data->read_wait, wait); 1147 poll_wait(file, &data->read_wait, wait);
1148 1148
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 9b5c46b052fd..21ad6a3a465c 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -285,6 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
285 bool upgrade) 285 bool upgrade)
286{ 286{
287 struct rxrpc_conn_parameters cp; 287 struct rxrpc_conn_parameters cp;
288 struct rxrpc_call_params p;
288 struct rxrpc_call *call; 289 struct rxrpc_call *call;
289 struct rxrpc_sock *rx = rxrpc_sk(sock->sk); 290 struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
290 int ret; 291 int ret;
@@ -302,6 +303,10 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
302 if (key && !key->payload.data[0]) 303 if (key && !key->payload.data[0])
303 key = NULL; /* a no-security key */ 304 key = NULL; /* a no-security key */
304 305
306 memset(&p, 0, sizeof(p));
307 p.user_call_ID = user_call_ID;
308 p.tx_total_len = tx_total_len;
309
305 memset(&cp, 0, sizeof(cp)); 310 memset(&cp, 0, sizeof(cp));
306 cp.local = rx->local; 311 cp.local = rx->local;
307 cp.key = key; 312 cp.key = key;
@@ -309,8 +314,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
309 cp.exclusive = false; 314 cp.exclusive = false;
310 cp.upgrade = upgrade; 315 cp.upgrade = upgrade;
311 cp.service_id = srx->srx_service; 316 cp.service_id = srx->srx_service;
312 call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, 317 call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp);
313 gfp);
314 /* The socket has been unlocked. */ 318 /* The socket has been unlocked. */
315 if (!IS_ERR(call)) { 319 if (!IS_ERR(call)) {
316 call->notify_rx = notify_rx; 320 call->notify_rx = notify_rx;
@@ -725,12 +729,12 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
725/* 729/*
726 * permit an RxRPC socket to be polled 730 * permit an RxRPC socket to be polled
727 */ 731 */
728static unsigned int rxrpc_poll(struct file *file, struct socket *sock, 732static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
729 poll_table *wait) 733 poll_table *wait)
730{ 734{
731 struct sock *sk = sock->sk; 735 struct sock *sk = sock->sk;
732 struct rxrpc_sock *rx = rxrpc_sk(sk); 736 struct rxrpc_sock *rx = rxrpc_sk(sk);
733 unsigned int mask; 737 __poll_t mask;
734 738
735 sock_poll_wait(file, sk_sleep(sk), wait); 739 sock_poll_wait(file, sk_sleep(sk), wait);
736 mask = 0; 740 mask = 0;
@@ -856,6 +860,7 @@ static void rxrpc_sock_destructor(struct sock *sk)
856static int rxrpc_release_sock(struct sock *sk) 860static int rxrpc_release_sock(struct sock *sk)
857{ 861{
858 struct rxrpc_sock *rx = rxrpc_sk(sk); 862 struct rxrpc_sock *rx = rxrpc_sk(sk);
863 struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
859 864
860 _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); 865 _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
861 866
@@ -863,6 +868,19 @@ static int rxrpc_release_sock(struct sock *sk)
863 sock_orphan(sk); 868 sock_orphan(sk);
864 sk->sk_shutdown = SHUTDOWN_MASK; 869 sk->sk_shutdown = SHUTDOWN_MASK;
865 870
871 /* We want to kill off all connections from a service socket
872 * as fast as possible because we can't share these; client
873 * sockets, on the other hand, can share an endpoint.
874 */
875 switch (sk->sk_state) {
876 case RXRPC_SERVER_BOUND:
877 case RXRPC_SERVER_BOUND2:
878 case RXRPC_SERVER_LISTENING:
879 case RXRPC_SERVER_LISTEN_DISABLED:
880 rx->local->service_closed = true;
881 break;
882 }
883
866 spin_lock_bh(&sk->sk_receive_queue.lock); 884 spin_lock_bh(&sk->sk_receive_queue.lock);
867 sk->sk_state = RXRPC_CLOSE; 885 sk->sk_state = RXRPC_CLOSE;
868 spin_unlock_bh(&sk->sk_receive_queue.lock); 886 spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -878,6 +896,8 @@ static int rxrpc_release_sock(struct sock *sk)
878 rxrpc_release_calls_on_socket(rx); 896 rxrpc_release_calls_on_socket(rx);
879 flush_workqueue(rxrpc_workqueue); 897 flush_workqueue(rxrpc_workqueue);
880 rxrpc_purge_queue(&sk->sk_receive_queue); 898 rxrpc_purge_queue(&sk->sk_receive_queue);
899 rxrpc_queue_work(&rxnet->service_conn_reaper);
900 rxrpc_queue_work(&rxnet->client_conn_reaper);
881 901
882 rxrpc_put_local(rx->local); 902 rxrpc_put_local(rx->local);
883 rx->local = NULL; 903 rx->local = NULL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index b2151993d384..416688381eb7 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -79,17 +79,20 @@ struct rxrpc_net {
79 struct list_head conn_proc_list; /* List of conns in this namespace for proc */ 79 struct list_head conn_proc_list; /* List of conns in this namespace for proc */
80 struct list_head service_conns; /* Service conns in this namespace */ 80 struct list_head service_conns; /* Service conns in this namespace */
81 rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ 81 rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */
82 struct delayed_work service_conn_reaper; 82 struct work_struct service_conn_reaper;
83 struct timer_list service_conn_reap_timer;
83 84
84 unsigned int nr_client_conns; 85 unsigned int nr_client_conns;
85 unsigned int nr_active_client_conns; 86 unsigned int nr_active_client_conns;
86 bool kill_all_client_conns; 87 bool kill_all_client_conns;
88 bool live;
87 spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ 89 spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */
88 spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ 90 spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */
89 struct list_head waiting_client_conns; 91 struct list_head waiting_client_conns;
90 struct list_head active_client_conns; 92 struct list_head active_client_conns;
91 struct list_head idle_client_conns; 93 struct list_head idle_client_conns;
92 struct delayed_work client_conn_reaper; 94 struct work_struct client_conn_reaper;
95 struct timer_list client_conn_reap_timer;
93 96
94 struct list_head local_endpoints; 97 struct list_head local_endpoints;
95 struct mutex local_mutex; /* Lock for ->local_endpoints */ 98 struct mutex local_mutex; /* Lock for ->local_endpoints */
@@ -265,6 +268,7 @@ struct rxrpc_local {
265 rwlock_t services_lock; /* lock for services list */ 268 rwlock_t services_lock; /* lock for services list */
266 int debug_id; /* debug ID for printks */ 269 int debug_id; /* debug ID for printks */
267 bool dead; 270 bool dead;
271 bool service_closed; /* Service socket closed */
268 struct sockaddr_rxrpc srx; /* local address */ 272 struct sockaddr_rxrpc srx; /* local address */
269}; 273};
270 274
@@ -338,8 +342,17 @@ enum rxrpc_conn_flag {
338 RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ 342 RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */
339 RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ 343 RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */
340 RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ 344 RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */
345 RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */
346 RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */
347 RXRPC_CONN_FINAL_ACK_2, /* Need final ACK for channel 2 */
348 RXRPC_CONN_FINAL_ACK_3, /* Need final ACK for channel 3 */
341}; 349};
342 350
351#define RXRPC_CONN_FINAL_ACK_MASK ((1UL << RXRPC_CONN_FINAL_ACK_0) | \
352 (1UL << RXRPC_CONN_FINAL_ACK_1) | \
353 (1UL << RXRPC_CONN_FINAL_ACK_2) | \
354 (1UL << RXRPC_CONN_FINAL_ACK_3))
355
343/* 356/*
344 * Events that can be raised upon a connection. 357 * Events that can be raised upon a connection.
345 */ 358 */
@@ -393,6 +406,7 @@ struct rxrpc_connection {
393#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1) 406#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1)
394 struct list_head waiting_calls; /* Calls waiting for channels */ 407 struct list_head waiting_calls; /* Calls waiting for channels */
395 struct rxrpc_channel { 408 struct rxrpc_channel {
409 unsigned long final_ack_at; /* Time at which to issue final ACK */
396 struct rxrpc_call __rcu *call; /* Active call */ 410 struct rxrpc_call __rcu *call; /* Active call */
397 u32 call_id; /* ID of current call */ 411 u32 call_id; /* ID of current call */
398 u32 call_counter; /* Call ID counter */ 412 u32 call_counter; /* Call ID counter */
@@ -404,6 +418,7 @@ struct rxrpc_connection {
404 }; 418 };
405 } channels[RXRPC_MAXCALLS]; 419 } channels[RXRPC_MAXCALLS];
406 420
421 struct timer_list timer; /* Conn event timer */
407 struct work_struct processor; /* connection event processor */ 422 struct work_struct processor; /* connection event processor */
408 union { 423 union {
409 struct rb_node client_node; /* Node in local->client_conns */ 424 struct rb_node client_node; /* Node in local->client_conns */
@@ -457,9 +472,10 @@ enum rxrpc_call_flag {
457enum rxrpc_call_event { 472enum rxrpc_call_event {
458 RXRPC_CALL_EV_ACK, /* need to generate ACK */ 473 RXRPC_CALL_EV_ACK, /* need to generate ACK */
459 RXRPC_CALL_EV_ABORT, /* need to generate abort */ 474 RXRPC_CALL_EV_ABORT, /* need to generate abort */
460 RXRPC_CALL_EV_TIMER, /* Timer expired */
461 RXRPC_CALL_EV_RESEND, /* Tx resend required */ 475 RXRPC_CALL_EV_RESEND, /* Tx resend required */
462 RXRPC_CALL_EV_PING, /* Ping send required */ 476 RXRPC_CALL_EV_PING, /* Ping send required */
477 RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */
478 RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */
463}; 479};
464 480
465/* 481/*
@@ -503,10 +519,16 @@ struct rxrpc_call {
503 struct rxrpc_peer *peer; /* Peer record for remote address */ 519 struct rxrpc_peer *peer; /* Peer record for remote address */
504 struct rxrpc_sock __rcu *socket; /* socket responsible */ 520 struct rxrpc_sock __rcu *socket; /* socket responsible */
505 struct mutex user_mutex; /* User access mutex */ 521 struct mutex user_mutex; /* User access mutex */
506 ktime_t ack_at; /* When deferred ACK needs to happen */ 522 unsigned long ack_at; /* When deferred ACK needs to happen */
507 ktime_t resend_at; /* When next resend needs to happen */ 523 unsigned long ack_lost_at; /* When ACK is figured as lost */
508 ktime_t ping_at; /* When next to send a ping */ 524 unsigned long resend_at; /* When next resend needs to happen */
509 ktime_t expire_at; /* When the call times out */ 525 unsigned long ping_at; /* When next to send a ping */
526 unsigned long keepalive_at; /* When next to send a keepalive ping */
527 unsigned long expect_rx_by; /* When we expect to get a packet by */
528 unsigned long expect_req_by; /* When we expect to get a request DATA packet by */
529 unsigned long expect_term_by; /* When we expect call termination by */
530 u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
531 u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
510 struct timer_list timer; /* Combined event timer */ 532 struct timer_list timer; /* Combined event timer */
511 struct work_struct processor; /* Event processor */ 533 struct work_struct processor; /* Event processor */
512 rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ 534 rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
@@ -609,6 +631,8 @@ struct rxrpc_call {
609 ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ 631 ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
610 rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ 632 rxrpc_serial_t acks_latest; /* serial number of latest ACK received */
611 rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ 633 rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
634 rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
635 rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
612}; 636};
613 637
614/* 638/*
@@ -632,6 +656,35 @@ struct rxrpc_ack_summary {
632 u8 cumulative_acks; 656 u8 cumulative_acks;
633}; 657};
634 658
659/*
660 * sendmsg() cmsg-specified parameters.
661 */
662enum rxrpc_command {
663 RXRPC_CMD_SEND_DATA, /* send data message */
664 RXRPC_CMD_SEND_ABORT, /* request abort generation */
665 RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
666 RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
667};
668
669struct rxrpc_call_params {
670 s64 tx_total_len; /* Total Tx data length (if send data) */
671 unsigned long user_call_ID; /* User's call ID */
672 struct {
673 u32 hard; /* Maximum lifetime (sec) */
674 u32 idle; /* Max time since last data packet (msec) */
675 u32 normal; /* Max time since last call packet (msec) */
676 } timeouts;
677 u8 nr_timeouts; /* Number of timeouts specified */
678};
679
680struct rxrpc_send_params {
681 struct rxrpc_call_params call;
682 u32 abort_code; /* Abort code to Tx (if abort) */
683 enum rxrpc_command command : 8; /* The command to implement */
684 bool exclusive; /* Shared or exclusive call */
685 bool upgrade; /* If the connection is upgradeable */
686};
687
635#include <trace/events/rxrpc.h> 688#include <trace/events/rxrpc.h>
636 689
637/* 690/*
@@ -657,12 +710,19 @@ int rxrpc_reject_call(struct rxrpc_sock *);
657/* 710/*
658 * call_event.c 711 * call_event.c
659 */ 712 */
660void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
661void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
662void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, 713void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
663 enum rxrpc_propose_ack_trace); 714 enum rxrpc_propose_ack_trace);
664void rxrpc_process_call(struct work_struct *); 715void rxrpc_process_call(struct work_struct *);
665 716
717static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call,
718 unsigned long expire_at,
719 unsigned long now,
720 enum rxrpc_timer_trace why)
721{
722 trace_rxrpc_timer(call, why, now);
723 timer_reduce(&call->timer, expire_at);
724}
725
666/* 726/*
667 * call_object.c 727 * call_object.c
668 */ 728 */
@@ -672,11 +732,11 @@ extern unsigned int rxrpc_max_call_lifetime;
672extern struct kmem_cache *rxrpc_call_jar; 732extern struct kmem_cache *rxrpc_call_jar;
673 733
674struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); 734struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
675struct rxrpc_call *rxrpc_alloc_call(gfp_t); 735struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t);
676struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, 736struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
677 struct rxrpc_conn_parameters *, 737 struct rxrpc_conn_parameters *,
678 struct sockaddr_rxrpc *, 738 struct sockaddr_rxrpc *,
679 unsigned long, s64, gfp_t); 739 struct rxrpc_call_params *, gfp_t);
680int rxrpc_retry_client_call(struct rxrpc_sock *, 740int rxrpc_retry_client_call(struct rxrpc_sock *,
681 struct rxrpc_call *, 741 struct rxrpc_call *,
682 struct rxrpc_conn_parameters *, 742 struct rxrpc_conn_parameters *,
@@ -803,8 +863,8 @@ static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
803 */ 863 */
804extern unsigned int rxrpc_max_client_connections; 864extern unsigned int rxrpc_max_client_connections;
805extern unsigned int rxrpc_reap_client_connections; 865extern unsigned int rxrpc_reap_client_connections;
806extern unsigned int rxrpc_conn_idle_client_expiry; 866extern unsigned long rxrpc_conn_idle_client_expiry;
807extern unsigned int rxrpc_conn_idle_client_fast_expiry; 867extern unsigned long rxrpc_conn_idle_client_fast_expiry;
808extern struct idr rxrpc_client_conn_ids; 868extern struct idr rxrpc_client_conn_ids;
809 869
810void rxrpc_destroy_client_conn_ids(void); 870void rxrpc_destroy_client_conn_ids(void);
@@ -825,6 +885,7 @@ void rxrpc_process_connection(struct work_struct *);
825 * conn_object.c 885 * conn_object.c
826 */ 886 */
827extern unsigned int rxrpc_connection_expiry; 887extern unsigned int rxrpc_connection_expiry;
888extern unsigned int rxrpc_closed_conn_expiry;
828 889
829struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); 890struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
830struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, 891struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
@@ -861,6 +922,12 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
861 rxrpc_put_service_conn(conn); 922 rxrpc_put_service_conn(conn);
862} 923}
863 924
925static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn,
926 unsigned long expire_at)
927{
928 timer_reduce(&conn->timer, expire_at);
929}
930
864/* 931/*
865 * conn_service.c 932 * conn_service.c
866 */ 933 */
@@ -930,13 +997,13 @@ static inline void rxrpc_queue_local(struct rxrpc_local *local)
930 * misc.c 997 * misc.c
931 */ 998 */
932extern unsigned int rxrpc_max_backlog __read_mostly; 999extern unsigned int rxrpc_max_backlog __read_mostly;
933extern unsigned int rxrpc_requested_ack_delay; 1000extern unsigned long rxrpc_requested_ack_delay;
934extern unsigned int rxrpc_soft_ack_delay; 1001extern unsigned long rxrpc_soft_ack_delay;
935extern unsigned int rxrpc_idle_ack_delay; 1002extern unsigned long rxrpc_idle_ack_delay;
936extern unsigned int rxrpc_rx_window_size; 1003extern unsigned int rxrpc_rx_window_size;
937extern unsigned int rxrpc_rx_mtu; 1004extern unsigned int rxrpc_rx_mtu;
938extern unsigned int rxrpc_rx_jumbo_max; 1005extern unsigned int rxrpc_rx_jumbo_max;
939extern unsigned int rxrpc_resend_timeout; 1006extern unsigned long rxrpc_resend_timeout;
940 1007
941extern const s8 rxrpc_ack_priority[]; 1008extern const s8 rxrpc_ack_priority[];
942 1009
@@ -954,7 +1021,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
954/* 1021/*
955 * output.c 1022 * output.c
956 */ 1023 */
957int rxrpc_send_ack_packet(struct rxrpc_call *, bool); 1024int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *);
958int rxrpc_send_abort_packet(struct rxrpc_call *); 1025int rxrpc_send_abort_packet(struct rxrpc_call *);
959int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); 1026int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool);
960void rxrpc_reject_packets(struct rxrpc_local *); 1027void rxrpc_reject_packets(struct rxrpc_local *);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index cbd1701e813a..3028298ca561 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -94,7 +94,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
94 /* Now it gets complicated, because calls get registered with the 94 /* Now it gets complicated, because calls get registered with the
95 * socket here, particularly if a user ID is preassigned by the user. 95 * socket here, particularly if a user ID is preassigned by the user.
96 */ 96 */
97 call = rxrpc_alloc_call(gfp); 97 call = rxrpc_alloc_call(rx, gfp);
98 if (!call) 98 if (!call)
99 return -ENOMEM; 99 return -ENOMEM;
100 call->flags |= (1 << RXRPC_CALL_IS_SERVICE); 100 call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 3574508baf9a..ad2ab1103189 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -22,80 +22,6 @@
22#include "ar-internal.h" 22#include "ar-internal.h"
23 23
24/* 24/*
25 * Set the timer
26 */
27void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
28 ktime_t now)
29{
30 unsigned long t_j, now_j = jiffies;
31 ktime_t t;
32 bool queue = false;
33
34 if (call->state < RXRPC_CALL_COMPLETE) {
35 t = call->expire_at;
36 if (!ktime_after(t, now)) {
37 trace_rxrpc_timer(call, why, now, now_j);
38 queue = true;
39 goto out;
40 }
41
42 if (!ktime_after(call->resend_at, now)) {
43 call->resend_at = call->expire_at;
44 if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
45 queue = true;
46 } else if (ktime_before(call->resend_at, t)) {
47 t = call->resend_at;
48 }
49
50 if (!ktime_after(call->ack_at, now)) {
51 call->ack_at = call->expire_at;
52 if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
53 queue = true;
54 } else if (ktime_before(call->ack_at, t)) {
55 t = call->ack_at;
56 }
57
58 if (!ktime_after(call->ping_at, now)) {
59 call->ping_at = call->expire_at;
60 if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
61 queue = true;
62 } else if (ktime_before(call->ping_at, t)) {
63 t = call->ping_at;
64 }
65
66 t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now)));
67 t_j += jiffies;
68
69 /* We have to make sure that the calculated jiffies value falls
70 * at or after the nsec value, or we may loop ceaselessly
71 * because the timer times out, but we haven't reached the nsec
72 * timeout yet.
73 */
74 t_j++;
75
76 if (call->timer.expires != t_j || !timer_pending(&call->timer)) {
77 mod_timer(&call->timer, t_j);
78 trace_rxrpc_timer(call, why, now, now_j);
79 }
80 }
81
82out:
83 if (queue)
84 rxrpc_queue_call(call);
85}
86
87/*
88 * Set the timer
89 */
90void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
91 ktime_t now)
92{
93 read_lock_bh(&call->state_lock);
94 __rxrpc_set_timer(call, why, now);
95 read_unlock_bh(&call->state_lock);
96}
97
98/*
99 * Propose a PING ACK be sent. 25 * Propose a PING ACK be sent.
100 */ 26 */
101static void rxrpc_propose_ping(struct rxrpc_call *call, 27static void rxrpc_propose_ping(struct rxrpc_call *call,
@@ -106,12 +32,13 @@ static void rxrpc_propose_ping(struct rxrpc_call *call,
106 !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) 32 !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
107 rxrpc_queue_call(call); 33 rxrpc_queue_call(call);
108 } else { 34 } else {
109 ktime_t now = ktime_get_real(); 35 unsigned long now = jiffies;
110 ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay); 36 unsigned long ping_at = now + rxrpc_idle_ack_delay;
111 37
112 if (ktime_before(ping_at, call->ping_at)) { 38 if (time_before(ping_at, call->ping_at)) {
113 call->ping_at = ping_at; 39 WRITE_ONCE(call->ping_at, ping_at);
114 rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now); 40 rxrpc_reduce_call_timer(call, ping_at, now,
41 rxrpc_timer_set_for_ping);
115 } 42 }
116 } 43 }
117} 44}
@@ -125,8 +52,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
125 enum rxrpc_propose_ack_trace why) 52 enum rxrpc_propose_ack_trace why)
126{ 53{
127 enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; 54 enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use;
128 unsigned int expiry = rxrpc_soft_ack_delay; 55 unsigned long expiry = rxrpc_soft_ack_delay;
129 ktime_t now, ack_at;
130 s8 prior = rxrpc_ack_priority[ack_reason]; 56 s8 prior = rxrpc_ack_priority[ack_reason];
131 57
132 /* Pings are handled specially because we don't want to accidentally 58 /* Pings are handled specially because we don't want to accidentally
@@ -190,11 +116,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
190 background) 116 background)
191 rxrpc_queue_call(call); 117 rxrpc_queue_call(call);
192 } else { 118 } else {
193 now = ktime_get_real(); 119 unsigned long now = jiffies, ack_at;
194 ack_at = ktime_add_ms(now, expiry); 120
195 if (ktime_before(ack_at, call->ack_at)) { 121 if (call->peer->rtt_usage > 0)
196 call->ack_at = ack_at; 122 ack_at = nsecs_to_jiffies(call->peer->rtt);
197 rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now); 123 else
124 ack_at = expiry;
125
126 ack_at += now;
127 if (time_before(ack_at, call->ack_at)) {
128 WRITE_ONCE(call->ack_at, ack_at);
129 rxrpc_reduce_call_timer(call, ack_at, now,
130 rxrpc_timer_set_for_ack);
198 } 131 }
199 } 132 }
200 133
@@ -227,18 +160,28 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
227/* 160/*
228 * Perform retransmission of NAK'd and unack'd packets. 161 * Perform retransmission of NAK'd and unack'd packets.
229 */ 162 */
230static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) 163static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
231{ 164{
232 struct rxrpc_skb_priv *sp; 165 struct rxrpc_skb_priv *sp;
233 struct sk_buff *skb; 166 struct sk_buff *skb;
167 unsigned long resend_at;
234 rxrpc_seq_t cursor, seq, top; 168 rxrpc_seq_t cursor, seq, top;
235 ktime_t max_age, oldest, ack_ts; 169 ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo;
236 int ix; 170 int ix;
237 u8 annotation, anno_type, retrans = 0, unacked = 0; 171 u8 annotation, anno_type, retrans = 0, unacked = 0;
238 172
239 _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); 173 _enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
240 174
241 max_age = ktime_sub_ms(now, rxrpc_resend_timeout); 175 if (call->peer->rtt_usage > 1)
176 timeout = ns_to_ktime(call->peer->rtt * 3 / 2);
177 else
178 timeout = ms_to_ktime(rxrpc_resend_timeout);
179 min_timeo = ns_to_ktime((1000000000 / HZ) * 4);
180 if (ktime_before(timeout, min_timeo))
181 timeout = min_timeo;
182
183 now = ktime_get_real();
184 max_age = ktime_sub(now, timeout);
242 185
243 spin_lock_bh(&call->lock); 186 spin_lock_bh(&call->lock);
244 187
@@ -282,7 +225,9 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
282 ktime_to_ns(ktime_sub(skb->tstamp, max_age))); 225 ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
283 } 226 }
284 227
285 call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); 228 resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(oldest, now)));
229 resend_at += jiffies + rxrpc_resend_timeout;
230 WRITE_ONCE(call->resend_at, resend_at);
286 231
287 if (unacked) 232 if (unacked)
288 rxrpc_congestion_timeout(call); 233 rxrpc_congestion_timeout(call);
@@ -292,14 +237,15 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
292 * retransmitting data. 237 * retransmitting data.
293 */ 238 */
294 if (!retrans) { 239 if (!retrans) {
295 rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); 240 rxrpc_reduce_call_timer(call, resend_at, now,
241 rxrpc_timer_set_for_resend);
296 spin_unlock_bh(&call->lock); 242 spin_unlock_bh(&call->lock);
297 ack_ts = ktime_sub(now, call->acks_latest_ts); 243 ack_ts = ktime_sub(now, call->acks_latest_ts);
298 if (ktime_to_ns(ack_ts) < call->peer->rtt) 244 if (ktime_to_ns(ack_ts) < call->peer->rtt)
299 goto out; 245 goto out;
300 rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, 246 rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
301 rxrpc_propose_ack_ping_for_lost_ack); 247 rxrpc_propose_ack_ping_for_lost_ack);
302 rxrpc_send_ack_packet(call, true); 248 rxrpc_send_ack_packet(call, true, NULL);
303 goto out; 249 goto out;
304 } 250 }
305 251
@@ -364,7 +310,8 @@ void rxrpc_process_call(struct work_struct *work)
364{ 310{
365 struct rxrpc_call *call = 311 struct rxrpc_call *call =
366 container_of(work, struct rxrpc_call, processor); 312 container_of(work, struct rxrpc_call, processor);
367 ktime_t now; 313 rxrpc_serial_t *send_ack;
314 unsigned long now, next, t;
368 315
369 rxrpc_see_call(call); 316 rxrpc_see_call(call);
370 317
@@ -384,22 +331,89 @@ recheck_state:
384 goto out_put; 331 goto out_put;
385 } 332 }
386 333
387 now = ktime_get_real(); 334 /* Work out if any timeouts tripped */
388 if (ktime_before(call->expire_at, now)) { 335 now = jiffies;
336 t = READ_ONCE(call->expect_rx_by);
337 if (time_after_eq(now, t)) {
338 trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now);
339 set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
340 }
341
342 t = READ_ONCE(call->expect_req_by);
343 if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST &&
344 time_after_eq(now, t)) {
345 trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
346 set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
347 }
348
349 t = READ_ONCE(call->expect_term_by);
350 if (time_after_eq(now, t)) {
351 trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now);
352 set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
353 }
354
355 t = READ_ONCE(call->ack_at);
356 if (time_after_eq(now, t)) {
357 trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
358 cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET);
359 set_bit(RXRPC_CALL_EV_ACK, &call->events);
360 }
361
362 t = READ_ONCE(call->ack_lost_at);
363 if (time_after_eq(now, t)) {
364 trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now);
365 cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET);
366 set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events);
367 }
368
369 t = READ_ONCE(call->keepalive_at);
370 if (time_after_eq(now, t)) {
371 trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now);
372 cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET);
373 rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true,
374 rxrpc_propose_ack_ping_for_keepalive);
375 set_bit(RXRPC_CALL_EV_PING, &call->events);
376 }
377
378 t = READ_ONCE(call->ping_at);
379 if (time_after_eq(now, t)) {
380 trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now);
381 cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET);
382 set_bit(RXRPC_CALL_EV_PING, &call->events);
383 }
384
385 t = READ_ONCE(call->resend_at);
386 if (time_after_eq(now, t)) {
387 trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now);
388 cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET);
389 set_bit(RXRPC_CALL_EV_RESEND, &call->events);
390 }
391
392 /* Process events */
393 if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) {
389 rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); 394 rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
390 set_bit(RXRPC_CALL_EV_ABORT, &call->events); 395 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
391 goto recheck_state; 396 goto recheck_state;
392 } 397 }
393 398
394 if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { 399 send_ack = NULL;
400 if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) {
401 call->acks_lost_top = call->tx_top;
402 rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
403 rxrpc_propose_ack_ping_for_lost_ack);
404 send_ack = &call->acks_lost_ping;
405 }
406
407 if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) ||
408 send_ack) {
395 if (call->ackr_reason) { 409 if (call->ackr_reason) {
396 rxrpc_send_ack_packet(call, false); 410 rxrpc_send_ack_packet(call, false, send_ack);
397 goto recheck_state; 411 goto recheck_state;
398 } 412 }
399 } 413 }
400 414
401 if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { 415 if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) {
402 rxrpc_send_ack_packet(call, true); 416 rxrpc_send_ack_packet(call, true, NULL);
403 goto recheck_state; 417 goto recheck_state;
404 } 418 }
405 419
@@ -408,7 +422,24 @@ recheck_state:
408 goto recheck_state; 422 goto recheck_state;
409 } 423 }
410 424
411 rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); 425 /* Make sure the timer is restarted */
426 next = call->expect_rx_by;
427
428#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
429
430 set(call->expect_req_by);
431 set(call->expect_term_by);
432 set(call->ack_at);
433 set(call->ack_lost_at);
434 set(call->resend_at);
435 set(call->keepalive_at);
436 set(call->ping_at);
437
438 now = jiffies;
439 if (time_after_eq(now, next))
440 goto recheck_state;
441
442 rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
412 443
413 /* other events may have been raised since we started checking */ 444 /* other events may have been raised since we started checking */
414 if (call->events && call->state < RXRPC_CALL_COMPLETE) { 445 if (call->events && call->state < RXRPC_CALL_COMPLETE) {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 994dc2df57e4..0b2db38dd32d 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -51,10 +51,14 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
51 51
52 _enter("%d", call->debug_id); 52 _enter("%d", call->debug_id);
53 53
54 if (call->state < RXRPC_CALL_COMPLETE) 54 if (call->state < RXRPC_CALL_COMPLETE) {
55 rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); 55 trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
56 rxrpc_queue_call(call);
57 }
56} 58}
57 59
60static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
61
58/* 62/*
59 * find an extant server call 63 * find an extant server call
60 * - called in process context with IRQs enabled 64 * - called in process context with IRQs enabled
@@ -95,7 +99,7 @@ found_extant_call:
95/* 99/*
96 * allocate a new call 100 * allocate a new call
97 */ 101 */
98struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) 102struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp)
99{ 103{
100 struct rxrpc_call *call; 104 struct rxrpc_call *call;
101 105
@@ -114,6 +118,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
114 goto nomem_2; 118 goto nomem_2;
115 119
116 mutex_init(&call->user_mutex); 120 mutex_init(&call->user_mutex);
121
122 /* Prevent lockdep reporting a deadlock false positive between the afs
123 * filesystem and sys_sendmsg() via the mmap sem.
124 */
125 if (rx->sk.sk_kern_sock)
126 lockdep_set_class(&call->user_mutex,
127 &rxrpc_call_user_mutex_lock_class_key);
128
117 timer_setup(&call->timer, rxrpc_call_timer_expired, 0); 129 timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
118 INIT_WORK(&call->processor, &rxrpc_process_call); 130 INIT_WORK(&call->processor, &rxrpc_process_call);
119 INIT_LIST_HEAD(&call->link); 131 INIT_LIST_HEAD(&call->link);
@@ -128,6 +140,8 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
128 atomic_set(&call->usage, 1); 140 atomic_set(&call->usage, 1);
129 call->debug_id = atomic_inc_return(&rxrpc_debug_id); 141 call->debug_id = atomic_inc_return(&rxrpc_debug_id);
130 call->tx_total_len = -1; 142 call->tx_total_len = -1;
143 call->next_rx_timo = 20 * HZ;
144 call->next_req_timo = 1 * HZ;
131 145
132 memset(&call->sock_node, 0xed, sizeof(call->sock_node)); 146 memset(&call->sock_node, 0xed, sizeof(call->sock_node));
133 147
@@ -150,7 +164,8 @@ nomem:
150/* 164/*
151 * Allocate a new client call. 165 * Allocate a new client call.
152 */ 166 */
153static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, 167static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
168 struct sockaddr_rxrpc *srx,
154 gfp_t gfp) 169 gfp_t gfp)
155{ 170{
156 struct rxrpc_call *call; 171 struct rxrpc_call *call;
@@ -158,7 +173,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
158 173
159 _enter(""); 174 _enter("");
160 175
161 call = rxrpc_alloc_call(gfp); 176 call = rxrpc_alloc_call(rx, gfp);
162 if (!call) 177 if (!call)
163 return ERR_PTR(-ENOMEM); 178 return ERR_PTR(-ENOMEM);
164 call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; 179 call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
@@ -177,15 +192,17 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
177 */ 192 */
178static void rxrpc_start_call_timer(struct rxrpc_call *call) 193static void rxrpc_start_call_timer(struct rxrpc_call *call)
179{ 194{
180 ktime_t now = ktime_get_real(), expire_at; 195 unsigned long now = jiffies;
181 196 unsigned long j = now + MAX_JIFFY_OFFSET;
182 expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); 197
183 call->expire_at = expire_at; 198 call->ack_at = j;
184 call->ack_at = expire_at; 199 call->ack_lost_at = j;
185 call->ping_at = expire_at; 200 call->resend_at = j;
186 call->resend_at = expire_at; 201 call->ping_at = j;
187 call->timer.expires = jiffies + LONG_MAX / 2; 202 call->expect_rx_by = j;
188 rxrpc_set_timer(call, rxrpc_timer_begin, now); 203 call->expect_req_by = j;
204 call->expect_term_by = j;
205 call->timer.expires = now;
189} 206}
190 207
191/* 208/*
@@ -196,8 +213,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
196struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, 213struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
197 struct rxrpc_conn_parameters *cp, 214 struct rxrpc_conn_parameters *cp,
198 struct sockaddr_rxrpc *srx, 215 struct sockaddr_rxrpc *srx,
199 unsigned long user_call_ID, 216 struct rxrpc_call_params *p,
200 s64 tx_total_len,
201 gfp_t gfp) 217 gfp_t gfp)
202 __releases(&rx->sk.sk_lock.slock) 218 __releases(&rx->sk.sk_lock.slock)
203{ 219{
@@ -207,18 +223,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
207 const void *here = __builtin_return_address(0); 223 const void *here = __builtin_return_address(0);
208 int ret; 224 int ret;
209 225
210 _enter("%p,%lx", rx, user_call_ID); 226 _enter("%p,%lx", rx, p->user_call_ID);
211 227
212 call = rxrpc_alloc_client_call(srx, gfp); 228 call = rxrpc_alloc_client_call(rx, srx, gfp);
213 if (IS_ERR(call)) { 229 if (IS_ERR(call)) {
214 release_sock(&rx->sk); 230 release_sock(&rx->sk);
215 _leave(" = %ld", PTR_ERR(call)); 231 _leave(" = %ld", PTR_ERR(call));
216 return call; 232 return call;
217 } 233 }
218 234
219 call->tx_total_len = tx_total_len; 235 call->tx_total_len = p->tx_total_len;
220 trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), 236 trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
221 here, (const void *)user_call_ID); 237 here, (const void *)p->user_call_ID);
222 238
223 /* We need to protect a partially set up call against the user as we 239 /* We need to protect a partially set up call against the user as we
224 * will be acting outside the socket lock. 240 * will be acting outside the socket lock.
@@ -234,16 +250,16 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
234 parent = *pp; 250 parent = *pp;
235 xcall = rb_entry(parent, struct rxrpc_call, sock_node); 251 xcall = rb_entry(parent, struct rxrpc_call, sock_node);
236 252
237 if (user_call_ID < xcall->user_call_ID) 253 if (p->user_call_ID < xcall->user_call_ID)
238 pp = &(*pp)->rb_left; 254 pp = &(*pp)->rb_left;
239 else if (user_call_ID > xcall->user_call_ID) 255 else if (p->user_call_ID > xcall->user_call_ID)
240 pp = &(*pp)->rb_right; 256 pp = &(*pp)->rb_right;
241 else 257 else
242 goto error_dup_user_ID; 258 goto error_dup_user_ID;
243 } 259 }
244 260
245 rcu_assign_pointer(call->socket, rx); 261 rcu_assign_pointer(call->socket, rx);
246 call->user_call_ID = user_call_ID; 262 call->user_call_ID = p->user_call_ID;
247 __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); 263 __set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
248 rxrpc_get_call(call, rxrpc_call_got_userid); 264 rxrpc_get_call(call, rxrpc_call_got_userid);
249 rb_link_node(&call->sock_node, parent, pp); 265 rb_link_node(&call->sock_node, parent, pp);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 5f9624bd311c..7f74ca3059f8 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -85,8 +85,8 @@
85 85
86__read_mostly unsigned int rxrpc_max_client_connections = 1000; 86__read_mostly unsigned int rxrpc_max_client_connections = 1000;
87__read_mostly unsigned int rxrpc_reap_client_connections = 900; 87__read_mostly unsigned int rxrpc_reap_client_connections = 900;
88__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; 88__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
89__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ; 89__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
90 90
91/* 91/*
92 * We use machine-unique IDs for our client connections. 92 * We use machine-unique IDs for our client connections.
@@ -554,6 +554,11 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
554 554
555 trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); 555 trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
556 556
557 /* Cancel the final ACK on the previous call if it hasn't been sent yet
558 * as the DATA packet will implicitly ACK it.
559 */
560 clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
561
557 write_lock_bh(&call->state_lock); 562 write_lock_bh(&call->state_lock);
558 if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags)) 563 if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags))
559 call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; 564 call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
@@ -686,7 +691,7 @@ int rxrpc_connect_call(struct rxrpc_call *call,
686 691
687 _enter("{%d,%lx},", call->debug_id, call->user_call_ID); 692 _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
688 693
689 rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work); 694 rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper);
690 rxrpc_cull_active_client_conns(rxnet); 695 rxrpc_cull_active_client_conns(rxnet);
691 696
692 ret = rxrpc_get_client_conn(call, cp, srx, gfp); 697 ret = rxrpc_get_client_conn(call, cp, srx, gfp);
@@ -752,6 +757,18 @@ void rxrpc_expose_client_call(struct rxrpc_call *call)
752} 757}
753 758
754/* 759/*
760 * Set the reap timer.
761 */
762static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
763{
764 unsigned long now = jiffies;
765 unsigned long reap_at = now + rxrpc_conn_idle_client_expiry;
766
767 if (rxnet->live)
768 timer_reduce(&rxnet->client_conn_reap_timer, reap_at);
769}
770
771/*
755 * Disconnect a client call. 772 * Disconnect a client call.
756 */ 773 */
757void rxrpc_disconnect_client_call(struct rxrpc_call *call) 774void rxrpc_disconnect_client_call(struct rxrpc_call *call)
@@ -813,6 +830,19 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
813 goto out_2; 830 goto out_2;
814 } 831 }
815 832
833 /* Schedule the final ACK to be transmitted in a short while so that it
834 * can be skipped if we find a follow-on call. The first DATA packet
835 * of the follow on call will implicitly ACK this call.
836 */
837 if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
838 unsigned long final_ack_at = jiffies + 2;
839
840 WRITE_ONCE(chan->final_ack_at, final_ack_at);
841 smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */
842 set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
843 rxrpc_reduce_conn_timer(conn, final_ack_at);
844 }
845
816 /* Things are more complex and we need the cache lock. We might be 846 /* Things are more complex and we need the cache lock. We might be
817 * able to simply idle the conn or it might now be lurking on the wait 847 * able to simply idle the conn or it might now be lurking on the wait
818 * list. It might even get moved back to the active list whilst we're 848 * list. It might even get moved back to the active list whilst we're
@@ -878,9 +908,7 @@ idle_connection:
878 list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); 908 list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
879 if (rxnet->idle_client_conns.next == &conn->cache_link && 909 if (rxnet->idle_client_conns.next == &conn->cache_link &&
880 !rxnet->kill_all_client_conns) 910 !rxnet->kill_all_client_conns)
881 queue_delayed_work(rxrpc_workqueue, 911 rxrpc_set_client_reap_timer(rxnet);
882 &rxnet->client_conn_reaper,
883 rxrpc_conn_idle_client_expiry);
884 } else { 912 } else {
885 trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); 913 trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive);
886 conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; 914 conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
@@ -1018,8 +1046,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work)
1018{ 1046{
1019 struct rxrpc_connection *conn; 1047 struct rxrpc_connection *conn;
1020 struct rxrpc_net *rxnet = 1048 struct rxrpc_net *rxnet =
1021 container_of(to_delayed_work(work), 1049 container_of(work, struct rxrpc_net, client_conn_reaper);
1022 struct rxrpc_net, client_conn_reaper);
1023 unsigned long expiry, conn_expires_at, now; 1050 unsigned long expiry, conn_expires_at, now;
1024 unsigned int nr_conns; 1051 unsigned int nr_conns;
1025 bool did_discard = false; 1052 bool did_discard = false;
@@ -1061,6 +1088,8 @@ next:
1061 expiry = rxrpc_conn_idle_client_expiry; 1088 expiry = rxrpc_conn_idle_client_expiry;
1062 if (nr_conns > rxrpc_reap_client_connections) 1089 if (nr_conns > rxrpc_reap_client_connections)
1063 expiry = rxrpc_conn_idle_client_fast_expiry; 1090 expiry = rxrpc_conn_idle_client_fast_expiry;
1091 if (conn->params.local->service_closed)
1092 expiry = rxrpc_closed_conn_expiry * HZ;
1064 1093
1065 conn_expires_at = conn->idle_timestamp + expiry; 1094 conn_expires_at = conn->idle_timestamp + expiry;
1066 1095
@@ -1096,9 +1125,8 @@ not_yet_expired:
1096 */ 1125 */
1097 _debug("not yet"); 1126 _debug("not yet");
1098 if (!rxnet->kill_all_client_conns) 1127 if (!rxnet->kill_all_client_conns)
1099 queue_delayed_work(rxrpc_workqueue, 1128 timer_reduce(&rxnet->client_conn_reap_timer,
1100 &rxnet->client_conn_reaper, 1129 conn_expires_at);
1101 conn_expires_at - now);
1102 1130
1103out: 1131out:
1104 spin_unlock(&rxnet->client_conn_cache_lock); 1132 spin_unlock(&rxnet->client_conn_cache_lock);
@@ -1118,9 +1146,9 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
1118 rxnet->kill_all_client_conns = true; 1146 rxnet->kill_all_client_conns = true;
1119 spin_unlock(&rxnet->client_conn_cache_lock); 1147 spin_unlock(&rxnet->client_conn_cache_lock);
1120 1148
1121 cancel_delayed_work(&rxnet->client_conn_reaper); 1149 del_timer_sync(&rxnet->client_conn_reap_timer);
1122 1150
1123 if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0)) 1151 if (!rxrpc_queue_work(&rxnet->client_conn_reaper))
1124 _debug("destroy: queue failed"); 1152 _debug("destroy: queue failed");
1125 1153
1126 _leave(""); 1154 _leave("");
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 59a51a56e7c8..4ca11be6be3c 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -24,31 +24,28 @@
24 * Retransmit terminal ACK or ABORT of the previous call. 24 * Retransmit terminal ACK or ABORT of the previous call.
25 */ 25 */
26static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, 26static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
27 struct sk_buff *skb) 27 struct sk_buff *skb,
28 unsigned int channel)
28{ 29{
29 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 30 struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL;
30 struct rxrpc_channel *chan; 31 struct rxrpc_channel *chan;
31 struct msghdr msg; 32 struct msghdr msg;
32 struct kvec iov; 33 struct kvec iov[3];
33 struct { 34 struct {
34 struct rxrpc_wire_header whdr; 35 struct rxrpc_wire_header whdr;
35 union { 36 union {
36 struct { 37 __be32 abort_code;
37 __be32 code; 38 struct rxrpc_ackpacket ack;
38 } abort;
39 struct {
40 struct rxrpc_ackpacket ack;
41 u8 padding[3];
42 struct rxrpc_ackinfo info;
43 };
44 }; 39 };
45 } __attribute__((packed)) pkt; 40 } __attribute__((packed)) pkt;
41 struct rxrpc_ackinfo ack_info;
46 size_t len; 42 size_t len;
47 u32 serial, mtu, call_id; 43 int ioc;
44 u32 serial, mtu, call_id, padding;
48 45
49 _enter("%d", conn->debug_id); 46 _enter("%d", conn->debug_id);
50 47
51 chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK]; 48 chan = &conn->channels[channel];
52 49
53 /* If the last call got moved on whilst we were waiting to run, just 50 /* If the last call got moved on whilst we were waiting to run, just
54 * ignore this packet. 51 * ignore this packet.
@@ -56,7 +53,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
56 call_id = READ_ONCE(chan->last_call); 53 call_id = READ_ONCE(chan->last_call);
57 /* Sync with __rxrpc_disconnect_call() */ 54 /* Sync with __rxrpc_disconnect_call() */
58 smp_rmb(); 55 smp_rmb();
59 if (call_id != sp->hdr.callNumber) 56 if (skb && call_id != sp->hdr.callNumber)
60 return; 57 return;
61 58
62 msg.msg_name = &conn->params.peer->srx.transport; 59 msg.msg_name = &conn->params.peer->srx.transport;
@@ -65,9 +62,16 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
65 msg.msg_controllen = 0; 62 msg.msg_controllen = 0;
66 msg.msg_flags = 0; 63 msg.msg_flags = 0;
67 64
68 pkt.whdr.epoch = htonl(sp->hdr.epoch); 65 iov[0].iov_base = &pkt;
69 pkt.whdr.cid = htonl(sp->hdr.cid); 66 iov[0].iov_len = sizeof(pkt.whdr);
70 pkt.whdr.callNumber = htonl(sp->hdr.callNumber); 67 iov[1].iov_base = &padding;
68 iov[1].iov_len = 3;
69 iov[2].iov_base = &ack_info;
70 iov[2].iov_len = sizeof(ack_info);
71
72 pkt.whdr.epoch = htonl(conn->proto.epoch);
73 pkt.whdr.cid = htonl(conn->proto.cid);
74 pkt.whdr.callNumber = htonl(call_id);
71 pkt.whdr.seq = 0; 75 pkt.whdr.seq = 0;
72 pkt.whdr.type = chan->last_type; 76 pkt.whdr.type = chan->last_type;
73 pkt.whdr.flags = conn->out_clientflag; 77 pkt.whdr.flags = conn->out_clientflag;
@@ -79,27 +83,35 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
79 len = sizeof(pkt.whdr); 83 len = sizeof(pkt.whdr);
80 switch (chan->last_type) { 84 switch (chan->last_type) {
81 case RXRPC_PACKET_TYPE_ABORT: 85 case RXRPC_PACKET_TYPE_ABORT:
82 pkt.abort.code = htonl(chan->last_abort); 86 pkt.abort_code = htonl(chan->last_abort);
83 len += sizeof(pkt.abort); 87 iov[0].iov_len += sizeof(pkt.abort_code);
88 len += sizeof(pkt.abort_code);
89 ioc = 1;
84 break; 90 break;
85 91
86 case RXRPC_PACKET_TYPE_ACK: 92 case RXRPC_PACKET_TYPE_ACK:
87 mtu = conn->params.peer->if_mtu; 93 mtu = conn->params.peer->if_mtu;
88 mtu -= conn->params.peer->hdrsize; 94 mtu -= conn->params.peer->hdrsize;
89 pkt.ack.bufferSpace = 0; 95 pkt.ack.bufferSpace = 0;
90 pkt.ack.maxSkew = htons(skb->priority); 96 pkt.ack.maxSkew = htons(skb ? skb->priority : 0);
91 pkt.ack.firstPacket = htonl(chan->last_seq); 97 pkt.ack.firstPacket = htonl(chan->last_seq + 1);
92 pkt.ack.previousPacket = htonl(chan->last_seq - 1); 98 pkt.ack.previousPacket = htonl(chan->last_seq);
93 pkt.ack.serial = htonl(sp->hdr.serial); 99 pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0);
94 pkt.ack.reason = RXRPC_ACK_DUPLICATE; 100 pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
95 pkt.ack.nAcks = 0; 101 pkt.ack.nAcks = 0;
96 pkt.info.rxMTU = htonl(rxrpc_rx_mtu); 102 ack_info.rxMTU = htonl(rxrpc_rx_mtu);
97 pkt.info.maxMTU = htonl(mtu); 103 ack_info.maxMTU = htonl(mtu);
98 pkt.info.rwind = htonl(rxrpc_rx_window_size); 104 ack_info.rwind = htonl(rxrpc_rx_window_size);
99 pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); 105 ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
100 pkt.whdr.flags |= RXRPC_SLOW_START_OK; 106 pkt.whdr.flags |= RXRPC_SLOW_START_OK;
101 len += sizeof(pkt.ack) + sizeof(pkt.info); 107 padding = 0;
108 iov[0].iov_len += sizeof(pkt.ack);
109 len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
110 ioc = 3;
102 break; 111 break;
112
113 default:
114 return;
103 } 115 }
104 116
105 /* Resync with __rxrpc_disconnect_call() and check that the last call 117 /* Resync with __rxrpc_disconnect_call() and check that the last call
@@ -109,9 +121,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
109 if (READ_ONCE(chan->last_call) != call_id) 121 if (READ_ONCE(chan->last_call) != call_id)
110 return; 122 return;
111 123
112 iov.iov_base = &pkt;
113 iov.iov_len = len;
114
115 serial = atomic_inc_return(&conn->serial); 124 serial = atomic_inc_return(&conn->serial);
116 pkt.whdr.serial = htonl(serial); 125 pkt.whdr.serial = htonl(serial);
117 126
@@ -126,7 +135,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
126 break; 135 break;
127 } 136 }
128 137
129 kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len); 138 kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
130 _leave(""); 139 _leave("");
131 return; 140 return;
132} 141}
@@ -272,7 +281,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
272 switch (sp->hdr.type) { 281 switch (sp->hdr.type) {
273 case RXRPC_PACKET_TYPE_DATA: 282 case RXRPC_PACKET_TYPE_DATA:
274 case RXRPC_PACKET_TYPE_ACK: 283 case RXRPC_PACKET_TYPE_ACK:
275 rxrpc_conn_retransmit_call(conn, skb); 284 rxrpc_conn_retransmit_call(conn, skb,
285 sp->hdr.cid & RXRPC_CHANNELMASK);
276 return 0; 286 return 0;
277 287
278 case RXRPC_PACKET_TYPE_BUSY: 288 case RXRPC_PACKET_TYPE_BUSY:
@@ -379,6 +389,48 @@ abort:
379} 389}
380 390
381/* 391/*
392 * Process delayed final ACKs that we haven't subsumed into a subsequent call.
393 */
394static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn)
395{
396 unsigned long j = jiffies, next_j;
397 unsigned int channel;
398 bool set;
399
400again:
401 next_j = j + LONG_MAX;
402 set = false;
403 for (channel = 0; channel < RXRPC_MAXCALLS; channel++) {
404 struct rxrpc_channel *chan = &conn->channels[channel];
405 unsigned long ack_at;
406
407 if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags))
408 continue;
409
410 smp_rmb(); /* vs rxrpc_disconnect_client_call */
411 ack_at = READ_ONCE(chan->final_ack_at);
412
413 if (time_before(j, ack_at)) {
414 if (time_before(ack_at, next_j)) {
415 next_j = ack_at;
416 set = true;
417 }
418 continue;
419 }
420
421 if (test_and_clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel,
422 &conn->flags))
423 rxrpc_conn_retransmit_call(conn, NULL, channel);
424 }
425
426 j = jiffies;
427 if (time_before_eq(next_j, j))
428 goto again;
429 if (set)
430 rxrpc_reduce_conn_timer(conn, next_j);
431}
432
433/*
382 * connection-level event processor 434 * connection-level event processor
383 */ 435 */
384void rxrpc_process_connection(struct work_struct *work) 436void rxrpc_process_connection(struct work_struct *work)
@@ -394,6 +446,10 @@ void rxrpc_process_connection(struct work_struct *work)
394 if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) 446 if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
395 rxrpc_secure_connection(conn); 447 rxrpc_secure_connection(conn);
396 448
449 /* Process delayed ACKs whose time has come. */
450 if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
451 rxrpc_process_delayed_final_acks(conn);
452
397 /* go through the conn-level event packets, releasing the ref on this 453 /* go through the conn-level event packets, releasing the ref on this
398 * connection that each one has when we've finished with it */ 454 * connection that each one has when we've finished with it */
399 while ((skb = skb_dequeue(&conn->rx_queue))) { 455 while ((skb = skb_dequeue(&conn->rx_queue))) {
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index fe575798592f..c628351eb900 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -20,10 +20,19 @@
20/* 20/*
21 * Time till a connection expires after last use (in seconds). 21 * Time till a connection expires after last use (in seconds).
22 */ 22 */
23unsigned int rxrpc_connection_expiry = 10 * 60; 23unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60;
24unsigned int __read_mostly rxrpc_closed_conn_expiry = 10;
24 25
25static void rxrpc_destroy_connection(struct rcu_head *); 26static void rxrpc_destroy_connection(struct rcu_head *);
26 27
28static void rxrpc_connection_timer(struct timer_list *timer)
29{
30 struct rxrpc_connection *conn =
31 container_of(timer, struct rxrpc_connection, timer);
32
33 rxrpc_queue_conn(conn);
34}
35
27/* 36/*
28 * allocate a new connection 37 * allocate a new connection
29 */ 38 */
@@ -38,6 +47,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
38 INIT_LIST_HEAD(&conn->cache_link); 47 INIT_LIST_HEAD(&conn->cache_link);
39 spin_lock_init(&conn->channel_lock); 48 spin_lock_init(&conn->channel_lock);
40 INIT_LIST_HEAD(&conn->waiting_calls); 49 INIT_LIST_HEAD(&conn->waiting_calls);
50 timer_setup(&conn->timer, &rxrpc_connection_timer, 0);
41 INIT_WORK(&conn->processor, &rxrpc_process_connection); 51 INIT_WORK(&conn->processor, &rxrpc_process_connection);
42 INIT_LIST_HEAD(&conn->proc_link); 52 INIT_LIST_HEAD(&conn->proc_link);
43 INIT_LIST_HEAD(&conn->link); 53 INIT_LIST_HEAD(&conn->link);
@@ -301,21 +311,29 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
301} 311}
302 312
303/* 313/*
314 * Set the service connection reap timer.
315 */
316static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
317 unsigned long reap_at)
318{
319 if (rxnet->live)
320 timer_reduce(&rxnet->service_conn_reap_timer, reap_at);
321}
322
323/*
304 * Release a service connection 324 * Release a service connection
305 */ 325 */
306void rxrpc_put_service_conn(struct rxrpc_connection *conn) 326void rxrpc_put_service_conn(struct rxrpc_connection *conn)
307{ 327{
308 struct rxrpc_net *rxnet;
309 const void *here = __builtin_return_address(0); 328 const void *here = __builtin_return_address(0);
310 int n; 329 int n;
311 330
312 n = atomic_dec_return(&conn->usage); 331 n = atomic_dec_return(&conn->usage);
313 trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); 332 trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
314 ASSERTCMP(n, >=, 0); 333 ASSERTCMP(n, >=, 0);
315 if (n == 0) { 334 if (n == 1)
316 rxnet = conn->params.local->rxnet; 335 rxrpc_set_service_reap_timer(conn->params.local->rxnet,
317 rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); 336 jiffies + rxrpc_connection_expiry);
318 }
319} 337}
320 338
321/* 339/*
@@ -332,6 +350,7 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
332 350
333 _net("DESTROY CONN %d", conn->debug_id); 351 _net("DESTROY CONN %d", conn->debug_id);
334 352
353 del_timer_sync(&conn->timer);
335 rxrpc_purge_queue(&conn->rx_queue); 354 rxrpc_purge_queue(&conn->rx_queue);
336 355
337 conn->security->clear(conn); 356 conn->security->clear(conn);
@@ -351,17 +370,15 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
351{ 370{
352 struct rxrpc_connection *conn, *_p; 371 struct rxrpc_connection *conn, *_p;
353 struct rxrpc_net *rxnet = 372 struct rxrpc_net *rxnet =
354 container_of(to_delayed_work(work), 373 container_of(work, struct rxrpc_net, service_conn_reaper);
355 struct rxrpc_net, service_conn_reaper); 374 unsigned long expire_at, earliest, idle_timestamp, now;
356 unsigned long reap_older_than, earliest, idle_timestamp, now;
357 375
358 LIST_HEAD(graveyard); 376 LIST_HEAD(graveyard);
359 377
360 _enter(""); 378 _enter("");
361 379
362 now = jiffies; 380 now = jiffies;
363 reap_older_than = now - rxrpc_connection_expiry * HZ; 381 earliest = now + MAX_JIFFY_OFFSET;
364 earliest = ULONG_MAX;
365 382
366 write_lock(&rxnet->conn_lock); 383 write_lock(&rxnet->conn_lock);
367 list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { 384 list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
@@ -371,15 +388,21 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
371 if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) 388 if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
372 continue; 389 continue;
373 390
374 idle_timestamp = READ_ONCE(conn->idle_timestamp); 391 if (rxnet->live) {
375 _debug("reap CONN %d { u=%d,t=%ld }", 392 idle_timestamp = READ_ONCE(conn->idle_timestamp);
376 conn->debug_id, atomic_read(&conn->usage), 393 expire_at = idle_timestamp + rxrpc_connection_expiry * HZ;
377 (long)reap_older_than - (long)idle_timestamp); 394 if (conn->params.local->service_closed)
378 395 expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ;
379 if (time_after(idle_timestamp, reap_older_than)) { 396
380 if (time_before(idle_timestamp, earliest)) 397 _debug("reap CONN %d { u=%d,t=%ld }",
381 earliest = idle_timestamp; 398 conn->debug_id, atomic_read(&conn->usage),
382 continue; 399 (long)expire_at - (long)now);
400
401 if (time_before(now, expire_at)) {
402 if (time_before(expire_at, earliest))
403 earliest = expire_at;
404 continue;
405 }
383 } 406 }
384 407
385 /* The usage count sits at 1 whilst the object is unused on the 408 /* The usage count sits at 1 whilst the object is unused on the
@@ -387,6 +410,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
387 */ 410 */
388 if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) 411 if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
389 continue; 412 continue;
413 trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0);
390 414
391 if (rxrpc_conn_is_client(conn)) 415 if (rxrpc_conn_is_client(conn))
392 BUG(); 416 BUG();
@@ -397,11 +421,10 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
397 } 421 }
398 write_unlock(&rxnet->conn_lock); 422 write_unlock(&rxnet->conn_lock);
399 423
400 if (earliest != ULONG_MAX) { 424 if (earliest != now + MAX_JIFFY_OFFSET) {
401 _debug("reschedule reaper %ld", (long) earliest - now); 425 _debug("reschedule reaper %ld", (long)earliest - (long)now);
402 ASSERT(time_after(earliest, now)); 426 ASSERT(time_after(earliest, now));
403 rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 427 rxrpc_set_service_reap_timer(rxnet, earliest);
404 earliest - now);
405 } 428 }
406 429
407 while (!list_empty(&graveyard)) { 430 while (!list_empty(&graveyard)) {
@@ -429,9 +452,8 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
429 452
430 rxrpc_destroy_all_client_connections(rxnet); 453 rxrpc_destroy_all_client_connections(rxnet);
431 454
432 rxrpc_connection_expiry = 0; 455 del_timer_sync(&rxnet->service_conn_reap_timer);
433 cancel_delayed_work(&rxnet->client_conn_reaper); 456 rxrpc_queue_work(&rxnet->service_conn_reaper);
434 rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0);
435 flush_workqueue(rxrpc_workqueue); 457 flush_workqueue(rxrpc_workqueue);
436 458
437 write_lock(&rxnet->conn_lock); 459 write_lock(&rxnet->conn_lock);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 1b592073ec96..6fc61400337f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -318,16 +318,18 @@ bad_state:
318static bool rxrpc_receiving_reply(struct rxrpc_call *call) 318static bool rxrpc_receiving_reply(struct rxrpc_call *call)
319{ 319{
320 struct rxrpc_ack_summary summary = { 0 }; 320 struct rxrpc_ack_summary summary = { 0 };
321 unsigned long now, timo;
321 rxrpc_seq_t top = READ_ONCE(call->tx_top); 322 rxrpc_seq_t top = READ_ONCE(call->tx_top);
322 323
323 if (call->ackr_reason) { 324 if (call->ackr_reason) {
324 spin_lock_bh(&call->lock); 325 spin_lock_bh(&call->lock);
325 call->ackr_reason = 0; 326 call->ackr_reason = 0;
326 call->resend_at = call->expire_at;
327 call->ack_at = call->expire_at;
328 spin_unlock_bh(&call->lock); 327 spin_unlock_bh(&call->lock);
329 rxrpc_set_timer(call, rxrpc_timer_init_for_reply, 328 now = jiffies;
330 ktime_get_real()); 329 timo = now + MAX_JIFFY_OFFSET;
330 WRITE_ONCE(call->resend_at, timo);
331 WRITE_ONCE(call->ack_at, timo);
332 trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
331 } 333 }
332 334
333 if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) 335 if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags))
@@ -437,6 +439,19 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
437 if (state >= RXRPC_CALL_COMPLETE) 439 if (state >= RXRPC_CALL_COMPLETE)
438 return; 440 return;
439 441
442 if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) {
443 unsigned long timo = READ_ONCE(call->next_req_timo);
444 unsigned long now, expect_req_by;
445
446 if (timo) {
447 now = jiffies;
448 expect_req_by = now + timo;
449 WRITE_ONCE(call->expect_req_by, expect_req_by);
450 rxrpc_reduce_call_timer(call, expect_req_by, now,
451 rxrpc_timer_set_for_idle);
452 }
453 }
454
440 /* Received data implicitly ACKs all of the request packets we sent 455 /* Received data implicitly ACKs all of the request packets we sent
441 * when we're acting as a client. 456 * when we're acting as a client.
442 */ 457 */
@@ -616,6 +631,43 @@ found:
616} 631}
617 632
618/* 633/*
634 * Process the response to a ping that we sent to find out if we lost an ACK.
635 *
636 * If we got back a ping response that indicates a lower tx_top than what we
637 * had at the time of the ping transmission, we adjudge all the DATA packets
638 * sent between the response tx_top and the ping-time tx_top to have been lost.
639 */
640static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call)
641{
642 rxrpc_seq_t top, bottom, seq;
643 bool resend = false;
644
645 spin_lock_bh(&call->lock);
646
647 bottom = call->tx_hard_ack + 1;
648 top = call->acks_lost_top;
649 if (before(bottom, top)) {
650 for (seq = bottom; before_eq(seq, top); seq++) {
651 int ix = seq & RXRPC_RXTX_BUFF_MASK;
652 u8 annotation = call->rxtx_annotations[ix];
653 u8 anno_type = annotation & RXRPC_TX_ANNO_MASK;
654
655 if (anno_type != RXRPC_TX_ANNO_UNACK)
656 continue;
657 annotation &= ~RXRPC_TX_ANNO_MASK;
658 annotation |= RXRPC_TX_ANNO_RETRANS;
659 call->rxtx_annotations[ix] = annotation;
660 resend = true;
661 }
662 }
663
664 spin_unlock_bh(&call->lock);
665
666 if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
667 rxrpc_queue_call(call);
668}
669
670/*
619 * Process a ping response. 671 * Process a ping response.
620 */ 672 */
621static void rxrpc_input_ping_response(struct rxrpc_call *call, 673static void rxrpc_input_ping_response(struct rxrpc_call *call,
@@ -630,6 +682,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call,
630 smp_rmb(); 682 smp_rmb();
631 ping_serial = call->ping_serial; 683 ping_serial = call->ping_serial;
632 684
685 if (orig_serial == call->acks_lost_ping)
686 rxrpc_input_check_for_lost_ack(call);
687
633 if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || 688 if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
634 before(orig_serial, ping_serial)) 689 before(orig_serial, ping_serial))
635 return; 690 return;
@@ -908,9 +963,20 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
908 struct sk_buff *skb, u16 skew) 963 struct sk_buff *skb, u16 skew)
909{ 964{
910 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 965 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
966 unsigned long timo;
911 967
912 _enter("%p,%p", call, skb); 968 _enter("%p,%p", call, skb);
913 969
970 timo = READ_ONCE(call->next_rx_timo);
971 if (timo) {
972 unsigned long now = jiffies, expect_rx_by;
973
974 expect_rx_by = jiffies + timo;
975 WRITE_ONCE(call->expect_rx_by, expect_rx_by);
976 rxrpc_reduce_call_timer(call, expect_rx_by, now,
977 rxrpc_timer_set_for_normal);
978 }
979
914 switch (sp->hdr.type) { 980 switch (sp->hdr.type) {
915 case RXRPC_PACKET_TYPE_DATA: 981 case RXRPC_PACKET_TYPE_DATA:
916 rxrpc_input_data(call, skb, skew); 982 rxrpc_input_data(call, skb, skew);
@@ -1147,7 +1213,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
1147 goto reupgrade; 1213 goto reupgrade;
1148 conn->service_id = sp->hdr.serviceId; 1214 conn->service_id = sp->hdr.serviceId;
1149 } 1215 }
1150 1216
1151 if (sp->hdr.callNumber == 0) { 1217 if (sp->hdr.callNumber == 0) {
1152 /* Connection-level packet */ 1218 /* Connection-level packet */
1153 _debug("CONN %p {%d}", conn, conn->debug_id); 1219 _debug("CONN %p {%d}", conn, conn->debug_id);
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 1a2d4b112064..c1d9e7fd7448 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -21,33 +21,28 @@
21unsigned int rxrpc_max_backlog __read_mostly = 10; 21unsigned int rxrpc_max_backlog __read_mostly = 10;
22 22
23/* 23/*
24 * Maximum lifetime of a call (in mx).
25 */
26unsigned int rxrpc_max_call_lifetime = 60 * 1000;
27
28/*
29 * How long to wait before scheduling ACK generation after seeing a 24 * How long to wait before scheduling ACK generation after seeing a
30 * packet with RXRPC_REQUEST_ACK set (in ms). 25 * packet with RXRPC_REQUEST_ACK set (in jiffies).
31 */ 26 */
32unsigned int rxrpc_requested_ack_delay = 1; 27unsigned long rxrpc_requested_ack_delay = 1;
33 28
34/* 29/*
35 * How long to wait before scheduling an ACK with subtype DELAY (in ms). 30 * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
36 * 31 *
37 * We use this when we've received new data packets. If those packets aren't 32 * We use this when we've received new data packets. If those packets aren't
38 * all consumed within this time we will send a DELAY ACK if an ACK was not 33 * all consumed within this time we will send a DELAY ACK if an ACK was not
39 * requested to let the sender know it doesn't need to resend. 34 * requested to let the sender know it doesn't need to resend.
40 */ 35 */
41unsigned int rxrpc_soft_ack_delay = 1 * 1000; 36unsigned long rxrpc_soft_ack_delay = HZ;
42 37
43/* 38/*
44 * How long to wait before scheduling an ACK with subtype IDLE (in ms). 39 * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
45 * 40 *
46 * We use this when we've consumed some previously soft-ACK'd packets when 41 * We use this when we've consumed some previously soft-ACK'd packets when
47 * further packets aren't immediately received to decide when to send an IDLE 42 * further packets aren't immediately received to decide when to send an IDLE
48 * ACK let the other end know that it can free up its Tx buffer space. 43 * ACK let the other end know that it can free up its Tx buffer space.
49 */ 44 */
50unsigned int rxrpc_idle_ack_delay = 0.5 * 1000; 45unsigned long rxrpc_idle_ack_delay = HZ / 2;
51 46
52/* 47/*
53 * Receive window size in packets. This indicates the maximum number of 48 * Receive window size in packets. This indicates the maximum number of
@@ -75,7 +70,7 @@ unsigned int rxrpc_rx_jumbo_max = 4;
75/* 70/*
76 * Time till packet resend (in milliseconds). 71 * Time till packet resend (in milliseconds).
77 */ 72 */
78unsigned int rxrpc_resend_timeout = 4 * 1000; 73unsigned long rxrpc_resend_timeout = 4 * HZ;
79 74
80const s8 rxrpc_ack_priority[] = { 75const s8 rxrpc_ack_priority[] = {
81 [0] = 0, 76 [0] = 0,
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 7edceb8522f5..f18c9248e0d4 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -14,6 +14,24 @@
14 14
15unsigned int rxrpc_net_id; 15unsigned int rxrpc_net_id;
16 16
17static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
18{
19 struct rxrpc_net *rxnet =
20 container_of(timer, struct rxrpc_net, client_conn_reap_timer);
21
22 if (rxnet->live)
23 rxrpc_queue_work(&rxnet->client_conn_reaper);
24}
25
26static void rxrpc_service_conn_reap_timeout(struct timer_list *timer)
27{
28 struct rxrpc_net *rxnet =
29 container_of(timer, struct rxrpc_net, service_conn_reap_timer);
30
31 if (rxnet->live)
32 rxrpc_queue_work(&rxnet->service_conn_reaper);
33}
34
17/* 35/*
18 * Initialise a per-network namespace record. 36 * Initialise a per-network namespace record.
19 */ 37 */
@@ -22,6 +40,7 @@ static __net_init int rxrpc_init_net(struct net *net)
22 struct rxrpc_net *rxnet = rxrpc_net(net); 40 struct rxrpc_net *rxnet = rxrpc_net(net);
23 int ret; 41 int ret;
24 42
43 rxnet->live = true;
25 get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); 44 get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch));
26 rxnet->epoch |= RXRPC_RANDOM_EPOCH; 45 rxnet->epoch |= RXRPC_RANDOM_EPOCH;
27 46
@@ -31,8 +50,10 @@ static __net_init int rxrpc_init_net(struct net *net)
31 INIT_LIST_HEAD(&rxnet->conn_proc_list); 50 INIT_LIST_HEAD(&rxnet->conn_proc_list);
32 INIT_LIST_HEAD(&rxnet->service_conns); 51 INIT_LIST_HEAD(&rxnet->service_conns);
33 rwlock_init(&rxnet->conn_lock); 52 rwlock_init(&rxnet->conn_lock);
34 INIT_DELAYED_WORK(&rxnet->service_conn_reaper, 53 INIT_WORK(&rxnet->service_conn_reaper,
35 rxrpc_service_connection_reaper); 54 rxrpc_service_connection_reaper);
55 timer_setup(&rxnet->service_conn_reap_timer,
56 rxrpc_service_conn_reap_timeout, 0);
36 57
37 rxnet->nr_client_conns = 0; 58 rxnet->nr_client_conns = 0;
38 rxnet->nr_active_client_conns = 0; 59 rxnet->nr_active_client_conns = 0;
@@ -42,8 +63,10 @@ static __net_init int rxrpc_init_net(struct net *net)
42 INIT_LIST_HEAD(&rxnet->waiting_client_conns); 63 INIT_LIST_HEAD(&rxnet->waiting_client_conns);
43 INIT_LIST_HEAD(&rxnet->active_client_conns); 64 INIT_LIST_HEAD(&rxnet->active_client_conns);
44 INIT_LIST_HEAD(&rxnet->idle_client_conns); 65 INIT_LIST_HEAD(&rxnet->idle_client_conns);
45 INIT_DELAYED_WORK(&rxnet->client_conn_reaper, 66 INIT_WORK(&rxnet->client_conn_reaper,
46 rxrpc_discard_expired_client_conns); 67 rxrpc_discard_expired_client_conns);
68 timer_setup(&rxnet->client_conn_reap_timer,
69 rxrpc_client_conn_reap_timeout, 0);
47 70
48 INIT_LIST_HEAD(&rxnet->local_endpoints); 71 INIT_LIST_HEAD(&rxnet->local_endpoints);
49 mutex_init(&rxnet->local_mutex); 72 mutex_init(&rxnet->local_mutex);
@@ -60,6 +83,7 @@ static __net_init int rxrpc_init_net(struct net *net)
60 return 0; 83 return 0;
61 84
62err_proc: 85err_proc:
86 rxnet->live = false;
63 return ret; 87 return ret;
64} 88}
65 89
@@ -70,6 +94,7 @@ static __net_exit void rxrpc_exit_net(struct net *net)
70{ 94{
71 struct rxrpc_net *rxnet = rxrpc_net(net); 95 struct rxrpc_net *rxnet = rxrpc_net(net);
72 96
97 rxnet->live = false;
73 rxrpc_destroy_all_calls(rxnet); 98 rxrpc_destroy_all_calls(rxnet);
74 rxrpc_destroy_all_connections(rxnet); 99 rxrpc_destroy_all_connections(rxnet);
75 rxrpc_destroy_all_locals(rxnet); 100 rxrpc_destroy_all_locals(rxnet);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f47659c7b224..42410e910aff 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -33,6 +33,24 @@ struct rxrpc_abort_buffer {
33}; 33};
34 34
35/* 35/*
36 * Arrange for a keepalive ping a certain time after we last transmitted. This
37 * lets the far side know we're still interested in this call and helps keep
38 * the route through any intervening firewall open.
39 *
40 * Receiving a response to the ping will prevent the ->expect_rx_by timer from
41 * expiring.
42 */
43static void rxrpc_set_keepalive(struct rxrpc_call *call)
44{
45 unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6;
46
47 keepalive_at += now;
48 WRITE_ONCE(call->keepalive_at, keepalive_at);
49 rxrpc_reduce_call_timer(call, keepalive_at, now,
50 rxrpc_timer_set_for_keepalive);
51}
52
53/*
36 * Fill out an ACK packet. 54 * Fill out an ACK packet.
37 */ 55 */
38static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, 56static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
@@ -95,7 +113,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
95/* 113/*
96 * Send an ACK call packet. 114 * Send an ACK call packet.
97 */ 115 */
98int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) 116int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
117 rxrpc_serial_t *_serial)
99{ 118{
100 struct rxrpc_connection *conn = NULL; 119 struct rxrpc_connection *conn = NULL;
101 struct rxrpc_ack_buffer *pkt; 120 struct rxrpc_ack_buffer *pkt;
@@ -165,6 +184,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
165 ntohl(pkt->ack.firstPacket), 184 ntohl(pkt->ack.firstPacket),
166 ntohl(pkt->ack.serial), 185 ntohl(pkt->ack.serial),
167 pkt->ack.reason, pkt->ack.nAcks); 186 pkt->ack.reason, pkt->ack.nAcks);
187 if (_serial)
188 *_serial = serial;
168 189
169 if (ping) { 190 if (ping) {
170 call->ping_serial = serial; 191 call->ping_serial = serial;
@@ -202,6 +223,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
202 call->ackr_seen = top; 223 call->ackr_seen = top;
203 spin_unlock_bh(&call->lock); 224 spin_unlock_bh(&call->lock);
204 } 225 }
226
227 rxrpc_set_keepalive(call);
205 } 228 }
206 229
207out: 230out:
@@ -323,7 +346,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
323 * ACKs if a DATA packet appears to have been lost. 346 * ACKs if a DATA packet appears to have been lost.
324 */ 347 */
325 if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && 348 if (!(sp->hdr.flags & RXRPC_LAST_PACKET) &&
326 (retrans || 349 (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) ||
350 retrans ||
327 call->cong_mode == RXRPC_CALL_SLOW_START || 351 call->cong_mode == RXRPC_CALL_SLOW_START ||
328 (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || 352 (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
329 ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), 353 ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
@@ -370,8 +394,23 @@ done:
370 if (whdr.flags & RXRPC_REQUEST_ACK) { 394 if (whdr.flags & RXRPC_REQUEST_ACK) {
371 call->peer->rtt_last_req = now; 395 call->peer->rtt_last_req = now;
372 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); 396 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
397 if (call->peer->rtt_usage > 1) {
398 unsigned long nowj = jiffies, ack_lost_at;
399
400 ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt);
401 if (ack_lost_at < 1)
402 ack_lost_at = 1;
403
404 ack_lost_at += nowj;
405 WRITE_ONCE(call->ack_lost_at, ack_lost_at);
406 rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
407 rxrpc_timer_set_for_lost_ack);
408 }
373 } 409 }
374 } 410 }
411
412 rxrpc_set_keepalive(call);
413
375 _leave(" = %d [%u]", ret, call->peer->maxdata); 414 _leave(" = %d [%u]", ret, call->peer->maxdata);
376 return ret; 415 return ret;
377 416
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 8510a98b87e1..cc21e8db25b0 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -144,11 +144,13 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
144 trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); 144 trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top);
145 ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); 145 ASSERTCMP(call->rx_hard_ack, ==, call->rx_top);
146 146
147#if 0 // TODO: May want to transmit final ACK under some circumstances anyway
147 if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { 148 if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
148 rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, 149 rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false,
149 rxrpc_propose_ack_terminal_ack); 150 rxrpc_propose_ack_terminal_ack);
150 rxrpc_send_ack_packet(call, false); 151 rxrpc_send_ack_packet(call, false, NULL);
151 } 152 }
153#endif
152 154
153 write_lock_bh(&call->state_lock); 155 write_lock_bh(&call->state_lock);
154 156
@@ -161,7 +163,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
161 case RXRPC_CALL_SERVER_RECV_REQUEST: 163 case RXRPC_CALL_SERVER_RECV_REQUEST:
162 call->tx_phase = true; 164 call->tx_phase = true;
163 call->state = RXRPC_CALL_SERVER_ACK_REQUEST; 165 call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
164 call->ack_at = call->expire_at; 166 call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
165 write_unlock_bh(&call->state_lock); 167 write_unlock_bh(&call->state_lock);
166 rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, 168 rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true,
167 rxrpc_propose_ack_processing_op); 169 rxrpc_propose_ack_processing_op);
@@ -217,10 +219,10 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
217 after_eq(top, call->ackr_seen + 2) || 219 after_eq(top, call->ackr_seen + 2) ||
218 (hard_ack == top && after(hard_ack, call->ackr_consumed))) 220 (hard_ack == top && after(hard_ack, call->ackr_consumed)))
219 rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, 221 rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial,
220 true, false, 222 true, true,
221 rxrpc_propose_ack_rotate_rx); 223 rxrpc_propose_ack_rotate_rx);
222 if (call->ackr_reason) 224 if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY)
223 rxrpc_send_ack_packet(call, false); 225 rxrpc_send_ack_packet(call, false, NULL);
224 } 226 }
225} 227}
226 228
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 7d2595582c09..09f2a3e05221 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -21,22 +21,6 @@
21#include <net/af_rxrpc.h> 21#include <net/af_rxrpc.h>
22#include "ar-internal.h" 22#include "ar-internal.h"
23 23
24enum rxrpc_command {
25 RXRPC_CMD_SEND_DATA, /* send data message */
26 RXRPC_CMD_SEND_ABORT, /* request abort generation */
27 RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
28 RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
29};
30
31struct rxrpc_send_params {
32 s64 tx_total_len; /* Total Tx data length (if send data) */
33 unsigned long user_call_ID; /* User's call ID */
34 u32 abort_code; /* Abort code to Tx (if abort) */
35 enum rxrpc_command command : 8; /* The command to implement */
36 bool exclusive; /* Shared or exclusive call */
37 bool upgrade; /* If the connection is upgradeable */
38};
39
40/* 24/*
41 * Wait for space to appear in the Tx queue or a signal to occur. 25 * Wait for space to appear in the Tx queue or a signal to occur.
42 */ 26 */
@@ -174,6 +158,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
174 rxrpc_notify_end_tx_t notify_end_tx) 158 rxrpc_notify_end_tx_t notify_end_tx)
175{ 159{
176 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 160 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
161 unsigned long now;
177 rxrpc_seq_t seq = sp->hdr.seq; 162 rxrpc_seq_t seq = sp->hdr.seq;
178 int ret, ix; 163 int ret, ix;
179 u8 annotation = RXRPC_TX_ANNO_UNACK; 164 u8 annotation = RXRPC_TX_ANNO_UNACK;
@@ -213,11 +198,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
213 break; 198 break;
214 case RXRPC_CALL_SERVER_ACK_REQUEST: 199 case RXRPC_CALL_SERVER_ACK_REQUEST:
215 call->state = RXRPC_CALL_SERVER_SEND_REPLY; 200 call->state = RXRPC_CALL_SERVER_SEND_REPLY;
216 call->ack_at = call->expire_at; 201 now = jiffies;
202 WRITE_ONCE(call->ack_at, now + MAX_JIFFY_OFFSET);
217 if (call->ackr_reason == RXRPC_ACK_DELAY) 203 if (call->ackr_reason == RXRPC_ACK_DELAY)
218 call->ackr_reason = 0; 204 call->ackr_reason = 0;
219 __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply, 205 trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
220 ktime_get_real());
221 if (!last) 206 if (!last)
222 break; 207 break;
223 /* Fall through */ 208 /* Fall through */
@@ -239,14 +224,19 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
239 _debug("need instant resend %d", ret); 224 _debug("need instant resend %d", ret);
240 rxrpc_instant_resend(call, ix); 225 rxrpc_instant_resend(call, ix);
241 } else { 226 } else {
242 ktime_t now = ktime_get_real(), resend_at; 227 unsigned long now = jiffies, resend_at;
243 228
244 resend_at = ktime_add_ms(now, rxrpc_resend_timeout); 229 if (call->peer->rtt_usage > 1)
245 230 resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2);
246 if (ktime_before(resend_at, call->resend_at)) { 231 else
247 call->resend_at = resend_at; 232 resend_at = rxrpc_resend_timeout;
248 rxrpc_set_timer(call, rxrpc_timer_set_for_send, now); 233 if (resend_at < 1)
249 } 234 resend_at = 1;
235
236 resend_at += now;
237 WRITE_ONCE(call->resend_at, resend_at);
238 rxrpc_reduce_call_timer(call, resend_at, now,
239 rxrpc_timer_set_for_send);
250 } 240 }
251 241
252 rxrpc_free_skb(skb, rxrpc_skb_tx_freed); 242 rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
@@ -295,7 +285,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
295 do { 285 do {
296 /* Check to see if there's a ping ACK to reply to. */ 286 /* Check to see if there's a ping ACK to reply to. */
297 if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) 287 if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
298 rxrpc_send_ack_packet(call, false); 288 rxrpc_send_ack_packet(call, false, NULL);
299 289
300 if (!skb) { 290 if (!skb) {
301 size_t size, chunk, max, space; 291 size_t size, chunk, max, space;
@@ -480,11 +470,11 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
480 if (msg->msg_flags & MSG_CMSG_COMPAT) { 470 if (msg->msg_flags & MSG_CMSG_COMPAT) {
481 if (len != sizeof(u32)) 471 if (len != sizeof(u32))
482 return -EINVAL; 472 return -EINVAL;
483 p->user_call_ID = *(u32 *)CMSG_DATA(cmsg); 473 p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg);
484 } else { 474 } else {
485 if (len != sizeof(unsigned long)) 475 if (len != sizeof(unsigned long))
486 return -EINVAL; 476 return -EINVAL;
487 p->user_call_ID = *(unsigned long *) 477 p->call.user_call_ID = *(unsigned long *)
488 CMSG_DATA(cmsg); 478 CMSG_DATA(cmsg);
489 } 479 }
490 got_user_ID = true; 480 got_user_ID = true;
@@ -522,11 +512,24 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
522 break; 512 break;
523 513
524 case RXRPC_TX_LENGTH: 514 case RXRPC_TX_LENGTH:
525 if (p->tx_total_len != -1 || len != sizeof(__s64)) 515 if (p->call.tx_total_len != -1 || len != sizeof(__s64))
516 return -EINVAL;
517 p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg);
518 if (p->call.tx_total_len < 0)
526 return -EINVAL; 519 return -EINVAL;
527 p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg); 520 break;
528 if (p->tx_total_len < 0) 521
522 case RXRPC_SET_CALL_TIMEOUT:
523 if (len & 3 || len < 4 || len > 12)
529 return -EINVAL; 524 return -EINVAL;
525 memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len);
526 p->call.nr_timeouts = len / 4;
527 if (p->call.timeouts.hard > INT_MAX / HZ)
528 return -ERANGE;
529 if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000)
530 return -ERANGE;
531 if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000)
532 return -ERANGE;
530 break; 533 break;
531 534
532 default: 535 default:
@@ -536,7 +539,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
536 539
537 if (!got_user_ID) 540 if (!got_user_ID)
538 return -EINVAL; 541 return -EINVAL;
539 if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) 542 if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA)
540 return -EINVAL; 543 return -EINVAL;
541 _leave(" = 0"); 544 _leave(" = 0");
542 return 0; 545 return 0;
@@ -576,8 +579,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
576 cp.exclusive = rx->exclusive | p->exclusive; 579 cp.exclusive = rx->exclusive | p->exclusive;
577 cp.upgrade = p->upgrade; 580 cp.upgrade = p->upgrade;
578 cp.service_id = srx->srx_service; 581 cp.service_id = srx->srx_service;
579 call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID, 582 call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL);
580 p->tx_total_len, GFP_KERNEL);
581 /* The socket is now unlocked */ 583 /* The socket is now unlocked */
582 584
583 _leave(" = %p\n", call); 585 _leave(" = %p\n", call);
@@ -594,15 +596,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
594{ 596{
595 enum rxrpc_call_state state; 597 enum rxrpc_call_state state;
596 struct rxrpc_call *call; 598 struct rxrpc_call *call;
599 unsigned long now, j;
597 int ret; 600 int ret;
598 601
599 struct rxrpc_send_params p = { 602 struct rxrpc_send_params p = {
600 .tx_total_len = -1, 603 .call.tx_total_len = -1,
601 .user_call_ID = 0, 604 .call.user_call_ID = 0,
602 .abort_code = 0, 605 .call.nr_timeouts = 0,
603 .command = RXRPC_CMD_SEND_DATA, 606 .abort_code = 0,
604 .exclusive = false, 607 .command = RXRPC_CMD_SEND_DATA,
605 .upgrade = true, 608 .exclusive = false,
609 .upgrade = false,
606 }; 610 };
607 611
608 _enter(""); 612 _enter("");
@@ -615,15 +619,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
615 ret = -EINVAL; 619 ret = -EINVAL;
616 if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) 620 if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
617 goto error_release_sock; 621 goto error_release_sock;
618 call = rxrpc_accept_call(rx, p.user_call_ID, NULL); 622 call = rxrpc_accept_call(rx, p.call.user_call_ID, NULL);
619 /* The socket is now unlocked. */ 623 /* The socket is now unlocked. */
620 if (IS_ERR(call)) 624 if (IS_ERR(call))
621 return PTR_ERR(call); 625 return PTR_ERR(call);
622 rxrpc_put_call(call, rxrpc_call_put); 626 ret = 0;
623 return 0; 627 goto out_put_unlock;
624 } 628 }
625 629
626 call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID); 630 call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID);
627 if (!call) { 631 if (!call) {
628 ret = -EBADSLT; 632 ret = -EBADSLT;
629 if (p.command != RXRPC_CMD_SEND_DATA) 633 if (p.command != RXRPC_CMD_SEND_DATA)
@@ -653,14 +657,39 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
653 goto error_put; 657 goto error_put;
654 } 658 }
655 659
656 if (p.tx_total_len != -1) { 660 if (p.call.tx_total_len != -1) {
657 ret = -EINVAL; 661 ret = -EINVAL;
658 if (call->tx_total_len != -1 || 662 if (call->tx_total_len != -1 ||
659 call->tx_pending || 663 call->tx_pending ||
660 call->tx_top != 0) 664 call->tx_top != 0)
661 goto error_put; 665 goto error_put;
662 call->tx_total_len = p.tx_total_len; 666 call->tx_total_len = p.call.tx_total_len;
667 }
668 }
669
670 switch (p.call.nr_timeouts) {
671 case 3:
672 j = msecs_to_jiffies(p.call.timeouts.normal);
673 if (p.call.timeouts.normal > 0 && j == 0)
674 j = 1;
675 WRITE_ONCE(call->next_rx_timo, j);
676 /* Fall through */
677 case 2:
678 j = msecs_to_jiffies(p.call.timeouts.idle);
679 if (p.call.timeouts.idle > 0 && j == 0)
680 j = 1;
681 WRITE_ONCE(call->next_req_timo, j);
682 /* Fall through */
683 case 1:
684 if (p.call.timeouts.hard > 0) {
685 j = msecs_to_jiffies(p.call.timeouts.hard);
686 now = jiffies;
687 j += now;
688 WRITE_ONCE(call->expect_term_by, j);
689 rxrpc_reduce_call_timer(call, j, now,
690 rxrpc_timer_set_for_hard);
663 } 691 }
692 break;
664 } 693 }
665 694
666 state = READ_ONCE(call->state); 695 state = READ_ONCE(call->state);
@@ -689,6 +718,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
689 ret = rxrpc_send_data(rx, call, msg, len, NULL); 718 ret = rxrpc_send_data(rx, call, msg, len, NULL);
690 } 719 }
691 720
721out_put_unlock:
692 mutex_unlock(&call->user_mutex); 722 mutex_unlock(&call->user_mutex);
693error_put: 723error_put:
694 rxrpc_put_call(call, rxrpc_call_put); 724 rxrpc_put_call(call, rxrpc_call_put);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 34c706d2f79c..4a7af7aff37d 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -21,6 +21,8 @@ static const unsigned int four = 4;
21static const unsigned int thirtytwo = 32; 21static const unsigned int thirtytwo = 32;
22static const unsigned int n_65535 = 65535; 22static const unsigned int n_65535 = 65535;
23static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; 23static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
24static const unsigned long one_jiffy = 1;
25static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
24 26
25/* 27/*
26 * RxRPC operating parameters. 28 * RxRPC operating parameters.
@@ -29,64 +31,60 @@ static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
29 * information on the individual parameters. 31 * information on the individual parameters.
30 */ 32 */
31static struct ctl_table rxrpc_sysctl_table[] = { 33static struct ctl_table rxrpc_sysctl_table[] = {
32 /* Values measured in milliseconds */ 34 /* Values measured in milliseconds but used in jiffies */
33 { 35 {
34 .procname = "req_ack_delay", 36 .procname = "req_ack_delay",
35 .data = &rxrpc_requested_ack_delay, 37 .data = &rxrpc_requested_ack_delay,
36 .maxlen = sizeof(unsigned int), 38 .maxlen = sizeof(unsigned long),
37 .mode = 0644, 39 .mode = 0644,
38 .proc_handler = proc_dointvec, 40 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
39 .extra1 = (void *)&zero, 41 .extra1 = (void *)&one_jiffy,
42 .extra2 = (void *)&max_jiffies,
40 }, 43 },
41 { 44 {
42 .procname = "soft_ack_delay", 45 .procname = "soft_ack_delay",
43 .data = &rxrpc_soft_ack_delay, 46 .data = &rxrpc_soft_ack_delay,
44 .maxlen = sizeof(unsigned int), 47 .maxlen = sizeof(unsigned long),
45 .mode = 0644, 48 .mode = 0644,
46 .proc_handler = proc_dointvec, 49 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
47 .extra1 = (void *)&one, 50 .extra1 = (void *)&one_jiffy,
51 .extra2 = (void *)&max_jiffies,
48 }, 52 },
49 { 53 {
50 .procname = "idle_ack_delay", 54 .procname = "idle_ack_delay",
51 .data = &rxrpc_idle_ack_delay, 55 .data = &rxrpc_idle_ack_delay,
52 .maxlen = sizeof(unsigned int), 56 .maxlen = sizeof(unsigned long),
53 .mode = 0644, 57 .mode = 0644,
54 .proc_handler = proc_dointvec, 58 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
55 .extra1 = (void *)&one, 59 .extra1 = (void *)&one_jiffy,
56 }, 60 .extra2 = (void *)&max_jiffies,
57 {
58 .procname = "resend_timeout",
59 .data = &rxrpc_resend_timeout,
60 .maxlen = sizeof(unsigned int),
61 .mode = 0644,
62 .proc_handler = proc_dointvec,
63 .extra1 = (void *)&one,
64 }, 61 },
65 { 62 {
66 .procname = "idle_conn_expiry", 63 .procname = "idle_conn_expiry",
67 .data = &rxrpc_conn_idle_client_expiry, 64 .data = &rxrpc_conn_idle_client_expiry,
68 .maxlen = sizeof(unsigned int), 65 .maxlen = sizeof(unsigned long),
69 .mode = 0644, 66 .mode = 0644,
70 .proc_handler = proc_dointvec_ms_jiffies, 67 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
71 .extra1 = (void *)&one, 68 .extra1 = (void *)&one_jiffy,
69 .extra2 = (void *)&max_jiffies,
72 }, 70 },
73 { 71 {
74 .procname = "idle_conn_fast_expiry", 72 .procname = "idle_conn_fast_expiry",
75 .data = &rxrpc_conn_idle_client_fast_expiry, 73 .data = &rxrpc_conn_idle_client_fast_expiry,
76 .maxlen = sizeof(unsigned int), 74 .maxlen = sizeof(unsigned long),
77 .mode = 0644, 75 .mode = 0644,
78 .proc_handler = proc_dointvec_ms_jiffies, 76 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
79 .extra1 = (void *)&one, 77 .extra1 = (void *)&one_jiffy,
78 .extra2 = (void *)&max_jiffies,
80 }, 79 },
81
82 /* Values measured in seconds but used in jiffies */
83 { 80 {
84 .procname = "max_call_lifetime", 81 .procname = "resend_timeout",
85 .data = &rxrpc_max_call_lifetime, 82 .data = &rxrpc_resend_timeout,
86 .maxlen = sizeof(unsigned int), 83 .maxlen = sizeof(unsigned long),
87 .mode = 0644, 84 .mode = 0644,
88 .proc_handler = proc_dointvec, 85 .proc_handler = proc_doulongvec_ms_jiffies_minmax,
89 .extra1 = (void *)&one, 86 .extra1 = (void *)&one_jiffy,
87 .extra2 = (void *)&max_jiffies,
90 }, 88 },
91 89
92 /* Non-time values */ 90 /* Non-time values */
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e29a48ef7fc3..a0ac42b3ed06 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
159 if (action == TC_ACT_SHOT) 159 if (action == TC_ACT_SHOT)
160 this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; 160 this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
161 161
162 tm->lastuse = lastuse; 162 tm->lastuse = max_t(u64, tm->lastuse, lastuse);
163} 163}
164 164
165static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, 165static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 1e3f10e5da99..6445184b2759 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -22,7 +22,6 @@
22#include <net/pkt_sched.h> 22#include <net/pkt_sched.h>
23#include <uapi/linux/tc_act/tc_ife.h> 23#include <uapi/linux/tc_act/tc_ife.h>
24#include <net/tc_act/tc_ife.h> 24#include <net/tc_act/tc_ife.h>
25#include <linux/rtnetlink.h>
26 25
27static int skbmark_encode(struct sk_buff *skb, void *skbdata, 26static int skbmark_encode(struct sk_buff *skb, void *skbdata,
28 struct tcf_meta_info *e) 27 struct tcf_meta_info *e)
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 2ea1f26c9e96..7221437ca3a6 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -22,7 +22,6 @@
22#include <net/pkt_sched.h> 22#include <net/pkt_sched.h>
23#include <uapi/linux/tc_act/tc_ife.h> 23#include <uapi/linux/tc_act/tc_ife.h>
24#include <net/tc_act/tc_ife.h> 24#include <net/tc_act/tc_ife.h>
25#include <linux/rtnetlink.h>
26 25
27static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, 26static int skbtcindex_encode(struct sk_buff *skb, void *skbdata,
28 struct tcf_meta_info *e) 27 struct tcf_meta_info *e)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8b3e59388480..08b61849c2a2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -239,7 +239,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
239 struct tcf_t *tm = &m->tcf_tm; 239 struct tcf_t *tm = &m->tcf_tm;
240 240
241 _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); 241 _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
242 tm->lastuse = lastuse; 242 tm->lastuse = max_t(u64, tm->lastuse, lastuse);
243} 243}
244 244
245static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, 245static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 8b5abcd2f32f..9438969290a6 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -96,23 +96,16 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
96 return ret; 96 return ret;
97} 97}
98 98
99static void tcf_sample_cleanup_rcu(struct rcu_head *rcu) 99static void tcf_sample_cleanup(struct tc_action *a, int bind)
100{ 100{
101 struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu); 101 struct tcf_sample *s = to_sample(a);
102 struct psample_group *psample_group; 102 struct psample_group *psample_group;
103 103
104 psample_group = rcu_dereference_protected(s->psample_group, 1); 104 psample_group = rtnl_dereference(s->psample_group);
105 RCU_INIT_POINTER(s->psample_group, NULL); 105 RCU_INIT_POINTER(s->psample_group, NULL);
106 psample_group_put(psample_group); 106 psample_group_put(psample_group);
107} 107}
108 108
109static void tcf_sample_cleanup(struct tc_action *a, int bind)
110{
111 struct tcf_sample *s = to_sample(a);
112
113 call_rcu(&s->rcu, tcf_sample_cleanup_rcu);
114}
115
116static bool tcf_sample_dev_ok_push(struct net_device *dev) 109static bool tcf_sample_dev_ok_push(struct net_device *dev)
117{ 110{
118 switch (dev->type) { 111 switch (dev->type) {
@@ -264,7 +257,6 @@ static int __init sample_init_module(void)
264 257
265static void __exit sample_cleanup_module(void) 258static void __exit sample_cleanup_module(void)
266{ 259{
267 rcu_barrier();
268 tcf_unregister_action(&act_sample_ops, &sample_net_ops); 260 tcf_unregister_action(&act_sample_ops, &sample_net_ops);
269} 261}
270 262
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 7d97f612c9b9..b9d63d2246e6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,7 +23,6 @@
23#include <linux/skbuff.h> 23#include <linux/skbuff.h>
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#include <linux/err.h>
27#include <linux/slab.h> 26#include <linux/slab.h>
28#include <net/net_namespace.h> 27#include <net/net_namespace.h>
29#include <net/sock.h> 28#include <net/sock.h>
@@ -336,7 +335,8 @@ static void tcf_block_put_final(struct work_struct *work)
336 struct tcf_chain *chain, *tmp; 335 struct tcf_chain *chain, *tmp;
337 336
338 rtnl_lock(); 337 rtnl_lock();
339 /* Only chain 0 should be still here. */ 338
339 /* At this point, all the chains should have refcnt == 1. */
340 list_for_each_entry_safe(chain, tmp, &block->chain_list, list) 340 list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
341 tcf_chain_put(chain); 341 tcf_chain_put(chain);
342 rtnl_unlock(); 342 rtnl_unlock();
@@ -344,15 +344,23 @@ static void tcf_block_put_final(struct work_struct *work)
344} 344}
345 345
346/* XXX: Standalone actions are not allowed to jump to any chain, and bound 346/* XXX: Standalone actions are not allowed to jump to any chain, and bound
347 * actions should be all removed after flushing. However, filters are now 347 * actions should be all removed after flushing.
348 * destroyed in tc filter workqueue with RTNL lock, they can not race here.
349 */ 348 */
350void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, 349void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
351 struct tcf_block_ext_info *ei) 350 struct tcf_block_ext_info *ei)
352{ 351{
353 struct tcf_chain *chain, *tmp; 352 struct tcf_chain *chain;
354 353
355 list_for_each_entry_safe(chain, tmp, &block->chain_list, list) 354 if (!block)
355 return;
356 /* Hold a refcnt for all chains, except 0, so that they don't disappear
357 * while we are iterating.
358 */
359 list_for_each_entry(chain, &block->chain_list, list)
360 if (chain->index)
361 tcf_chain_hold(chain);
362
363 list_for_each_entry(chain, &block->chain_list, list)
356 tcf_chain_flush(chain); 364 tcf_chain_flush(chain);
357 365
358 tcf_block_offload_unbind(block, q, ei); 366 tcf_block_offload_unbind(block, q, ei);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index a9f3e317055c..a62586e2dbdb 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
42 struct list_head link; 42 struct list_head link;
43 struct tcf_result res; 43 struct tcf_result res;
44 bool exts_integrated; 44 bool exts_integrated;
45 bool offloaded;
46 u32 gen_flags; 45 u32 gen_flags;
47 struct tcf_exts exts; 46 struct tcf_exts exts;
48 u32 handle; 47 u32 handle;
@@ -148,73 +147,63 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
148} 147}
149 148
150static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, 149static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
151 enum tc_clsbpf_command cmd) 150 struct cls_bpf_prog *oldprog)
152{ 151{
153 bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
154 struct tcf_block *block = tp->chain->block; 152 struct tcf_block *block = tp->chain->block;
155 bool skip_sw = tc_skip_sw(prog->gen_flags);
156 struct tc_cls_bpf_offload cls_bpf = {}; 153 struct tc_cls_bpf_offload cls_bpf = {};
154 struct cls_bpf_prog *obj;
155 bool skip_sw;
157 int err; 156 int err;
158 157
158 skip_sw = prog && tc_skip_sw(prog->gen_flags);
159 obj = prog ?: oldprog;
160
159 tc_cls_common_offload_init(&cls_bpf.common, tp); 161 tc_cls_common_offload_init(&cls_bpf.common, tp);
160 cls_bpf.command = cmd; 162 cls_bpf.command = TC_CLSBPF_OFFLOAD;
161 cls_bpf.exts = &prog->exts; 163 cls_bpf.exts = &obj->exts;
162 cls_bpf.prog = prog->filter; 164 cls_bpf.prog = prog ? prog->filter : NULL;
163 cls_bpf.name = prog->bpf_name; 165 cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
164 cls_bpf.exts_integrated = prog->exts_integrated; 166 cls_bpf.name = obj->bpf_name;
165 cls_bpf.gen_flags = prog->gen_flags; 167 cls_bpf.exts_integrated = obj->exts_integrated;
168 cls_bpf.gen_flags = obj->gen_flags;
166 169
167 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); 170 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
168 if (addorrep) { 171 if (prog) {
169 if (err < 0) { 172 if (err < 0) {
170 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); 173 cls_bpf_offload_cmd(tp, oldprog, prog);
171 return err; 174 return err;
172 } else if (err > 0) { 175 } else if (err > 0) {
173 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; 176 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
174 } 177 }
175 } 178 }
176 179
177 if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) 180 if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
178 return -EINVAL; 181 return -EINVAL;
179 182
180 return 0; 183 return 0;
181} 184}
182 185
186static u32 cls_bpf_flags(u32 flags)
187{
188 return flags & CLS_BPF_SUPPORTED_GEN_FLAGS;
189}
190
183static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, 191static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
184 struct cls_bpf_prog *oldprog) 192 struct cls_bpf_prog *oldprog)
185{ 193{
186 struct cls_bpf_prog *obj = prog; 194 if (prog && oldprog &&
187 enum tc_clsbpf_command cmd; 195 cls_bpf_flags(prog->gen_flags) !=
188 bool skip_sw; 196 cls_bpf_flags(oldprog->gen_flags))
189 int ret; 197 return -EINVAL;
190
191 skip_sw = tc_skip_sw(prog->gen_flags) ||
192 (oldprog && tc_skip_sw(oldprog->gen_flags));
193
194 if (oldprog && oldprog->offloaded) {
195 if (!tc_skip_hw(prog->gen_flags)) {
196 cmd = TC_CLSBPF_REPLACE;
197 } else if (!tc_skip_sw(prog->gen_flags)) {
198 obj = oldprog;
199 cmd = TC_CLSBPF_DESTROY;
200 } else {
201 return -EINVAL;
202 }
203 } else {
204 if (tc_skip_hw(prog->gen_flags))
205 return skip_sw ? -EINVAL : 0;
206 cmd = TC_CLSBPF_ADD;
207 }
208
209 ret = cls_bpf_offload_cmd(tp, obj, cmd);
210 if (ret)
211 return ret;
212 198
213 obj->offloaded = true; 199 if (prog && tc_skip_hw(prog->gen_flags))
214 if (oldprog) 200 prog = NULL;
215 oldprog->offloaded = false; 201 if (oldprog && tc_skip_hw(oldprog->gen_flags))
202 oldprog = NULL;
203 if (!prog && !oldprog)
204 return 0;
216 205
217 return 0; 206 return cls_bpf_offload_cmd(tp, prog, oldprog);
218} 207}
219 208
220static void cls_bpf_stop_offload(struct tcf_proto *tp, 209static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +211,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
222{ 211{
223 int err; 212 int err;
224 213
225 if (!prog->offloaded) 214 err = cls_bpf_offload_cmd(tp, NULL, prog);
226 return; 215 if (err)
227
228 err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
229 if (err) {
230 pr_err("Stopping hardware offload failed: %d\n", err); 216 pr_err("Stopping hardware offload failed: %d\n", err);
231 return;
232 }
233
234 prog->offloaded = false;
235} 217}
236 218
237static void cls_bpf_offload_update_stats(struct tcf_proto *tp, 219static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
238 struct cls_bpf_prog *prog) 220 struct cls_bpf_prog *prog)
239{ 221{
240 if (!prog->offloaded) 222 struct tcf_block *block = tp->chain->block;
241 return; 223 struct tc_cls_bpf_offload cls_bpf = {};
242 224
243 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); 225 tc_cls_common_offload_init(&cls_bpf.common, tp);
226 cls_bpf.command = TC_CLSBPF_STATS;
227 cls_bpf.exts = &prog->exts;
228 cls_bpf.prog = prog->filter;
229 cls_bpf.name = prog->bpf_name;
230 cls_bpf.exts_integrated = prog->exts_integrated;
231 cls_bpf.gen_flags = prog->gen_flags;
232
233 tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
244} 234}
245 235
246static int cls_bpf_init(struct tcf_proto *tp) 236static int cls_bpf_init(struct tcf_proto *tp)
@@ -258,11 +248,8 @@ static int cls_bpf_init(struct tcf_proto *tp)
258 return 0; 248 return 0;
259} 249}
260 250
261static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) 251static void cls_bpf_free_parms(struct cls_bpf_prog *prog)
262{ 252{
263 tcf_exts_destroy(&prog->exts);
264 tcf_exts_put_net(&prog->exts);
265
266 if (cls_bpf_is_ebpf(prog)) 253 if (cls_bpf_is_ebpf(prog))
267 bpf_prog_put(prog->filter); 254 bpf_prog_put(prog->filter);
268 else 255 else
@@ -270,6 +257,14 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
270 257
271 kfree(prog->bpf_name); 258 kfree(prog->bpf_name);
272 kfree(prog->bpf_ops); 259 kfree(prog->bpf_ops);
260}
261
262static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
263{
264 tcf_exts_destroy(&prog->exts);
265 tcf_exts_put_net(&prog->exts);
266
267 cls_bpf_free_parms(prog);
273 kfree(prog); 268 kfree(prog);
274} 269}
275 270
@@ -514,12 +509,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
514 goto errout_idr; 509 goto errout_idr;
515 510
516 ret = cls_bpf_offload(tp, prog, oldprog); 511 ret = cls_bpf_offload(tp, prog, oldprog);
517 if (ret) { 512 if (ret)
518 if (!oldprog) 513 goto errout_parms;
519 idr_remove_ext(&head->handle_idr, prog->handle);
520 __cls_bpf_delete_prog(prog);
521 return ret;
522 }
523 514
524 if (!tc_in_hw(prog->gen_flags)) 515 if (!tc_in_hw(prog->gen_flags))
525 prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; 516 prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
@@ -537,6 +528,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
537 *arg = prog; 528 *arg = prog;
538 return 0; 529 return 0;
539 530
531errout_parms:
532 cls_bpf_free_parms(prog);
540errout_idr: 533errout_idr:
541 if (!oldprog) 534 if (!oldprog)
542 idr_remove_ext(&head->handle_idr, prog->handle); 535 idr_remove_ext(&head->handle_idr, prog->handle);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ac152b4f4247..507859cdd1cb 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -45,7 +45,6 @@
45#include <net/netlink.h> 45#include <net/netlink.h>
46#include <net/act_api.h> 46#include <net/act_api.h>
47#include <net/pkt_cls.h> 47#include <net/pkt_cls.h>
48#include <linux/netdevice.h>
49#include <linux/idr.h> 48#include <linux/idr.h>
50 49
51struct tc_u_knode { 50struct tc_u_knode {
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index df3110d69585..07c10bac06a0 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
51 if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len)) 51 if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
52 return 0; 52 return 0;
53 53
54 return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len); 54 return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len);
55} 55}
56 56
57static struct tcf_ematch_ops em_nbyte_ops = { 57static struct tcf_ematch_ops em_nbyte_ops = {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b6c4f536876b..52529b7f8d96 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -795,6 +795,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
795 tcm->tcm_info = refcount_read(&q->refcnt); 795 tcm->tcm_info = refcount_read(&q->refcnt);
796 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 796 if (nla_put_string(skb, TCA_KIND, q->ops->id))
797 goto nla_put_failure; 797 goto nla_put_failure;
798 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
799 goto nla_put_failure;
798 if (q->ops->dump && q->ops->dump(q, skb) < 0) 800 if (q->ops->dump && q->ops->dump(q, skb) < 0)
799 goto nla_put_failure; 801 goto nla_put_failure;
800 qlen = q->q.qlen; 802 qlen = q->q.qlen;
@@ -1061,17 +1063,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
1061 } 1063 }
1062 1064
1063 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { 1065 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
1064 if (qdisc_is_percpu_stats(sch)) {
1065 sch->cpu_bstats =
1066 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
1067 if (!sch->cpu_bstats)
1068 goto err_out4;
1069
1070 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
1071 if (!sch->cpu_qstats)
1072 goto err_out4;
1073 }
1074
1075 if (tca[TCA_STAB]) { 1066 if (tca[TCA_STAB]) {
1076 stab = qdisc_get_stab(tca[TCA_STAB]); 1067 stab = qdisc_get_stab(tca[TCA_STAB]);
1077 if (IS_ERR(stab)) { 1068 if (IS_ERR(stab)) {
@@ -1113,7 +1104,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
1113 ops->destroy(sch); 1104 ops->destroy(sch);
1114err_out3: 1105err_out3:
1115 dev_put(dev); 1106 dev_put(dev);
1116 kfree((char *) sch - sch->padded); 1107 qdisc_free(sch);
1117err_out2: 1108err_out2:
1118 module_put(ops->owner); 1109 module_put(ops->owner);
1119err_out: 1110err_out:
@@ -1121,8 +1112,6 @@ err_out:
1121 return NULL; 1112 return NULL;
1122 1113
1123err_out4: 1114err_out4:
1124 free_percpu(sch->cpu_bstats);
1125 free_percpu(sch->cpu_qstats);
1126 /* 1115 /*
1127 * Any broken qdiscs that would require a ops->reset() here? 1116 * Any broken qdiscs that would require a ops->reset() here?
1128 * The qdisc was never in action so it shouldn't be necessary. 1117 * The qdisc was never in action so it shouldn't be necessary.
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 6361be7881f1..525eb3a6d625 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1158,9 +1158,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1158 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) 1158 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1159 return -EINVAL; 1159 return -EINVAL;
1160 1160
1161 err = tcf_block_get(&q->link.block, &q->link.filter_list, sch);
1162 if (err)
1163 goto put_rtab;
1164
1161 err = qdisc_class_hash_init(&q->clhash); 1165 err = qdisc_class_hash_init(&q->clhash);
1162 if (err < 0) 1166 if (err < 0)
1163 goto put_rtab; 1167 goto put_block;
1164 1168
1165 q->link.sibling = &q->link; 1169 q->link.sibling = &q->link;
1166 q->link.common.classid = sch->handle; 1170 q->link.common.classid = sch->handle;
@@ -1194,6 +1198,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1194 cbq_addprio(q, &q->link); 1198 cbq_addprio(q, &q->link);
1195 return 0; 1199 return 0;
1196 1200
1201put_block:
1202 tcf_block_put(q->link.block);
1203
1197put_rtab: 1204put_rtab:
1198 qdisc_put_rtab(q->link.R_tab); 1205 qdisc_put_rtab(q->link.R_tab);
1199 return err; 1206 return err;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index b30a2c70bd48..531250fceb9e 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -369,6 +369,9 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
369 369
370 ctl = nla_data(tb[TCA_CHOKE_PARMS]); 370 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
371 371
372 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
373 return -EINVAL;
374
372 if (ctl->limit > CHOKE_MAX_QUEUE) 375 if (ctl->limit > CHOKE_MAX_QUEUE)
373 return -EINVAL; 376 return -EINVAL;
374 377
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 3839cbbdc32b..cac003fddf3e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,6 +26,7 @@
26#include <linux/list.h> 26#include <linux/list.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/if_vlan.h> 28#include <linux/if_vlan.h>
29#include <linux/if_macvlan.h>
29#include <net/sch_generic.h> 30#include <net/sch_generic.h>
30#include <net/pkt_sched.h> 31#include <net/pkt_sched.h>
31#include <net/dst.h> 32#include <net/dst.h>
@@ -277,6 +278,8 @@ unsigned long dev_trans_start(struct net_device *dev)
277 278
278 if (is_vlan_dev(dev)) 279 if (is_vlan_dev(dev))
279 dev = vlan_dev_real_dev(dev); 280 dev = vlan_dev_real_dev(dev);
281 else if (netif_is_macvlan(dev))
282 dev = macvlan_dev_real_dev(dev);
280 res = netdev_get_tx_queue(dev, 0)->trans_start; 283 res = netdev_get_tx_queue(dev, 0)->trans_start;
281 for (i = 1; i < dev->num_tx_queues; i++) { 284 for (i = 1; i < dev->num_tx_queues; i++) {
282 val = netdev_get_tx_queue(dev, i)->trans_start; 285 val = netdev_get_tx_queue(dev, i)->trans_start;
@@ -630,6 +633,19 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
630 qdisc_skb_head_init(&sch->q); 633 qdisc_skb_head_init(&sch->q);
631 spin_lock_init(&sch->q.lock); 634 spin_lock_init(&sch->q.lock);
632 635
636 if (ops->static_flags & TCQ_F_CPUSTATS) {
637 sch->cpu_bstats =
638 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
639 if (!sch->cpu_bstats)
640 goto errout1;
641
642 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
643 if (!sch->cpu_qstats) {
644 free_percpu(sch->cpu_bstats);
645 goto errout1;
646 }
647 }
648
633 spin_lock_init(&sch->busylock); 649 spin_lock_init(&sch->busylock);
634 lockdep_set_class(&sch->busylock, 650 lockdep_set_class(&sch->busylock,
635 dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); 651 dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
@@ -639,6 +655,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
639 dev->qdisc_running_key ?: &qdisc_running_key); 655 dev->qdisc_running_key ?: &qdisc_running_key);
640 656
641 sch->ops = ops; 657 sch->ops = ops;
658 sch->flags = ops->static_flags;
642 sch->enqueue = ops->enqueue; 659 sch->enqueue = ops->enqueue;
643 sch->dequeue = ops->dequeue; 660 sch->dequeue = ops->dequeue;
644 sch->dev_queue = dev_queue; 661 sch->dev_queue = dev_queue;
@@ -646,6 +663,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
646 refcount_set(&sch->refcnt, 1); 663 refcount_set(&sch->refcnt, 1);
647 664
648 return sch; 665 return sch;
666errout1:
667 kfree(p);
649errout: 668errout:
650 return ERR_PTR(err); 669 return ERR_PTR(err);
651} 670}
@@ -695,7 +714,7 @@ void qdisc_reset(struct Qdisc *qdisc)
695} 714}
696EXPORT_SYMBOL(qdisc_reset); 715EXPORT_SYMBOL(qdisc_reset);
697 716
698static void qdisc_free(struct Qdisc *qdisc) 717void qdisc_free(struct Qdisc *qdisc)
699{ 718{
700 if (qdisc_is_percpu_stats(qdisc)) { 719 if (qdisc_is_percpu_stats(qdisc)) {
701 free_percpu(qdisc->cpu_bstats); 720 free_percpu(qdisc->cpu_bstats);
@@ -1037,6 +1056,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1037 1056
1038 if (!tp_head) { 1057 if (!tp_head) {
1039 RCU_INIT_POINTER(*miniqp->p_miniq, NULL); 1058 RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
1059 /* Wait for flying RCU callback before it is freed. */
1060 rcu_barrier_bh();
1040 return; 1061 return;
1041 } 1062 }
1042 1063
@@ -1052,7 +1073,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1052 rcu_assign_pointer(*miniqp->p_miniq, miniq); 1073 rcu_assign_pointer(*miniqp->p_miniq, miniq);
1053 1074
1054 if (miniq_old) 1075 if (miniq_old)
1055 /* This is counterpart of the rcu barrier above. We need to 1076 /* This is counterpart of the rcu barriers above. We need to
1056 * block potential new user of miniq_old until all readers 1077 * block potential new user of miniq_old until all readers
1057 * are not seeing it. 1078 * are not seeing it.
1058 */ 1079 */
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 17c7130454bd..bc30f9186ac6 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -356,6 +356,9 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
356 struct gred_sched *table = qdisc_priv(sch); 356 struct gred_sched *table = qdisc_priv(sch);
357 struct gred_sched_data *q = table->tab[dp]; 357 struct gred_sched_data *q = table->tab[dp];
358 358
359 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
360 return -EINVAL;
361
359 if (!q) { 362 if (!q) {
360 table->tab[dp] = q = *prealloc; 363 table->tab[dp] = q = *prealloc;
361 *prealloc = NULL; 364 *prealloc = NULL;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 5ecc38f35d47..003e1b063447 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -66,7 +66,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
66{ 66{
67 struct ingress_sched_data *q = qdisc_priv(sch); 67 struct ingress_sched_data *q = qdisc_priv(sch);
68 struct net_device *dev = qdisc_dev(sch); 68 struct net_device *dev = qdisc_dev(sch);
69 int err; 69
70 net_inc_ingress_queue();
70 71
71 mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); 72 mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
72 73
@@ -74,14 +75,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
74 q->block_info.chain_head_change = clsact_chain_head_change; 75 q->block_info.chain_head_change = clsact_chain_head_change;
75 q->block_info.chain_head_change_priv = &q->miniqp; 76 q->block_info.chain_head_change_priv = &q->miniqp;
76 77
77 err = tcf_block_get_ext(&q->block, sch, &q->block_info); 78 return tcf_block_get_ext(&q->block, sch, &q->block_info);
78 if (err)
79 return err;
80
81 net_inc_ingress_queue();
82 sch->flags |= TCQ_F_CPUSTATS;
83
84 return 0;
85} 79}
86 80
87static void ingress_destroy(struct Qdisc *sch) 81static void ingress_destroy(struct Qdisc *sch)
@@ -120,6 +114,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
120 .cl_ops = &ingress_class_ops, 114 .cl_ops = &ingress_class_ops,
121 .id = "ingress", 115 .id = "ingress",
122 .priv_size = sizeof(struct ingress_sched_data), 116 .priv_size = sizeof(struct ingress_sched_data),
117 .static_flags = TCQ_F_CPUSTATS,
123 .init = ingress_init, 118 .init = ingress_init,
124 .destroy = ingress_destroy, 119 .destroy = ingress_destroy,
125 .dump = ingress_dump, 120 .dump = ingress_dump,
@@ -172,6 +167,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
172 struct net_device *dev = qdisc_dev(sch); 167 struct net_device *dev = qdisc_dev(sch);
173 int err; 168 int err;
174 169
170 net_inc_ingress_queue();
171 net_inc_egress_queue();
172
175 mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); 173 mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
176 174
177 q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; 175 q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -188,20 +186,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
188 q->egress_block_info.chain_head_change = clsact_chain_head_change; 186 q->egress_block_info.chain_head_change = clsact_chain_head_change;
189 q->egress_block_info.chain_head_change_priv = &q->miniqp_egress; 187 q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
190 188
191 err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); 189 return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
192 if (err)
193 goto err_egress_block_get;
194
195 net_inc_ingress_queue();
196 net_inc_egress_queue();
197
198 sch->flags |= TCQ_F_CPUSTATS;
199
200 return 0;
201
202err_egress_block_get:
203 tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
204 return err;
205} 190}
206 191
207static void clsact_destroy(struct Qdisc *sch) 192static void clsact_destroy(struct Qdisc *sch)
@@ -228,6 +213,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
228 .cl_ops = &clsact_class_ops, 213 .cl_ops = &clsact_class_ops,
229 .id = "clsact", 214 .id = "clsact",
230 .priv_size = sizeof(struct clsact_sched_data), 215 .priv_size = sizeof(struct clsact_sched_data),
216 .static_flags = TCQ_F_CPUSTATS,
231 .init = clsact_init, 217 .init = clsact_init,
232 .destroy = clsact_destroy, 218 .destroy = clsact_destroy,
233 .dump = ingress_dump, 219 .dump = ingress_dump,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 7f8ea9e297c3..f0747eb87dc4 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -157,6 +157,7 @@ static int red_offload(struct Qdisc *sch, bool enable)
157 .handle = sch->handle, 157 .handle = sch->handle,
158 .parent = sch->parent, 158 .parent = sch->parent,
159 }; 159 };
160 int err;
160 161
161 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 162 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 return -EOPNOTSUPP; 163 return -EOPNOTSUPP;
@@ -171,7 +172,14 @@ static int red_offload(struct Qdisc *sch, bool enable)
171 opt.command = TC_RED_DESTROY; 172 opt.command = TC_RED_DESTROY;
172 } 173 }
173 174
174 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); 175 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176
177 if (!err && enable)
178 sch->flags |= TCQ_F_OFFLOADED;
179 else
180 sch->flags &= ~TCQ_F_OFFLOADED;
181
182 return err;
175} 183}
176 184
177static void red_destroy(struct Qdisc *sch) 185static void red_destroy(struct Qdisc *sch)
@@ -212,6 +220,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
212 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; 220 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
213 221
214 ctl = nla_data(tb[TCA_RED_PARMS]); 222 ctl = nla_data(tb[TCA_RED_PARMS]);
223 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
224 return -EINVAL;
215 225
216 if (ctl->limit > 0) { 226 if (ctl->limit > 0) {
217 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); 227 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
@@ -272,7 +282,7 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
272 return red_change(sch, opt); 282 return red_change(sch, opt);
273} 283}
274 284
275static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) 285static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
276{ 286{
277 struct net_device *dev = qdisc_dev(sch); 287 struct net_device *dev = qdisc_dev(sch);
278 struct tc_red_qopt_offload hw_stats = { 288 struct tc_red_qopt_offload hw_stats = {
@@ -284,21 +294,12 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
284 .stats.qstats = &sch->qstats, 294 .stats.qstats = &sch->qstats,
285 }, 295 },
286 }; 296 };
287 int err;
288 297
289 opt->flags &= ~TC_RED_OFFLOADED; 298 if (!(sch->flags & TCQ_F_OFFLOADED))
290 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
291 return 0;
292
293 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
294 &hw_stats);
295 if (err == -EOPNOTSUPP)
296 return 0; 299 return 0;
297 300
298 if (!err) 301 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
299 opt->flags |= TC_RED_OFFLOADED; 302 &hw_stats);
300
301 return err;
302} 303}
303 304
304static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 305static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -317,7 +318,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
317 int err; 318 int err;
318 319
319 sch->qstats.backlog = q->qdisc->qstats.backlog; 320 sch->qstats.backlog = q->qdisc->qstats.backlog;
320 err = red_dump_offload(sch, &opt); 321 err = red_dump_offload_stats(sch, &opt);
321 if (err) 322 if (err)
322 goto nla_put_failure; 323 goto nla_put_failure;
323 324
@@ -345,7 +346,7 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
345 .marked = q->stats.prob_mark + q->stats.forced_mark, 346 .marked = q->stats.prob_mark + q->stats.forced_mark,
346 }; 347 };
347 348
348 if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) { 349 if (sch->flags & TCQ_F_OFFLOADED) {
349 struct red_stats hw_stats = {0}; 350 struct red_stats hw_stats = {0};
350 struct tc_red_qopt_offload hw_stats_request = { 351 struct tc_red_qopt_offload hw_stats_request = {
351 .command = TC_RED_XSTATS, 352 .command = TC_RED_XSTATS,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 890f4a4564e7..930e5bd26d3d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -639,6 +639,9 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
639 if (ctl->divisor && 639 if (ctl->divisor &&
640 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) 640 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
641 return -EINVAL; 641 return -EINVAL;
642 if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
643 ctl_v1->Wlog))
644 return -EINVAL;
642 if (ctl_v1 && ctl_v1->qth_min) { 645 if (ctl_v1 && ctl_v1->qth_min) {
643 p = kmalloc(sizeof(*p), GFP_KERNEL); 646 p = kmalloc(sizeof(*p), GFP_KERNEL);
644 if (!p) 647 if (!p)
@@ -724,6 +727,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
724 int i; 727 int i;
725 int err; 728 int err;
726 729
730 q->sch = sch;
727 timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); 731 timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
728 732
729 err = tcf_block_get(&q->block, &q->filter_list, sch); 733 err = tcf_block_get(&q->block, &q->filter_list, sch);
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7b261afc47b9..7f8baa48e7c2 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -53,6 +53,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
53 msg->send_failed = 0; 53 msg->send_failed = 0;
54 msg->send_error = 0; 54 msg->send_error = 0;
55 msg->can_delay = 1; 55 msg->can_delay = 1;
56 msg->abandoned = 0;
56 msg->expires_at = 0; 57 msg->expires_at = 0;
57 INIT_LIST_HEAD(&msg->chunks); 58 INIT_LIST_HEAD(&msg->chunks);
58} 59}
@@ -304,6 +305,13 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
304 if (!chunk->asoc->peer.prsctp_capable) 305 if (!chunk->asoc->peer.prsctp_capable)
305 return 0; 306 return 0;
306 307
308 if (chunk->msg->abandoned)
309 return 1;
310
311 if (!chunk->has_tsn &&
312 !(chunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG))
313 return 0;
314
307 if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && 315 if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
308 time_after(jiffies, chunk->msg->expires_at)) { 316 time_after(jiffies, chunk->msg->expires_at)) {
309 struct sctp_stream_out *streamout = 317 struct sctp_stream_out *streamout =
@@ -316,6 +324,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
316 chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; 324 chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
317 streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; 325 streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
318 } 326 }
327 chunk->msg->abandoned = 1;
319 return 1; 328 return 1;
320 } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && 329 } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
321 chunk->sent_count > chunk->sinfo.sinfo_timetolive) { 330 chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
@@ -324,10 +333,12 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
324 333
325 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; 334 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
326 streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++; 335 streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
336 chunk->msg->abandoned = 1;
327 return 1; 337 return 1;
328 } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && 338 } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
329 chunk->msg->expires_at && 339 chunk->msg->expires_at &&
330 time_after(jiffies, chunk->msg->expires_at)) { 340 time_after(jiffies, chunk->msg->expires_at)) {
341 chunk->msg->abandoned = 1;
331 return 1; 342 return 1;
332 } 343 }
333 /* PRIO policy is processed by sendmsg, not here */ 344 /* PRIO policy is processed by sendmsg, not here */
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 3f619fdcbf0a..291c97b07058 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
78 case SCTP_CID_AUTH: 78 case SCTP_CID_AUTH:
79 return "AUTH"; 79 return "AUTH";
80 80
81 case SCTP_CID_RECONF:
82 return "RECONF";
83
81 default: 84 default:
82 break; 85 break;
83 } 86 }
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 621b5ca3fd1c..141c9c466ec1 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
399 return; 399 return;
400 } 400 }
401 401
402 if (t->param_flags & SPP_PMTUD_ENABLE) { 402 if (!(t->param_flags & SPP_PMTUD_ENABLE))
403 /* Update transports view of the MTU */ 403 /* We can't allow retransmitting in such case, as the
404 sctp_transport_update_pmtu(t, pmtu); 404 * retransmission would be sized just as before, and thus we
405 405 * would get another icmp, and retransmit again.
406 /* Update association pmtu. */ 406 */
407 sctp_assoc_sync_pmtu(asoc); 407 return;
408 }
409 408
410 /* Retransmit with the new pmtu setting. 409 /* Update transports view of the MTU. Return if no update was needed.
411 * Normally, if PMTU discovery is disabled, an ICMP Fragmentation 410 * If an update wasn't needed/possible, it also doesn't make sense to
412 * Needed will never be sent, but if a message was sent before 411 * try to retransmit now.
413 * PMTU discovery was disabled that was larger than the PMTU, it
414 * would not be fragmented, so it must be re-transmitted fragmented.
415 */ 412 */
413 if (!sctp_transport_update_pmtu(t, pmtu))
414 return;
415
416 /* Update association pmtu. */
417 sctp_assoc_sync_pmtu(asoc);
418
419 /* Retransmit with the new pmtu setting. */
416 sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); 420 sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
417} 421}
418 422
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 3b18085e3b10..5d4c15bf66d2 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -826,6 +826,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp)
826 case AF_INET: 826 case AF_INET:
827 if (!__ipv6_only_sock(sctp_opt2sk(sp))) 827 if (!__ipv6_only_sock(sctp_opt2sk(sp)))
828 return 1; 828 return 1;
829 /* fallthru */
829 default: 830 default:
830 return 0; 831 return 0;
831 } 832 }
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 275925b93b29..35bc7106d182 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
45 struct sk_buff *segs = ERR_PTR(-EINVAL); 45 struct sk_buff *segs = ERR_PTR(-EINVAL);
46 struct sctphdr *sh; 46 struct sctphdr *sh;
47 47
48 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
49 goto out;
50
48 sh = sctp_hdr(skb); 51 sh = sctp_hdr(skb);
49 if (!pskb_may_pull(skb, sizeof(*sh))) 52 if (!pskb_may_pull(skb, sizeof(*sh)))
50 goto out; 53 goto out;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4db012aa25f7..c4ec99b20150 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -364,10 +364,12 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
364 list_for_each_entry_safe(chk, temp, queue, transmitted_list) { 364 list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
365 struct sctp_stream_out *streamout; 365 struct sctp_stream_out *streamout;
366 366
367 if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || 367 if (!chk->msg->abandoned &&
368 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) 368 (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
369 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
369 continue; 370 continue;
370 371
372 chk->msg->abandoned = 1;
371 list_del_init(&chk->transmitted_list); 373 list_del_init(&chk->transmitted_list);
372 sctp_insert_list(&asoc->outqueue.abandoned, 374 sctp_insert_list(&asoc->outqueue.abandoned,
373 &chk->transmitted_list); 375 &chk->transmitted_list);
@@ -377,7 +379,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
377 asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; 379 asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
378 streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; 380 streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
379 381
380 if (!chk->tsn_gap_acked) { 382 if (queue != &asoc->outqueue.retransmit &&
383 !chk->tsn_gap_acked) {
381 if (chk->transport) 384 if (chk->transport)
382 chk->transport->flight_size -= 385 chk->transport->flight_size -=
383 sctp_data_size(chk); 386 sctp_data_size(chk);
@@ -403,10 +406,13 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
403 q->sched->unsched_all(&asoc->stream); 406 q->sched->unsched_all(&asoc->stream);
404 407
405 list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { 408 list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
406 if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || 409 if (!chk->msg->abandoned &&
407 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) 410 (!(chk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) ||
411 !SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
412 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
408 continue; 413 continue;
409 414
415 chk->msg->abandoned = 1;
410 sctp_sched_dequeue_common(q, chk); 416 sctp_sched_dequeue_common(q, chk);
411 asoc->sent_cnt_removable--; 417 asoc->sent_cnt_removable--;
412 asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; 418 asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
@@ -912,9 +918,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
912 break; 918 break;
913 919
914 case SCTP_CID_ABORT: 920 case SCTP_CID_ABORT:
915 if (sctp_test_T_bit(chunk)) { 921 if (sctp_test_T_bit(chunk))
916 packet->vtag = asoc->c.my_vtag; 922 packet->vtag = asoc->c.my_vtag;
917 } 923 /* fallthru */
918 /* The following chunks are "response" chunks, i.e. 924 /* The following chunks are "response" chunks, i.e.
919 * they are generated in response to something we 925 * they are generated in response to something we
920 * received. If we are sending these, then we can 926 * received. If we are sending these, then we can
@@ -1434,7 +1440,8 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1434 /* If this chunk has not been acked, stop 1440 /* If this chunk has not been acked, stop
1435 * considering it as 'outstanding'. 1441 * considering it as 'outstanding'.
1436 */ 1442 */
1437 if (!tchunk->tsn_gap_acked) { 1443 if (transmitted_queue != &q->retransmit &&
1444 !tchunk->tsn_gap_acked) {
1438 if (tchunk->transport) 1445 if (tchunk->transport)
1439 tchunk->transport->flight_size -= 1446 tchunk->transport->flight_size -=
1440 sctp_data_size(tchunk); 1447 sctp_data_size(tchunk);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f5172c21349b..6a38c2503649 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1499,6 +1499,7 @@ static __init int sctp_init(void)
1499 INIT_LIST_HEAD(&sctp_address_families); 1499 INIT_LIST_HEAD(&sctp_address_families);
1500 sctp_v4_pf_init(); 1500 sctp_v4_pf_init();
1501 sctp_v6_pf_init(); 1501 sctp_v6_pf_init();
1502 sctp_sched_ops_init();
1502 1503
1503 status = register_pernet_subsys(&sctp_defaults_ops); 1504 status = register_pernet_subsys(&sctp_defaults_ops);
1504 if (status) 1505 if (status)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3204a9b29407..737e551fbf67 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -85,7 +85,7 @@
85static int sctp_writeable(struct sock *sk); 85static int sctp_writeable(struct sock *sk);
86static void sctp_wfree(struct sk_buff *skb); 86static void sctp_wfree(struct sk_buff *skb);
87static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, 87static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
88 size_t msg_len, struct sock **orig_sk); 88 size_t msg_len);
89static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); 89static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
90static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); 90static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
91static int sctp_wait_for_accept(struct sock *sk, long timeo); 91static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -188,13 +188,13 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
188 list_for_each_entry(chunk, &t->transmitted, transmitted_list) 188 list_for_each_entry(chunk, &t->transmitted, transmitted_list)
189 cb(chunk); 189 cb(chunk);
190 190
191 list_for_each_entry(chunk, &q->retransmit, list) 191 list_for_each_entry(chunk, &q->retransmit, transmitted_list)
192 cb(chunk); 192 cb(chunk);
193 193
194 list_for_each_entry(chunk, &q->sacked, list) 194 list_for_each_entry(chunk, &q->sacked, transmitted_list)
195 cb(chunk); 195 cb(chunk);
196 196
197 list_for_each_entry(chunk, &q->abandoned, list) 197 list_for_each_entry(chunk, &q->abandoned, transmitted_list)
198 cb(chunk); 198 cb(chunk);
199 199
200 list_for_each_entry(chunk, &q->out_chunk_list, list) 200 list_for_each_entry(chunk, &q->out_chunk_list, list)
@@ -335,16 +335,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
335 if (len < sizeof (struct sockaddr)) 335 if (len < sizeof (struct sockaddr))
336 return NULL; 336 return NULL;
337 337
338 if (!opt->pf->af_supported(addr->sa.sa_family, opt))
339 return NULL;
340
338 /* V4 mapped address are really of AF_INET family */ 341 /* V4 mapped address are really of AF_INET family */
339 if (addr->sa.sa_family == AF_INET6 && 342 if (addr->sa.sa_family == AF_INET6 &&
340 ipv6_addr_v4mapped(&addr->v6.sin6_addr)) { 343 ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
341 if (!opt->pf->af_supported(AF_INET, opt)) 344 !opt->pf->af_supported(AF_INET, opt))
342 return NULL; 345 return NULL;
343 } else {
344 /* Does this PF support this AF? */
345 if (!opt->pf->af_supported(addr->sa.sa_family, opt))
346 return NULL;
347 }
348 346
349 /* If we get this far, af is valid. */ 347 /* If we get this far, af is valid. */
350 af = sctp_get_af_specific(addr->sa.sa_family); 348 af = sctp_get_af_specific(addr->sa.sa_family);
@@ -970,13 +968,6 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw)
970 * This is used for tunneling the sctp_bindx() request through sctp_setsockopt() 968 * This is used for tunneling the sctp_bindx() request through sctp_setsockopt()
971 * from userspace. 969 * from userspace.
972 * 970 *
973 * We don't use copy_from_user() for optimization: we first do the
974 * sanity checks (buffer size -fast- and access check-healthy
975 * pointer); if all of those succeed, then we can alloc the memory
976 * (expensive operation) needed to copy the data to kernel. Then we do
977 * the copying without checking the user space area
978 * (__copy_from_user()).
979 *
980 * On exit there is no need to do sockfd_put(), sys_setsockopt() does 971 * On exit there is no need to do sockfd_put(), sys_setsockopt() does
981 * it. 972 * it.
982 * 973 *
@@ -1006,25 +997,15 @@ static int sctp_setsockopt_bindx(struct sock *sk,
1006 if (unlikely(addrs_size <= 0)) 997 if (unlikely(addrs_size <= 0))
1007 return -EINVAL; 998 return -EINVAL;
1008 999
1009 /* Check the user passed a healthy pointer. */ 1000 kaddrs = vmemdup_user(addrs, addrs_size);
1010 if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size))) 1001 if (unlikely(IS_ERR(kaddrs)))
1011 return -EFAULT; 1002 return PTR_ERR(kaddrs);
1012
1013 /* Alloc space for the address array in kernel memory. */
1014 kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN);
1015 if (unlikely(!kaddrs))
1016 return -ENOMEM;
1017
1018 if (__copy_from_user(kaddrs, addrs, addrs_size)) {
1019 kfree(kaddrs);
1020 return -EFAULT;
1021 }
1022 1003
1023 /* Walk through the addrs buffer and count the number of addresses. */ 1004 /* Walk through the addrs buffer and count the number of addresses. */
1024 addr_buf = kaddrs; 1005 addr_buf = kaddrs;
1025 while (walk_size < addrs_size) { 1006 while (walk_size < addrs_size) {
1026 if (walk_size + sizeof(sa_family_t) > addrs_size) { 1007 if (walk_size + sizeof(sa_family_t) > addrs_size) {
1027 kfree(kaddrs); 1008 kvfree(kaddrs);
1028 return -EINVAL; 1009 return -EINVAL;
1029 } 1010 }
1030 1011
@@ -1035,7 +1016,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
1035 * causes the address buffer to overflow return EINVAL. 1016 * causes the address buffer to overflow return EINVAL.
1036 */ 1017 */
1037 if (!af || (walk_size + af->sockaddr_len) > addrs_size) { 1018 if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
1038 kfree(kaddrs); 1019 kvfree(kaddrs);
1039 return -EINVAL; 1020 return -EINVAL;
1040 } 1021 }
1041 addrcnt++; 1022 addrcnt++;
@@ -1065,7 +1046,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
1065 } 1046 }
1066 1047
1067out: 1048out:
1068 kfree(kaddrs); 1049 kvfree(kaddrs);
1069 1050
1070 return err; 1051 return err;
1071} 1052}
@@ -1323,13 +1304,6 @@ out_free:
1323 * land and invoking either sctp_connectx(). This is used for tunneling 1304 * land and invoking either sctp_connectx(). This is used for tunneling
1324 * the sctp_connectx() request through sctp_setsockopt() from userspace. 1305 * the sctp_connectx() request through sctp_setsockopt() from userspace.
1325 * 1306 *
1326 * We don't use copy_from_user() for optimization: we first do the
1327 * sanity checks (buffer size -fast- and access check-healthy
1328 * pointer); if all of those succeed, then we can alloc the memory
1329 * (expensive operation) needed to copy the data to kernel. Then we do
1330 * the copying without checking the user space area
1331 * (__copy_from_user()).
1332 *
1333 * On exit there is no need to do sockfd_put(), sys_setsockopt() does 1307 * On exit there is no need to do sockfd_put(), sys_setsockopt() does
1334 * it. 1308 * it.
1335 * 1309 *
@@ -1345,7 +1319,6 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
1345 sctp_assoc_t *assoc_id) 1319 sctp_assoc_t *assoc_id)
1346{ 1320{
1347 struct sockaddr *kaddrs; 1321 struct sockaddr *kaddrs;
1348 gfp_t gfp = GFP_KERNEL;
1349 int err = 0; 1322 int err = 0;
1350 1323
1351 pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", 1324 pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
@@ -1354,24 +1327,12 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
1354 if (unlikely(addrs_size <= 0)) 1327 if (unlikely(addrs_size <= 0))
1355 return -EINVAL; 1328 return -EINVAL;
1356 1329
1357 /* Check the user passed a healthy pointer. */ 1330 kaddrs = vmemdup_user(addrs, addrs_size);
1358 if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size))) 1331 if (unlikely(IS_ERR(kaddrs)))
1359 return -EFAULT; 1332 return PTR_ERR(kaddrs);
1360
1361 /* Alloc space for the address array in kernel memory. */
1362 if (sk->sk_socket->file)
1363 gfp = GFP_USER | __GFP_NOWARN;
1364 kaddrs = kmalloc(addrs_size, gfp);
1365 if (unlikely(!kaddrs))
1366 return -ENOMEM;
1367
1368 if (__copy_from_user(kaddrs, addrs, addrs_size)) {
1369 err = -EFAULT;
1370 } else {
1371 err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
1372 }
1373 1333
1374 kfree(kaddrs); 1334 err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
1335 kvfree(kaddrs);
1375 1336
1376 return err; 1337 return err;
1377} 1338}
@@ -1883,8 +1844,14 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
1883 */ 1844 */
1884 if (sinit) { 1845 if (sinit) {
1885 if (sinit->sinit_num_ostreams) { 1846 if (sinit->sinit_num_ostreams) {
1886 asoc->c.sinit_num_ostreams = 1847 __u16 outcnt = sinit->sinit_num_ostreams;
1887 sinit->sinit_num_ostreams; 1848
1849 asoc->c.sinit_num_ostreams = outcnt;
1850 /* outcnt has been changed, so re-init stream */
1851 err = sctp_stream_init(&asoc->stream, outcnt, 0,
1852 GFP_KERNEL);
1853 if (err)
1854 goto out_free;
1888 } 1855 }
1889 if (sinit->sinit_max_instreams) { 1856 if (sinit->sinit_max_instreams) {
1890 asoc->c.sinit_max_instreams = 1857 asoc->c.sinit_max_instreams =
@@ -1971,7 +1938,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
1971 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1938 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1972 if (!sctp_wspace(asoc)) { 1939 if (!sctp_wspace(asoc)) {
1973 /* sk can be changed by peel off when waiting for buf. */ 1940 /* sk can be changed by peel off when waiting for buf. */
1974 err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk); 1941 err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
1975 if (err) { 1942 if (err) {
1976 if (err == -ESRCH) { 1943 if (err == -ESRCH) {
1977 /* asoc is already dead. */ 1944 /* asoc is already dead. */
@@ -2277,7 +2244,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
2277 2244
2278 if (asoc && sctp_outq_is_empty(&asoc->outqueue)) { 2245 if (asoc && sctp_outq_is_empty(&asoc->outqueue)) {
2279 event = sctp_ulpevent_make_sender_dry_event(asoc, 2246 event = sctp_ulpevent_make_sender_dry_event(asoc,
2280 GFP_ATOMIC); 2247 GFP_USER | __GFP_NOWARN);
2281 if (!event) 2248 if (!event)
2282 return -ENOMEM; 2249 return -ENOMEM;
2283 2250
@@ -3498,6 +3465,8 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
3498 3465
3499 if (optlen < sizeof(struct sctp_hmacalgo)) 3466 if (optlen < sizeof(struct sctp_hmacalgo))
3500 return -EINVAL; 3467 return -EINVAL;
3468 optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) +
3469 SCTP_AUTH_NUM_HMACS * sizeof(u16));
3501 3470
3502 hmacs = memdup_user(optval, optlen); 3471 hmacs = memdup_user(optval, optlen);
3503 if (IS_ERR(hmacs)) 3472 if (IS_ERR(hmacs))
@@ -3536,6 +3505,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
3536 3505
3537 if (optlen <= sizeof(struct sctp_authkey)) 3506 if (optlen <= sizeof(struct sctp_authkey))
3538 return -EINVAL; 3507 return -EINVAL;
3508 /* authkey->sca_keylength is u16, so optlen can't be bigger than
3509 * this.
3510 */
3511 optlen = min_t(unsigned int, optlen, USHRT_MAX +
3512 sizeof(struct sctp_authkey));
3539 3513
3540 authkey = memdup_user(optval, optlen); 3514 authkey = memdup_user(optval, optlen);
3541 if (IS_ERR(authkey)) 3515 if (IS_ERR(authkey))
@@ -3891,13 +3865,20 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
3891 struct sctp_association *asoc; 3865 struct sctp_association *asoc;
3892 int retval = -EINVAL; 3866 int retval = -EINVAL;
3893 3867
3894 if (optlen < sizeof(struct sctp_reset_streams)) 3868 if (optlen < sizeof(*params))
3895 return -EINVAL; 3869 return -EINVAL;
3870 /* srs_number_streams is u16, so optlen can't be bigger than this. */
3871 optlen = min_t(unsigned int, optlen, USHRT_MAX +
3872 sizeof(__u16) * sizeof(*params));
3896 3873
3897 params = memdup_user(optval, optlen); 3874 params = memdup_user(optval, optlen);
3898 if (IS_ERR(params)) 3875 if (IS_ERR(params))
3899 return PTR_ERR(params); 3876 return PTR_ERR(params);
3900 3877
3878 if (params->srs_number_streams * sizeof(__u16) >
3879 optlen - sizeof(*params))
3880 goto out;
3881
3901 asoc = sctp_id2assoc(sk, params->srs_assoc_id); 3882 asoc = sctp_id2assoc(sk, params->srs_assoc_id);
3902 if (!asoc) 3883 if (!asoc)
3903 goto out; 3884 goto out;
@@ -4494,7 +4475,7 @@ static int sctp_init_sock(struct sock *sk)
4494 SCTP_DBG_OBJCNT_INC(sock); 4475 SCTP_DBG_OBJCNT_INC(sock);
4495 4476
4496 local_bh_disable(); 4477 local_bh_disable();
4497 percpu_counter_inc(&sctp_sockets_allocated); 4478 sk_sockets_allocated_inc(sk);
4498 sock_prot_inuse_add(net, sk->sk_prot, 1); 4479 sock_prot_inuse_add(net, sk->sk_prot, 1);
4499 4480
4500 /* Nothing can fail after this block, otherwise 4481 /* Nothing can fail after this block, otherwise
@@ -4538,7 +4519,7 @@ static void sctp_destroy_sock(struct sock *sk)
4538 } 4519 }
4539 sctp_endpoint_free(sp->ep); 4520 sctp_endpoint_free(sp->ep);
4540 local_bh_disable(); 4521 local_bh_disable();
4541 percpu_counter_dec(&sctp_sockets_allocated); 4522 sk_sockets_allocated_dec(sk);
4542 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 4523 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
4543 local_bh_enable(); 4524 local_bh_enable();
4544} 4525}
@@ -5011,7 +4992,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv
5011 len = sizeof(int); 4992 len = sizeof(int);
5012 if (put_user(len, optlen)) 4993 if (put_user(len, optlen))
5013 return -EFAULT; 4994 return -EFAULT;
5014 if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int))) 4995 if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len))
5015 return -EFAULT; 4996 return -EFAULT;
5016 return 0; 4997 return 0;
5017} 4998}
@@ -5080,7 +5061,6 @@ static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *p
5080 *newfile = sock_alloc_file(newsock, 0, NULL); 5061 *newfile = sock_alloc_file(newsock, 0, NULL);
5081 if (IS_ERR(*newfile)) { 5062 if (IS_ERR(*newfile)) {
5082 put_unused_fd(retval); 5063 put_unused_fd(retval);
5083 sock_release(newsock);
5084 retval = PTR_ERR(*newfile); 5064 retval = PTR_ERR(*newfile);
5085 *newfile = NULL; 5065 *newfile = NULL;
5086 return retval; 5066 return retval;
@@ -5642,6 +5622,9 @@ copy_getaddrs:
5642 err = -EFAULT; 5622 err = -EFAULT;
5643 goto out; 5623 goto out;
5644 } 5624 }
5625 /* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too,
5626 * but we can't change it anymore.
5627 */
5645 if (put_user(bytes_copied, optlen)) 5628 if (put_user(bytes_copied, optlen))
5646 err = -EFAULT; 5629 err = -EFAULT;
5647out: 5630out:
@@ -6078,7 +6061,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
6078 params.assoc_id = 0; 6061 params.assoc_id = 0;
6079 } else if (len >= sizeof(struct sctp_assoc_value)) { 6062 } else if (len >= sizeof(struct sctp_assoc_value)) {
6080 len = sizeof(struct sctp_assoc_value); 6063 len = sizeof(struct sctp_assoc_value);
6081 if (copy_from_user(&params, optval, sizeof(params))) 6064 if (copy_from_user(&params, optval, len))
6082 return -EFAULT; 6065 return -EFAULT;
6083 } else 6066 } else
6084 return -EINVAL; 6067 return -EINVAL;
@@ -6248,7 +6231,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
6248 6231
6249 if (len < sizeof(struct sctp_authkeyid)) 6232 if (len < sizeof(struct sctp_authkeyid))
6250 return -EINVAL; 6233 return -EINVAL;
6251 if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid))) 6234
6235 len = sizeof(struct sctp_authkeyid);
6236 if (copy_from_user(&val, optval, len))
6252 return -EFAULT; 6237 return -EFAULT;
6253 6238
6254 asoc = sctp_id2assoc(sk, val.scact_assoc_id); 6239 asoc = sctp_id2assoc(sk, val.scact_assoc_id);
@@ -6260,7 +6245,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
6260 else 6245 else
6261 val.scact_keynumber = ep->active_key_id; 6246 val.scact_keynumber = ep->active_key_id;
6262 6247
6263 len = sizeof(struct sctp_authkeyid);
6264 if (put_user(len, optlen)) 6248 if (put_user(len, optlen))
6265 return -EFAULT; 6249 return -EFAULT;
6266 if (copy_to_user(optval, &val, len)) 6250 if (copy_to_user(optval, &val, len))
@@ -6286,7 +6270,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
6286 if (len < sizeof(struct sctp_authchunks)) 6270 if (len < sizeof(struct sctp_authchunks))
6287 return -EINVAL; 6271 return -EINVAL;
6288 6272
6289 if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) 6273 if (copy_from_user(&val, optval, sizeof(val)))
6290 return -EFAULT; 6274 return -EFAULT;
6291 6275
6292 to = p->gauth_chunks; 6276 to = p->gauth_chunks;
@@ -6331,7 +6315,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
6331 if (len < sizeof(struct sctp_authchunks)) 6315 if (len < sizeof(struct sctp_authchunks))
6332 return -EINVAL; 6316 return -EINVAL;
6333 6317
6334 if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) 6318 if (copy_from_user(&val, optval, sizeof(val)))
6335 return -EFAULT; 6319 return -EFAULT;
6336 6320
6337 to = p->gauth_chunks; 6321 to = p->gauth_chunks;
@@ -7497,11 +7481,11 @@ out:
7497 * here, again, by modeling the current TCP/UDP code. We don't have 7481 * here, again, by modeling the current TCP/UDP code. We don't have
7498 * a good way to test with it yet. 7482 * a good way to test with it yet.
7499 */ 7483 */
7500unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) 7484__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
7501{ 7485{
7502 struct sock *sk = sock->sk; 7486 struct sock *sk = sock->sk;
7503 struct sctp_sock *sp = sctp_sk(sk); 7487 struct sctp_sock *sp = sctp_sk(sk);
7504 unsigned int mask; 7488 __poll_t mask;
7505 7489
7506 poll_wait(file, sk_sleep(sk), wait); 7490 poll_wait(file, sk_sleep(sk), wait);
7507 7491
@@ -7999,12 +7983,12 @@ void sctp_sock_rfree(struct sk_buff *skb)
7999 7983
8000/* Helper function to wait for space in the sndbuf. */ 7984/* Helper function to wait for space in the sndbuf. */
8001static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, 7985static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
8002 size_t msg_len, struct sock **orig_sk) 7986 size_t msg_len)
8003{ 7987{
8004 struct sock *sk = asoc->base.sk; 7988 struct sock *sk = asoc->base.sk;
8005 int err = 0;
8006 long current_timeo = *timeo_p; 7989 long current_timeo = *timeo_p;
8007 DEFINE_WAIT(wait); 7990 DEFINE_WAIT(wait);
7991 int err = 0;
8008 7992
8009 pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, 7993 pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
8010 *timeo_p, msg_len); 7994 *timeo_p, msg_len);
@@ -8033,17 +8017,13 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
8033 release_sock(sk); 8017 release_sock(sk);
8034 current_timeo = schedule_timeout(current_timeo); 8018 current_timeo = schedule_timeout(current_timeo);
8035 lock_sock(sk); 8019 lock_sock(sk);
8036 if (sk != asoc->base.sk) { 8020 if (sk != asoc->base.sk)
8037 release_sock(sk); 8021 goto do_error;
8038 sk = asoc->base.sk;
8039 lock_sock(sk);
8040 }
8041 8022
8042 *timeo_p = current_timeo; 8023 *timeo_p = current_timeo;
8043 } 8024 }
8044 8025
8045out: 8026out:
8046 *orig_sk = sk;
8047 finish_wait(&asoc->wait, &wait); 8027 finish_wait(&asoc->wait, &wait);
8048 8028
8049 /* Release the association's refcnt. */ 8029 /* Release the association's refcnt. */
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index a11db21dc8a0..524dfeb94c41 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -64,7 +64,7 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
64 */ 64 */
65 65
66 /* Mark as failed send. */ 66 /* Mark as failed send. */
67 sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM); 67 sctp_chunk_fail(ch, (__force __u32)SCTP_ERROR_INV_STRM);
68 if (asoc->peer.prsctp_capable && 68 if (asoc->peer.prsctp_capable &&
69 SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags)) 69 SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags))
70 asoc->sent_cnt_removable--; 70 asoc->sent_cnt_removable--;
@@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
156 sctp_stream_outq_migrate(stream, NULL, outcnt); 156 sctp_stream_outq_migrate(stream, NULL, outcnt);
157 sched->sched_all(stream); 157 sched->sched_all(stream);
158 158
159 i = sctp_stream_alloc_out(stream, outcnt, gfp); 159 ret = sctp_stream_alloc_out(stream, outcnt, gfp);
160 if (i) 160 if (ret)
161 return i; 161 goto out;
162 162
163 stream->outcnt = outcnt; 163 stream->outcnt = outcnt;
164 for (i = 0; i < stream->outcnt; i++) 164 for (i = 0; i < stream->outcnt; i++)
@@ -170,19 +170,17 @@ in:
170 if (!incnt) 170 if (!incnt)
171 goto out; 171 goto out;
172 172
173 i = sctp_stream_alloc_in(stream, incnt, gfp); 173 ret = sctp_stream_alloc_in(stream, incnt, gfp);
174 if (i) { 174 if (ret) {
175 ret = -ENOMEM; 175 sched->free(stream);
176 goto free; 176 kfree(stream->out);
177 stream->out = NULL;
178 stream->outcnt = 0;
179 goto out;
177 } 180 }
178 181
179 stream->incnt = incnt; 182 stream->incnt = incnt;
180 goto out;
181 183
182free:
183 sched->free(stream);
184 kfree(stream->out);
185 stream->out = NULL;
186out: 184out:
187 return ret; 185 return ret;
188} 186}
@@ -254,6 +252,30 @@ static int sctp_send_reconf(struct sctp_association *asoc,
254 return retval; 252 return retval;
255} 253}
256 254
255static bool sctp_stream_outq_is_empty(struct sctp_stream *stream,
256 __u16 str_nums, __be16 *str_list)
257{
258 struct sctp_association *asoc;
259 __u16 i;
260
261 asoc = container_of(stream, struct sctp_association, stream);
262 if (!asoc->outqueue.out_qlen)
263 return true;
264
265 if (!str_nums)
266 return false;
267
268 for (i = 0; i < str_nums; i++) {
269 __u16 sid = ntohs(str_list[i]);
270
271 if (stream->out[sid].ext &&
272 !list_empty(&stream->out[sid].ext->outq))
273 return false;
274 }
275
276 return true;
277}
278
257int sctp_send_reset_streams(struct sctp_association *asoc, 279int sctp_send_reset_streams(struct sctp_association *asoc,
258 struct sctp_reset_streams *params) 280 struct sctp_reset_streams *params)
259{ 281{
@@ -317,6 +339,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
317 for (i = 0; i < str_nums; i++) 339 for (i = 0; i < str_nums; i++)
318 nstr_list[i] = htons(str_list[i]); 340 nstr_list[i] = htons(str_list[i]);
319 341
342 if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) {
343 retval = -EAGAIN;
344 goto out;
345 }
346
320 chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in); 347 chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in);
321 348
322 kfree(nstr_list); 349 kfree(nstr_list);
@@ -377,6 +404,9 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
377 if (asoc->strreset_outstanding) 404 if (asoc->strreset_outstanding)
378 return -EINPROGRESS; 405 return -EINPROGRESS;
379 406
407 if (!sctp_outq_is_empty(&asoc->outqueue))
408 return -EAGAIN;
409
380 chunk = sctp_make_strreset_tsnreq(asoc); 410 chunk = sctp_make_strreset_tsnreq(asoc);
381 if (!chunk) 411 if (!chunk)
382 return -ENOMEM; 412 return -ENOMEM;
@@ -563,7 +593,7 @@ struct sctp_chunk *sctp_process_strreset_outreq(
563 flags = SCTP_STREAM_RESET_INCOMING_SSN; 593 flags = SCTP_STREAM_RESET_INCOMING_SSN;
564 } 594 }
565 595
566 nums = (ntohs(param.p->length) - sizeof(*outreq)) / 2; 596 nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
567 if (nums) { 597 if (nums) {
568 str_p = outreq->list_of_streams; 598 str_p = outreq->list_of_streams;
569 for (i = 0; i < nums; i++) { 599 for (i = 0; i < nums; i++) {
@@ -627,7 +657,7 @@ struct sctp_chunk *sctp_process_strreset_inreq(
627 goto out; 657 goto out;
628 } 658 }
629 659
630 nums = (ntohs(param.p->length) - sizeof(*inreq)) / 2; 660 nums = (ntohs(param.p->length) - sizeof(*inreq)) / sizeof(__u16);
631 str_p = inreq->list_of_streams; 661 str_p = inreq->list_of_streams;
632 for (i = 0; i < nums; i++) { 662 for (i = 0; i < nums; i++) {
633 if (ntohs(str_p[i]) >= stream->outcnt) { 663 if (ntohs(str_p[i]) >= stream->outcnt) {
@@ -636,6 +666,12 @@ struct sctp_chunk *sctp_process_strreset_inreq(
636 } 666 }
637 } 667 }
638 668
669 if (!sctp_stream_outq_is_empty(stream, nums, str_p)) {
670 result = SCTP_STRRESET_IN_PROGRESS;
671 asoc->strreset_inseq--;
672 goto err;
673 }
674
639 chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0); 675 chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0);
640 if (!chunk) 676 if (!chunk)
641 goto out; 677 goto out;
@@ -687,12 +723,18 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
687 i = asoc->strreset_inseq - request_seq - 1; 723 i = asoc->strreset_inseq - request_seq - 1;
688 result = asoc->strreset_result[i]; 724 result = asoc->strreset_result[i];
689 if (result == SCTP_STRRESET_PERFORMED) { 725 if (result == SCTP_STRRESET_PERFORMED) {
690 next_tsn = asoc->next_tsn; 726 next_tsn = asoc->ctsn_ack_point + 1;
691 init_tsn = 727 init_tsn =
692 sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1; 728 sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1;
693 } 729 }
694 goto err; 730 goto err;
695 } 731 }
732
733 if (!sctp_outq_is_empty(&asoc->outqueue)) {
734 result = SCTP_STRRESET_IN_PROGRESS;
735 goto err;
736 }
737
696 asoc->strreset_inseq++; 738 asoc->strreset_inseq++;
697 739
698 if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) 740 if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ))
@@ -703,9 +745,10 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
703 goto out; 745 goto out;
704 } 746 }
705 747
706 /* G3: The same processing as though a SACK chunk with no gap report 748 /* G4: The same processing as though a FWD-TSN chunk (as defined in
707 * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were 749 * [RFC3758]) with all streams affected and a new cumulative TSN
708 * received MUST be performed. 750 * ACK of the Receiver's Next TSN minus 1 were received MUST be
751 * performed.
709 */ 752 */
710 max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); 753 max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
711 sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); 754 sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen);
@@ -720,10 +763,9 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
720 sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, 763 sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
721 init_tsn, GFP_ATOMIC); 764 init_tsn, GFP_ATOMIC);
722 765
723 /* G4: The same processing as though a FWD-TSN chunk (as defined in 766 /* G3: The same processing as though a SACK chunk with no gap report
724 * [RFC3758]) with all streams affected and a new cumulative TSN 767 * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were
725 * ACK of the Receiver's Next TSN minus 1 were received MUST be 768 * received MUST be performed.
726 * performed.
727 */ 769 */
728 sctp_outq_free(&asoc->outqueue); 770 sctp_outq_free(&asoc->outqueue);
729 771
@@ -927,7 +969,8 @@ struct sctp_chunk *sctp_process_strreset_resp(
927 969
928 outreq = (struct sctp_strreset_outreq *)req; 970 outreq = (struct sctp_strreset_outreq *)req;
929 str_p = outreq->list_of_streams; 971 str_p = outreq->list_of_streams;
930 nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2; 972 nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) /
973 sizeof(__u16);
931 974
932 if (result == SCTP_STRRESET_PERFORMED) { 975 if (result == SCTP_STRRESET_PERFORMED) {
933 if (nums) { 976 if (nums) {
@@ -956,7 +999,8 @@ struct sctp_chunk *sctp_process_strreset_resp(
956 999
957 inreq = (struct sctp_strreset_inreq *)req; 1000 inreq = (struct sctp_strreset_inreq *)req;
958 str_p = inreq->list_of_streams; 1001 str_p = inreq->list_of_streams;
959 nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2; 1002 nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) /
1003 sizeof(__u16);
960 1004
961 *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, 1005 *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
962 nums, str_p, GFP_ATOMIC); 1006 nums, str_p, GFP_ATOMIC);
@@ -975,6 +1019,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
975 if (result == SCTP_STRRESET_PERFORMED) { 1019 if (result == SCTP_STRRESET_PERFORMED) {
976 __u32 mtsn = sctp_tsnmap_get_max_tsn_seen( 1020 __u32 mtsn = sctp_tsnmap_get_max_tsn_seen(
977 &asoc->peer.tsn_map); 1021 &asoc->peer.tsn_map);
1022 LIST_HEAD(temp);
978 1023
979 sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); 1024 sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn);
980 sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); 1025 sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
@@ -983,7 +1028,13 @@ struct sctp_chunk *sctp_process_strreset_resp(
983 SCTP_TSN_MAP_INITIAL, 1028 SCTP_TSN_MAP_INITIAL,
984 stsn, GFP_ATOMIC); 1029 stsn, GFP_ATOMIC);
985 1030
1031 /* Clean up sacked and abandoned queues only. As the
1032 * out_chunk_list may not be empty, splice it to temp,
1033 * then get it back after sctp_outq_free is done.
1034 */
1035 list_splice_init(&asoc->outqueue.out_chunk_list, &temp);
986 sctp_outq_free(&asoc->outqueue); 1036 sctp_outq_free(&asoc->outqueue);
1037 list_splice_init(&temp, &asoc->outqueue.out_chunk_list);
987 1038
988 asoc->next_tsn = rtsn; 1039 asoc->next_tsn = rtsn;
989 asoc->ctsn_ack_point = asoc->next_tsn - 1; 1040 asoc->ctsn_ack_point = asoc->next_tsn - 1;
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 0b83ec51e43b..d8c162a4089c 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -119,16 +119,27 @@ static struct sctp_sched_ops sctp_sched_fcfs = {
119 .unsched_all = sctp_sched_fcfs_unsched_all, 119 .unsched_all = sctp_sched_fcfs_unsched_all,
120}; 120};
121 121
122static void sctp_sched_ops_fcfs_init(void)
123{
124 sctp_sched_ops_register(SCTP_SS_FCFS, &sctp_sched_fcfs);
125}
126
122/* API to other parts of the stack */ 127/* API to other parts of the stack */
123 128
124extern struct sctp_sched_ops sctp_sched_prio; 129static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1];
125extern struct sctp_sched_ops sctp_sched_rr;
126 130
127static struct sctp_sched_ops *sctp_sched_ops[] = { 131void sctp_sched_ops_register(enum sctp_sched_type sched,
128 &sctp_sched_fcfs, 132 struct sctp_sched_ops *sched_ops)
129 &sctp_sched_prio, 133{
130 &sctp_sched_rr, 134 sctp_sched_ops[sched] = sched_ops;
131}; 135}
136
137void sctp_sched_ops_init(void)
138{
139 sctp_sched_ops_fcfs_init();
140 sctp_sched_ops_prio_init();
141 sctp_sched_ops_rr_init();
142}
132 143
133int sctp_sched_set_sched(struct sctp_association *asoc, 144int sctp_sched_set_sched(struct sctp_association *asoc,
134 enum sctp_sched_type sched) 145 enum sctp_sched_type sched)
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
index 384dbf3c8760..7997d35dd0fd 100644
--- a/net/sctp/stream_sched_prio.c
+++ b/net/sctp/stream_sched_prio.c
@@ -333,7 +333,7 @@ static void sctp_sched_prio_unsched_all(struct sctp_stream *stream)
333 sctp_sched_prio_unsched(soute); 333 sctp_sched_prio_unsched(soute);
334} 334}
335 335
336struct sctp_sched_ops sctp_sched_prio = { 336static struct sctp_sched_ops sctp_sched_prio = {
337 .set = sctp_sched_prio_set, 337 .set = sctp_sched_prio_set,
338 .get = sctp_sched_prio_get, 338 .get = sctp_sched_prio_get,
339 .init = sctp_sched_prio_init, 339 .init = sctp_sched_prio_init,
@@ -345,3 +345,8 @@ struct sctp_sched_ops sctp_sched_prio = {
345 .sched_all = sctp_sched_prio_sched_all, 345 .sched_all = sctp_sched_prio_sched_all,
346 .unsched_all = sctp_sched_prio_unsched_all, 346 .unsched_all = sctp_sched_prio_unsched_all,
347}; 347};
348
349void sctp_sched_ops_prio_init(void)
350{
351 sctp_sched_ops_register(SCTP_SS_PRIO, &sctp_sched_prio);
352}
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
index 7612a438c5b9..1155692448f1 100644
--- a/net/sctp/stream_sched_rr.c
+++ b/net/sctp/stream_sched_rr.c
@@ -187,7 +187,7 @@ static void sctp_sched_rr_unsched_all(struct sctp_stream *stream)
187 sctp_sched_rr_unsched(stream, soute); 187 sctp_sched_rr_unsched(stream, soute);
188} 188}
189 189
190struct sctp_sched_ops sctp_sched_rr = { 190static struct sctp_sched_ops sctp_sched_rr = {
191 .set = sctp_sched_rr_set, 191 .set = sctp_sched_rr_set,
192 .get = sctp_sched_rr_get, 192 .get = sctp_sched_rr_get,
193 .init = sctp_sched_rr_init, 193 .init = sctp_sched_rr_init,
@@ -199,3 +199,8 @@ struct sctp_sched_ops sctp_sched_rr = {
199 .sched_all = sctp_sched_rr_sched_all, 199 .sched_all = sctp_sched_rr_sched_all,
200 .unsched_all = sctp_sched_rr_unsched_all, 200 .unsched_all = sctp_sched_rr_unsched_all,
201}; 201};
202
203void sctp_sched_ops_rr_init(void)
204{
205 sctp_sched_ops_register(SCTP_SS_RR, &sctp_sched_rr);
206}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1e5a22430cf5..47f82bd794d9 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
248 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; 248 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
249} 249}
250 250
251void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) 251bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
252{ 252{
253 struct dst_entry *dst = sctp_transport_dst_check(t); 253 struct dst_entry *dst = sctp_transport_dst_check(t);
254 bool change = true;
254 255
255 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { 256 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
256 pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", 257 pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
257 __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); 258 __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
258 /* Use default minimum segment size and disable 259 /* Use default minimum segment instead */
259 * pmtu discovery on this transport. 260 pmtu = SCTP_DEFAULT_MINSEGMENT;
260 */
261 t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
262 } else {
263 t->pathmtu = pmtu;
264 } 261 }
262 pmtu = SCTP_TRUNC4(pmtu);
265 263
266 if (dst) { 264 if (dst) {
267 dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); 265 dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
268 dst = sctp_transport_dst_check(t); 266 dst = sctp_transport_dst_check(t);
269 } 267 }
270 268
271 if (!dst) 269 if (!dst) {
272 t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); 270 t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
271 dst = t->dst;
272 }
273
274 if (dst) {
275 /* Re-fetch, as under layers may have a higher minimum size */
276 pmtu = SCTP_TRUNC4(dst_mtu(dst));
277 change = t->pathmtu != pmtu;
278 }
279 t->pathmtu = pmtu;
280
281 return change;
273} 282}
274 283
275/* Caches the dst entry and source address for a transport's destination 284/* Caches the dst entry and source address for a transport's destination
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index a71be33f3afe..e36ec5dd64c6 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
1084void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, 1084void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
1085 gfp_t gfp) 1085 gfp_t gfp)
1086{ 1086{
1087 struct sctp_association *asoc; 1087 struct sctp_association *asoc = ulpq->asoc;
1088 __u16 needed, freed; 1088 __u32 freed = 0;
1089 1089 __u16 needed;
1090 asoc = ulpq->asoc;
1091 1090
1092 if (chunk) { 1091 needed = ntohs(chunk->chunk_hdr->length) -
1093 needed = ntohs(chunk->chunk_hdr->length); 1092 sizeof(struct sctp_data_chunk);
1094 needed -= sizeof(struct sctp_data_chunk);
1095 } else
1096 needed = SCTP_DEFAULT_MAXWINDOW;
1097
1098 freed = 0;
1099 1093
1100 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { 1094 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
1101 freed = sctp_ulpq_renege_order(ulpq, needed); 1095 freed = sctp_ulpq_renege_order(ulpq, needed);
1102 if (freed < needed) { 1096 if (freed < needed)
1103 freed += sctp_ulpq_renege_frags(ulpq, needed - freed); 1097 freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
1104 }
1105 } 1098 }
1106 /* If able to free enough room, accept this chunk. */ 1099 /* If able to free enough room, accept this chunk. */
1107 if (chunk && (freed >= needed)) { 1100 if (freed >= needed) {
1108 int retval; 1101 int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
1109 retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
1110 /* 1102 /*
1111 * Enter partial delivery if chunk has not been 1103 * Enter partial delivery if chunk has not been
1112 * delivered; otherwise, drain the reassembly queue. 1104 * delivered; otherwise, drain the reassembly queue.
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6451c5013e06..449f62e1e270 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1107,7 +1107,7 @@ out:
1107 return rc; 1107 return rc;
1108} 1108}
1109 1109
1110static unsigned int smc_accept_poll(struct sock *parent) 1110static __poll_t smc_accept_poll(struct sock *parent)
1111{ 1111{
1112 struct smc_sock *isk; 1112 struct smc_sock *isk;
1113 struct sock *sk; 1113 struct sock *sk;
@@ -1126,11 +1126,11 @@ static unsigned int smc_accept_poll(struct sock *parent)
1126 return 0; 1126 return 0;
1127} 1127}
1128 1128
1129static unsigned int smc_poll(struct file *file, struct socket *sock, 1129static __poll_t smc_poll(struct file *file, struct socket *sock,
1130 poll_table *wait) 1130 poll_table *wait)
1131{ 1131{
1132 struct sock *sk = sock->sk; 1132 struct sock *sk = sock->sk;
1133 unsigned int mask = 0; 1133 __poll_t mask = 0;
1134 struct smc_sock *smc; 1134 struct smc_sock *smc;
1135 int rc; 1135 int rc;
1136 1136
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 1800e16b2a02..511548085d16 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -35,7 +35,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
35 struct smc_clc_msg_hdr *clcm = buf; 35 struct smc_clc_msg_hdr *clcm = buf;
36 struct msghdr msg = {NULL, 0}; 36 struct msghdr msg = {NULL, 0};
37 int reason_code = 0; 37 int reason_code = 0;
38 struct kvec vec; 38 struct kvec vec = {buf, buflen};
39 int len, datlen; 39 int len, datlen;
40 int krflags; 40 int krflags;
41 41
@@ -43,12 +43,15 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
43 * so we don't consume any subsequent CLC message or payload data 43 * so we don't consume any subsequent CLC message or payload data
44 * in the TCP byte stream 44 * in the TCP byte stream
45 */ 45 */
46 vec.iov_base = buf; 46 /*
47 vec.iov_len = buflen; 47 * Caller must make sure that buflen is no less than
48 * sizeof(struct smc_clc_msg_hdr)
49 */
48 krflags = MSG_PEEK | MSG_WAITALL; 50 krflags = MSG_PEEK | MSG_WAITALL;
49 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 51 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
50 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, 52 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
51 sizeof(struct smc_clc_msg_hdr), krflags); 53 sizeof(struct smc_clc_msg_hdr));
54 len = sock_recvmsg(smc->clcsock, &msg, krflags);
52 if (signal_pending(current)) { 55 if (signal_pending(current)) {
53 reason_code = -EINTR; 56 reason_code = -EINTR;
54 clc_sk->sk_err = EINTR; 57 clc_sk->sk_err = EINTR;
@@ -83,12 +86,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
83 } 86 }
84 87
85 /* receive the complete CLC message */ 88 /* receive the complete CLC message */
86 vec.iov_base = buf;
87 vec.iov_len = buflen;
88 memset(&msg, 0, sizeof(struct msghdr)); 89 memset(&msg, 0, sizeof(struct msghdr));
90 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen);
89 krflags = MSG_WAITALL; 91 krflags = MSG_WAITALL;
90 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 92 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
91 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); 93 len = sock_recvmsg(smc->clcsock, &msg, krflags);
92 if (len < datlen) { 94 if (len < datlen) {
93 smc->sk.sk_err = EPROTO; 95 smc->sk.sk_err = EPROTO;
94 reason_code = -EPROTO; 96 reason_code = -EPROTO;
diff --git a/net/socket.c b/net/socket.c
index 42d8e9c9ccd5..2f378449bc1b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -118,7 +118,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
118static int sock_mmap(struct file *file, struct vm_area_struct *vma); 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
119 119
120static int sock_close(struct inode *inode, struct file *file); 120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file, 121static __poll_t sock_poll(struct file *file,
122 struct poll_table_struct *wait); 122 struct poll_table_struct *wait);
123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
124#ifdef CONFIG_COMPAT 124#ifdef CONFIG_COMPAT
@@ -406,8 +406,10 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
406 name.len = strlen(name.name); 406 name.len = strlen(name.name);
407 } 407 }
408 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); 408 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
409 if (unlikely(!path.dentry)) 409 if (unlikely(!path.dentry)) {
410 sock_release(sock);
410 return ERR_PTR(-ENOMEM); 411 return ERR_PTR(-ENOMEM);
412 }
411 path.mnt = mntget(sock_mnt); 413 path.mnt = mntget(sock_mnt);
412 414
413 d_instantiate(path.dentry, SOCK_INODE(sock)); 415 d_instantiate(path.dentry, SOCK_INODE(sock));
@@ -415,9 +417,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
415 file = alloc_file(&path, FMODE_READ | FMODE_WRITE, 417 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
416 &socket_file_ops); 418 &socket_file_ops);
417 if (IS_ERR(file)) { 419 if (IS_ERR(file)) {
418 /* drop dentry, keep inode */ 420 /* drop dentry, keep inode for a bit */
419 ihold(d_inode(path.dentry)); 421 ihold(d_inode(path.dentry));
420 path_put(&path); 422 path_put(&path);
423 /* ... and now kill it properly */
424 sock_release(sock);
421 return file; 425 return file;
422 } 426 }
423 427
@@ -432,8 +436,10 @@ static int sock_map_fd(struct socket *sock, int flags)
432{ 436{
433 struct file *newfile; 437 struct file *newfile;
434 int fd = get_unused_fd_flags(flags); 438 int fd = get_unused_fd_flags(flags);
435 if (unlikely(fd < 0)) 439 if (unlikely(fd < 0)) {
440 sock_release(sock);
436 return fd; 441 return fd;
442 }
437 443
438 newfile = sock_alloc_file(sock, flags, NULL); 444 newfile = sock_alloc_file(sock, flags, NULL);
439 if (likely(!IS_ERR(newfile))) { 445 if (likely(!IS_ERR(newfile))) {
@@ -1091,9 +1097,9 @@ out_release:
1091EXPORT_SYMBOL(sock_create_lite); 1097EXPORT_SYMBOL(sock_create_lite);
1092 1098
1093/* No kernel lock held - perfect */ 1099/* No kernel lock held - perfect */
1094static unsigned int sock_poll(struct file *file, poll_table *wait) 1100static __poll_t sock_poll(struct file *file, poll_table *wait)
1095{ 1101{
1096 unsigned int busy_flag = 0; 1102 __poll_t busy_flag = 0;
1097 struct socket *sock; 1103 struct socket *sock;
1098 1104
1099 /* 1105 /*
@@ -1330,19 +1336,9 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1330 1336
1331 retval = sock_create(family, type, protocol, &sock); 1337 retval = sock_create(family, type, protocol, &sock);
1332 if (retval < 0) 1338 if (retval < 0)
1333 goto out; 1339 return retval;
1334
1335 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1336 if (retval < 0)
1337 goto out_release;
1338
1339out:
1340 /* It may be already another descriptor 8) Not kernel problem. */
1341 return retval;
1342 1340
1343out_release: 1341 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1344 sock_release(sock);
1345 return retval;
1346} 1342}
1347 1343
1348/* 1344/*
@@ -1366,87 +1362,72 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1366 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1362 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1367 1363
1368 /* 1364 /*
1365 * reserve descriptors and make sure we won't fail
1366 * to return them to userland.
1367 */
1368 fd1 = get_unused_fd_flags(flags);
1369 if (unlikely(fd1 < 0))
1370 return fd1;
1371
1372 fd2 = get_unused_fd_flags(flags);
1373 if (unlikely(fd2 < 0)) {
1374 put_unused_fd(fd1);
1375 return fd2;
1376 }
1377
1378 err = put_user(fd1, &usockvec[0]);
1379 if (err)
1380 goto out;
1381
1382 err = put_user(fd2, &usockvec[1]);
1383 if (err)
1384 goto out;
1385
1386 /*
1369 * Obtain the first socket and check if the underlying protocol 1387 * Obtain the first socket and check if the underlying protocol
1370 * supports the socketpair call. 1388 * supports the socketpair call.
1371 */ 1389 */
1372 1390
1373 err = sock_create(family, type, protocol, &sock1); 1391 err = sock_create(family, type, protocol, &sock1);
1374 if (err < 0) 1392 if (unlikely(err < 0))
1375 goto out; 1393 goto out;
1376 1394
1377 err = sock_create(family, type, protocol, &sock2); 1395 err = sock_create(family, type, protocol, &sock2);
1378 if (err < 0) 1396 if (unlikely(err < 0)) {
1379 goto out_release_1; 1397 sock_release(sock1);
1380 1398 goto out;
1381 err = sock1->ops->socketpair(sock1, sock2);
1382 if (err < 0)
1383 goto out_release_both;
1384
1385 fd1 = get_unused_fd_flags(flags);
1386 if (unlikely(fd1 < 0)) {
1387 err = fd1;
1388 goto out_release_both;
1389 } 1399 }
1390 1400
1391 fd2 = get_unused_fd_flags(flags); 1401 err = sock1->ops->socketpair(sock1, sock2);
1392 if (unlikely(fd2 < 0)) { 1402 if (unlikely(err < 0)) {
1393 err = fd2; 1403 sock_release(sock2);
1394 goto out_put_unused_1; 1404 sock_release(sock1);
1405 goto out;
1395 } 1406 }
1396 1407
1397 newfile1 = sock_alloc_file(sock1, flags, NULL); 1408 newfile1 = sock_alloc_file(sock1, flags, NULL);
1398 if (IS_ERR(newfile1)) { 1409 if (IS_ERR(newfile1)) {
1399 err = PTR_ERR(newfile1); 1410 err = PTR_ERR(newfile1);
1400 goto out_put_unused_both; 1411 sock_release(sock2);
1412 goto out;
1401 } 1413 }
1402 1414
1403 newfile2 = sock_alloc_file(sock2, flags, NULL); 1415 newfile2 = sock_alloc_file(sock2, flags, NULL);
1404 if (IS_ERR(newfile2)) { 1416 if (IS_ERR(newfile2)) {
1405 err = PTR_ERR(newfile2); 1417 err = PTR_ERR(newfile2);
1406 goto out_fput_1; 1418 fput(newfile1);
1419 goto out;
1407 } 1420 }
1408 1421
1409 err = put_user(fd1, &usockvec[0]);
1410 if (err)
1411 goto out_fput_both;
1412
1413 err = put_user(fd2, &usockvec[1]);
1414 if (err)
1415 goto out_fput_both;
1416
1417 audit_fd_pair(fd1, fd2); 1422 audit_fd_pair(fd1, fd2);
1418 1423
1419 fd_install(fd1, newfile1); 1424 fd_install(fd1, newfile1);
1420 fd_install(fd2, newfile2); 1425 fd_install(fd2, newfile2);
1421 /* fd1 and fd2 may be already another descriptors.
1422 * Not kernel problem.
1423 */
1424
1425 return 0; 1426 return 0;
1426 1427
1427out_fput_both: 1428out:
1428 fput(newfile2);
1429 fput(newfile1);
1430 put_unused_fd(fd2);
1431 put_unused_fd(fd1);
1432 goto out;
1433
1434out_fput_1:
1435 fput(newfile1);
1436 put_unused_fd(fd2);
1437 put_unused_fd(fd1);
1438 sock_release(sock2);
1439 goto out;
1440
1441out_put_unused_both:
1442 put_unused_fd(fd2); 1429 put_unused_fd(fd2);
1443out_put_unused_1:
1444 put_unused_fd(fd1); 1430 put_unused_fd(fd1);
1445out_release_both:
1446 sock_release(sock2);
1447out_release_1:
1448 sock_release(sock1);
1449out:
1450 return err; 1431 return err;
1451} 1432}
1452 1433
@@ -1562,7 +1543,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1562 if (IS_ERR(newfile)) { 1543 if (IS_ERR(newfile)) {
1563 err = PTR_ERR(newfile); 1544 err = PTR_ERR(newfile);
1564 put_unused_fd(newfd); 1545 put_unused_fd(newfd);
1565 sock_release(newsock);
1566 goto out_put; 1546 goto out_put;
1567 } 1547 }
1568 1548
@@ -2641,6 +2621,15 @@ out_fs:
2641 2621
2642core_initcall(sock_init); /* early initcall */ 2622core_initcall(sock_init); /* early initcall */
2643 2623
2624static int __init jit_init(void)
2625{
2626#ifdef CONFIG_BPF_JIT_ALWAYS_ON
2627 bpf_jit_enable = 1;
2628#endif
2629 return 0;
2630}
2631pure_initcall(jit_init);
2632
2644#ifdef CONFIG_PROC_FS 2633#ifdef CONFIG_PROC_FS
2645void socket_seq_show(struct seq_file *seq) 2634void socket_seq_show(struct seq_file *seq)
2646{ 2635{
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index c5fda15ba319..1fdab5c4eda8 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp)
401 * allows a thread in BH context to safely check if the process 401 * allows a thread in BH context to safely check if the process
402 * lock is held. In this case, if the lock is held, queue work. 402 * lock is held. In this case, if the lock is held, queue work.
403 */ 403 */
404 if (sock_owned_by_user(strp->sk)) { 404 if (sock_owned_by_user_nocheck(strp->sk)) {
405 queue_work(strp_wq, &strp->work); 405 queue_work(strp_wq, &strp->work);
406 return; 406 return;
407 } 407 }
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index c4778cae58ef..444380f968f1 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
231 goto out_free_groups; 231 goto out_free_groups;
232 creds->cr_group_info->gid[i] = kgid; 232 creds->cr_group_info->gid[i] = kgid;
233 } 233 }
234 groups_sort(creds->cr_group_info);
234 235
235 return 0; 236 return 0;
236out_free_groups: 237out_free_groups:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 73165e9ca5bf..26531193fce4 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -264,7 +264,7 @@ out:
264 return status; 264 return status;
265} 265}
266 266
267static struct cache_detail rsi_cache_template = { 267static const struct cache_detail rsi_cache_template = {
268 .owner = THIS_MODULE, 268 .owner = THIS_MODULE,
269 .hash_size = RSI_HASHMAX, 269 .hash_size = RSI_HASHMAX,
270 .name = "auth.rpcsec.init", 270 .name = "auth.rpcsec.init",
@@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd,
481 goto out; 481 goto out;
482 rsci.cred.cr_group_info->gid[i] = kgid; 482 rsci.cred.cr_group_info->gid[i] = kgid;
483 } 483 }
484 groups_sort(rsci.cred.cr_group_info);
484 485
485 /* mech name */ 486 /* mech name */
486 len = qword_get(&mesg, buf, mlen); 487 len = qword_get(&mesg, buf, mlen);
@@ -524,7 +525,7 @@ out:
524 return status; 525 return status;
525} 526}
526 527
527static struct cache_detail rsc_cache_template = { 528static const struct cache_detail rsc_cache_template = {
528 .owner = THIS_MODULE, 529 .owner = THIS_MODULE,
529 .hash_size = RSC_HASHMAX, 530 .hash_size = RSC_HASHMAX,
530 .name = "auth.rpcsec.context", 531 .name = "auth.rpcsec.context",
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 79d55d949d9a..aa36dad32db1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -930,10 +930,10 @@ out:
930 930
931static DECLARE_WAIT_QUEUE_HEAD(queue_wait); 931static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
932 932
933static unsigned int cache_poll(struct file *filp, poll_table *wait, 933static __poll_t cache_poll(struct file *filp, poll_table *wait,
934 struct cache_detail *cd) 934 struct cache_detail *cd)
935{ 935{
936 unsigned int mask; 936 __poll_t mask;
937 struct cache_reader *rp = filp->private_data; 937 struct cache_reader *rp = filp->private_data;
938 struct cache_queue *cq; 938 struct cache_queue *cq;
939 939
@@ -1501,7 +1501,7 @@ static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
1501 return cache_write(filp, buf, count, ppos, cd); 1501 return cache_write(filp, buf, count, ppos, cd);
1502} 1502}
1503 1503
1504static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) 1504static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait)
1505{ 1505{
1506 struct cache_detail *cd = PDE_DATA(file_inode(filp)); 1506 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1507 1507
@@ -1674,7 +1674,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net)
1674} 1674}
1675EXPORT_SYMBOL_GPL(cache_unregister_net); 1675EXPORT_SYMBOL_GPL(cache_unregister_net);
1676 1676
1677struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) 1677struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net)
1678{ 1678{
1679 struct cache_detail *cd; 1679 struct cache_detail *cd;
1680 int i; 1680 int i;
@@ -1720,7 +1720,7 @@ static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf,
1720 return cache_write(filp, buf, count, ppos, cd); 1720 return cache_write(filp, buf, count, ppos, cd);
1721} 1721}
1722 1722
1723static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) 1723static __poll_t cache_poll_pipefs(struct file *filp, poll_table *wait)
1724{ 1724{
1725 struct cache_detail *cd = RPC_I(file_inode(filp))->private; 1725 struct cache_detail *cd = RPC_I(file_inode(filp))->private;
1726 1726
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a801da812f86..6e432ecd7f99 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1376,22 +1376,6 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize
1376EXPORT_SYMBOL_GPL(rpc_setbufsize); 1376EXPORT_SYMBOL_GPL(rpc_setbufsize);
1377 1377
1378/** 1378/**
1379 * rpc_protocol - Get transport protocol number for an RPC client
1380 * @clnt: RPC client to query
1381 *
1382 */
1383int rpc_protocol(struct rpc_clnt *clnt)
1384{
1385 int protocol;
1386
1387 rcu_read_lock();
1388 protocol = rcu_dereference(clnt->cl_xprt)->prot;
1389 rcu_read_unlock();
1390 return protocol;
1391}
1392EXPORT_SYMBOL_GPL(rpc_protocol);
1393
1394/**
1395 * rpc_net_ns - Get the network namespace for this RPC client 1379 * rpc_net_ns - Get the network namespace for this RPC client
1396 * @clnt: RPC client to query 1380 * @clnt: RPC client to query
1397 * 1381 *
@@ -1841,6 +1825,7 @@ call_bind_status(struct rpc_task *task)
1841 case -ECONNABORTED: 1825 case -ECONNABORTED:
1842 case -ENOTCONN: 1826 case -ENOTCONN:
1843 case -EHOSTDOWN: 1827 case -EHOSTDOWN:
1828 case -ENETDOWN:
1844 case -EHOSTUNREACH: 1829 case -EHOSTUNREACH:
1845 case -ENETUNREACH: 1830 case -ENETUNREACH:
1846 case -ENOBUFS: 1831 case -ENOBUFS:
@@ -1917,6 +1902,7 @@ call_connect_status(struct rpc_task *task)
1917 /* fall through */ 1902 /* fall through */
1918 case -ECONNRESET: 1903 case -ECONNRESET:
1919 case -ECONNABORTED: 1904 case -ECONNABORTED:
1905 case -ENETDOWN:
1920 case -ENETUNREACH: 1906 case -ENETUNREACH:
1921 case -EHOSTUNREACH: 1907 case -EHOSTUNREACH:
1922 case -EADDRINUSE: 1908 case -EADDRINUSE:
@@ -2022,6 +2008,7 @@ call_transmit_status(struct rpc_task *task)
2022 */ 2008 */
2023 case -ECONNREFUSED: 2009 case -ECONNREFUSED:
2024 case -EHOSTDOWN: 2010 case -EHOSTDOWN:
2011 case -ENETDOWN:
2025 case -EHOSTUNREACH: 2012 case -EHOSTUNREACH:
2026 case -ENETUNREACH: 2013 case -ENETUNREACH:
2027 case -EPERM: 2014 case -EPERM:
@@ -2071,6 +2058,7 @@ call_bc_transmit(struct rpc_task *task)
2071 switch (task->tk_status) { 2058 switch (task->tk_status) {
2072 case 0: 2059 case 0:
2073 /* Success */ 2060 /* Success */
2061 case -ENETDOWN:
2074 case -EHOSTDOWN: 2062 case -EHOSTDOWN:
2075 case -EHOSTUNREACH: 2063 case -EHOSTUNREACH:
2076 case -ENETUNREACH: 2064 case -ENETUNREACH:
@@ -2139,6 +2127,7 @@ call_status(struct rpc_task *task)
2139 task->tk_status = 0; 2127 task->tk_status = 0;
2140 switch(status) { 2128 switch(status) {
2141 case -EHOSTDOWN: 2129 case -EHOSTDOWN:
2130 case -ENETDOWN:
2142 case -EHOSTUNREACH: 2131 case -EHOSTUNREACH:
2143 case -ENETUNREACH: 2132 case -ENETUNREACH:
2144 case -EPERM: 2133 case -EPERM:
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 7803f3b6aa53..5c4330325787 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -340,12 +340,12 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
340 return res; 340 return res;
341} 341}
342 342
343static unsigned int 343static __poll_t
344rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) 344rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
345{ 345{
346 struct inode *inode = file_inode(filp); 346 struct inode *inode = file_inode(filp);
347 struct rpc_inode *rpci = RPC_I(inode); 347 struct rpc_inode *rpci = RPC_I(inode);
348 unsigned int mask = POLLOUT | POLLWRNORM; 348 __poll_t mask = POLLOUT | POLLWRNORM;
349 349
350 poll_wait(filp, &rpci->waitq, wait); 350 poll_wait(filp, &rpci->waitq, wait);
351 351
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b1b49edd7c4d..896691afbb1a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task)
755 void (*do_action)(struct rpc_task *); 755 void (*do_action)(struct rpc_task *);
756 756
757 /* 757 /*
758 * Execute any pending callback first. 758 * Perform the next FSM step or a pending callback.
759 *
760 * tk_action may be NULL if the task has been killed.
761 * In particular, note that rpc_killall_tasks may
762 * do this at any time, so beware when dereferencing.
759 */ 763 */
760 do_action = task->tk_callback; 764 do_action = task->tk_action;
761 task->tk_callback = NULL; 765 if (task->tk_callback) {
762 if (do_action == NULL) { 766 do_action = task->tk_callback;
763 /* 767 task->tk_callback = NULL;
764 * Perform the next FSM step.
765 * tk_action may be NULL if the task has been killed.
766 * In particular, note that rpc_killall_tasks may
767 * do this at any time, so beware when dereferencing.
768 */
769 do_action = task->tk_action;
770 if (do_action == NULL)
771 break;
772 } 768 }
773 trace_rpc_task_run_action(task->tk_client, task, task->tk_action); 769 if (!do_action)
770 break;
771 trace_rpc_task_run_action(task->tk_client, task, do_action);
774 do_action(task); 772 do_action(task);
775 773
776 /* 774 /*
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index f81eaa8e0888..af7f28fb8102 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd,
520 ug.gi->gid[i] = kgid; 520 ug.gi->gid[i] = kgid;
521 } 521 }
522 522
523 groups_sort(ug.gi);
523 ugp = unix_gid_lookup(cd, uid); 524 ugp = unix_gid_lookup(cd, uid);
524 if (ugp) { 525 if (ugp) {
525 struct cache_head *ch; 526 struct cache_head *ch;
@@ -569,7 +570,7 @@ static int unix_gid_show(struct seq_file *m,
569 return 0; 570 return 0;
570} 571}
571 572
572static struct cache_detail unix_gid_cache_template = { 573static const struct cache_detail unix_gid_cache_template = {
573 .owner = THIS_MODULE, 574 .owner = THIS_MODULE,
574 .hash_size = GID_HASHMAX, 575 .hash_size = GID_HASHMAX,
575 .name = "auth.unix.gid", 576 .name = "auth.unix.gid",
@@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
819 kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); 820 kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
820 cred->cr_group_info->gid[i] = kgid; 821 cred->cr_group_info->gid[i] = kgid;
821 } 822 }
823 groups_sort(cred->cr_group_info);
822 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { 824 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
823 *authp = rpc_autherr_badverf; 825 *authp = rpc_autherr_badverf;
824 return SVC_DENIED; 826 return SVC_DENIED;
@@ -862,7 +864,7 @@ struct auth_ops svcauth_unix = {
862 .set_client = svcauth_unix_set_client, 864 .set_client = svcauth_unix_set_client,
863}; 865};
864 866
865static struct cache_detail ip_map_cache_template = { 867static const struct cache_detail ip_map_cache_template = {
866 .owner = THIS_MODULE, 868 .owner = THIS_MODULE,
867 .hash_size = IP_HASHMAX, 869 .hash_size = IP_HASHMAX,
868 .name = "auth.unix.ip", 870 .name = "auth.unix.ip",
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ff8e06cd067e..5570719e4787 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -338,8 +338,8 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
338 rqstp->rq_xprt_hlen = 0; 338 rqstp->rq_xprt_hlen = 0;
339 339
340 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 340 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
341 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, 341 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen);
342 msg.msg_flags); 342 len = sock_recvmsg(svsk->sk_sock, &msg, msg.msg_flags);
343 /* If we read a full record, then assume there may be more 343 /* If we read a full record, then assume there may be more
344 * data to read (stream based sockets only!) 344 * data to read (stream based sockets only!)
345 */ 345 */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 333b9d697ae5..2436fd1125fc 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task)
940 940
941 if (task->tk_status != -ETIMEDOUT) 941 if (task->tk_status != -ETIMEDOUT)
942 return; 942 return;
943 dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
944 943
944 trace_xprt_timer(xprt, req->rq_xid, task->tk_status);
945 if (!req->rq_reply_bytes_recvd) { 945 if (!req->rq_reply_bytes_recvd) {
946 if (xprt->ops->timer) 946 if (xprt->ops->timer)
947 xprt->ops->timer(xprt, task); 947 xprt->ops->timer(xprt, task);
@@ -1001,6 +1001,7 @@ void xprt_transmit(struct rpc_task *task)
1001{ 1001{
1002 struct rpc_rqst *req = task->tk_rqstp; 1002 struct rpc_rqst *req = task->tk_rqstp;
1003 struct rpc_xprt *xprt = req->rq_xprt; 1003 struct rpc_xprt *xprt = req->rq_xprt;
1004 unsigned int connect_cookie;
1004 int status, numreqs; 1005 int status, numreqs;
1005 1006
1006 dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); 1007 dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
@@ -1024,6 +1025,7 @@ void xprt_transmit(struct rpc_task *task)
1024 } else if (!req->rq_bytes_sent) 1025 } else if (!req->rq_bytes_sent)
1025 return; 1026 return;
1026 1027
1028 connect_cookie = xprt->connect_cookie;
1027 req->rq_xtime = ktime_get(); 1029 req->rq_xtime = ktime_get();
1028 status = xprt->ops->send_request(task); 1030 status = xprt->ops->send_request(task);
1029 trace_xprt_transmit(xprt, req->rq_xid, status); 1031 trace_xprt_transmit(xprt, req->rq_xid, status);
@@ -1047,20 +1049,28 @@ void xprt_transmit(struct rpc_task *task)
1047 xprt->stat.bklog_u += xprt->backlog.qlen; 1049 xprt->stat.bklog_u += xprt->backlog.qlen;
1048 xprt->stat.sending_u += xprt->sending.qlen; 1050 xprt->stat.sending_u += xprt->sending.qlen;
1049 xprt->stat.pending_u += xprt->pending.qlen; 1051 xprt->stat.pending_u += xprt->pending.qlen;
1052 spin_unlock_bh(&xprt->transport_lock);
1050 1053
1051 /* Don't race with disconnect */ 1054 req->rq_connect_cookie = connect_cookie;
1052 if (!xprt_connected(xprt)) 1055 if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
1053 task->tk_status = -ENOTCONN;
1054 else {
1055 /* 1056 /*
1056 * Sleep on the pending queue since 1057 * Sleep on the pending queue if we're expecting a reply.
1057 * we're expecting a reply. 1058 * The spinlock ensures atomicity between the test of
1059 * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
1058 */ 1060 */
1059 if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) 1061 spin_lock(&xprt->recv_lock);
1062 if (!req->rq_reply_bytes_recvd) {
1060 rpc_sleep_on(&xprt->pending, task, xprt_timer); 1063 rpc_sleep_on(&xprt->pending, task, xprt_timer);
1061 req->rq_connect_cookie = xprt->connect_cookie; 1064 /*
1065 * Send an extra queue wakeup call if the
1066 * connection was dropped in case the call to
1067 * rpc_sleep_on() raced.
1068 */
1069 if (!xprt_connected(xprt))
1070 xprt_wake_pending_tasks(xprt, -ENOTCONN);
1071 }
1072 spin_unlock(&xprt->recv_lock);
1062 } 1073 }
1063 spin_unlock_bh(&xprt->transport_lock);
1064} 1074}
1065 1075
1066static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) 1076static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 8b818bb3518a..ed1a4a3065ee 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
43 req = rpcrdma_create_req(r_xprt); 43 req = rpcrdma_create_req(r_xprt);
44 if (IS_ERR(req)) 44 if (IS_ERR(req))
45 return PTR_ERR(req); 45 return PTR_ERR(req);
46 __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
47 46
48 rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, 47 rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
49 DMA_TO_DEVICE, GFP_KERNEL); 48 DMA_TO_DEVICE, GFP_KERNEL);
@@ -74,21 +73,13 @@ out_fail:
74static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, 73static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
75 unsigned int count) 74 unsigned int count)
76{ 75{
77 struct rpcrdma_rep *rep;
78 int rc = 0; 76 int rc = 0;
79 77
80 while (count--) { 78 while (count--) {
81 rep = rpcrdma_create_rep(r_xprt); 79 rc = rpcrdma_create_rep(r_xprt);
82 if (IS_ERR(rep)) { 80 if (rc)
83 pr_err("RPC: %s: reply buffer alloc failed\n",
84 __func__);
85 rc = PTR_ERR(rep);
86 break; 81 break;
87 }
88
89 rpcrdma_recv_buffer_put(rep);
90 } 82 }
91
92 return rc; 83 return rc;
93} 84}
94 85
@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
129 rqst->rq_xprt = &r_xprt->rx_xprt; 120 rqst->rq_xprt = &r_xprt->rx_xprt;
130 INIT_LIST_HEAD(&rqst->rq_list); 121 INIT_LIST_HEAD(&rqst->rq_list);
131 INIT_LIST_HEAD(&rqst->rq_bc_list); 122 INIT_LIST_HEAD(&rqst->rq_bc_list);
123 __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
132 124
133 if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) 125 if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
134 goto out_free; 126 goto out_free;
@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
148 140
149 buffer->rb_bc_srv_max_requests = reqs; 141 buffer->rb_bc_srv_max_requests = reqs;
150 request_module("svcrdma"); 142 request_module("svcrdma");
151 143 trace_xprtrdma_cb_setup(r_xprt, reqs);
152 return 0; 144 return 0;
153 145
154out_free: 146out_free:
@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
196 return maxmsg - RPCRDMA_HDRLEN_MIN; 188 return maxmsg - RPCRDMA_HDRLEN_MIN;
197} 189}
198 190
199/** 191static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
200 * rpcrdma_bc_marshal_reply - Send backwards direction reply
201 * @rqst: buffer containing RPC reply data
202 *
203 * Returns zero on success.
204 */
205int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
206{ 192{
207 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 193 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
208 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 194 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
226 if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, 212 if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
227 &rqst->rq_snd_buf, rpcrdma_noch)) 213 &rqst->rq_snd_buf, rpcrdma_noch))
228 return -EIO; 214 return -EIO;
215
216 trace_xprtrdma_cb_reply(rqst);
217 return 0;
218}
219
220/**
221 * xprt_rdma_bc_send_reply - marshal and send a backchannel reply
222 * @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf
223 *
224 * Caller holds the transport's write lock.
225 *
226 * Returns:
227 * %0 if the RPC message has been sent
228 * %-ENOTCONN if the caller should reconnect and call again
229 * %-EIO if a permanent error occurred and the request was not
230 * sent. Do not try to send this message again.
231 */
232int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
233{
234 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
235 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
236 int rc;
237
238 if (!xprt_connected(rqst->rq_xprt))
239 goto drop_connection;
240
241 rc = rpcrdma_bc_marshal_reply(rqst);
242 if (rc < 0)
243 goto failed_marshal;
244
245 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
246 goto drop_connection;
229 return 0; 247 return 0;
248
249failed_marshal:
250 if (rc != -ENOTCONN)
251 return rc;
252drop_connection:
253 xprt_disconnect_done(rqst->rq_xprt);
254 return -ENOTCONN;
230} 255}
231 256
232/** 257/**
@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
262 dprintk("RPC: %s: freeing rqst %p (req %p)\n", 287 dprintk("RPC: %s: freeing rqst %p (req %p)\n",
263 __func__, rqst, rpcr_to_rdmar(rqst)); 288 __func__, rqst, rpcr_to_rdmar(rqst));
264 289
265 smp_mb__before_atomic();
266 WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
267 clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
268 smp_mb__after_atomic();
269
270 spin_lock_bh(&xprt->bc_pa_lock); 290 spin_lock_bh(&xprt->bc_pa_lock);
271 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); 291 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
272 spin_unlock_bh(&xprt->bc_pa_lock); 292 spin_unlock_bh(&xprt->bc_pa_lock);
@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
274 294
275/** 295/**
276 * rpcrdma_bc_receive_call - Handle a backward direction call 296 * rpcrdma_bc_receive_call - Handle a backward direction call
277 * @xprt: transport receiving the call 297 * @r_xprt: transport receiving the call
278 * @rep: receive buffer containing the call 298 * @rep: receive buffer containing the call
279 * 299 *
280 * Operational assumptions: 300 * Operational assumptions:
@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
313 struct rpc_rqst, rq_bc_pa_list); 333 struct rpc_rqst, rq_bc_pa_list);
314 list_del(&rqst->rq_bc_pa_list); 334 list_del(&rqst->rq_bc_pa_list);
315 spin_unlock(&xprt->bc_pa_lock); 335 spin_unlock(&xprt->bc_pa_lock);
316 dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
317 336
318 /* Prepare rqst */ 337 /* Prepare rqst */
319 rqst->rq_reply_bytes_recvd = 0; 338 rqst->rq_reply_bytes_recvd = 0;
@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
321 rqst->rq_xid = *p; 340 rqst->rq_xid = *p;
322 341
323 rqst->rq_private_buf.len = size; 342 rqst->rq_private_buf.len = size;
324 set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
325 343
326 buf = &rqst->rq_rcv_buf; 344 buf = &rqst->rq_rcv_buf;
327 memset(buf, 0, sizeof(*buf)); 345 memset(buf, 0, sizeof(*buf));
@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
335 * the Upper Layer is done decoding it. 353 * the Upper Layer is done decoding it.
336 */ 354 */
337 req = rpcr_to_rdmar(rqst); 355 req = rpcr_to_rdmar(rqst);
338 dprintk("RPC: %s: attaching rep %p to req %p\n",
339 __func__, rep, req);
340 req->rl_reply = rep; 356 req->rl_reply = rep;
341 357 trace_xprtrdma_cb_call(rqst);
342 /* Defeat the retransmit detection logic in send_request */
343 req->rl_connect_cookie = 0;
344 358
345 /* Queue rqst for ULP's callback service */ 359 /* Queue rqst for ULP's callback service */
346 bc_serv = xprt->bc_serv; 360 bc_serv = xprt->bc_serv;
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 29fc84c7ff98..d5f95bb39300 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -1,6 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * Copyright (c) 2015 Oracle. All rights reserved. 3 * Copyright (c) 2015, 2017 Oracle. All rights reserved.
4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5 */ 5 */
6 6
@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
47} 47}
48 48
49static int 49static int
50fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) 50fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
51{ 51{
52 static struct ib_fmr_attr fmr_attr = { 52 static struct ib_fmr_attr fmr_attr = {
53 .max_pages = RPCRDMA_MAX_FMR_SGES, 53 .max_pages = RPCRDMA_MAX_FMR_SGES,
@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
55 .page_shift = PAGE_SHIFT 55 .page_shift = PAGE_SHIFT
56 }; 56 };
57 57
58 mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, 58 mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
59 sizeof(u64), GFP_KERNEL); 59 sizeof(u64), GFP_KERNEL);
60 if (!mw->fmr.fm_physaddrs) 60 if (!mr->fmr.fm_physaddrs)
61 goto out_free; 61 goto out_free;
62 62
63 mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, 63 mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
64 sizeof(*mw->mw_sg), GFP_KERNEL); 64 sizeof(*mr->mr_sg), GFP_KERNEL);
65 if (!mw->mw_sg) 65 if (!mr->mr_sg)
66 goto out_free; 66 goto out_free;
67 67
68 sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES); 68 sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
69 69
70 mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, 70 mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
71 &fmr_attr); 71 &fmr_attr);
72 if (IS_ERR(mw->fmr.fm_mr)) 72 if (IS_ERR(mr->fmr.fm_mr))
73 goto out_fmr_err; 73 goto out_fmr_err;
74 74
75 return 0; 75 return 0;
76 76
77out_fmr_err: 77out_fmr_err:
78 dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, 78 dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
79 PTR_ERR(mw->fmr.fm_mr)); 79 PTR_ERR(mr->fmr.fm_mr));
80 80
81out_free: 81out_free:
82 kfree(mw->mw_sg); 82 kfree(mr->mr_sg);
83 kfree(mw->fmr.fm_physaddrs); 83 kfree(mr->fmr.fm_physaddrs);
84 return -ENOMEM; 84 return -ENOMEM;
85} 85}
86 86
87static int 87static int
88__fmr_unmap(struct rpcrdma_mw *mw) 88__fmr_unmap(struct rpcrdma_mr *mr)
89{ 89{
90 LIST_HEAD(l); 90 LIST_HEAD(l);
91 int rc; 91 int rc;
92 92
93 list_add(&mw->fmr.fm_mr->list, &l); 93 list_add(&mr->fmr.fm_mr->list, &l);
94 rc = ib_unmap_fmr(&l); 94 rc = ib_unmap_fmr(&l);
95 list_del(&mw->fmr.fm_mr->list); 95 list_del(&mr->fmr.fm_mr->list);
96 return rc; 96 return rc;
97} 97}
98 98
99static void 99static void
100fmr_op_release_mr(struct rpcrdma_mw *r) 100fmr_op_release_mr(struct rpcrdma_mr *mr)
101{ 101{
102 LIST_HEAD(unmap_list); 102 LIST_HEAD(unmap_list);
103 int rc; 103 int rc;
104 104
105 /* Ensure MW is not on any rl_registered list */ 105 /* Ensure MW is not on any rl_registered list */
106 if (!list_empty(&r->mw_list)) 106 if (!list_empty(&mr->mr_list))
107 list_del(&r->mw_list); 107 list_del(&mr->mr_list);
108 108
109 kfree(r->fmr.fm_physaddrs); 109 kfree(mr->fmr.fm_physaddrs);
110 kfree(r->mw_sg); 110 kfree(mr->mr_sg);
111 111
112 /* In case this one was left mapped, try to unmap it 112 /* In case this one was left mapped, try to unmap it
113 * to prevent dealloc_fmr from failing with EBUSY 113 * to prevent dealloc_fmr from failing with EBUSY
114 */ 114 */
115 rc = __fmr_unmap(r); 115 rc = __fmr_unmap(mr);
116 if (rc) 116 if (rc)
117 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", 117 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
118 r, rc); 118 mr, rc);
119 119
120 rc = ib_dealloc_fmr(r->fmr.fm_mr); 120 rc = ib_dealloc_fmr(mr->fmr.fm_mr);
121 if (rc) 121 if (rc)
122 pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", 122 pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
123 r, rc); 123 mr, rc);
124 124
125 kfree(r); 125 kfree(mr);
126} 126}
127 127
128/* Reset of a single FMR. 128/* Reset of a single FMR.
129 */ 129 */
130static void 130static void
131fmr_op_recover_mr(struct rpcrdma_mw *mw) 131fmr_op_recover_mr(struct rpcrdma_mr *mr)
132{ 132{
133 struct rpcrdma_xprt *r_xprt = mw->mw_xprt; 133 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
134 int rc; 134 int rc;
135 135
136 /* ORDER: invalidate first */ 136 /* ORDER: invalidate first */
137 rc = __fmr_unmap(mw); 137 rc = __fmr_unmap(mr);
138
139 /* ORDER: then DMA unmap */
140 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
141 mw->mw_sg, mw->mw_nents, mw->mw_dir);
142 if (rc) 138 if (rc)
143 goto out_release; 139 goto out_release;
144 140
145 rpcrdma_put_mw(r_xprt, mw); 141 /* ORDER: then DMA unmap */
142 rpcrdma_mr_unmap_and_put(mr);
143
146 r_xprt->rx_stats.mrs_recovered++; 144 r_xprt->rx_stats.mrs_recovered++;
147 return; 145 return;
148 146
149out_release: 147out_release:
150 pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw); 148 pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
151 r_xprt->rx_stats.mrs_orphaned++; 149 r_xprt->rx_stats.mrs_orphaned++;
152 150
153 spin_lock(&r_xprt->rx_buf.rb_mwlock); 151 trace_xprtrdma_dma_unmap(mr);
154 list_del(&mw->mw_all); 152 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
155 spin_unlock(&r_xprt->rx_buf.rb_mwlock); 153 mr->mr_sg, mr->mr_nents, mr->mr_dir);
154
155 spin_lock(&r_xprt->rx_buf.rb_mrlock);
156 list_del(&mr->mr_all);
157 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
156 158
157 fmr_op_release_mr(mw); 159 fmr_op_release_mr(mr);
158} 160}
159 161
160static int 162static int
@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
180 */ 182 */
181static struct rpcrdma_mr_seg * 183static struct rpcrdma_mr_seg *
182fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, 184fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
183 int nsegs, bool writing, struct rpcrdma_mw **out) 185 int nsegs, bool writing, struct rpcrdma_mr **out)
184{ 186{
185 struct rpcrdma_mr_seg *seg1 = seg; 187 struct rpcrdma_mr_seg *seg1 = seg;
186 int len, pageoff, i, rc; 188 int len, pageoff, i, rc;
187 struct rpcrdma_mw *mw; 189 struct rpcrdma_mr *mr;
188 u64 *dma_pages; 190 u64 *dma_pages;
189 191
190 mw = rpcrdma_get_mw(r_xprt); 192 mr = rpcrdma_mr_get(r_xprt);
191 if (!mw) 193 if (!mr)
192 return ERR_PTR(-ENOBUFS); 194 return ERR_PTR(-ENOBUFS);
193 195
194 pageoff = offset_in_page(seg1->mr_offset); 196 pageoff = offset_in_page(seg1->mr_offset);
@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
199 nsegs = RPCRDMA_MAX_FMR_SGES; 201 nsegs = RPCRDMA_MAX_FMR_SGES;
200 for (i = 0; i < nsegs;) { 202 for (i = 0; i < nsegs;) {
201 if (seg->mr_page) 203 if (seg->mr_page)
202 sg_set_page(&mw->mw_sg[i], 204 sg_set_page(&mr->mr_sg[i],
203 seg->mr_page, 205 seg->mr_page,
204 seg->mr_len, 206 seg->mr_len,
205 offset_in_page(seg->mr_offset)); 207 offset_in_page(seg->mr_offset));
206 else 208 else
207 sg_set_buf(&mw->mw_sg[i], seg->mr_offset, 209 sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
208 seg->mr_len); 210 seg->mr_len);
209 len += seg->mr_len; 211 len += seg->mr_len;
210 ++seg; 212 ++seg;
@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
214 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 216 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
215 break; 217 break;
216 } 218 }
217 mw->mw_dir = rpcrdma_data_dir(writing); 219 mr->mr_dir = rpcrdma_data_dir(writing);
218 220
219 mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, 221 mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
220 mw->mw_sg, i, mw->mw_dir); 222 mr->mr_sg, i, mr->mr_dir);
221 if (!mw->mw_nents) 223 if (!mr->mr_nents)
222 goto out_dmamap_err; 224 goto out_dmamap_err;
223 225
224 for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++) 226 for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
225 dma_pages[i] = sg_dma_address(&mw->mw_sg[i]); 227 dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
226 rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents, 228 rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
227 dma_pages[0]); 229 dma_pages[0]);
228 if (rc) 230 if (rc)
229 goto out_maperr; 231 goto out_maperr;
230 232
231 mw->mw_handle = mw->fmr.fm_mr->rkey; 233 mr->mr_handle = mr->fmr.fm_mr->rkey;
232 mw->mw_length = len; 234 mr->mr_length = len;
233 mw->mw_offset = dma_pages[0] + pageoff; 235 mr->mr_offset = dma_pages[0] + pageoff;
234 236
235 *out = mw; 237 *out = mr;
236 return seg; 238 return seg;
237 239
238out_dmamap_err: 240out_dmamap_err:
239 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", 241 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
240 mw->mw_sg, i); 242 mr->mr_sg, i);
241 rpcrdma_put_mw(r_xprt, mw); 243 rpcrdma_mr_put(mr);
242 return ERR_PTR(-EIO); 244 return ERR_PTR(-EIO);
243 245
244out_maperr: 246out_maperr:
245 pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", 247 pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
246 len, (unsigned long long)dma_pages[0], 248 len, (unsigned long long)dma_pages[0],
247 pageoff, mw->mw_nents, rc); 249 pageoff, mr->mr_nents, rc);
248 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, 250 rpcrdma_mr_unmap_and_put(mr);
249 mw->mw_sg, mw->mw_nents, mw->mw_dir);
250 rpcrdma_put_mw(r_xprt, mw);
251 return ERR_PTR(-EIO); 251 return ERR_PTR(-EIO);
252} 252}
253 253
@@ -256,13 +256,13 @@ out_maperr:
256 * Sleeps until it is safe for the host CPU to access the 256 * Sleeps until it is safe for the host CPU to access the
257 * previously mapped memory regions. 257 * previously mapped memory regions.
258 * 258 *
259 * Caller ensures that @mws is not empty before the call. This 259 * Caller ensures that @mrs is not empty before the call. This
260 * function empties the list. 260 * function empties the list.
261 */ 261 */
262static void 262static void
263fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) 263fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
264{ 264{
265 struct rpcrdma_mw *mw; 265 struct rpcrdma_mr *mr;
266 LIST_HEAD(unmap_list); 266 LIST_HEAD(unmap_list);
267 int rc; 267 int rc;
268 268
@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
271 * ib_unmap_fmr() is slow, so use a single call instead 271 * ib_unmap_fmr() is slow, so use a single call instead
272 * of one call per mapped FMR. 272 * of one call per mapped FMR.
273 */ 273 */
274 list_for_each_entry(mw, mws, mw_list) { 274 list_for_each_entry(mr, mrs, mr_list) {
275 dprintk("RPC: %s: unmapping fmr %p\n", 275 dprintk("RPC: %s: unmapping fmr %p\n",
276 __func__, &mw->fmr); 276 __func__, &mr->fmr);
277 list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); 277 trace_xprtrdma_localinv(mr);
278 list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
278 } 279 }
279 r_xprt->rx_stats.local_inv_needed++; 280 r_xprt->rx_stats.local_inv_needed++;
280 rc = ib_unmap_fmr(&unmap_list); 281 rc = ib_unmap_fmr(&unmap_list);
@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
284 /* ORDER: Now DMA unmap all of the req's MRs, and return 285 /* ORDER: Now DMA unmap all of the req's MRs, and return
285 * them to the free MW list. 286 * them to the free MW list.
286 */ 287 */
287 while (!list_empty(mws)) { 288 while (!list_empty(mrs)) {
288 mw = rpcrdma_pop_mw(mws); 289 mr = rpcrdma_mr_pop(mrs);
289 dprintk("RPC: %s: DMA unmapping fmr %p\n", 290 list_del(&mr->fmr.fm_mr->list);
290 __func__, &mw->fmr); 291 rpcrdma_mr_unmap_and_put(mr);
291 list_del(&mw->fmr.fm_mr->list);
292 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
293 mw->mw_sg, mw->mw_nents, mw->mw_dir);
294 rpcrdma_put_mw(r_xprt, mw);
295 } 292 }
296 293
297 return; 294 return;
@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
299out_reset: 296out_reset:
300 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); 297 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
301 298
302 while (!list_empty(mws)) { 299 while (!list_empty(mrs)) {
303 mw = rpcrdma_pop_mw(mws); 300 mr = rpcrdma_mr_pop(mrs);
304 list_del(&mw->fmr.fm_mr->list); 301 list_del(&mr->fmr.fm_mr->list);
305 fmr_op_recover_mr(mw); 302 fmr_op_recover_mr(mr);
306 } 303 }
307} 304}
308 305
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 773e66e10a15..90f688f19783 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,11 +1,11 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * Copyright (c) 2015 Oracle. All rights reserved. 3 * Copyright (c) 2015, 2017 Oracle. All rights reserved.
4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5 */ 5 */
6 6
7/* Lightweight memory registration using Fast Registration Work 7/* Lightweight memory registration using Fast Registration Work
8 * Requests (FRWR). Also referred to sometimes as FRMR mode. 8 * Requests (FRWR).
9 * 9 *
10 * FRWR features ordered asynchronous registration and deregistration 10 * FRWR features ordered asynchronous registration and deregistration
11 * of arbitrarily sized memory regions. This is the fastest and safest 11 * of arbitrarily sized memory regions. This is the fastest and safest
@@ -15,9 +15,9 @@
15/* Normal operation 15/* Normal operation
16 * 16 *
17 * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG 17 * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
18 * Work Request (frmr_op_map). When the RDMA operation is finished, this 18 * Work Request (frwr_op_map). When the RDMA operation is finished, this
19 * Memory Region is invalidated using a LOCAL_INV Work Request 19 * Memory Region is invalidated using a LOCAL_INV Work Request
20 * (frmr_op_unmap). 20 * (frwr_op_unmap_sync).
21 * 21 *
22 * Typically these Work Requests are not signaled, and neither are RDMA 22 * Typically these Work Requests are not signaled, and neither are RDMA
23 * SEND Work Requests (with the exception of signaling occasionally to 23 * SEND Work Requests (with the exception of signaling occasionally to
@@ -26,7 +26,7 @@
26 * 26 *
27 * As an optimization, frwr_op_unmap marks MRs INVALID before the 27 * As an optimization, frwr_op_unmap marks MRs INVALID before the
28 * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on 28 * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
29 * rb_mws immediately so that no work (like managing a linked list 29 * rb_mrs immediately so that no work (like managing a linked list
30 * under a spinlock) is needed in the completion upcall. 30 * under a spinlock) is needed in the completion upcall.
31 * 31 *
32 * But this means that frwr_op_map() can occasionally encounter an MR 32 * But this means that frwr_op_map() can occasionally encounter an MR
@@ -60,7 +60,7 @@
60 * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered 60 * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
61 * with ib_dereg_mr and then are re-initialized. Because MR recovery 61 * with ib_dereg_mr and then are re-initialized. Because MR recovery
62 * allocates fresh resources, it is deferred to a workqueue, and the 62 * allocates fresh resources, it is deferred to a workqueue, and the
63 * recovered MRs are placed back on the rb_mws list when recovery is 63 * recovered MRs are placed back on the rb_mrs list when recovery is
64 * complete. frwr_op_map allocates another MR for the current RPC while 64 * complete. frwr_op_map allocates another MR for the current RPC while
65 * the broken MR is reset. 65 * the broken MR is reset.
66 * 66 *
@@ -96,26 +96,26 @@ out_not_supported:
96} 96}
97 97
98static int 98static int
99frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) 99frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
100{ 100{
101 unsigned int depth = ia->ri_max_frmr_depth; 101 unsigned int depth = ia->ri_max_frwr_depth;
102 struct rpcrdma_frmr *f = &r->frmr; 102 struct rpcrdma_frwr *frwr = &mr->frwr;
103 int rc; 103 int rc;
104 104
105 f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); 105 frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
106 if (IS_ERR(f->fr_mr)) 106 if (IS_ERR(frwr->fr_mr))
107 goto out_mr_err; 107 goto out_mr_err;
108 108
109 r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); 109 mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
110 if (!r->mw_sg) 110 if (!mr->mr_sg)
111 goto out_list_err; 111 goto out_list_err;
112 112
113 sg_init_table(r->mw_sg, depth); 113 sg_init_table(mr->mr_sg, depth);
114 init_completion(&f->fr_linv_done); 114 init_completion(&frwr->fr_linv_done);
115 return 0; 115 return 0;
116 116
117out_mr_err: 117out_mr_err:
118 rc = PTR_ERR(f->fr_mr); 118 rc = PTR_ERR(frwr->fr_mr);
119 dprintk("RPC: %s: ib_alloc_mr status %i\n", 119 dprintk("RPC: %s: ib_alloc_mr status %i\n",
120 __func__, rc); 120 __func__, rc);
121 return rc; 121 return rc;
@@ -124,83 +124,85 @@ out_list_err:
124 rc = -ENOMEM; 124 rc = -ENOMEM;
125 dprintk("RPC: %s: sg allocation failure\n", 125 dprintk("RPC: %s: sg allocation failure\n",
126 __func__); 126 __func__);
127 ib_dereg_mr(f->fr_mr); 127 ib_dereg_mr(frwr->fr_mr);
128 return rc; 128 return rc;
129} 129}
130 130
131static void 131static void
132frwr_op_release_mr(struct rpcrdma_mw *r) 132frwr_op_release_mr(struct rpcrdma_mr *mr)
133{ 133{
134 int rc; 134 int rc;
135 135
136 /* Ensure MW is not on any rl_registered list */ 136 /* Ensure MR is not on any rl_registered list */
137 if (!list_empty(&r->mw_list)) 137 if (!list_empty(&mr->mr_list))
138 list_del(&r->mw_list); 138 list_del(&mr->mr_list);
139 139
140 rc = ib_dereg_mr(r->frmr.fr_mr); 140 rc = ib_dereg_mr(mr->frwr.fr_mr);
141 if (rc) 141 if (rc)
142 pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", 142 pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
143 r, rc); 143 mr, rc);
144 kfree(r->mw_sg); 144 kfree(mr->mr_sg);
145 kfree(r); 145 kfree(mr);
146} 146}
147 147
148static int 148static int
149__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) 149__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
150{ 150{
151 struct rpcrdma_frmr *f = &r->frmr; 151 struct rpcrdma_frwr *frwr = &mr->frwr;
152 int rc; 152 int rc;
153 153
154 rc = ib_dereg_mr(f->fr_mr); 154 rc = ib_dereg_mr(frwr->fr_mr);
155 if (rc) { 155 if (rc) {
156 pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", 156 pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
157 rc, r); 157 rc, mr);
158 return rc; 158 return rc;
159 } 159 }
160 160
161 f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, 161 frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
162 ia->ri_max_frmr_depth); 162 ia->ri_max_frwr_depth);
163 if (IS_ERR(f->fr_mr)) { 163 if (IS_ERR(frwr->fr_mr)) {
164 pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", 164 pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
165 PTR_ERR(f->fr_mr), r); 165 PTR_ERR(frwr->fr_mr), mr);
166 return PTR_ERR(f->fr_mr); 166 return PTR_ERR(frwr->fr_mr);
167 } 167 }
168 168
169 dprintk("RPC: %s: recovered FRMR %p\n", __func__, f); 169 dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
170 f->fr_state = FRMR_IS_INVALID; 170 frwr->fr_state = FRWR_IS_INVALID;
171 return 0; 171 return 0;
172} 172}
173 173
174/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. 174/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
175 */ 175 */
176static void 176static void
177frwr_op_recover_mr(struct rpcrdma_mw *mw) 177frwr_op_recover_mr(struct rpcrdma_mr *mr)
178{ 178{
179 enum rpcrdma_frmr_state state = mw->frmr.fr_state; 179 enum rpcrdma_frwr_state state = mr->frwr.fr_state;
180 struct rpcrdma_xprt *r_xprt = mw->mw_xprt; 180 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
181 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 181 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
182 int rc; 182 int rc;
183 183
184 rc = __frwr_reset_mr(ia, mw); 184 rc = __frwr_mr_reset(ia, mr);
185 if (state != FRMR_FLUSHED_LI) 185 if (state != FRWR_FLUSHED_LI) {
186 trace_xprtrdma_dma_unmap(mr);
186 ib_dma_unmap_sg(ia->ri_device, 187 ib_dma_unmap_sg(ia->ri_device,
187 mw->mw_sg, mw->mw_nents, mw->mw_dir); 188 mr->mr_sg, mr->mr_nents, mr->mr_dir);
189 }
188 if (rc) 190 if (rc)
189 goto out_release; 191 goto out_release;
190 192
191 rpcrdma_put_mw(r_xprt, mw); 193 rpcrdma_mr_put(mr);
192 r_xprt->rx_stats.mrs_recovered++; 194 r_xprt->rx_stats.mrs_recovered++;
193 return; 195 return;
194 196
195out_release: 197out_release:
196 pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw); 198 pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
197 r_xprt->rx_stats.mrs_orphaned++; 199 r_xprt->rx_stats.mrs_orphaned++;
198 200
199 spin_lock(&r_xprt->rx_buf.rb_mwlock); 201 spin_lock(&r_xprt->rx_buf.rb_mrlock);
200 list_del(&mw->mw_all); 202 list_del(&mr->mr_all);
201 spin_unlock(&r_xprt->rx_buf.rb_mwlock); 203 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
202 204
203 frwr_op_release_mr(mw); 205 frwr_op_release_mr(mr);
204} 206}
205 207
206static int 208static int
@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
214 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) 216 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
215 ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; 217 ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
216 218
217 ia->ri_max_frmr_depth = 219 ia->ri_max_frwr_depth =
218 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 220 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
219 attrs->max_fast_reg_page_list_len); 221 attrs->max_fast_reg_page_list_len);
220 dprintk("RPC: %s: device's max FR page list len = %u\n", 222 dprintk("RPC: %s: device's max FR page list len = %u\n",
221 __func__, ia->ri_max_frmr_depth); 223 __func__, ia->ri_max_frwr_depth);
222 224
223 /* Add room for frmr register and invalidate WRs. 225 /* Add room for frwr register and invalidate WRs.
224 * 1. FRMR reg WR for head 226 * 1. FRWR reg WR for head
225 * 2. FRMR invalidate WR for head 227 * 2. FRWR invalidate WR for head
226 * 3. N FRMR reg WRs for pagelist 228 * 3. N FRWR reg WRs for pagelist
227 * 4. N FRMR invalidate WRs for pagelist 229 * 4. N FRWR invalidate WRs for pagelist
228 * 5. FRMR reg WR for tail 230 * 5. FRWR reg WR for tail
229 * 6. FRMR invalidate WR for tail 231 * 6. FRWR invalidate WR for tail
230 * 7. The RDMA_SEND WR 232 * 7. The RDMA_SEND WR
231 */ 233 */
232 depth = 7; 234 depth = 7;
233 235
234 /* Calculate N if the device max FRMR depth is smaller than 236 /* Calculate N if the device max FRWR depth is smaller than
235 * RPCRDMA_MAX_DATA_SEGS. 237 * RPCRDMA_MAX_DATA_SEGS.
236 */ 238 */
237 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { 239 if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
238 delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; 240 delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
239 do { 241 do {
240 depth += 2; /* FRMR reg + invalidate */ 242 depth += 2; /* FRWR reg + invalidate */
241 delta -= ia->ri_max_frmr_depth; 243 delta -= ia->ri_max_frwr_depth;
242 } while (delta > 0); 244 } while (delta > 0);
243 } 245 }
244 246
@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
252 } 254 }
253 255
254 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / 256 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
255 ia->ri_max_frmr_depth); 257 ia->ri_max_frwr_depth);
256 return 0; 258 return 0;
257} 259}
258 260
@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
265 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 267 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
266 268
267 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 269 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
268 RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth); 270 RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
269} 271}
270 272
271static void 273static void
@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
286static void 288static void
287frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) 289frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
288{ 290{
289 struct rpcrdma_frmr *frmr; 291 struct ib_cqe *cqe = wc->wr_cqe;
290 struct ib_cqe *cqe; 292 struct rpcrdma_frwr *frwr =
293 container_of(cqe, struct rpcrdma_frwr, fr_cqe);
291 294
292 /* WARNING: Only wr_cqe and status are reliable at this point */ 295 /* WARNING: Only wr_cqe and status are reliable at this point */
293 if (wc->status != IB_WC_SUCCESS) { 296 if (wc->status != IB_WC_SUCCESS) {
294 cqe = wc->wr_cqe; 297 frwr->fr_state = FRWR_FLUSHED_FR;
295 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
296 frmr->fr_state = FRMR_FLUSHED_FR;
297 __frwr_sendcompletion_flush(wc, "fastreg"); 298 __frwr_sendcompletion_flush(wc, "fastreg");
298 } 299 }
300 trace_xprtrdma_wc_fastreg(wc, frwr);
299} 301}
300 302
301/** 303/**
@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
307static void 309static void
308frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) 310frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
309{ 311{
310 struct rpcrdma_frmr *frmr; 312 struct ib_cqe *cqe = wc->wr_cqe;
311 struct ib_cqe *cqe; 313 struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
314 fr_cqe);
312 315
313 /* WARNING: Only wr_cqe and status are reliable at this point */ 316 /* WARNING: Only wr_cqe and status are reliable at this point */
314 if (wc->status != IB_WC_SUCCESS) { 317 if (wc->status != IB_WC_SUCCESS) {
315 cqe = wc->wr_cqe; 318 frwr->fr_state = FRWR_FLUSHED_LI;
316 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
317 frmr->fr_state = FRMR_FLUSHED_LI;
318 __frwr_sendcompletion_flush(wc, "localinv"); 319 __frwr_sendcompletion_flush(wc, "localinv");
319 } 320 }
321 trace_xprtrdma_wc_li(wc, frwr);
320} 322}
321 323
322/** 324/**
@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
329static void 331static void
330frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) 332frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
331{ 333{
332 struct rpcrdma_frmr *frmr; 334 struct ib_cqe *cqe = wc->wr_cqe;
333 struct ib_cqe *cqe; 335 struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
336 fr_cqe);
334 337
335 /* WARNING: Only wr_cqe and status are reliable at this point */ 338 /* WARNING: Only wr_cqe and status are reliable at this point */
336 cqe = wc->wr_cqe;
337 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
338 if (wc->status != IB_WC_SUCCESS) { 339 if (wc->status != IB_WC_SUCCESS) {
339 frmr->fr_state = FRMR_FLUSHED_LI; 340 frwr->fr_state = FRWR_FLUSHED_LI;
340 __frwr_sendcompletion_flush(wc, "localinv"); 341 __frwr_sendcompletion_flush(wc, "localinv");
341 } 342 }
342 complete(&frmr->fr_linv_done); 343 complete(&frwr->fr_linv_done);
344 trace_xprtrdma_wc_li_wake(wc, frwr);
343} 345}
344 346
345/* Post a REG_MR Work Request to register a memory region 347/* Post a REG_MR Work Request to register a memory region
@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
347 */ 349 */
348static struct rpcrdma_mr_seg * 350static struct rpcrdma_mr_seg *
349frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, 351frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
350 int nsegs, bool writing, struct rpcrdma_mw **out) 352 int nsegs, bool writing, struct rpcrdma_mr **out)
351{ 353{
352 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 354 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
353 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; 355 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
354 struct rpcrdma_mw *mw; 356 struct rpcrdma_frwr *frwr;
355 struct rpcrdma_frmr *frmr; 357 struct rpcrdma_mr *mr;
356 struct ib_mr *mr; 358 struct ib_mr *ibmr;
357 struct ib_reg_wr *reg_wr; 359 struct ib_reg_wr *reg_wr;
358 struct ib_send_wr *bad_wr; 360 struct ib_send_wr *bad_wr;
359 int rc, i, n; 361 int rc, i, n;
360 u8 key; 362 u8 key;
361 363
362 mw = NULL; 364 mr = NULL;
363 do { 365 do {
364 if (mw) 366 if (mr)
365 rpcrdma_defer_mr_recovery(mw); 367 rpcrdma_mr_defer_recovery(mr);
366 mw = rpcrdma_get_mw(r_xprt); 368 mr = rpcrdma_mr_get(r_xprt);
367 if (!mw) 369 if (!mr)
368 return ERR_PTR(-ENOBUFS); 370 return ERR_PTR(-ENOBUFS);
369 } while (mw->frmr.fr_state != FRMR_IS_INVALID); 371 } while (mr->frwr.fr_state != FRWR_IS_INVALID);
370 frmr = &mw->frmr; 372 frwr = &mr->frwr;
371 frmr->fr_state = FRMR_IS_VALID; 373 frwr->fr_state = FRWR_IS_VALID;
372 mr = frmr->fr_mr; 374
373 reg_wr = &frmr->fr_regwr; 375 if (nsegs > ia->ri_max_frwr_depth)
374 376 nsegs = ia->ri_max_frwr_depth;
375 if (nsegs > ia->ri_max_frmr_depth)
376 nsegs = ia->ri_max_frmr_depth;
377 for (i = 0; i < nsegs;) { 377 for (i = 0; i < nsegs;) {
378 if (seg->mr_page) 378 if (seg->mr_page)
379 sg_set_page(&mw->mw_sg[i], 379 sg_set_page(&mr->mr_sg[i],
380 seg->mr_page, 380 seg->mr_page,
381 seg->mr_len, 381 seg->mr_len,
382 offset_in_page(seg->mr_offset)); 382 offset_in_page(seg->mr_offset));
383 else 383 else
384 sg_set_buf(&mw->mw_sg[i], seg->mr_offset, 384 sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
385 seg->mr_len); 385 seg->mr_len);
386 386
387 ++seg; 387 ++seg;
@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
392 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 392 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
393 break; 393 break;
394 } 394 }
395 mw->mw_dir = rpcrdma_data_dir(writing); 395 mr->mr_dir = rpcrdma_data_dir(writing);
396 396
397 mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir); 397 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
398 if (!mw->mw_nents) 398 if (!mr->mr_nents)
399 goto out_dmamap_err; 399 goto out_dmamap_err;
400 400
401 n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE); 401 ibmr = frwr->fr_mr;
402 if (unlikely(n != mw->mw_nents)) 402 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
403 if (unlikely(n != mr->mr_nents))
403 goto out_mapmr_err; 404 goto out_mapmr_err;
404 405
405 dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n", 406 key = (u8)(ibmr->rkey & 0x000000FF);
406 __func__, frmr, mw->mw_nents, mr->length); 407 ib_update_fast_reg_key(ibmr, ++key);
407
408 key = (u8)(mr->rkey & 0x000000FF);
409 ib_update_fast_reg_key(mr, ++key);
410 408
409 reg_wr = &frwr->fr_regwr;
411 reg_wr->wr.next = NULL; 410 reg_wr->wr.next = NULL;
412 reg_wr->wr.opcode = IB_WR_REG_MR; 411 reg_wr->wr.opcode = IB_WR_REG_MR;
413 frmr->fr_cqe.done = frwr_wc_fastreg; 412 frwr->fr_cqe.done = frwr_wc_fastreg;
414 reg_wr->wr.wr_cqe = &frmr->fr_cqe; 413 reg_wr->wr.wr_cqe = &frwr->fr_cqe;
415 reg_wr->wr.num_sge = 0; 414 reg_wr->wr.num_sge = 0;
416 reg_wr->wr.send_flags = 0; 415 reg_wr->wr.send_flags = 0;
417 reg_wr->mr = mr; 416 reg_wr->mr = ibmr;
418 reg_wr->key = mr->rkey; 417 reg_wr->key = ibmr->rkey;
419 reg_wr->access = writing ? 418 reg_wr->access = writing ?
420 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 419 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
421 IB_ACCESS_REMOTE_READ; 420 IB_ACCESS_REMOTE_READ;
@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
424 if (rc) 423 if (rc)
425 goto out_senderr; 424 goto out_senderr;
426 425
427 mw->mw_handle = mr->rkey; 426 mr->mr_handle = ibmr->rkey;
428 mw->mw_length = mr->length; 427 mr->mr_length = ibmr->length;
429 mw->mw_offset = mr->iova; 428 mr->mr_offset = ibmr->iova;
430 429
431 *out = mw; 430 *out = mr;
432 return seg; 431 return seg;
433 432
434out_dmamap_err: 433out_dmamap_err:
435 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", 434 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
436 mw->mw_sg, i); 435 mr->mr_sg, i);
437 frmr->fr_state = FRMR_IS_INVALID; 436 frwr->fr_state = FRWR_IS_INVALID;
438 rpcrdma_put_mw(r_xprt, mw); 437 rpcrdma_mr_put(mr);
439 return ERR_PTR(-EIO); 438 return ERR_PTR(-EIO);
440 439
441out_mapmr_err: 440out_mapmr_err:
442 pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", 441 pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
443 frmr->fr_mr, n, mw->mw_nents); 442 frwr->fr_mr, n, mr->mr_nents);
444 rpcrdma_defer_mr_recovery(mw); 443 rpcrdma_mr_defer_recovery(mr);
445 return ERR_PTR(-EIO); 444 return ERR_PTR(-EIO);
446 445
447out_senderr: 446out_senderr:
448 pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); 447 pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
449 rpcrdma_defer_mr_recovery(mw); 448 rpcrdma_mr_defer_recovery(mr);
450 return ERR_PTR(-ENOTCONN); 449 return ERR_PTR(-ENOTCONN);
451} 450}
452 451
452/* Handle a remotely invalidated mr on the @mrs list
453 */
454static void
455frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
456{
457 struct rpcrdma_mr *mr;
458
459 list_for_each_entry(mr, mrs, mr_list)
460 if (mr->mr_handle == rep->rr_inv_rkey) {
461 list_del(&mr->mr_list);
462 trace_xprtrdma_remoteinv(mr);
463 mr->frwr.fr_state = FRWR_IS_INVALID;
464 rpcrdma_mr_unmap_and_put(mr);
465 break; /* only one invalidated MR per RPC */
466 }
467}
468
453/* Invalidate all memory regions that were registered for "req". 469/* Invalidate all memory regions that were registered for "req".
454 * 470 *
455 * Sleeps until it is safe for the host CPU to access the 471 * Sleeps until it is safe for the host CPU to access the
456 * previously mapped memory regions. 472 * previously mapped memory regions.
457 * 473 *
458 * Caller ensures that @mws is not empty before the call. This 474 * Caller ensures that @mrs is not empty before the call. This
459 * function empties the list. 475 * function empties the list.
460 */ 476 */
461static void 477static void
462frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) 478frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
463{ 479{
464 struct ib_send_wr *first, **prev, *last, *bad_wr; 480 struct ib_send_wr *first, **prev, *last, *bad_wr;
465 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 481 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
466 struct rpcrdma_frmr *f; 482 struct rpcrdma_frwr *frwr;
467 struct rpcrdma_mw *mw; 483 struct rpcrdma_mr *mr;
468 int count, rc; 484 int count, rc;
469 485
470 /* ORDER: Invalidate all of the MRs first 486 /* ORDER: Invalidate all of the MRs first
@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
472 * Chain the LOCAL_INV Work Requests and post them with 488 * Chain the LOCAL_INV Work Requests and post them with
473 * a single ib_post_send() call. 489 * a single ib_post_send() call.
474 */ 490 */
475 f = NULL; 491 frwr = NULL;
476 count = 0; 492 count = 0;
477 prev = &first; 493 prev = &first;
478 list_for_each_entry(mw, mws, mw_list) { 494 list_for_each_entry(mr, mrs, mr_list) {
479 mw->frmr.fr_state = FRMR_IS_INVALID; 495 mr->frwr.fr_state = FRWR_IS_INVALID;
480 496
481 if (mw->mw_flags & RPCRDMA_MW_F_RI) 497 frwr = &mr->frwr;
482 continue; 498 trace_xprtrdma_localinv(mr);
483 499
484 f = &mw->frmr; 500 frwr->fr_cqe.done = frwr_wc_localinv;
485 dprintk("RPC: %s: invalidating frmr %p\n", 501 last = &frwr->fr_invwr;
486 __func__, f);
487
488 f->fr_cqe.done = frwr_wc_localinv;
489 last = &f->fr_invwr;
490 memset(last, 0, sizeof(*last)); 502 memset(last, 0, sizeof(*last));
491 last->wr_cqe = &f->fr_cqe; 503 last->wr_cqe = &frwr->fr_cqe;
492 last->opcode = IB_WR_LOCAL_INV; 504 last->opcode = IB_WR_LOCAL_INV;
493 last->ex.invalidate_rkey = mw->mw_handle; 505 last->ex.invalidate_rkey = mr->mr_handle;
494 count++; 506 count++;
495 507
496 *prev = last; 508 *prev = last;
497 prev = &last->next; 509 prev = &last->next;
498 } 510 }
499 if (!f) 511 if (!frwr)
500 goto unmap; 512 goto unmap;
501 513
502 /* Strong send queue ordering guarantees that when the 514 /* Strong send queue ordering guarantees that when the
@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
504 * are complete. 516 * are complete.
505 */ 517 */
506 last->send_flags = IB_SEND_SIGNALED; 518 last->send_flags = IB_SEND_SIGNALED;
507 f->fr_cqe.done = frwr_wc_localinv_wake; 519 frwr->fr_cqe.done = frwr_wc_localinv_wake;
508 reinit_completion(&f->fr_linv_done); 520 reinit_completion(&frwr->fr_linv_done);
509 521
510 /* Transport disconnect drains the receive CQ before it 522 /* Transport disconnect drains the receive CQ before it
511 * replaces the QP. The RPC reply handler won't call us 523 * replaces the QP. The RPC reply handler won't call us
@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
515 bad_wr = NULL; 527 bad_wr = NULL;
516 rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); 528 rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
517 if (bad_wr != first) 529 if (bad_wr != first)
518 wait_for_completion(&f->fr_linv_done); 530 wait_for_completion(&frwr->fr_linv_done);
519 if (rc) 531 if (rc)
520 goto reset_mrs; 532 goto reset_mrs;
521 533
522 /* ORDER: Now DMA unmap all of the MRs, and return 534 /* ORDER: Now DMA unmap all of the MRs, and return
523 * them to the free MW list. 535 * them to the free MR list.
524 */ 536 */
525unmap: 537unmap:
526 while (!list_empty(mws)) { 538 while (!list_empty(mrs)) {
527 mw = rpcrdma_pop_mw(mws); 539 mr = rpcrdma_mr_pop(mrs);
528 dprintk("RPC: %s: DMA unmapping frmr %p\n", 540 rpcrdma_mr_unmap_and_put(mr);
529 __func__, &mw->frmr);
530 ib_dma_unmap_sg(ia->ri_device,
531 mw->mw_sg, mw->mw_nents, mw->mw_dir);
532 rpcrdma_put_mw(r_xprt, mw);
533 } 541 }
534 return; 542 return;
535 543
536reset_mrs: 544reset_mrs:
537 pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); 545 pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
538 546
539 /* Find and reset the MRs in the LOCAL_INV WRs that did not 547 /* Find and reset the MRs in the LOCAL_INV WRs that did not
540 * get posted. 548 * get posted.
541 */ 549 */
542 while (bad_wr) { 550 while (bad_wr) {
543 f = container_of(bad_wr, struct rpcrdma_frmr, 551 frwr = container_of(bad_wr, struct rpcrdma_frwr,
544 fr_invwr); 552 fr_invwr);
545 mw = container_of(f, struct rpcrdma_mw, frmr); 553 mr = container_of(frwr, struct rpcrdma_mr, frwr);
546 554
547 __frwr_reset_mr(ia, mw); 555 __frwr_mr_reset(ia, mr);
548 556
549 bad_wr = bad_wr->next; 557 bad_wr = bad_wr->next;
550 } 558 }
@@ -553,6 +561,7 @@ reset_mrs:
553 561
554const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { 562const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
555 .ro_map = frwr_op_map, 563 .ro_map = frwr_op_map,
564 .ro_reminv = frwr_op_reminv,
556 .ro_unmap_sync = frwr_op_unmap_sync, 565 .ro_unmap_sync = frwr_op_unmap_sync,
557 .ro_recover_mr = frwr_op_recover_mr, 566 .ro_recover_mr = frwr_op_recover_mr,
558 .ro_open = frwr_op_open, 567 .ro_open = frwr_op_open,
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 560712bd9fa2..a762d192372b 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -1,18 +1,20 @@
1/* 1/*
2 * Copyright (c) 2015 Oracle. All rights reserved. 2 * Copyright (c) 2015, 2017 Oracle. All rights reserved.
3 */ 3 */
4 4
5/* rpcrdma.ko module initialization 5/* rpcrdma.ko module initialization
6 */ 6 */
7 7
8#include <linux/types.h>
9#include <linux/compiler.h>
8#include <linux/module.h> 10#include <linux/module.h>
9#include <linux/init.h> 11#include <linux/init.h>
10#include <linux/sunrpc/svc_rdma.h> 12#include <linux/sunrpc/svc_rdma.h>
11#include "xprt_rdma.h"
12 13
13#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 14#include <asm/swab.h>
14# define RPCDBG_FACILITY RPCDBG_TRANS 15
15#endif 16#define CREATE_TRACE_POINTS
17#include "xprt_rdma.h"
16 18
17MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); 19MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
18MODULE_DESCRIPTION("RPC/RDMA Transport"); 20MODULE_DESCRIPTION("RPC/RDMA Transport");
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ed34dc0f144c..162e5dd82466 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr)
292} 292}
293 293
294static void 294static void
295xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) 295xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
296{ 296{
297 *iptr++ = cpu_to_be32(mw->mw_handle); 297 *iptr++ = cpu_to_be32(mr->mr_handle);
298 *iptr++ = cpu_to_be32(mw->mw_length); 298 *iptr++ = cpu_to_be32(mr->mr_length);
299 xdr_encode_hyper(iptr, mw->mw_offset); 299 xdr_encode_hyper(iptr, mr->mr_offset);
300} 300}
301 301
302static int 302static int
303encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) 303encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
304{ 304{
305 __be32 *p; 305 __be32 *p;
306 306
@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
308 if (unlikely(!p)) 308 if (unlikely(!p))
309 return -EMSGSIZE; 309 return -EMSGSIZE;
310 310
311 xdr_encode_rdma_segment(p, mw); 311 xdr_encode_rdma_segment(p, mr);
312 return 0; 312 return 0;
313} 313}
314 314
315static int 315static int
316encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, 316encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
317 u32 position) 317 u32 position)
318{ 318{
319 __be32 *p; 319 __be32 *p;
@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
324 324
325 *p++ = xdr_one; /* Item present */ 325 *p++ = xdr_one; /* Item present */
326 *p++ = cpu_to_be32(position); 326 *p++ = cpu_to_be32(position);
327 xdr_encode_rdma_segment(p, mw); 327 xdr_encode_rdma_segment(p, mr);
328 return 0; 328 return 0;
329} 329}
330 330
@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
348{ 348{
349 struct xdr_stream *xdr = &req->rl_stream; 349 struct xdr_stream *xdr = &req->rl_stream;
350 struct rpcrdma_mr_seg *seg; 350 struct rpcrdma_mr_seg *seg;
351 struct rpcrdma_mw *mw; 351 struct rpcrdma_mr *mr;
352 unsigned int pos; 352 unsigned int pos;
353 int nsegs; 353 int nsegs;
354 354
@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
363 363
364 do { 364 do {
365 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 365 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
366 false, &mw); 366 false, &mr);
367 if (IS_ERR(seg)) 367 if (IS_ERR(seg))
368 return PTR_ERR(seg); 368 return PTR_ERR(seg);
369 rpcrdma_push_mw(mw, &req->rl_registered); 369 rpcrdma_mr_push(mr, &req->rl_registered);
370 370
371 if (encode_read_segment(xdr, mw, pos) < 0) 371 if (encode_read_segment(xdr, mr, pos) < 0)
372 return -EMSGSIZE; 372 return -EMSGSIZE;
373 373
374 dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", 374 trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
375 rqst->rq_task->tk_pid, __func__, pos,
376 mw->mw_length, (unsigned long long)mw->mw_offset,
377 mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
378
379 r_xprt->rx_stats.read_chunk_count++; 375 r_xprt->rx_stats.read_chunk_count++;
380 nsegs -= mw->mw_nents; 376 nsegs -= mr->mr_nents;
381 } while (nsegs); 377 } while (nsegs);
382 378
383 return 0; 379 return 0;
@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
404{ 400{
405 struct xdr_stream *xdr = &req->rl_stream; 401 struct xdr_stream *xdr = &req->rl_stream;
406 struct rpcrdma_mr_seg *seg; 402 struct rpcrdma_mr_seg *seg;
407 struct rpcrdma_mw *mw; 403 struct rpcrdma_mr *mr;
408 int nsegs, nchunks; 404 int nsegs, nchunks;
409 __be32 *segcount; 405 __be32 *segcount;
410 406
@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
425 nchunks = 0; 421 nchunks = 0;
426 do { 422 do {
427 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 423 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
428 true, &mw); 424 true, &mr);
429 if (IS_ERR(seg)) 425 if (IS_ERR(seg))
430 return PTR_ERR(seg); 426 return PTR_ERR(seg);
431 rpcrdma_push_mw(mw, &req->rl_registered); 427 rpcrdma_mr_push(mr, &req->rl_registered);
432 428
433 if (encode_rdma_segment(xdr, mw) < 0) 429 if (encode_rdma_segment(xdr, mr) < 0)
434 return -EMSGSIZE; 430 return -EMSGSIZE;
435 431
436 dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", 432 trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
437 rqst->rq_task->tk_pid, __func__,
438 mw->mw_length, (unsigned long long)mw->mw_offset,
439 mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
440
441 r_xprt->rx_stats.write_chunk_count++; 433 r_xprt->rx_stats.write_chunk_count++;
442 r_xprt->rx_stats.total_rdma_request += seg->mr_len; 434 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
443 nchunks++; 435 nchunks++;
444 nsegs -= mw->mw_nents; 436 nsegs -= mr->mr_nents;
445 } while (nsegs); 437 } while (nsegs);
446 438
447 /* Update count of segments in this Write chunk */ 439 /* Update count of segments in this Write chunk */
@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
468{ 460{
469 struct xdr_stream *xdr = &req->rl_stream; 461 struct xdr_stream *xdr = &req->rl_stream;
470 struct rpcrdma_mr_seg *seg; 462 struct rpcrdma_mr_seg *seg;
471 struct rpcrdma_mw *mw; 463 struct rpcrdma_mr *mr;
472 int nsegs, nchunks; 464 int nsegs, nchunks;
473 __be32 *segcount; 465 __be32 *segcount;
474 466
@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
487 nchunks = 0; 479 nchunks = 0;
488 do { 480 do {
489 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 481 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
490 true, &mw); 482 true, &mr);
491 if (IS_ERR(seg)) 483 if (IS_ERR(seg))
492 return PTR_ERR(seg); 484 return PTR_ERR(seg);
493 rpcrdma_push_mw(mw, &req->rl_registered); 485 rpcrdma_mr_push(mr, &req->rl_registered);
494 486
495 if (encode_rdma_segment(xdr, mw) < 0) 487 if (encode_rdma_segment(xdr, mr) < 0)
496 return -EMSGSIZE; 488 return -EMSGSIZE;
497 489
498 dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", 490 trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
499 rqst->rq_task->tk_pid, __func__,
500 mw->mw_length, (unsigned long long)mw->mw_offset,
501 mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
502
503 r_xprt->rx_stats.reply_chunk_count++; 491 r_xprt->rx_stats.reply_chunk_count++;
504 r_xprt->rx_stats.total_rdma_request += seg->mr_len; 492 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
505 nchunks++; 493 nchunks++;
506 nsegs -= mw->mw_nents; 494 nsegs -= mr->mr_nents;
507 } while (nsegs); 495 } while (nsegs);
508 496
509 /* Update count of segments in the Reply chunk */ 497 /* Update count of segments in the Reply chunk */
@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
524 struct ib_sge *sge; 512 struct ib_sge *sge;
525 unsigned int count; 513 unsigned int count;
526 514
527 dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
528 __func__, sc->sc_unmap_count, sc);
529
530 /* The first two SGEs contain the transport header and 515 /* The first two SGEs contain the transport header and
531 * the inline buffer. These are always left mapped so 516 * the inline buffer. These are always left mapped so
532 * they can be cheaply re-used. 517 * they can be cheaply re-used.
@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
754 __be32 *p; 739 __be32 *p;
755 int ret; 740 int ret;
756 741
757#if defined(CONFIG_SUNRPC_BACKCHANNEL)
758 if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
759 return rpcrdma_bc_marshal_reply(rqst);
760#endif
761
762 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); 742 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
763 xdr_init_encode(xdr, &req->rl_hdrbuf, 743 xdr_init_encode(xdr, &req->rl_hdrbuf,
764 req->rl_rdmabuf->rg_base); 744 req->rl_rdmabuf->rg_base);
@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
821 rtype = rpcrdma_areadch; 801 rtype = rpcrdma_areadch;
822 } 802 }
823 803
804 /* If this is a retransmit, discard previously registered
805 * chunks. Very likely the connection has been replaced,
806 * so these registrations are invalid and unusable.
807 */
808 while (unlikely(!list_empty(&req->rl_registered))) {
809 struct rpcrdma_mr *mr;
810
811 mr = rpcrdma_mr_pop(&req->rl_registered);
812 rpcrdma_mr_defer_recovery(mr);
813 }
814
824 /* This implementation supports the following combinations 815 /* This implementation supports the following combinations
825 * of chunk lists in one RPC-over-RDMA Call message: 816 * of chunk lists in one RPC-over-RDMA Call message:
826 * 817 *
@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
868 if (ret) 859 if (ret)
869 goto out_err; 860 goto out_err;
870 861
871 dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", 862 trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
872 rqst->rq_task->tk_pid, __func__,
873 transfertypes[rtype], transfertypes[wtype],
874 xdr_stream_pos(xdr));
875 863
876 ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), 864 ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
877 &rqst->rq_snd_buf, rtype); 865 &rqst->rq_snd_buf, rtype);
@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
926 curlen = rqst->rq_rcv_buf.head[0].iov_len; 914 curlen = rqst->rq_rcv_buf.head[0].iov_len;
927 if (curlen > copy_len) 915 if (curlen > copy_len)
928 curlen = copy_len; 916 curlen = copy_len;
929 dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", 917 trace_xprtrdma_fixup(rqst, copy_len, curlen);
930 __func__, srcp, copy_len, curlen);
931 srcp += curlen; 918 srcp += curlen;
932 copy_len -= curlen; 919 copy_len -= curlen;
933 920
@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
947 if (curlen > pagelist_len) 934 if (curlen > pagelist_len)
948 curlen = pagelist_len; 935 curlen = pagelist_len;
949 936
950 dprintk("RPC: %s: page %d" 937 trace_xprtrdma_fixup_pg(rqst, i, srcp,
951 " srcp 0x%p len %d curlen %d\n", 938 copy_len, curlen);
952 __func__, i, srcp, copy_len, curlen);
953 destp = kmap_atomic(ppages[i]); 939 destp = kmap_atomic(ppages[i]);
954 memcpy(destp + page_base, srcp, curlen); 940 memcpy(destp + page_base, srcp, curlen);
955 flush_dcache_page(ppages[i]); 941 flush_dcache_page(ppages[i]);
@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
984 return fixup_copy_count; 970 return fixup_copy_count;
985} 971}
986 972
987/* Caller must guarantee @rep remains stable during this call.
988 */
989static void
990rpcrdma_mark_remote_invalidation(struct list_head *mws,
991 struct rpcrdma_rep *rep)
992{
993 struct rpcrdma_mw *mw;
994
995 if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
996 return;
997
998 list_for_each_entry(mw, mws, mw_list)
999 if (mw->mw_handle == rep->rr_inv_rkey) {
1000 mw->mw_flags = RPCRDMA_MW_F_RI;
1001 break; /* only one invalidated MR per RPC */
1002 }
1003}
1004
1005/* By convention, backchannel calls arrive via rdma_msg type 973/* By convention, backchannel calls arrive via rdma_msg type
1006 * messages, and never populate the chunk lists. This makes 974 * messages, and never populate the chunk lists. This makes
1007 * the RPC/RDMA header small and fixed in size, so it is 975 * the RPC/RDMA header small and fixed in size, so it is
@@ -1058,26 +1026,19 @@ out_short:
1058 1026
1059static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) 1027static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
1060{ 1028{
1029 u32 handle;
1030 u64 offset;
1061 __be32 *p; 1031 __be32 *p;
1062 1032
1063 p = xdr_inline_decode(xdr, 4 * sizeof(*p)); 1033 p = xdr_inline_decode(xdr, 4 * sizeof(*p));
1064 if (unlikely(!p)) 1034 if (unlikely(!p))
1065 return -EIO; 1035 return -EIO;
1066 1036
1067 ifdebug(FACILITY) { 1037 handle = be32_to_cpup(p++);
1068 u64 offset; 1038 *length = be32_to_cpup(p++);
1069 u32 handle; 1039 xdr_decode_hyper(p, &offset);
1070
1071 handle = be32_to_cpup(p++);
1072 *length = be32_to_cpup(p++);
1073 xdr_decode_hyper(p, &offset);
1074 dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n",
1075 __func__, *length, (unsigned long long)offset,
1076 handle);
1077 } else {
1078 *length = be32_to_cpup(p + 1);
1079 }
1080 1040
1041 trace_xprtrdma_decode_seg(handle, *length, offset);
1081 return 0; 1042 return 0;
1082} 1043}
1083 1044
@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
1098 *length += seglength; 1059 *length += seglength;
1099 } 1060 }
1100 1061
1101 dprintk("RPC: %s: segcount=%u, %u bytes\n",
1102 __func__, be32_to_cpup(p), *length);
1103 return 0; 1062 return 0;
1104} 1063}
1105 1064
@@ -1296,8 +1255,7 @@ out:
1296 * being marshaled. 1255 * being marshaled.
1297 */ 1256 */
1298out_badheader: 1257out_badheader:
1299 dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", 1258 trace_xprtrdma_reply_hdr(rep);
1300 rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
1301 r_xprt->rx_stats.bad_reply_count++; 1259 r_xprt->rx_stats.bad_reply_count++;
1302 status = -EIO; 1260 status = -EIO;
1303 goto out; 1261 goto out;
@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work)
1339 struct rpcrdma_rep *rep = 1297 struct rpcrdma_rep *rep =
1340 container_of(work, struct rpcrdma_rep, rr_work); 1298 container_of(work, struct rpcrdma_rep, rr_work);
1341 struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); 1299 struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
1300 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1342 1301
1343 rpcrdma_mark_remote_invalidation(&req->rl_registered, rep); 1302 trace_xprtrdma_defer_cmp(rep);
1344 rpcrdma_release_rqst(rep->rr_rxprt, req); 1303 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1304 r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
1305 rpcrdma_release_rqst(r_xprt, req);
1345 rpcrdma_complete_rqst(rep); 1306 rpcrdma_complete_rqst(rep);
1346} 1307}
1347 1308
@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1360 u32 credits; 1321 u32 credits;
1361 __be32 *p; 1322 __be32 *p;
1362 1323
1363 dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
1364
1365 if (rep->rr_hdrbuf.head[0].iov_len == 0) 1324 if (rep->rr_hdrbuf.head[0].iov_len == 0)
1366 goto out_badstatus; 1325 goto out_badstatus;
1367 1326
@@ -1405,14 +1364,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1405 rep->rr_rqst = rqst; 1364 rep->rr_rqst = rqst;
1406 clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); 1365 clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
1407 1366
1408 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", 1367 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
1409 __func__, rep, req, be32_to_cpu(rep->rr_xid));
1410 1368
1411 if (list_empty(&req->rl_registered) && 1369 queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
1412 !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
1413 rpcrdma_complete_rqst(rep);
1414 else
1415 queue_work(rpcrdma_receive_wq, &rep->rr_work);
1416 return; 1370 return;
1417 1371
1418out_badstatus: 1372out_badstatus:
@@ -1424,8 +1378,7 @@ out_badstatus:
1424 return; 1378 return;
1425 1379
1426out_badversion: 1380out_badversion:
1427 dprintk("RPC: %s: invalid version %d\n", 1381 trace_xprtrdma_reply_vers(rep);
1428 __func__, be32_to_cpu(rep->rr_vers));
1429 goto repost; 1382 goto repost;
1430 1383
1431/* The RPC transaction has already been terminated, or the header 1384/* The RPC transaction has already been terminated, or the header
@@ -1433,12 +1386,11 @@ out_badversion:
1433 */ 1386 */
1434out_norqst: 1387out_norqst:
1435 spin_unlock(&xprt->recv_lock); 1388 spin_unlock(&xprt->recv_lock);
1436 dprintk("RPC: %s: no match for incoming xid 0x%08x\n", 1389 trace_xprtrdma_reply_rqst(rep);
1437 __func__, be32_to_cpu(rep->rr_xid));
1438 goto repost; 1390 goto repost;
1439 1391
1440out_shortreply: 1392out_shortreply:
1441 dprintk("RPC: %s: short/invalid reply\n", __func__); 1393 trace_xprtrdma_reply_short(rep);
1442 1394
1443/* If no pending RPC transaction was matched, post a replacement 1395/* If no pending RPC transaction was matched, post a replacement
1444 * receive buffer before returning. 1396 * receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 646c24494ea7..4b1ecfe979cf 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,6 +52,7 @@
52#include <linux/slab.h> 52#include <linux/slab.h>
53#include <linux/seq_file.h> 53#include <linux/seq_file.h>
54#include <linux/sunrpc/addr.h> 54#include <linux/sunrpc/addr.h>
55#include <linux/smp.h>
55 56
56#include "xprt_rdma.h" 57#include "xprt_rdma.h"
57 58
@@ -66,8 +67,7 @@
66static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; 67static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
67unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; 68unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
68static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 69static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
69static unsigned int xprt_rdma_inline_write_padding; 70unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
70unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
71int xprt_rdma_pad_optimize; 71int xprt_rdma_pad_optimize;
72 72
73#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 73#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -80,6 +80,7 @@ static unsigned int zero;
80static unsigned int max_padding = PAGE_SIZE; 80static unsigned int max_padding = PAGE_SIZE;
81static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; 81static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
82static unsigned int max_memreg = RPCRDMA_LAST - 1; 82static unsigned int max_memreg = RPCRDMA_LAST - 1;
83static unsigned int dummy;
83 84
84static struct ctl_table_header *sunrpc_table_header; 85static struct ctl_table_header *sunrpc_table_header;
85 86
@@ -113,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = {
113 }, 114 },
114 { 115 {
115 .procname = "rdma_inline_write_padding", 116 .procname = "rdma_inline_write_padding",
116 .data = &xprt_rdma_inline_write_padding, 117 .data = &dummy,
117 .maxlen = sizeof(unsigned int), 118 .maxlen = sizeof(unsigned int),
118 .mode = 0644, 119 .mode = 0644,
119 .proc_handler = proc_dointvec_minmax, 120 .proc_handler = proc_dointvec_minmax,
@@ -258,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
258 259
259 xprt_clear_connected(xprt); 260 xprt_clear_connected(xprt);
260 261
261 dprintk("RPC: %s: %sconnect\n", __func__,
262 r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
263 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); 262 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
264 if (rc) 263 if (rc)
265 xprt_wake_pending_tasks(xprt, rc); 264 xprt_wake_pending_tasks(xprt, rc);
266 265
267 dprintk("RPC: %s: exit\n", __func__);
268 xprt_clear_connecting(xprt); 266 xprt_clear_connecting(xprt);
269} 267}
270 268
@@ -274,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
274 struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, 272 struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
275 rx_xprt); 273 rx_xprt);
276 274
277 pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); 275 trace_xprtrdma_inject_dsc(r_xprt);
278 rdma_disconnect(r_xprt->rx_ia.ri_id); 276 rdma_disconnect(r_xprt->rx_ia.ri_id);
279} 277}
280 278
@@ -294,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
294{ 292{
295 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 293 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
296 294
297 dprintk("RPC: %s: called\n", __func__); 295 trace_xprtrdma_destroy(r_xprt);
298 296
299 cancel_delayed_work_sync(&r_xprt->rx_connect_worker); 297 cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
300 298
@@ -305,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
305 rpcrdma_ia_close(&r_xprt->rx_ia); 303 rpcrdma_ia_close(&r_xprt->rx_ia);
306 304
307 xprt_rdma_free_addresses(xprt); 305 xprt_rdma_free_addresses(xprt);
308
309 xprt_free(xprt); 306 xprt_free(xprt);
310 307
311 dprintk("RPC: %s: returning\n", __func__);
312
313 module_put(THIS_MODULE); 308 module_put(THIS_MODULE);
314} 309}
315 310
@@ -360,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args)
360 /* 355 /*
361 * Set up RDMA-specific connect data. 356 * Set up RDMA-specific connect data.
362 */ 357 */
363 358 sap = args->dstaddr;
364 sap = (struct sockaddr *)&cdata.addr;
365 memcpy(sap, args->dstaddr, args->addrlen);
366 359
367 /* Ensure xprt->addr holds valid server TCP (not RDMA) 360 /* Ensure xprt->addr holds valid server TCP (not RDMA)
368 * address, for any side protocols which peek at it */ 361 * address, for any side protocols which peek at it */
@@ -372,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args)
372 365
373 if (rpc_get_port(sap)) 366 if (rpc_get_port(sap))
374 xprt_set_bound(xprt); 367 xprt_set_bound(xprt);
368 xprt_rdma_format_addresses(xprt, sap);
375 369
376 cdata.max_requests = xprt->max_reqs; 370 cdata.max_requests = xprt->max_reqs;
377 371
@@ -386,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args)
386 if (cdata.inline_rsize > cdata.rsize) 380 if (cdata.inline_rsize > cdata.rsize)
387 cdata.inline_rsize = cdata.rsize; 381 cdata.inline_rsize = cdata.rsize;
388 382
389 cdata.padding = xprt_rdma_inline_write_padding;
390
391 /* 383 /*
392 * Create new transport instance, which includes initialized 384 * Create new transport instance, which includes initialized
393 * o ia 385 * o ia
@@ -397,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args)
397 389
398 new_xprt = rpcx_to_rdmax(xprt); 390 new_xprt = rpcx_to_rdmax(xprt);
399 391
400 rc = rpcrdma_ia_open(new_xprt, sap); 392 rc = rpcrdma_ia_open(new_xprt);
401 if (rc) 393 if (rc)
402 goto out1; 394 goto out1;
403 395
@@ -406,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args)
406 */ 398 */
407 new_xprt->rx_data = cdata; 399 new_xprt->rx_data = cdata;
408 new_ep = &new_xprt->rx_ep; 400 new_ep = &new_xprt->rx_ep;
409 new_ep->rep_remote_addr = cdata.addr;
410 401
411 rc = rpcrdma_ep_create(&new_xprt->rx_ep, 402 rc = rpcrdma_ep_create(&new_xprt->rx_ep,
412 &new_xprt->rx_ia, &new_xprt->rx_data); 403 &new_xprt->rx_ia, &new_xprt->rx_data);
413 if (rc) 404 if (rc)
414 goto out2; 405 goto out2;
415 406
416 /*
417 * Allocate pre-registered send and receive buffers for headers and
418 * any inline data. Also specify any padding which will be provided
419 * from a preregistered zero buffer.
420 */
421 rc = rpcrdma_buffer_create(new_xprt); 407 rc = rpcrdma_buffer_create(new_xprt);
422 if (rc) 408 if (rc)
423 goto out3; 409 goto out3;
424 410
425 /*
426 * Register a callback for connection events. This is necessary because
427 * connection loss notification is async. We also catch connection loss
428 * when reaping receives.
429 */
430 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, 411 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
431 xprt_rdma_connect_worker); 412 xprt_rdma_connect_worker);
432 413
433 xprt_rdma_format_addresses(xprt, sap);
434 xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); 414 xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
435 if (xprt->max_payload == 0) 415 if (xprt->max_payload == 0)
436 goto out4; 416 goto out4;
@@ -444,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args)
444 dprintk("RPC: %s: %s:%s\n", __func__, 424 dprintk("RPC: %s: %s:%s\n", __func__,
445 xprt->address_strings[RPC_DISPLAY_ADDR], 425 xprt->address_strings[RPC_DISPLAY_ADDR],
446 xprt->address_strings[RPC_DISPLAY_PORT]); 426 xprt->address_strings[RPC_DISPLAY_PORT]);
427 trace_xprtrdma_create(new_xprt);
447 return xprt; 428 return xprt;
448 429
449out4: 430out4:
450 xprt_rdma_free_addresses(xprt); 431 rpcrdma_buffer_destroy(&new_xprt->rx_buf);
451 rc = -EINVAL; 432 rc = -ENODEV;
452out3: 433out3:
453 rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); 434 rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
454out2: 435out2:
455 rpcrdma_ia_close(&new_xprt->rx_ia); 436 rpcrdma_ia_close(&new_xprt->rx_ia);
456out1: 437out1:
438 trace_xprtrdma_destroy(new_xprt);
439 xprt_rdma_free_addresses(xprt);
457 xprt_free(xprt); 440 xprt_free(xprt);
458 return ERR_PTR(rc); 441 return ERR_PTR(rc);
459} 442}
@@ -487,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt)
487 rpcrdma_ep_disconnect(ep, ia); 470 rpcrdma_ep_disconnect(ep, ia);
488} 471}
489 472
473/**
474 * xprt_rdma_set_port - update server port with rpcbind result
475 * @xprt: controlling RPC transport
476 * @port: new port value
477 *
478 * Transport connect status is unchanged.
479 */
490static void 480static void
491xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) 481xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
492{ 482{
493 struct sockaddr_in *sap; 483 struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
484 char buf[8];
494 485
495 sap = (struct sockaddr_in *)&xprt->addr; 486 dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n",
496 sap->sin_port = htons(port); 487 __func__, xprt,
497 sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; 488 xprt->address_strings[RPC_DISPLAY_ADDR],
498 sap->sin_port = htons(port); 489 xprt->address_strings[RPC_DISPLAY_PORT],
499 dprintk("RPC: %s: %u\n", __func__, port); 490 port);
491
492 rpc_set_port(sap, port);
493
494 kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
495 snprintf(buf, sizeof(buf), "%u", port);
496 xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
497
498 kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
499 snprintf(buf, sizeof(buf), "%4hx", port);
500 xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
500} 501}
501 502
502/** 503/**
@@ -515,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
515static void 516static void
516xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) 517xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
517{ 518{
518 dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
519
520 xprt_force_disconnect(xprt); 519 xprt_force_disconnect(xprt);
521} 520}
522 521
@@ -639,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task)
639 638
640 req = rpcrdma_buffer_get(&r_xprt->rx_buf); 639 req = rpcrdma_buffer_get(&r_xprt->rx_buf);
641 if (req == NULL) 640 if (req == NULL)
642 return -ENOMEM; 641 goto out_get;
643 642
644 flags = RPCRDMA_DEF_GFP; 643 flags = RPCRDMA_DEF_GFP;
645 if (RPC_IS_SWAPPER(task)) 644 if (RPC_IS_SWAPPER(task))
@@ -652,18 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task)
652 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) 651 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
653 goto out_fail; 652 goto out_fail;
654 653
655 dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n", 654 req->rl_cpu = smp_processor_id();
656 task->tk_pid, __func__, rqst->rq_callsize,
657 rqst->rq_rcvsize, req);
658
659 req->rl_connect_cookie = 0; /* our reserved value */ 655 req->rl_connect_cookie = 0; /* our reserved value */
660 rpcrdma_set_xprtdata(rqst, req); 656 rpcrdma_set_xprtdata(rqst, req);
661 rqst->rq_buffer = req->rl_sendbuf->rg_base; 657 rqst->rq_buffer = req->rl_sendbuf->rg_base;
662 rqst->rq_rbuffer = req->rl_recvbuf->rg_base; 658 rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
659 trace_xprtrdma_allocate(task, req);
663 return 0; 660 return 0;
664 661
665out_fail: 662out_fail:
666 rpcrdma_buffer_put(req); 663 rpcrdma_buffer_put(req);
664out_get:
665 trace_xprtrdma_allocate(task, NULL);
667 return -ENOMEM; 666 return -ENOMEM;
668} 667}
669 668
@@ -680,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task)
680 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 679 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
681 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 680 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
682 681
683 if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
684 return;
685
686 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
687
688 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) 682 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
689 rpcrdma_release_rqst(r_xprt, req); 683 rpcrdma_release_rqst(r_xprt, req);
684 trace_xprtrdma_rpc_done(task, req);
690 rpcrdma_buffer_put(req); 685 rpcrdma_buffer_put(req);
691} 686}
692 687
@@ -696,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task)
696 * 691 *
697 * Caller holds the transport's write lock. 692 * Caller holds the transport's write lock.
698 * 693 *
699 * Return values: 694 * Returns:
700 * 0: The request has been sent 695 * %0 if the RPC message has been sent
701 * ENOTCONN: Caller needs to invoke connect logic then call again 696 * %-ENOTCONN if the caller should reconnect and call again
702 * ENOBUFS: Call again later to send the request 697 * %-ENOBUFS if the caller should call again later
703 * EIO: A permanent error occurred. The request was not sent, 698 * %-EIO if a permanent error occurred and the request was not
704 * and don't try it again 699 * sent. Do not try to send this message again.
705 *
706 * send_request invokes the meat of RPC RDMA. It must do the following:
707 *
708 * 1. Marshal the RPC request into an RPC RDMA request, which means
709 * putting a header in front of data, and creating IOVs for RDMA
710 * from those in the request.
711 * 2. In marshaling, detect opportunities for RDMA, and use them.
712 * 3. Post a recv message to set up asynch completion, then send
713 * the request (rpcrdma_ep_post).
714 * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
715 */ 700 */
716static int 701static int
717xprt_rdma_send_request(struct rpc_task *task) 702xprt_rdma_send_request(struct rpc_task *task)
@@ -722,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task)
722 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 707 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
723 int rc = 0; 708 int rc = 0;
724 709
710#if defined(CONFIG_SUNRPC_BACKCHANNEL)
711 if (unlikely(!rqst->rq_buffer))
712 return xprt_rdma_bc_send_reply(rqst);
713#endif /* CONFIG_SUNRPC_BACKCHANNEL */
714
725 if (!xprt_connected(xprt)) 715 if (!xprt_connected(xprt))
726 goto drop_connection; 716 goto drop_connection;
727 717
728 /* On retransmit, remove any previously registered chunks */
729 if (unlikely(!list_empty(&req->rl_registered)))
730 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
731 &req->rl_registered);
732
733 rc = rpcrdma_marshal_req(r_xprt, rqst); 718 rc = rpcrdma_marshal_req(r_xprt, rqst);
734 if (rc < 0) 719 if (rc < 0)
735 goto failed_marshal; 720 goto failed_marshal;
@@ -742,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task)
742 goto drop_connection; 727 goto drop_connection;
743 req->rl_connect_cookie = xprt->connect_cookie; 728 req->rl_connect_cookie = xprt->connect_cookie;
744 729
745 set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); 730 __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
746 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) 731 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
747 goto drop_connection; 732 goto drop_connection;
748 733
@@ -902,8 +887,7 @@ int xprt_rdma_init(void)
902 "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", 887 "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
903 xprt_rdma_slot_table_entries, 888 xprt_rdma_slot_table_entries,
904 xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); 889 xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
905 dprintk("\tPadding %d\n\tMemreg %d\n", 890 dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
906 xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
907 891
908#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 892#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
909 if (!sunrpc_table_header) 893 if (!sunrpc_table_header)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 710b3f77db82..f4eb63e8e689 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -71,8 +71,8 @@
71/* 71/*
72 * internal functions 72 * internal functions
73 */ 73 */
74static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt); 74static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
75static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); 75static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
76static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); 76static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
77 77
78struct workqueue_struct *rpcrdma_receive_wq __read_mostly; 78struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
@@ -83,7 +83,7 @@ rpcrdma_alloc_wq(void)
83 struct workqueue_struct *recv_wq; 83 struct workqueue_struct *recv_wq;
84 84
85 recv_wq = alloc_workqueue("xprtrdma_receive", 85 recv_wq = alloc_workqueue("xprtrdma_receive",
86 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI, 86 WQ_MEM_RECLAIM | WQ_HIGHPRI,
87 0); 87 0);
88 if (!recv_wq) 88 if (!recv_wq)
89 return -ENOMEM; 89 return -ENOMEM;
@@ -108,7 +108,10 @@ static void
108rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) 108rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
109{ 109{
110 struct rpcrdma_ep *ep = context; 110 struct rpcrdma_ep *ep = context;
111 struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
112 rx_ep);
111 113
114 trace_xprtrdma_qp_error(r_xprt, event);
112 pr_err("rpcrdma: %s on device %s ep %p\n", 115 pr_err("rpcrdma: %s on device %s ep %p\n",
113 ib_event_msg(event->event), event->device->name, context); 116 ib_event_msg(event->event), event->device->name, context);
114 117
@@ -133,6 +136,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
133 container_of(cqe, struct rpcrdma_sendctx, sc_cqe); 136 container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
134 137
135 /* WARNING: Only wr_cqe and status are reliable at this point */ 138 /* WARNING: Only wr_cqe and status are reliable at this point */
139 trace_xprtrdma_wc_send(sc, wc);
136 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) 140 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
137 pr_err("rpcrdma: Send: %s (%u/0x%x)\n", 141 pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
138 ib_wc_status_msg(wc->status), 142 ib_wc_status_msg(wc->status),
@@ -155,13 +159,11 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
155 rr_cqe); 159 rr_cqe);
156 160
157 /* WARNING: Only wr_id and status are reliable at this point */ 161 /* WARNING: Only wr_id and status are reliable at this point */
162 trace_xprtrdma_wc_receive(rep, wc);
158 if (wc->status != IB_WC_SUCCESS) 163 if (wc->status != IB_WC_SUCCESS)
159 goto out_fail; 164 goto out_fail;
160 165
161 /* status == SUCCESS means all fields in wc are trustworthy */ 166 /* status == SUCCESS means all fields in wc are trustworthy */
162 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
163 __func__, rep, wc->byte_len);
164
165 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); 167 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
166 rep->rr_wc_flags = wc->wc_flags; 168 rep->rr_wc_flags = wc->wc_flags;
167 rep->rr_inv_rkey = wc->ex.invalidate_rkey; 169 rep->rr_inv_rkey = wc->ex.invalidate_rkey;
@@ -192,7 +194,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
192 unsigned int rsize, wsize; 194 unsigned int rsize, wsize;
193 195
194 /* Default settings for RPC-over-RDMA Version One */ 196 /* Default settings for RPC-over-RDMA Version One */
195 r_xprt->rx_ia.ri_reminv_expected = false;
196 r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; 197 r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
197 rsize = RPCRDMA_V1_DEF_INLINE_SIZE; 198 rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
198 wsize = RPCRDMA_V1_DEF_INLINE_SIZE; 199 wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
@@ -200,7 +201,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
200 if (pmsg && 201 if (pmsg &&
201 pmsg->cp_magic == rpcrdma_cmp_magic && 202 pmsg->cp_magic == rpcrdma_cmp_magic &&
202 pmsg->cp_version == RPCRDMA_CMP_VERSION) { 203 pmsg->cp_version == RPCRDMA_CMP_VERSION) {
203 r_xprt->rx_ia.ri_reminv_expected = true;
204 r_xprt->rx_ia.ri_implicit_roundup = true; 204 r_xprt->rx_ia.ri_implicit_roundup = true;
205 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); 205 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
206 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 206 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
@@ -221,11 +221,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
221 struct rpcrdma_xprt *xprt = id->context; 221 struct rpcrdma_xprt *xprt = id->context;
222 struct rpcrdma_ia *ia = &xprt->rx_ia; 222 struct rpcrdma_ia *ia = &xprt->rx_ia;
223 struct rpcrdma_ep *ep = &xprt->rx_ep; 223 struct rpcrdma_ep *ep = &xprt->rx_ep;
224#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
225 struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
226#endif
227 int connstate = 0; 224 int connstate = 0;
228 225
226 trace_xprtrdma_conn_upcall(xprt, event);
229 switch (event->event) { 227 switch (event->event) {
230 case RDMA_CM_EVENT_ADDR_RESOLVED: 228 case RDMA_CM_EVENT_ADDR_RESOLVED:
231 case RDMA_CM_EVENT_ROUTE_RESOLVED: 229 case RDMA_CM_EVENT_ROUTE_RESOLVED:
@@ -234,21 +232,17 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
234 break; 232 break;
235 case RDMA_CM_EVENT_ADDR_ERROR: 233 case RDMA_CM_EVENT_ADDR_ERROR:
236 ia->ri_async_rc = -EHOSTUNREACH; 234 ia->ri_async_rc = -EHOSTUNREACH;
237 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
238 __func__, ep);
239 complete(&ia->ri_done); 235 complete(&ia->ri_done);
240 break; 236 break;
241 case RDMA_CM_EVENT_ROUTE_ERROR: 237 case RDMA_CM_EVENT_ROUTE_ERROR:
242 ia->ri_async_rc = -ENETUNREACH; 238 ia->ri_async_rc = -ENETUNREACH;
243 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
244 __func__, ep);
245 complete(&ia->ri_done); 239 complete(&ia->ri_done);
246 break; 240 break;
247 case RDMA_CM_EVENT_DEVICE_REMOVAL: 241 case RDMA_CM_EVENT_DEVICE_REMOVAL:
248#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 242#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
249 pr_info("rpcrdma: removing device %s for %pIS:%u\n", 243 pr_info("rpcrdma: removing device %s for %s:%s\n",
250 ia->ri_device->name, 244 ia->ri_device->name,
251 sap, rpc_get_port(sap)); 245 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
252#endif 246#endif
253 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); 247 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
254 ep->rep_connected = -ENODEV; 248 ep->rep_connected = -ENODEV;
@@ -271,8 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
271 connstate = -ENETDOWN; 265 connstate = -ENETDOWN;
272 goto connected; 266 goto connected;
273 case RDMA_CM_EVENT_REJECTED: 267 case RDMA_CM_EVENT_REJECTED:
274 dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n", 268 dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
275 sap, rpc_get_port(sap), 269 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
276 rdma_reject_msg(id, event->status)); 270 rdma_reject_msg(id, event->status));
277 connstate = -ECONNREFUSED; 271 connstate = -ECONNREFUSED;
278 if (event->status == IB_CM_REJ_STALE_CONN) 272 if (event->status == IB_CM_REJ_STALE_CONN)
@@ -287,8 +281,9 @@ connected:
287 wake_up_all(&ep->rep_connect_wait); 281 wake_up_all(&ep->rep_connect_wait);
288 /*FALLTHROUGH*/ 282 /*FALLTHROUGH*/
289 default: 283 default:
290 dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n", 284 dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n",
291 __func__, sap, rpc_get_port(sap), 285 __func__,
286 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
292 ia->ri_device->name, ia->ri_ops->ro_displayname, 287 ia->ri_device->name, ia->ri_ops->ro_displayname,
293 ep, rdma_event_msg(event->event)); 288 ep, rdma_event_msg(event->event));
294 break; 289 break;
@@ -298,13 +293,14 @@ connected:
298} 293}
299 294
300static struct rdma_cm_id * 295static struct rdma_cm_id *
301rpcrdma_create_id(struct rpcrdma_xprt *xprt, 296rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
302 struct rpcrdma_ia *ia, struct sockaddr *addr)
303{ 297{
304 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; 298 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
305 struct rdma_cm_id *id; 299 struct rdma_cm_id *id;
306 int rc; 300 int rc;
307 301
302 trace_xprtrdma_conn_start(xprt);
303
308 init_completion(&ia->ri_done); 304 init_completion(&ia->ri_done);
309 init_completion(&ia->ri_remove_done); 305 init_completion(&ia->ri_remove_done);
310 306
@@ -318,7 +314,9 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
318 } 314 }
319 315
320 ia->ri_async_rc = -ETIMEDOUT; 316 ia->ri_async_rc = -ETIMEDOUT;
321 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); 317 rc = rdma_resolve_addr(id, NULL,
318 (struct sockaddr *)&xprt->rx_xprt.addr,
319 RDMA_RESOLVE_TIMEOUT);
322 if (rc) { 320 if (rc) {
323 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", 321 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
324 __func__, rc); 322 __func__, rc);
@@ -326,8 +324,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
326 } 324 }
327 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 325 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
328 if (rc < 0) { 326 if (rc < 0) {
329 dprintk("RPC: %s: wait() exited: %i\n", 327 trace_xprtrdma_conn_tout(xprt);
330 __func__, rc);
331 goto out; 328 goto out;
332 } 329 }
333 330
@@ -344,8 +341,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
344 } 341 }
345 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 342 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
346 if (rc < 0) { 343 if (rc < 0) {
347 dprintk("RPC: %s: wait() exited: %i\n", 344 trace_xprtrdma_conn_tout(xprt);
348 __func__, rc);
349 goto out; 345 goto out;
350 } 346 }
351 rc = ia->ri_async_rc; 347 rc = ia->ri_async_rc;
@@ -365,19 +361,18 @@ out:
365 361
366/** 362/**
367 * rpcrdma_ia_open - Open and initialize an Interface Adapter. 363 * rpcrdma_ia_open - Open and initialize an Interface Adapter.
368 * @xprt: controlling transport 364 * @xprt: transport with IA to (re)initialize
369 * @addr: IP address of remote peer
370 * 365 *
371 * Returns 0 on success, negative errno if an appropriate 366 * Returns 0 on success, negative errno if an appropriate
372 * Interface Adapter could not be found and opened. 367 * Interface Adapter could not be found and opened.
373 */ 368 */
374int 369int
375rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr) 370rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
376{ 371{
377 struct rpcrdma_ia *ia = &xprt->rx_ia; 372 struct rpcrdma_ia *ia = &xprt->rx_ia;
378 int rc; 373 int rc;
379 374
380 ia->ri_id = rpcrdma_create_id(xprt, ia, addr); 375 ia->ri_id = rpcrdma_create_id(xprt, ia);
381 if (IS_ERR(ia->ri_id)) { 376 if (IS_ERR(ia->ri_id)) {
382 rc = PTR_ERR(ia->ri_id); 377 rc = PTR_ERR(ia->ri_id);
383 goto out_err; 378 goto out_err;
@@ -392,7 +387,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
392 } 387 }
393 388
394 switch (xprt_rdma_memreg_strategy) { 389 switch (xprt_rdma_memreg_strategy) {
395 case RPCRDMA_FRMR: 390 case RPCRDMA_FRWR:
396 if (frwr_is_supported(ia)) { 391 if (frwr_is_supported(ia)) {
397 ia->ri_ops = &rpcrdma_frwr_memreg_ops; 392 ia->ri_ops = &rpcrdma_frwr_memreg_ops;
398 break; 393 break;
@@ -462,10 +457,12 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
462 rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); 457 rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
463 rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); 458 rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
464 } 459 }
465 rpcrdma_destroy_mrs(buf); 460 rpcrdma_mrs_destroy(buf);
466 461
467 /* Allow waiters to continue */ 462 /* Allow waiters to continue */
468 complete(&ia->ri_remove_done); 463 complete(&ia->ri_remove_done);
464
465 trace_xprtrdma_remove(r_xprt);
469} 466}
470 467
471/** 468/**
@@ -476,7 +473,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
476void 473void
477rpcrdma_ia_close(struct rpcrdma_ia *ia) 474rpcrdma_ia_close(struct rpcrdma_ia *ia)
478{ 475{
479 dprintk("RPC: %s: entering\n", __func__);
480 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { 476 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
481 if (ia->ri_id->qp) 477 if (ia->ri_id->qp)
482 rdma_destroy_qp(ia->ri_id); 478 rdma_destroy_qp(ia->ri_id);
@@ -630,9 +626,6 @@ out1:
630void 626void
631rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 627rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
632{ 628{
633 dprintk("RPC: %s: entering, connected is %d\n",
634 __func__, ep->rep_connected);
635
636 cancel_delayed_work_sync(&ep->rep_connect_worker); 629 cancel_delayed_work_sync(&ep->rep_connect_worker);
637 630
638 if (ia->ri_id->qp) { 631 if (ia->ri_id->qp) {
@@ -653,13 +646,12 @@ static int
653rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, 646rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
654 struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 647 struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
655{ 648{
656 struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
657 int rc, err; 649 int rc, err;
658 650
659 pr_info("%s: r_xprt = %p\n", __func__, r_xprt); 651 trace_xprtrdma_reinsert(r_xprt);
660 652
661 rc = -EHOSTUNREACH; 653 rc = -EHOSTUNREACH;
662 if (rpcrdma_ia_open(r_xprt, sap)) 654 if (rpcrdma_ia_open(r_xprt))
663 goto out1; 655 goto out1;
664 656
665 rc = -ENOMEM; 657 rc = -ENOMEM;
@@ -676,7 +668,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
676 goto out3; 668 goto out3;
677 } 669 }
678 670
679 rpcrdma_create_mrs(r_xprt); 671 rpcrdma_mrs_create(r_xprt);
680 return 0; 672 return 0;
681 673
682out3: 674out3:
@@ -691,16 +683,15 @@ static int
691rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, 683rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
692 struct rpcrdma_ia *ia) 684 struct rpcrdma_ia *ia)
693{ 685{
694 struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
695 struct rdma_cm_id *id, *old; 686 struct rdma_cm_id *id, *old;
696 int err, rc; 687 int err, rc;
697 688
698 dprintk("RPC: %s: reconnecting...\n", __func__); 689 trace_xprtrdma_reconnect(r_xprt);
699 690
700 rpcrdma_ep_disconnect(ep, ia); 691 rpcrdma_ep_disconnect(ep, ia);
701 692
702 rc = -EHOSTUNREACH; 693 rc = -EHOSTUNREACH;
703 id = rpcrdma_create_id(r_xprt, ia, sap); 694 id = rpcrdma_create_id(r_xprt, ia);
704 if (IS_ERR(id)) 695 if (IS_ERR(id))
705 goto out; 696 goto out;
706 697
@@ -817,16 +808,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
817 int rc; 808 int rc;
818 809
819 rc = rdma_disconnect(ia->ri_id); 810 rc = rdma_disconnect(ia->ri_id);
820 if (!rc) { 811 if (!rc)
821 /* returns without wait if not connected */ 812 /* returns without wait if not connected */
822 wait_event_interruptible(ep->rep_connect_wait, 813 wait_event_interruptible(ep->rep_connect_wait,
823 ep->rep_connected != 1); 814 ep->rep_connected != 1);
824 dprintk("RPC: %s: after wait, %sconnected\n", __func__, 815 else
825 (ep->rep_connected == 1) ? "still " : "dis");
826 } else {
827 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
828 ep->rep_connected = rc; 816 ep->rep_connected = rc;
829 } 817 trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt,
818 rx_ep), rc);
830 819
831 ib_drain_qp(ia->ri_id->qp); 820 ib_drain_qp(ia->ri_id->qp);
832} 821}
@@ -998,15 +987,15 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
998{ 987{
999 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, 988 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
1000 rb_recovery_worker.work); 989 rb_recovery_worker.work);
1001 struct rpcrdma_mw *mw; 990 struct rpcrdma_mr *mr;
1002 991
1003 spin_lock(&buf->rb_recovery_lock); 992 spin_lock(&buf->rb_recovery_lock);
1004 while (!list_empty(&buf->rb_stale_mrs)) { 993 while (!list_empty(&buf->rb_stale_mrs)) {
1005 mw = rpcrdma_pop_mw(&buf->rb_stale_mrs); 994 mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
1006 spin_unlock(&buf->rb_recovery_lock); 995 spin_unlock(&buf->rb_recovery_lock);
1007 996
1008 dprintk("RPC: %s: recovering MR %p\n", __func__, mw); 997 trace_xprtrdma_recover_mr(mr);
1009 mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw); 998 mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
1010 999
1011 spin_lock(&buf->rb_recovery_lock); 1000 spin_lock(&buf->rb_recovery_lock);
1012 } 1001 }
@@ -1014,20 +1003,20 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
1014} 1003}
1015 1004
1016void 1005void
1017rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw) 1006rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
1018{ 1007{
1019 struct rpcrdma_xprt *r_xprt = mw->mw_xprt; 1008 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
1020 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1009 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1021 1010
1022 spin_lock(&buf->rb_recovery_lock); 1011 spin_lock(&buf->rb_recovery_lock);
1023 rpcrdma_push_mw(mw, &buf->rb_stale_mrs); 1012 rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
1024 spin_unlock(&buf->rb_recovery_lock); 1013 spin_unlock(&buf->rb_recovery_lock);
1025 1014
1026 schedule_delayed_work(&buf->rb_recovery_worker, 0); 1015 schedule_delayed_work(&buf->rb_recovery_worker, 0);
1027} 1016}
1028 1017
1029static void 1018static void
1030rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) 1019rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
1031{ 1020{
1032 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1021 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1033 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1022 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -1036,32 +1025,32 @@ rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
1036 LIST_HEAD(all); 1025 LIST_HEAD(all);
1037 1026
1038 for (count = 0; count < 32; count++) { 1027 for (count = 0; count < 32; count++) {
1039 struct rpcrdma_mw *mw; 1028 struct rpcrdma_mr *mr;
1040 int rc; 1029 int rc;
1041 1030
1042 mw = kzalloc(sizeof(*mw), GFP_KERNEL); 1031 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1043 if (!mw) 1032 if (!mr)
1044 break; 1033 break;
1045 1034
1046 rc = ia->ri_ops->ro_init_mr(ia, mw); 1035 rc = ia->ri_ops->ro_init_mr(ia, mr);
1047 if (rc) { 1036 if (rc) {
1048 kfree(mw); 1037 kfree(mr);
1049 break; 1038 break;
1050 } 1039 }
1051 1040
1052 mw->mw_xprt = r_xprt; 1041 mr->mr_xprt = r_xprt;
1053 1042
1054 list_add(&mw->mw_list, &free); 1043 list_add(&mr->mr_list, &free);
1055 list_add(&mw->mw_all, &all); 1044 list_add(&mr->mr_all, &all);
1056 } 1045 }
1057 1046
1058 spin_lock(&buf->rb_mwlock); 1047 spin_lock(&buf->rb_mrlock);
1059 list_splice(&free, &buf->rb_mws); 1048 list_splice(&free, &buf->rb_mrs);
1060 list_splice(&all, &buf->rb_all); 1049 list_splice(&all, &buf->rb_all);
1061 r_xprt->rx_stats.mrs_allocated += count; 1050 r_xprt->rx_stats.mrs_allocated += count;
1062 spin_unlock(&buf->rb_mwlock); 1051 spin_unlock(&buf->rb_mrlock);
1063 1052
1064 dprintk("RPC: %s: created %u MRs\n", __func__, count); 1053 trace_xprtrdma_createmrs(r_xprt, count);
1065} 1054}
1066 1055
1067static void 1056static void
@@ -1072,7 +1061,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
1072 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 1061 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
1073 rx_buf); 1062 rx_buf);
1074 1063
1075 rpcrdma_create_mrs(r_xprt); 1064 rpcrdma_mrs_create(r_xprt);
1076} 1065}
1077 1066
1078struct rpcrdma_req * 1067struct rpcrdma_req *
@@ -1093,10 +1082,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1093 return req; 1082 return req;
1094} 1083}
1095 1084
1096struct rpcrdma_rep * 1085/**
1086 * rpcrdma_create_rep - Allocate an rpcrdma_rep object
1087 * @r_xprt: controlling transport
1088 *
1089 * Returns 0 on success or a negative errno on failure.
1090 */
1091int
1097rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) 1092rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1098{ 1093{
1099 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; 1094 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
1095 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1100 struct rpcrdma_rep *rep; 1096 struct rpcrdma_rep *rep;
1101 int rc; 1097 int rc;
1102 1098
@@ -1121,12 +1117,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1121 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 1117 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
1122 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 1118 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
1123 rep->rr_recv_wr.num_sge = 1; 1119 rep->rr_recv_wr.num_sge = 1;
1124 return rep; 1120
1121 spin_lock(&buf->rb_lock);
1122 list_add(&rep->rr_list, &buf->rb_recv_bufs);
1123 spin_unlock(&buf->rb_lock);
1124 return 0;
1125 1125
1126out_free: 1126out_free:
1127 kfree(rep); 1127 kfree(rep);
1128out: 1128out:
1129 return ERR_PTR(rc); 1129 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1130 __func__, rc);
1131 return rc;
1130} 1132}
1131 1133
1132int 1134int
@@ -1137,10 +1139,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1137 1139
1138 buf->rb_max_requests = r_xprt->rx_data.max_requests; 1140 buf->rb_max_requests = r_xprt->rx_data.max_requests;
1139 buf->rb_bc_srv_max_requests = 0; 1141 buf->rb_bc_srv_max_requests = 0;
1140 spin_lock_init(&buf->rb_mwlock); 1142 spin_lock_init(&buf->rb_mrlock);
1141 spin_lock_init(&buf->rb_lock); 1143 spin_lock_init(&buf->rb_lock);
1142 spin_lock_init(&buf->rb_recovery_lock); 1144 spin_lock_init(&buf->rb_recovery_lock);
1143 INIT_LIST_HEAD(&buf->rb_mws); 1145 INIT_LIST_HEAD(&buf->rb_mrs);
1144 INIT_LIST_HEAD(&buf->rb_all); 1146 INIT_LIST_HEAD(&buf->rb_all);
1145 INIT_LIST_HEAD(&buf->rb_stale_mrs); 1147 INIT_LIST_HEAD(&buf->rb_stale_mrs);
1146 INIT_DELAYED_WORK(&buf->rb_refresh_worker, 1148 INIT_DELAYED_WORK(&buf->rb_refresh_worker,
@@ -1148,7 +1150,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1148 INIT_DELAYED_WORK(&buf->rb_recovery_worker, 1150 INIT_DELAYED_WORK(&buf->rb_recovery_worker,
1149 rpcrdma_mr_recovery_worker); 1151 rpcrdma_mr_recovery_worker);
1150 1152
1151 rpcrdma_create_mrs(r_xprt); 1153 rpcrdma_mrs_create(r_xprt);
1152 1154
1153 INIT_LIST_HEAD(&buf->rb_send_bufs); 1155 INIT_LIST_HEAD(&buf->rb_send_bufs);
1154 INIT_LIST_HEAD(&buf->rb_allreqs); 1156 INIT_LIST_HEAD(&buf->rb_allreqs);
@@ -1167,17 +1169,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1167 } 1169 }
1168 1170
1169 INIT_LIST_HEAD(&buf->rb_recv_bufs); 1171 INIT_LIST_HEAD(&buf->rb_recv_bufs);
1170 for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) { 1172 for (i = 0; i <= buf->rb_max_requests; i++) {
1171 struct rpcrdma_rep *rep; 1173 rc = rpcrdma_create_rep(r_xprt);
1172 1174 if (rc)
1173 rep = rpcrdma_create_rep(r_xprt);
1174 if (IS_ERR(rep)) {
1175 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1176 __func__, i);
1177 rc = PTR_ERR(rep);
1178 goto out; 1175 goto out;
1179 }
1180 list_add(&rep->rr_list, &buf->rb_recv_bufs);
1181 } 1176 }
1182 1177
1183 rc = rpcrdma_sendctxs_create(r_xprt); 1178 rc = rpcrdma_sendctxs_create(r_xprt);
@@ -1229,26 +1224,26 @@ rpcrdma_destroy_req(struct rpcrdma_req *req)
1229} 1224}
1230 1225
1231static void 1226static void
1232rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf) 1227rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
1233{ 1228{
1234 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 1229 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
1235 rx_buf); 1230 rx_buf);
1236 struct rpcrdma_ia *ia = rdmab_to_ia(buf); 1231 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1237 struct rpcrdma_mw *mw; 1232 struct rpcrdma_mr *mr;
1238 unsigned int count; 1233 unsigned int count;
1239 1234
1240 count = 0; 1235 count = 0;
1241 spin_lock(&buf->rb_mwlock); 1236 spin_lock(&buf->rb_mrlock);
1242 while (!list_empty(&buf->rb_all)) { 1237 while (!list_empty(&buf->rb_all)) {
1243 mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 1238 mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all);
1244 list_del(&mw->mw_all); 1239 list_del(&mr->mr_all);
1245 1240
1246 spin_unlock(&buf->rb_mwlock); 1241 spin_unlock(&buf->rb_mrlock);
1247 ia->ri_ops->ro_release_mr(mw); 1242 ia->ri_ops->ro_release_mr(mr);
1248 count++; 1243 count++;
1249 spin_lock(&buf->rb_mwlock); 1244 spin_lock(&buf->rb_mrlock);
1250 } 1245 }
1251 spin_unlock(&buf->rb_mwlock); 1246 spin_unlock(&buf->rb_mrlock);
1252 r_xprt->rx_stats.mrs_allocated = 0; 1247 r_xprt->rx_stats.mrs_allocated = 0;
1253 1248
1254 dprintk("RPC: %s: released %u MRs\n", __func__, count); 1249 dprintk("RPC: %s: released %u MRs\n", __func__, count);
@@ -1285,27 +1280,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1285 spin_unlock(&buf->rb_reqslock); 1280 spin_unlock(&buf->rb_reqslock);
1286 buf->rb_recv_count = 0; 1281 buf->rb_recv_count = 0;
1287 1282
1288 rpcrdma_destroy_mrs(buf); 1283 rpcrdma_mrs_destroy(buf);
1289} 1284}
1290 1285
1291struct rpcrdma_mw * 1286/**
1292rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) 1287 * rpcrdma_mr_get - Allocate an rpcrdma_mr object
1288 * @r_xprt: controlling transport
1289 *
1290 * Returns an initialized rpcrdma_mr or NULL if no free
1291 * rpcrdma_mr objects are available.
1292 */
1293struct rpcrdma_mr *
1294rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
1293{ 1295{
1294 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1296 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1295 struct rpcrdma_mw *mw = NULL; 1297 struct rpcrdma_mr *mr = NULL;
1296 1298
1297 spin_lock(&buf->rb_mwlock); 1299 spin_lock(&buf->rb_mrlock);
1298 if (!list_empty(&buf->rb_mws)) 1300 if (!list_empty(&buf->rb_mrs))
1299 mw = rpcrdma_pop_mw(&buf->rb_mws); 1301 mr = rpcrdma_mr_pop(&buf->rb_mrs);
1300 spin_unlock(&buf->rb_mwlock); 1302 spin_unlock(&buf->rb_mrlock);
1301 1303
1302 if (!mw) 1304 if (!mr)
1303 goto out_nomws; 1305 goto out_nomrs;
1304 mw->mw_flags = 0; 1306 return mr;
1305 return mw;
1306 1307
1307out_nomws: 1308out_nomrs:
1308 dprintk("RPC: %s: no MWs available\n", __func__); 1309 trace_xprtrdma_nomrs(r_xprt);
1309 if (r_xprt->rx_ep.rep_connected != -ENODEV) 1310 if (r_xprt->rx_ep.rep_connected != -ENODEV)
1310 schedule_delayed_work(&buf->rb_refresh_worker, 0); 1311 schedule_delayed_work(&buf->rb_refresh_worker, 0);
1311 1312
@@ -1315,14 +1316,39 @@ out_nomws:
1315 return NULL; 1316 return NULL;
1316} 1317}
1317 1318
1319static void
1320__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr)
1321{
1322 spin_lock(&buf->rb_mrlock);
1323 rpcrdma_mr_push(mr, &buf->rb_mrs);
1324 spin_unlock(&buf->rb_mrlock);
1325}
1326
1327/**
1328 * rpcrdma_mr_put - Release an rpcrdma_mr object
1329 * @mr: object to release
1330 *
1331 */
1318void 1332void
1319rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) 1333rpcrdma_mr_put(struct rpcrdma_mr *mr)
1320{ 1334{
1321 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1335 __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr);
1336}
1337
1338/**
1339 * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
1340 * @mr: object to release
1341 *
1342 */
1343void
1344rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
1345{
1346 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
1322 1347
1323 spin_lock(&buf->rb_mwlock); 1348 trace_xprtrdma_dma_unmap(mr);
1324 rpcrdma_push_mw(mw, &buf->rb_mws); 1349 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
1325 spin_unlock(&buf->rb_mwlock); 1350 mr->mr_sg, mr->mr_nents, mr->mr_dir);
1351 __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
1326} 1352}
1327 1353
1328static struct rpcrdma_rep * 1354static struct rpcrdma_rep *
@@ -1359,11 +1385,11 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1359 req = rpcrdma_buffer_get_req_locked(buffers); 1385 req = rpcrdma_buffer_get_req_locked(buffers);
1360 req->rl_reply = rpcrdma_buffer_get_rep(buffers); 1386 req->rl_reply = rpcrdma_buffer_get_rep(buffers);
1361 spin_unlock(&buffers->rb_lock); 1387 spin_unlock(&buffers->rb_lock);
1388
1362 return req; 1389 return req;
1363 1390
1364out_reqbuf: 1391out_reqbuf:
1365 spin_unlock(&buffers->rb_lock); 1392 spin_unlock(&buffers->rb_lock);
1366 pr_warn("RPC: %s: out of request buffers\n", __func__);
1367 return NULL; 1393 return NULL;
1368} 1394}
1369 1395
@@ -1519,9 +1545,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1519 req->rl_reply = NULL; 1545 req->rl_reply = NULL;
1520 } 1546 }
1521 1547
1522 dprintk("RPC: %s: posting %d s/g entries\n",
1523 __func__, send_wr->num_sge);
1524
1525 if (!ep->rep_send_count || 1548 if (!ep->rep_send_count ||
1526 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { 1549 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
1527 send_wr->send_flags |= IB_SEND_SIGNALED; 1550 send_wr->send_flags |= IB_SEND_SIGNALED;
@@ -1530,14 +1553,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1530 send_wr->send_flags &= ~IB_SEND_SIGNALED; 1553 send_wr->send_flags &= ~IB_SEND_SIGNALED;
1531 --ep->rep_send_count; 1554 --ep->rep_send_count;
1532 } 1555 }
1556
1533 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); 1557 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
1558 trace_xprtrdma_post_send(req, rc);
1534 if (rc) 1559 if (rc)
1535 goto out_postsend_err; 1560 return -ENOTCONN;
1536 return 0; 1561 return 0;
1537
1538out_postsend_err:
1539 pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
1540 return -ENOTCONN;
1541} 1562}
1542 1563
1543int 1564int
@@ -1550,23 +1571,20 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1550 if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) 1571 if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
1551 goto out_map; 1572 goto out_map;
1552 rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); 1573 rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
1574 trace_xprtrdma_post_recv(rep, rc);
1553 if (rc) 1575 if (rc)
1554 goto out_postrecv; 1576 return -ENOTCONN;
1555 return 0; 1577 return 0;
1556 1578
1557out_map: 1579out_map:
1558 pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); 1580 pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
1559 return -EIO; 1581 return -EIO;
1560
1561out_postrecv:
1562 pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
1563 return -ENOTCONN;
1564} 1582}
1565 1583
1566/** 1584/**
1567 * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests 1585 * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
1568 * @r_xprt: transport associated with these backchannel resources 1586 * @r_xprt: transport associated with these backchannel resources
1569 * @min_reqs: minimum number of incoming requests expected 1587 * @count: minimum number of incoming requests expected
1570 * 1588 *
1571 * Returns zero if all requested buffers were posted, or a negative errno. 1589 * Returns zero if all requested buffers were posted, or a negative errno.
1572 */ 1590 */
@@ -1594,7 +1612,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
1594 1612
1595out_reqbuf: 1613out_reqbuf:
1596 spin_unlock(&buffers->rb_lock); 1614 spin_unlock(&buffers->rb_lock);
1597 pr_warn("%s: no extra receive buffers\n", __func__); 1615 trace_xprtrdma_noreps(r_xprt);
1598 return -ENOMEM; 1616 return -ENOMEM;
1599 1617
1600out_rc: 1618out_rc:
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 51686d9eac5f..69883a960a3f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -73,11 +73,10 @@ struct rpcrdma_ia {
73 struct completion ri_remove_done; 73 struct completion ri_remove_done;
74 int ri_async_rc; 74 int ri_async_rc;
75 unsigned int ri_max_segs; 75 unsigned int ri_max_segs;
76 unsigned int ri_max_frmr_depth; 76 unsigned int ri_max_frwr_depth;
77 unsigned int ri_max_inline_write; 77 unsigned int ri_max_inline_write;
78 unsigned int ri_max_inline_read; 78 unsigned int ri_max_inline_read;
79 unsigned int ri_max_send_sges; 79 unsigned int ri_max_send_sges;
80 bool ri_reminv_expected;
81 bool ri_implicit_roundup; 80 bool ri_implicit_roundup;
82 enum ib_mr_type ri_mrtype; 81 enum ib_mr_type ri_mrtype;
83 unsigned long ri_flags; 82 unsigned long ri_flags;
@@ -101,7 +100,6 @@ struct rpcrdma_ep {
101 wait_queue_head_t rep_connect_wait; 100 wait_queue_head_t rep_connect_wait;
102 struct rpcrdma_connect_private rep_cm_private; 101 struct rpcrdma_connect_private rep_cm_private;
103 struct rdma_conn_param rep_remote_cma; 102 struct rdma_conn_param rep_remote_cma;
104 struct sockaddr_storage rep_remote_addr;
105 struct delayed_work rep_connect_worker; 103 struct delayed_work rep_connect_worker;
106}; 104};
107 105
@@ -232,29 +230,29 @@ enum {
232}; 230};
233 231
234/* 232/*
235 * struct rpcrdma_mw - external memory region metadata 233 * struct rpcrdma_mr - external memory region metadata
236 * 234 *
237 * An external memory region is any buffer or page that is registered 235 * An external memory region is any buffer or page that is registered
238 * on the fly (ie, not pre-registered). 236 * on the fly (ie, not pre-registered).
239 * 237 *
240 * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During 238 * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
241 * call_allocate, rpcrdma_buffer_get() assigns one to each segment in 239 * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
242 * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep 240 * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
243 * track of registration metadata while each RPC is pending. 241 * track of registration metadata while each RPC is pending.
244 * rpcrdma_deregister_external() uses this metadata to unmap and 242 * rpcrdma_deregister_external() uses this metadata to unmap and
245 * release these resources when an RPC is complete. 243 * release these resources when an RPC is complete.
246 */ 244 */
247enum rpcrdma_frmr_state { 245enum rpcrdma_frwr_state {
248 FRMR_IS_INVALID, /* ready to be used */ 246 FRWR_IS_INVALID, /* ready to be used */
249 FRMR_IS_VALID, /* in use */ 247 FRWR_IS_VALID, /* in use */
250 FRMR_FLUSHED_FR, /* flushed FASTREG WR */ 248 FRWR_FLUSHED_FR, /* flushed FASTREG WR */
251 FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ 249 FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
252}; 250};
253 251
254struct rpcrdma_frmr { 252struct rpcrdma_frwr {
255 struct ib_mr *fr_mr; 253 struct ib_mr *fr_mr;
256 struct ib_cqe fr_cqe; 254 struct ib_cqe fr_cqe;
257 enum rpcrdma_frmr_state fr_state; 255 enum rpcrdma_frwr_state fr_state;
258 struct completion fr_linv_done; 256 struct completion fr_linv_done;
259 union { 257 union {
260 struct ib_reg_wr fr_regwr; 258 struct ib_reg_wr fr_regwr;
@@ -267,26 +265,20 @@ struct rpcrdma_fmr {
267 u64 *fm_physaddrs; 265 u64 *fm_physaddrs;
268}; 266};
269 267
270struct rpcrdma_mw { 268struct rpcrdma_mr {
271 struct list_head mw_list; 269 struct list_head mr_list;
272 struct scatterlist *mw_sg; 270 struct scatterlist *mr_sg;
273 int mw_nents; 271 int mr_nents;
274 enum dma_data_direction mw_dir; 272 enum dma_data_direction mr_dir;
275 unsigned long mw_flags;
276 union { 273 union {
277 struct rpcrdma_fmr fmr; 274 struct rpcrdma_fmr fmr;
278 struct rpcrdma_frmr frmr; 275 struct rpcrdma_frwr frwr;
279 }; 276 };
280 struct rpcrdma_xprt *mw_xprt; 277 struct rpcrdma_xprt *mr_xprt;
281 u32 mw_handle; 278 u32 mr_handle;
282 u32 mw_length; 279 u32 mr_length;
283 u64 mw_offset; 280 u64 mr_offset;
284 struct list_head mw_all; 281 struct list_head mr_all;
285};
286
287/* mw_flags */
288enum {
289 RPCRDMA_MW_F_RI = 1,
290}; 282};
291 283
292/* 284/*
@@ -342,6 +334,7 @@ enum {
342struct rpcrdma_buffer; 334struct rpcrdma_buffer;
343struct rpcrdma_req { 335struct rpcrdma_req {
344 struct list_head rl_list; 336 struct list_head rl_list;
337 int rl_cpu;
345 unsigned int rl_connect_cookie; 338 unsigned int rl_connect_cookie;
346 struct rpcrdma_buffer *rl_buffer; 339 struct rpcrdma_buffer *rl_buffer;
347 struct rpcrdma_rep *rl_reply; 340 struct rpcrdma_rep *rl_reply;
@@ -361,8 +354,7 @@ struct rpcrdma_req {
361 354
362/* rl_flags */ 355/* rl_flags */
363enum { 356enum {
364 RPCRDMA_REQ_F_BACKCHANNEL = 0, 357 RPCRDMA_REQ_F_PENDING = 0,
365 RPCRDMA_REQ_F_PENDING,
366 RPCRDMA_REQ_F_TX_RESOURCES, 358 RPCRDMA_REQ_F_TX_RESOURCES,
367}; 359};
368 360
@@ -373,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
373} 365}
374 366
375static inline struct rpcrdma_req * 367static inline struct rpcrdma_req *
376rpcr_to_rdmar(struct rpc_rqst *rqst) 368rpcr_to_rdmar(const struct rpc_rqst *rqst)
377{ 369{
378 return rqst->rq_xprtdata; 370 return rqst->rq_xprtdata;
379} 371}
380 372
381static inline void 373static inline void
382rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) 374rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
383{ 375{
384 list_add_tail(&mw->mw_list, list); 376 list_add_tail(&mr->mr_list, list);
385} 377}
386 378
387static inline struct rpcrdma_mw * 379static inline struct rpcrdma_mr *
388rpcrdma_pop_mw(struct list_head *list) 380rpcrdma_mr_pop(struct list_head *list)
389{ 381{
390 struct rpcrdma_mw *mw; 382 struct rpcrdma_mr *mr;
391 383
392 mw = list_first_entry(list, struct rpcrdma_mw, mw_list); 384 mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
393 list_del(&mw->mw_list); 385 list_del(&mr->mr_list);
394 return mw; 386 return mr;
395} 387}
396 388
397/* 389/*
@@ -401,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list)
401 * One of these is associated with a transport instance 393 * One of these is associated with a transport instance
402 */ 394 */
403struct rpcrdma_buffer { 395struct rpcrdma_buffer {
404 spinlock_t rb_mwlock; /* protect rb_mws list */ 396 spinlock_t rb_mrlock; /* protect rb_mrs list */
405 struct list_head rb_mws; 397 struct list_head rb_mrs;
406 struct list_head rb_all; 398 struct list_head rb_all;
407 399
408 unsigned long rb_sc_head; 400 unsigned long rb_sc_head;
@@ -437,13 +429,11 @@ struct rpcrdma_buffer {
437 * This data should be set with mount options 429 * This data should be set with mount options
438 */ 430 */
439struct rpcrdma_create_data_internal { 431struct rpcrdma_create_data_internal {
440 struct sockaddr_storage addr; /* RDMA server address */
441 unsigned int max_requests; /* max requests (slots) in flight */ 432 unsigned int max_requests; /* max requests (slots) in flight */
442 unsigned int rsize; /* mount rsize - max read hdr+data */ 433 unsigned int rsize; /* mount rsize - max read hdr+data */
443 unsigned int wsize; /* mount wsize - max write hdr+data */ 434 unsigned int wsize; /* mount wsize - max write hdr+data */
444 unsigned int inline_rsize; /* max non-rdma read data payload */ 435 unsigned int inline_rsize; /* max non-rdma read data payload */
445 unsigned int inline_wsize; /* max non-rdma write data payload */ 436 unsigned int inline_wsize; /* max non-rdma write data payload */
446 unsigned int padding; /* non-rdma write header padding */
447}; 437};
448 438
449/* 439/*
@@ -483,17 +473,19 @@ struct rpcrdma_memreg_ops {
483 struct rpcrdma_mr_seg * 473 struct rpcrdma_mr_seg *
484 (*ro_map)(struct rpcrdma_xprt *, 474 (*ro_map)(struct rpcrdma_xprt *,
485 struct rpcrdma_mr_seg *, int, bool, 475 struct rpcrdma_mr_seg *, int, bool,
486 struct rpcrdma_mw **); 476 struct rpcrdma_mr **);
477 void (*ro_reminv)(struct rpcrdma_rep *rep,
478 struct list_head *mrs);
487 void (*ro_unmap_sync)(struct rpcrdma_xprt *, 479 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
488 struct list_head *); 480 struct list_head *);
489 void (*ro_recover_mr)(struct rpcrdma_mw *); 481 void (*ro_recover_mr)(struct rpcrdma_mr *mr);
490 int (*ro_open)(struct rpcrdma_ia *, 482 int (*ro_open)(struct rpcrdma_ia *,
491 struct rpcrdma_ep *, 483 struct rpcrdma_ep *,
492 struct rpcrdma_create_data_internal *); 484 struct rpcrdma_create_data_internal *);
493 size_t (*ro_maxpages)(struct rpcrdma_xprt *); 485 size_t (*ro_maxpages)(struct rpcrdma_xprt *);
494 int (*ro_init_mr)(struct rpcrdma_ia *, 486 int (*ro_init_mr)(struct rpcrdma_ia *,
495 struct rpcrdma_mw *); 487 struct rpcrdma_mr *);
496 void (*ro_release_mr)(struct rpcrdma_mw *); 488 void (*ro_release_mr)(struct rpcrdma_mr *mr);
497 const char *ro_displayname; 489 const char *ro_displayname;
498 const int ro_send_w_inv_ok; 490 const int ro_send_w_inv_ok;
499}; 491};
@@ -524,6 +516,18 @@ struct rpcrdma_xprt {
524#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) 516#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
525#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) 517#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
526 518
519static inline const char *
520rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
521{
522 return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR];
523}
524
525static inline const char *
526rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt)
527{
528 return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT];
529}
530
527/* Setting this to 0 ensures interoperability with early servers. 531/* Setting this to 0 ensures interoperability with early servers.
528 * Setting this to 1 enhances certain unaligned read/write performance. 532 * Setting this to 1 enhances certain unaligned read/write performance.
529 * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ 533 * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
@@ -537,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
537/* 541/*
538 * Interface Adapter calls - xprtrdma/verbs.c 542 * Interface Adapter calls - xprtrdma/verbs.c
539 */ 543 */
540int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr); 544int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
541void rpcrdma_ia_remove(struct rpcrdma_ia *ia); 545void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
542void rpcrdma_ia_close(struct rpcrdma_ia *); 546void rpcrdma_ia_close(struct rpcrdma_ia *);
543bool frwr_is_supported(struct rpcrdma_ia *); 547bool frwr_is_supported(struct rpcrdma_ia *);
@@ -563,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
563 * Buffer calls - xprtrdma/verbs.c 567 * Buffer calls - xprtrdma/verbs.c
564 */ 568 */
565struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); 569struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
566struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
567void rpcrdma_destroy_req(struct rpcrdma_req *); 570void rpcrdma_destroy_req(struct rpcrdma_req *);
571int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
568int rpcrdma_buffer_create(struct rpcrdma_xprt *); 572int rpcrdma_buffer_create(struct rpcrdma_xprt *);
569void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 573void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
570struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); 574struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
571void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); 575void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
572 576
573struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); 577struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
574void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); 578void rpcrdma_mr_put(struct rpcrdma_mr *mr);
579void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
580void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
581
575struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); 582struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
576void rpcrdma_buffer_put(struct rpcrdma_req *); 583void rpcrdma_buffer_put(struct rpcrdma_req *);
577void rpcrdma_recv_buffer_get(struct rpcrdma_req *); 584void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
578void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); 585void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
579 586
580void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
581
582struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, 587struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
583 gfp_t); 588 gfp_t);
584bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); 589bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
@@ -662,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *);
662size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); 667size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
663int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); 668int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
664void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); 669void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
665int rpcrdma_bc_marshal_reply(struct rpc_rqst *); 670int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
666void xprt_rdma_bc_free_rqst(struct rpc_rqst *); 671void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
667void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); 672void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
668#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 673#endif /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -670,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
670extern struct xprt_class xprt_rdma_bc; 675extern struct xprt_class xprt_rdma_bc;
671 676
672#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ 677#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
678
679#include <trace/events/rpcrdma.h>
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9cc850c2719e..18803021f242 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -52,6 +52,8 @@
52 52
53#include "sunrpc.h" 53#include "sunrpc.h"
54 54
55#define RPC_TCP_READ_CHUNK_SZ (3*512*1024)
56
55static void xs_close(struct rpc_xprt *xprt); 57static void xs_close(struct rpc_xprt *xprt);
56static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, 58static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
57 struct socket *sock); 59 struct socket *sock);
@@ -1003,6 +1005,7 @@ static void xs_local_data_receive(struct sock_xprt *transport)
1003 struct sock *sk; 1005 struct sock *sk;
1004 int err; 1006 int err;
1005 1007
1008restart:
1006 mutex_lock(&transport->recv_mutex); 1009 mutex_lock(&transport->recv_mutex);
1007 sk = transport->inet; 1010 sk = transport->inet;
1008 if (sk == NULL) 1011 if (sk == NULL)
@@ -1016,6 +1019,11 @@ static void xs_local_data_receive(struct sock_xprt *transport)
1016 } 1019 }
1017 if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1020 if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
1018 break; 1021 break;
1022 if (need_resched()) {
1023 mutex_unlock(&transport->recv_mutex);
1024 cond_resched();
1025 goto restart;
1026 }
1019 } 1027 }
1020out: 1028out:
1021 mutex_unlock(&transport->recv_mutex); 1029 mutex_unlock(&transport->recv_mutex);
@@ -1094,6 +1102,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
1094 struct sock *sk; 1102 struct sock *sk;
1095 int err; 1103 int err;
1096 1104
1105restart:
1097 mutex_lock(&transport->recv_mutex); 1106 mutex_lock(&transport->recv_mutex);
1098 sk = transport->inet; 1107 sk = transport->inet;
1099 if (sk == NULL) 1108 if (sk == NULL)
@@ -1107,6 +1116,11 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
1107 } 1116 }
1108 if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1117 if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
1109 break; 1118 break;
1119 if (need_resched()) {
1120 mutex_unlock(&transport->recv_mutex);
1121 cond_resched();
1122 goto restart;
1123 }
1110 } 1124 }
1111out: 1125out:
1112 mutex_unlock(&transport->recv_mutex); 1126 mutex_unlock(&transport->recv_mutex);
@@ -1479,6 +1493,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
1479 .offset = offset, 1493 .offset = offset,
1480 .count = len, 1494 .count = len,
1481 }; 1495 };
1496 size_t ret;
1482 1497
1483 dprintk("RPC: xs_tcp_data_recv started\n"); 1498 dprintk("RPC: xs_tcp_data_recv started\n");
1484 do { 1499 do {
@@ -1507,9 +1522,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
1507 /* Skip over any trailing bytes on short reads */ 1522 /* Skip over any trailing bytes on short reads */
1508 xs_tcp_read_discard(transport, &desc); 1523 xs_tcp_read_discard(transport, &desc);
1509 } while (desc.count); 1524 } while (desc.count);
1525 ret = len - desc.count;
1526 if (ret < rd_desc->count)
1527 rd_desc->count -= ret;
1528 else
1529 rd_desc->count = 0;
1510 trace_xs_tcp_data_recv(transport); 1530 trace_xs_tcp_data_recv(transport);
1511 dprintk("RPC: xs_tcp_data_recv done\n"); 1531 dprintk("RPC: xs_tcp_data_recv done\n");
1512 return len - desc.count; 1532 return ret;
1513} 1533}
1514 1534
1515static void xs_tcp_data_receive(struct sock_xprt *transport) 1535static void xs_tcp_data_receive(struct sock_xprt *transport)
@@ -1517,30 +1537,34 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
1517 struct rpc_xprt *xprt = &transport->xprt; 1537 struct rpc_xprt *xprt = &transport->xprt;
1518 struct sock *sk; 1538 struct sock *sk;
1519 read_descriptor_t rd_desc = { 1539 read_descriptor_t rd_desc = {
1520 .count = 2*1024*1024,
1521 .arg.data = xprt, 1540 .arg.data = xprt,
1522 }; 1541 };
1523 unsigned long total = 0; 1542 unsigned long total = 0;
1524 int loop;
1525 int read = 0; 1543 int read = 0;
1526 1544
1545restart:
1527 mutex_lock(&transport->recv_mutex); 1546 mutex_lock(&transport->recv_mutex);
1528 sk = transport->inet; 1547 sk = transport->inet;
1529 if (sk == NULL) 1548 if (sk == NULL)
1530 goto out; 1549 goto out;
1531 1550
1532 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ 1551 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
1533 for (loop = 0; loop < 64; loop++) { 1552 for (;;) {
1553 rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
1534 lock_sock(sk); 1554 lock_sock(sk);
1535 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); 1555 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
1536 if (read <= 0) { 1556 if (rd_desc.count != 0 || read < 0) {
1537 clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); 1557 clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
1538 release_sock(sk); 1558 release_sock(sk);
1539 break; 1559 break;
1540 } 1560 }
1541 release_sock(sk); 1561 release_sock(sk);
1542 total += read; 1562 total += read;
1543 rd_desc.count = 65536; 1563 if (need_resched()) {
1564 mutex_unlock(&transport->recv_mutex);
1565 cond_resched();
1566 goto restart;
1567 }
1544 } 1568 }
1545 if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1569 if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
1546 queue_work(xprtiod_workqueue, &transport->recv_worker); 1570 queue_work(xprtiod_workqueue, &transport->recv_worker);
@@ -2440,7 +2464,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2440 */ 2464 */
2441 case -ECONNREFUSED: 2465 case -ECONNREFUSED:
2442 case -ECONNRESET: 2466 case -ECONNRESET:
2467 case -ENETDOWN:
2443 case -ENETUNREACH: 2468 case -ENETUNREACH:
2469 case -EHOSTUNREACH:
2444 case -EADDRINUSE: 2470 case -EADDRINUSE:
2445 case -ENOBUFS: 2471 case -ENOBUFS:
2446 /* 2472 /*
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 47ec121574ce..c8001471da6c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -324,6 +324,7 @@ restart:
324 if (res) { 324 if (res) {
325 pr_warn("Bearer <%s> rejected, enable failure (%d)\n", 325 pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
326 name, -res); 326 name, -res);
327 kfree(b);
327 return -EINVAL; 328 return -EINVAL;
328 } 329 }
329 330
@@ -347,8 +348,10 @@ restart:
347 if (skb) 348 if (skb)
348 tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); 349 tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
349 350
350 if (tipc_mon_create(net, bearer_id)) 351 if (tipc_mon_create(net, bearer_id)) {
352 bearer_disable(net, b);
351 return -ENOMEM; 353 return -ENOMEM;
354 }
352 355
353 pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", 356 pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
354 name, 357 name,
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 12777cac638a..5f4ffae807ee 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -109,7 +109,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
109static void tipc_group_decr_active(struct tipc_group *grp, 109static void tipc_group_decr_active(struct tipc_group *grp,
110 struct tipc_member *m) 110 struct tipc_member *m)
111{ 111{
112 if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING) 112 if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING ||
113 m->state == MBR_REMITTED)
113 grp->active_cnt--; 114 grp->active_cnt--;
114} 115}
115 116
@@ -351,8 +352,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
351 if (m->window >= ADV_IDLE) 352 if (m->window >= ADV_IDLE)
352 return; 353 return;
353 354
354 if (!list_empty(&m->congested)) 355 list_del_init(&m->congested);
355 return;
356 356
357 /* Sort member into congested members' list */ 357 /* Sort member into congested members' list */
358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { 358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -369,18 +369,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
369 u16 prev = grp->bc_snd_nxt - 1; 369 u16 prev = grp->bc_snd_nxt - 1;
370 struct tipc_member *m; 370 struct tipc_member *m;
371 struct rb_node *n; 371 struct rb_node *n;
372 u16 ackers = 0;
372 373
373 for (n = rb_first(&grp->members); n; n = rb_next(n)) { 374 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
374 m = container_of(n, struct tipc_member, tree_node); 375 m = container_of(n, struct tipc_member, tree_node);
375 if (tipc_group_is_enabled(m)) { 376 if (tipc_group_is_enabled(m)) {
376 tipc_group_update_member(m, len); 377 tipc_group_update_member(m, len);
377 m->bc_acked = prev; 378 m->bc_acked = prev;
379 ackers++;
378 } 380 }
379 } 381 }
380 382
381 /* Mark number of acknowledges to expect, if any */ 383 /* Mark number of acknowledges to expect, if any */
382 if (ack) 384 if (ack)
383 grp->bc_ackers = grp->member_cnt; 385 grp->bc_ackers = ackers;
384 grp->bc_snd_nxt++; 386 grp->bc_snd_nxt++;
385} 387}
386 388
@@ -497,6 +499,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
497 while ((skb = skb_peek(defq))) { 499 while ((skb = skb_peek(defq))) {
498 hdr = buf_msg(skb); 500 hdr = buf_msg(skb);
499 mtyp = msg_type(hdr); 501 mtyp = msg_type(hdr);
502 blks = msg_blocks(hdr);
500 deliver = true; 503 deliver = true;
501 ack = false; 504 ack = false;
502 update = false; 505 update = false;
@@ -546,7 +549,6 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
546 if (!update) 549 if (!update)
547 continue; 550 continue;
548 551
549 blks = msg_blocks(hdr);
550 tipc_group_update_rcv_win(grp, blks, node, port, xmitq); 552 tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
551 } 553 }
552 return; 554 return;
@@ -561,7 +563,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
561 int max_active = grp->max_active; 563 int max_active = grp->max_active;
562 int reclaim_limit = max_active * 3 / 4; 564 int reclaim_limit = max_active * 3 / 4;
563 int active_cnt = grp->active_cnt; 565 int active_cnt = grp->active_cnt;
564 struct tipc_member *m, *rm; 566 struct tipc_member *m, *rm, *pm;
565 567
566 m = tipc_group_find_member(grp, node, port); 568 m = tipc_group_find_member(grp, node, port);
567 if (!m) 569 if (!m)
@@ -604,6 +606,17 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
604 pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); 606 pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
605 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); 607 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
606 } 608 }
609 grp->active_cnt--;
610 list_del_init(&m->list);
611 if (list_empty(&grp->pending))
612 return;
613
614 /* Set oldest pending member to active and advertise */
615 pm = list_first_entry(&grp->pending, struct tipc_member, list);
616 pm->state = MBR_ACTIVE;
617 list_move_tail(&pm->list, &grp->active);
618 grp->active_cnt++;
619 tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
607 break; 620 break;
608 case MBR_RECLAIMING: 621 case MBR_RECLAIMING:
609 case MBR_DISCOVERED: 622 case MBR_DISCOVERED:
@@ -648,6 +661,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
648 } else if (mtyp == GRP_REMIT_MSG) { 661 } else if (mtyp == GRP_REMIT_MSG) {
649 msg_set_grp_remitted(hdr, m->window); 662 msg_set_grp_remitted(hdr, m->window);
650 } 663 }
664 msg_set_dest_droppable(hdr, true);
651 __skb_queue_tail(xmitq, skb); 665 __skb_queue_tail(xmitq, skb);
652} 666}
653 667
@@ -689,15 +703,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
689 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); 703 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
690 __skb_queue_tail(inputq, m->event_msg); 704 __skb_queue_tail(inputq, m->event_msg);
691 } 705 }
692 if (m->window < ADV_IDLE) 706 list_del_init(&m->congested);
693 tipc_group_update_member(m, 0); 707 tipc_group_update_member(m, 0);
694 else
695 list_del_init(&m->congested);
696 return; 708 return;
697 case GRP_LEAVE_MSG: 709 case GRP_LEAVE_MSG:
698 if (!m) 710 if (!m)
699 return; 711 return;
700 m->bc_syncpt = msg_grp_bc_syncpt(hdr); 712 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
713 list_del_init(&m->list);
714 list_del_init(&m->congested);
715 *usr_wakeup = true;
701 716
702 /* Wait until WITHDRAW event is received */ 717 /* Wait until WITHDRAW event is received */
703 if (m->state != MBR_LEAVING) { 718 if (m->state != MBR_LEAVING) {
@@ -709,8 +724,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
709 ehdr = buf_msg(m->event_msg); 724 ehdr = buf_msg(m->event_msg);
710 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); 725 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
711 __skb_queue_tail(inputq, m->event_msg); 726 __skb_queue_tail(inputq, m->event_msg);
712 *usr_wakeup = true;
713 list_del_init(&m->congested);
714 return; 727 return;
715 case GRP_ADV_MSG: 728 case GRP_ADV_MSG:
716 if (!m) 729 if (!m)
@@ -741,14 +754,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
741 if (!m || m->state != MBR_RECLAIMING) 754 if (!m || m->state != MBR_RECLAIMING)
742 return; 755 return;
743 756
744 list_del_init(&m->list);
745 grp->active_cnt--;
746 remitted = msg_grp_remitted(hdr); 757 remitted = msg_grp_remitted(hdr);
747 758
748 /* Messages preceding the REMIT still in receive queue */ 759 /* Messages preceding the REMIT still in receive queue */
749 if (m->advertised > remitted) { 760 if (m->advertised > remitted) {
750 m->state = MBR_REMITTED; 761 m->state = MBR_REMITTED;
751 in_flight = m->advertised - remitted; 762 in_flight = m->advertised - remitted;
763 m->advertised = ADV_IDLE + in_flight;
764 return;
752 } 765 }
753 /* All messages preceding the REMIT have been read */ 766 /* All messages preceding the REMIT have been read */
754 if (m->advertised <= remitted) { 767 if (m->advertised <= remitted) {
@@ -760,6 +773,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
760 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); 773 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
761 774
762 m->advertised = ADV_IDLE + in_flight; 775 m->advertised = ADV_IDLE + in_flight;
776 grp->active_cnt--;
777 list_del_init(&m->list);
763 778
764 /* Set oldest pending member to active and advertise */ 779 /* Set oldest pending member to active and advertise */
765 if (list_empty(&grp->pending)) 780 if (list_empty(&grp->pending))
@@ -849,19 +864,29 @@ void tipc_group_member_evt(struct tipc_group *grp,
849 *usr_wakeup = true; 864 *usr_wakeup = true;
850 m->usr_pending = false; 865 m->usr_pending = false;
851 node_up = tipc_node_is_up(net, node); 866 node_up = tipc_node_is_up(net, node);
852 867 m->event_msg = NULL;
853 /* Hold back event if more messages might be expected */ 868
854 if (m->state != MBR_LEAVING && node_up) { 869 if (node_up) {
855 m->event_msg = skb; 870 /* Hold back event if a LEAVE msg should be expected */
856 tipc_group_decr_active(grp, m); 871 if (m->state != MBR_LEAVING) {
857 m->state = MBR_LEAVING; 872 m->event_msg = skb;
858 } else { 873 tipc_group_decr_active(grp, m);
859 if (node_up) 874 m->state = MBR_LEAVING;
875 } else {
860 msg_set_grp_bc_seqno(hdr, m->bc_syncpt); 876 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
861 else 877 __skb_queue_tail(inputq, skb);
878 }
879 } else {
880 if (m->state != MBR_LEAVING) {
881 tipc_group_decr_active(grp, m);
882 m->state = MBR_LEAVING;
862 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); 883 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
884 } else {
885 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
886 }
863 __skb_queue_tail(inputq, skb); 887 __skb_queue_tail(inputq, skb);
864 } 888 }
889 list_del_init(&m->list);
865 list_del_init(&m->congested); 890 list_del_init(&m->congested);
866 } 891 }
867 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); 892 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 8e884ed06d4b..32dc33a94bc7 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id)
642{ 642{
643 struct tipc_net *tn = tipc_net(net); 643 struct tipc_net *tn = tipc_net(net);
644 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 644 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
645 struct tipc_peer *self = get_self(net, bearer_id); 645 struct tipc_peer *self;
646 struct tipc_peer *peer, *tmp; 646 struct tipc_peer *peer, *tmp;
647 647
648 if (!mon)
649 return;
650
651 self = get_self(net, bearer_id);
648 write_lock_bh(&mon->lock); 652 write_lock_bh(&mon->lock);
649 tn->monitors[bearer_id] = NULL; 653 tn->monitors[bearer_id] = NULL;
650 list_for_each_entry_safe(peer, tmp, &self->list, list) { 654 list_for_each_entry_safe(peer, tmp, &self->list, list) {
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 507017fe0f1b..9036d8756e73 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1880,36 +1880,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
1880 1880
1881 if (strcmp(name, tipc_bclink_name) == 0) { 1881 if (strcmp(name, tipc_bclink_name) == 0) {
1882 err = tipc_nl_add_bc_link(net, &msg); 1882 err = tipc_nl_add_bc_link(net, &msg);
1883 if (err) { 1883 if (err)
1884 nlmsg_free(msg.skb); 1884 goto err_free;
1885 return err;
1886 }
1887 } else { 1885 } else {
1888 int bearer_id; 1886 int bearer_id;
1889 struct tipc_node *node; 1887 struct tipc_node *node;
1890 struct tipc_link *link; 1888 struct tipc_link *link;
1891 1889
1892 node = tipc_node_find_by_name(net, name, &bearer_id); 1890 node = tipc_node_find_by_name(net, name, &bearer_id);
1893 if (!node) 1891 if (!node) {
1894 return -EINVAL; 1892 err = -EINVAL;
1893 goto err_free;
1894 }
1895 1895
1896 tipc_node_read_lock(node); 1896 tipc_node_read_lock(node);
1897 link = node->links[bearer_id].link; 1897 link = node->links[bearer_id].link;
1898 if (!link) { 1898 if (!link) {
1899 tipc_node_read_unlock(node); 1899 tipc_node_read_unlock(node);
1900 nlmsg_free(msg.skb); 1900 err = -EINVAL;
1901 return -EINVAL; 1901 goto err_free;
1902 } 1902 }
1903 1903
1904 err = __tipc_nl_add_link(net, &msg, link, 0); 1904 err = __tipc_nl_add_link(net, &msg, link, 0);
1905 tipc_node_read_unlock(node); 1905 tipc_node_read_unlock(node);
1906 if (err) { 1906 if (err)
1907 nlmsg_free(msg.skb); 1907 goto err_free;
1908 return err;
1909 }
1910 } 1908 }
1911 1909
1912 return genlmsg_reply(msg.skb, info); 1910 return genlmsg_reply(msg.skb, info);
1911
1912err_free:
1913 nlmsg_free(msg.skb);
1914 return err;
1913} 1915}
1914 1916
1915int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) 1917int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/tipc/server.c b/net/tipc/server.c
index acaef80fb88c..78a292a84afc 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -264,8 +264,8 @@ static int tipc_receive_from_sock(struct tipc_conn *con)
264 iov.iov_base = buf; 264 iov.iov_base = buf;
265 iov.iov_len = s->max_rcvbuf_size; 265 iov.iov_len = s->max_rcvbuf_size;
266 msg.msg_name = &addr; 266 msg.msg_name = &addr;
267 ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, 267 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
268 MSG_DONTWAIT); 268 ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
269 if (ret <= 0) { 269 if (ret <= 0) {
270 kmem_cache_free(s->rcvbuf_cache, buf); 270 kmem_cache_free(s->rcvbuf_cache, buf);
271 goto out_close; 271 goto out_close;
@@ -314,6 +314,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
314 newcon->usr_data = s->tipc_conn_new(newcon->conid); 314 newcon->usr_data = s->tipc_conn_new(newcon->conid);
315 if (!newcon->usr_data) { 315 if (!newcon->usr_data) {
316 sock_release(newsock); 316 sock_release(newsock);
317 conn_put(newcon);
317 return -ENOMEM; 318 return -ENOMEM;
318 } 319 }
319 320
@@ -511,7 +512,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
511 s = con->server; 512 s = con->server;
512 scbr = s->tipc_conn_new(*conid); 513 scbr = s->tipc_conn_new(*conid);
513 if (!scbr) { 514 if (!scbr) {
514 tipc_close_conn(con); 515 conn_put(con);
515 return false; 516 return false;
516 } 517 }
517 518
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 5d18c0caa92b..2aa46e8cd8fe 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -710,13 +710,13 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
710 * imply that the operation will succeed, merely that it should be performed 710 * imply that the operation will succeed, merely that it should be performed
711 * and will not block. 711 * and will not block.
712 */ 712 */
713static unsigned int tipc_poll(struct file *file, struct socket *sock, 713static __poll_t tipc_poll(struct file *file, struct socket *sock,
714 poll_table *wait) 714 poll_table *wait)
715{ 715{
716 struct sock *sk = sock->sk; 716 struct sock *sk = sock->sk;
717 struct tipc_sock *tsk = tipc_sk(sk); 717 struct tipc_sock *tsk = tipc_sk(sk);
718 struct tipc_group *grp = tsk->group; 718 struct tipc_group *grp = tsk->group;
719 u32 revents = 0; 719 __poll_t revents = 0;
720 720
721 sock_poll_wait(file, sk_sleep(sk), wait); 721 sock_poll_wait(file, sk_sleep(sk), wait);
722 722
@@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
727 727
728 switch (sk->sk_state) { 728 switch (sk->sk_state) {
729 case TIPC_ESTABLISHED: 729 case TIPC_ESTABLISHED:
730 case TIPC_CONNECTING:
730 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) 731 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
731 revents |= POLLOUT; 732 revents |= POLLOUT;
732 /* fall thru' */ 733 /* fall thru' */
733 case TIPC_LISTEN: 734 case TIPC_LISTEN:
734 case TIPC_CONNECTING:
735 if (!skb_queue_empty(&sk->sk_receive_queue)) 735 if (!skb_queue_empty(&sk->sk_receive_queue))
736 revents |= POLLIN | POLLRDNORM; 736 revents |= POLLIN | POLLRDNORM;
737 break; 737 break;
@@ -1140,7 +1140,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
1140 __skb_dequeue(arrvq); 1140 __skb_dequeue(arrvq);
1141 __skb_queue_tail(inputq, skb); 1141 __skb_queue_tail(inputq, skb);
1142 } 1142 }
1143 refcount_dec(&skb->users); 1143 kfree_skb(skb);
1144 spin_unlock_bh(&inputq->lock); 1144 spin_unlock_bh(&inputq->lock);
1145 continue; 1145 continue;
1146 } 1146 }
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ecca64fc6a6f..3deabcab4882 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
371 goto rcu_out; 371 goto rcu_out;
372 } 372 }
373 373
374 tipc_rcv(sock_net(sk), skb, b);
375 rcu_read_unlock();
376 return 0;
377
378rcu_out: 374rcu_out:
379 rcu_read_unlock(); 375 rcu_read_unlock();
380out: 376out:
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index e07ee3ae0023..736719c8314e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -367,8 +367,10 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
367 367
368 crypto_info = &ctx->crypto_send; 368 crypto_info = &ctx->crypto_send;
369 /* Currently we don't support set crypto info more than one time */ 369 /* Currently we don't support set crypto info more than one time */
370 if (TLS_CRYPTO_INFO_READY(crypto_info)) 370 if (TLS_CRYPTO_INFO_READY(crypto_info)) {
371 rc = -EBUSY;
371 goto out; 372 goto out;
373 }
372 374
373 rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); 375 rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
374 if (rc) { 376 if (rc) {
@@ -386,7 +388,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
386 case TLS_CIPHER_AES_GCM_128: { 388 case TLS_CIPHER_AES_GCM_128: {
387 if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { 389 if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
388 rc = -EINVAL; 390 rc = -EINVAL;
389 goto out; 391 goto err_crypto_info;
390 } 392 }
391 rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), 393 rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
392 optlen - sizeof(*crypto_info)); 394 optlen - sizeof(*crypto_info));
@@ -398,7 +400,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
398 } 400 }
399 default: 401 default:
400 rc = -EINVAL; 402 rc = -EINVAL;
401 goto out; 403 goto err_crypto_info;
402 } 404 }
403 405
404 /* currently SW is default, we will have ethtool in future */ 406 /* currently SW is default, we will have ethtool in future */
@@ -454,6 +456,15 @@ static int tls_init(struct sock *sk)
454 struct tls_context *ctx; 456 struct tls_context *ctx;
455 int rc = 0; 457 int rc = 0;
456 458
459 /* The TLS ulp is currently supported only for TCP sockets
460 * in ESTABLISHED state.
461 * Supporting sockets in LISTEN state will require us
462 * to modify the accept implementation to clone rather then
463 * share the ulp context.
464 */
465 if (sk->sk_state != TCP_ESTABLISHED)
466 return -ENOTSUPP;
467
457 /* allocate tls context */ 468 /* allocate tls context */
458 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 469 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
459 if (!ctx) { 470 if (!ctx) {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 73d19210dd49..0a9b72fbd761 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -391,7 +391,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
391 391
392 while (msg_data_left(msg)) { 392 while (msg_data_left(msg)) {
393 if (sk->sk_err) { 393 if (sk->sk_err) {
394 ret = sk->sk_err; 394 ret = -sk->sk_err;
395 goto send_end; 395 goto send_end;
396 } 396 }
397 397
@@ -544,7 +544,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
544 size_t copy, required_size; 544 size_t copy, required_size;
545 545
546 if (sk->sk_err) { 546 if (sk->sk_err) {
547 ret = sk->sk_err; 547 ret = -sk->sk_err;
548 goto sendpage_end; 548 goto sendpage_end;
549 } 549 }
550 550
@@ -577,6 +577,8 @@ alloc_payload:
577 get_page(page); 577 get_page(page);
578 sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem; 578 sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem;
579 sg_set_page(sg, page, copy, offset); 579 sg_set_page(sg, page, copy, offset);
580 sg_unmark_end(sg);
581
580 ctx->sg_plaintext_num_elem++; 582 ctx->sg_plaintext_num_elem++;
581 583
582 sk_mem_charge(sk, copy); 584 sk_mem_charge(sk, copy);
@@ -681,18 +683,17 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
681 } 683 }
682 default: 684 default:
683 rc = -EINVAL; 685 rc = -EINVAL;
684 goto out; 686 goto free_priv;
685 } 687 }
686 688
687 ctx->prepend_size = TLS_HEADER_SIZE + nonce_size; 689 ctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
688 ctx->tag_size = tag_size; 690 ctx->tag_size = tag_size;
689 ctx->overhead_size = ctx->prepend_size + ctx->tag_size; 691 ctx->overhead_size = ctx->prepend_size + ctx->tag_size;
690 ctx->iv_size = iv_size; 692 ctx->iv_size = iv_size;
691 ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, 693 ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL);
692 GFP_KERNEL);
693 if (!ctx->iv) { 694 if (!ctx->iv) {
694 rc = -ENOMEM; 695 rc = -ENOMEM;
695 goto out; 696 goto free_priv;
696 } 697 }
697 memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); 698 memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
698 memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); 699 memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
@@ -740,7 +741,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
740 741
741 rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size); 742 rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size);
742 if (!rc) 743 if (!rc)
743 goto out; 744 return 0;
744 745
745free_aead: 746free_aead:
746 crypto_free_aead(sw_ctx->aead_send); 747 crypto_free_aead(sw_ctx->aead_send);
@@ -751,6 +752,9 @@ free_rec_seq:
751free_iv: 752free_iv:
752 kfree(ctx->iv); 753 kfree(ctx->iv);
753 ctx->iv = NULL; 754 ctx->iv = NULL;
755free_priv:
756 kfree(ctx->priv_ctx);
757 ctx->priv_ctx = NULL;
754out: 758out:
755 return rc; 759 return rc;
756} 760}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a9ee634f3c42..6b7678df41e5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -367,7 +367,7 @@ static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int
367 /* relaying can only happen while the wq still exists */ 367 /* relaying can only happen while the wq still exists */
368 u_sleep = sk_sleep(&u->sk); 368 u_sleep = sk_sleep(&u->sk);
369 if (u_sleep) 369 if (u_sleep)
370 wake_up_interruptible_poll(u_sleep, key); 370 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
371 371
372 return 0; 372 return 0;
373} 373}
@@ -638,8 +638,8 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
638static int unix_socketpair(struct socket *, struct socket *); 638static int unix_socketpair(struct socket *, struct socket *);
639static int unix_accept(struct socket *, struct socket *, int, bool); 639static int unix_accept(struct socket *, struct socket *, int, bool);
640static int unix_getname(struct socket *, struct sockaddr *, int *, int); 640static int unix_getname(struct socket *, struct sockaddr *, int *, int);
641static unsigned int unix_poll(struct file *, struct socket *, poll_table *); 641static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
642static unsigned int unix_dgram_poll(struct file *, struct socket *, 642static __poll_t unix_dgram_poll(struct file *, struct socket *,
643 poll_table *); 643 poll_table *);
644static int unix_ioctl(struct socket *, unsigned int, unsigned long); 644static int unix_ioctl(struct socket *, unsigned int, unsigned long);
645static int unix_shutdown(struct socket *, int); 645static int unix_shutdown(struct socket *, int);
@@ -2640,10 +2640,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2640 return err; 2640 return err;
2641} 2641}
2642 2642
2643static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait) 2643static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2644{ 2644{
2645 struct sock *sk = sock->sk; 2645 struct sock *sk = sock->sk;
2646 unsigned int mask; 2646 __poll_t mask;
2647 2647
2648 sock_poll_wait(file, sk_sleep(sk), wait); 2648 sock_poll_wait(file, sk_sleep(sk), wait);
2649 mask = 0; 2649 mask = 0;
@@ -2675,11 +2675,12 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
2675 return mask; 2675 return mask;
2676} 2676}
2677 2677
2678static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, 2678static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2679 poll_table *wait) 2679 poll_table *wait)
2680{ 2680{
2681 struct sock *sk = sock->sk, *other; 2681 struct sock *sk = sock->sk, *other;
2682 unsigned int mask, writable; 2682 unsigned int writable;
2683 __poll_t mask;
2683 2684
2684 sock_poll_wait(file, sk_sleep(sk), wait); 2685 sock_poll_wait(file, sk_sleep(sk), wait);
2685 mask = 0; 2686 mask = 0;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 5d28abf87fbf..9d95e773f4c8 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -850,11 +850,11 @@ static int vsock_shutdown(struct socket *sock, int mode)
850 return err; 850 return err;
851} 851}
852 852
853static unsigned int vsock_poll(struct file *file, struct socket *sock, 853static __poll_t vsock_poll(struct file *file, struct socket *sock,
854 poll_table *wait) 854 poll_table *wait)
855{ 855{
856 struct sock *sk; 856 struct sock *sk;
857 unsigned int mask; 857 __poll_t mask;
858 struct vsock_sock *vsk; 858 struct vsock_sock *vsk;
859 859
860 sk = sock->sk; 860 sk = sock->sk;
@@ -951,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
951 * POLLOUT|POLLWRNORM when peer is closed and nothing to read, 951 * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
952 * but local send is not shutdown. 952 * but local send is not shutdown.
953 */ 953 */
954 if (sk->sk_state == TCP_CLOSE) { 954 if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) {
955 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) 955 if (!(sk->sk_shutdown & SEND_SHUTDOWN))
956 mask |= POLLOUT | POLLWRNORM; 956 mask |= POLLOUT | POLLWRNORM;
957 957
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 5583df708b8c..a827547aa102 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -487,7 +487,7 @@ static void hvs_release(struct vsock_sock *vsk)
487 487
488 lock_sock(sk); 488 lock_sock(sk);
489 489
490 sk->sk_state = SS_DISCONNECTING; 490 sk->sk_state = TCP_CLOSING;
491 vsock_remove_sock(vsk); 491 vsock_remove_sock(vsk);
492 492
493 release_sock(sk); 493 release_sock(sk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 391775e3575c..a7a73ffe675b 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -797,11 +797,13 @@ static void vmci_transport_handle_detach(struct sock *sk)
797 797
798 /* We should not be sending anymore since the peer won't be 798 /* We should not be sending anymore since the peer won't be
799 * there to receive, but we can still receive if there is data 799 * there to receive, but we can still receive if there is data
800 * left in our consume queue. 800 * left in our consume queue. If the local endpoint is a host,
801 * we can't call vsock_stream_has_data, since that may block,
802 * but a host endpoint can't read data once the VM has
803 * detached, so there is no available data in that case.
801 */ 804 */
802 if (vsock_stream_has_data(vsk) <= 0) { 805 if (vsk->local_addr.svm_cid == VMADDR_CID_HOST ||
803 sk->sk_state = TCP_CLOSE; 806 vsock_stream_has_data(vsk) <= 0) {
804
805 if (sk->sk_state == TCP_SYN_SENT) { 807 if (sk->sk_state == TCP_SYN_SENT) {
806 /* The peer may detach from a queue pair while 808 /* The peer may detach from a queue pair while
807 * we are still in the connecting state, i.e., 809 * we are still in the connecting state, i.e.,
@@ -811,10 +813,12 @@ static void vmci_transport_handle_detach(struct sock *sk)
811 * event like a reset. 813 * event like a reset.
812 */ 814 */
813 815
816 sk->sk_state = TCP_CLOSE;
814 sk->sk_err = ECONNRESET; 817 sk->sk_err = ECONNRESET;
815 sk->sk_error_report(sk); 818 sk->sk_error_report(sk);
816 return; 819 return;
817 } 820 }
821 sk->sk_state = TCP_CLOSE;
818 } 822 }
819 sk->sk_state_change(sk); 823 sk->sk_state_change(sk);
820 } 824 }
@@ -2144,7 +2148,7 @@ module_exit(vmci_transport_exit);
2144 2148
2145MODULE_AUTHOR("VMware, Inc."); 2149MODULE_AUTHOR("VMware, Inc.");
2146MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); 2150MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
2147MODULE_VERSION("1.0.4.0-k"); 2151MODULE_VERSION("1.0.5.0-k");
2148MODULE_LICENSE("GPL v2"); 2152MODULE_LICENSE("GPL v2");
2149MODULE_ALIAS("vmware_vsock"); 2153MODULE_ALIAS("vmware_vsock");
2150MODULE_ALIAS_NETPROTO(PF_VSOCK); 2154MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index da91bb547db3..1abcc4fc4df1 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -20,6 +20,10 @@ config CFG80211
20 tristate "cfg80211 - wireless configuration API" 20 tristate "cfg80211 - wireless configuration API"
21 depends on RFKILL || !RFKILL 21 depends on RFKILL || !RFKILL
22 select FW_LOADER 22 select FW_LOADER
23 # may need to update this when certificates are changed and are
24 # using a different algorithm, though right now they shouldn't
25 # (this is here rather than below to allow it to be a module)
26 select CRYPTO_SHA256 if CFG80211_USE_KERNEL_REGDB_KEYS
23 ---help--- 27 ---help---
24 cfg80211 is the Linux wireless LAN (802.11) configuration API. 28 cfg80211 is the Linux wireless LAN (802.11) configuration API.
25 Enable this if you have a wireless device. 29 Enable this if you have a wireless device.
@@ -113,6 +117,9 @@ config CFG80211_EXTRA_REGDB_KEYDIR
113 certificates like in the kernel sources (net/wireless/certs/) 117 certificates like in the kernel sources (net/wireless/certs/)
114 that shall be accepted for a signed regulatory database. 118 that shall be accepted for a signed regulatory database.
115 119
120 Note that you need to also select the correct CRYPTO_<hash> modules
121 for your certificates, and if cfg80211 is built-in they also must be.
122
116config CFG80211_REG_CELLULAR_HINTS 123config CFG80211_REG_CELLULAR_HINTS
117 bool "cfg80211 regulatory support for cellular base station hints" 124 bool "cfg80211 regulatory support for cellular base station hints"
118 depends on CFG80211_CERTIFICATION_ONUS 125 depends on CFG80211_CERTIFICATION_ONUS
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 278d979c211a..1d84f91bbfb0 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -23,19 +23,36 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
23cfg80211-y += extra-certs.o 23cfg80211-y += extra-certs.o
24endif 24endif
25 25
26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) 26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
27 @$(kecho) " GEN $@" 27 @$(kecho) " GEN $@"
28 @echo '#include "reg.h"' > $@ 28 @(echo '#include "reg.h"'; \
29 @echo 'const u8 shipped_regdb_certs[] = {' >> $@ 29 echo 'const u8 shipped_regdb_certs[] = {'; \
30 @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done 30 cat $^ ; \
31 @echo '};' >> $@ 31 echo '};'; \
32 @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@ 32 echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
33 ) > $@
33 34
34$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ 35$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
35 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) 36 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
36 @$(kecho) " GEN $@" 37 @$(kecho) " GEN $@"
37 @echo '#include "reg.h"' > $@ 38 @(set -e; \
38 @echo 'const u8 extra_regdb_certs[] = {' >> $@ 39 allf=""; \
39 @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done 40 for f in $^ ; do \
40 @echo '};' >> $@ 41 # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
41 @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@ 42 thisf=$$(od -An -v -tx1 < $$f | \
43 sed -e 's/ /\n/g' | \
44 sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
45 sed -e 's/^/0x/;s/$$/,/'); \
46 # file should not be empty - maybe command substitution failed? \
47 test ! -z "$$thisf";\
48 allf=$$allf$$thisf;\
49 done; \
50 ( \
51 echo '#include "reg.h"'; \
52 echo 'const u8 extra_regdb_certs[] = {'; \
53 echo "$$allf"; \
54 echo '};'; \
55 echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
56 ) > $@)
57
58clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644
index 000000000000..14ea66643ffa
--- /dev/null
+++ b/net/wireless/certs/sforshee.hex
@@ -0,0 +1,86 @@
1/* Seth Forshee's regdb certificate */
20x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
30x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
40xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
50x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
60x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
70x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
80x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
90x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
100x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
110x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
120x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
130x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
140x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
150x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
160x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
170x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
180x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
190x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
200x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
210x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
220x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
230x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
240x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
250x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
260x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
270x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
280x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
290x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
300x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
310xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
320x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
330xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
340x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
350x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
360x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
370xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
380xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
390x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
400x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
410x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
420x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
430x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
440x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
450x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
460x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
470x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
480x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
490xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
500xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
510x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
520x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
530x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
540x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
550x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
560x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
570x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
580x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
590x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
600x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
610xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
620x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
630x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
640x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
650xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
660x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
670x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
680x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
690xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
700xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
710x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
720xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
730xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
740x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
750xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
760xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
770x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
780xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
790x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
800xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
810xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
820x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
830x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
840x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
850x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
860x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644
index c6f8f9d6b988..000000000000
--- a/net/wireless/certs/sforshee.x509
+++ /dev/null
Binary files differ
diff --git a/net/wireless/core.c b/net/wireless/core.c
index fdde0d98fde1..a6f3cac8c640 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -439,6 +439,8 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
439 if (rv) 439 if (rv)
440 goto use_default_name; 440 goto use_default_name;
441 } else { 441 } else {
442 int rv;
443
442use_default_name: 444use_default_name:
443 /* NOTE: This is *probably* safe w/out holding rtnl because of 445 /* NOTE: This is *probably* safe w/out holding rtnl because of
444 * the restrictions on phy names. Probably this call could 446 * the restrictions on phy names. Probably this call could
@@ -446,7 +448,11 @@ use_default_name:
446 * phyX. But, might should add some locking and check return 448 * phyX. But, might should add some locking and check return
447 * value, and use a different name if this one exists? 449 * value, and use a different name if this one exists?
448 */ 450 */
449 dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); 451 rv = dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx);
452 if (rv < 0) {
453 kfree(rdev);
454 return NULL;
455 }
450 } 456 }
451 457
452 INIT_LIST_HEAD(&rdev->wiphy.wdev_list); 458 INIT_LIST_HEAD(&rdev->wiphy.wdev_list);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d2f7e8b8a097..eaff636169c2 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -507,8 +507,6 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
507void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, 507void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
508 struct wireless_dev *wdev); 508 struct wireless_dev *wdev);
509 509
510#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
511
512#ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS 510#ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
513#define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond) 511#define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond)
514#else 512#else
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b1ac23ca20c8..542a4fc0a8d7 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2610 case NL80211_IFTYPE_AP: 2610 case NL80211_IFTYPE_AP:
2611 if (wdev->ssid_len && 2611 if (wdev->ssid_len &&
2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) 2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
2613 goto nla_put_failure; 2613 goto nla_put_failure_locked;
2614 break; 2614 break;
2615 case NL80211_IFTYPE_STATION: 2615 case NL80211_IFTYPE_STATION:
2616 case NL80211_IFTYPE_P2P_CLIENT: 2616 case NL80211_IFTYPE_P2P_CLIENT:
@@ -2618,12 +2618,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2618 const u8 *ssid_ie; 2618 const u8 *ssid_ie;
2619 if (!wdev->current_bss) 2619 if (!wdev->current_bss)
2620 break; 2620 break;
2621 rcu_read_lock();
2621 ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub, 2622 ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub,
2622 WLAN_EID_SSID); 2623 WLAN_EID_SSID);
2623 if (!ssid_ie) 2624 if (ssid_ie &&
2624 break; 2625 nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
2625 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) 2626 goto nla_put_failure_rcu_locked;
2626 goto nla_put_failure; 2627 rcu_read_unlock();
2627 break; 2628 break;
2628 } 2629 }
2629 default: 2630 default:
@@ -2635,6 +2636,10 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2635 genlmsg_end(msg, hdr); 2636 genlmsg_end(msg, hdr);
2636 return 0; 2637 return 0;
2637 2638
2639 nla_put_failure_rcu_locked:
2640 rcu_read_unlock();
2641 nla_put_failure_locked:
2642 wdev_unlock(wdev);
2638 nla_put_failure: 2643 nla_put_failure:
2639 genlmsg_cancel(msg, hdr); 2644 genlmsg_cancel(msg, hdr);
2640 return -EMSGSIZE; 2645 return -EMSGSIZE;
@@ -9804,7 +9809,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
9804 */ 9809 */
9805 if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss && 9810 if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss &&
9806 rdev->ops->get_station) { 9811 rdev->ops->get_station) {
9807 struct station_info sinfo; 9812 struct station_info sinfo = {};
9808 u8 *mac_addr; 9813 u8 *mac_addr;
9809 9814
9810 mac_addr = wdev->current_bss->pub.bssid; 9815 mac_addr = wdev->current_bss->pub.bssid;
@@ -11359,7 +11364,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
11359 break; 11364 break;
11360 case NL80211_NAN_FUNC_FOLLOW_UP: 11365 case NL80211_NAN_FUNC_FOLLOW_UP:
11361 if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || 11366 if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
11362 !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) { 11367 !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] ||
11368 !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) {
11363 err = -EINVAL; 11369 err = -EINVAL;
11364 goto out; 11370 goto out;
11365 } 11371 }
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 78e71b0390be..7b42f0bacfd8 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1769,8 +1769,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx,
1769 if (wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS) 1769 if (wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS)
1770 return; 1770 return;
1771 1771
1772 chan_before.center_freq = chan->center_freq; 1772 chan_before = *chan;
1773 chan_before.flags = chan->flags;
1774 1773
1775 if (chan->flags & IEEE80211_CHAN_NO_IR) { 1774 if (chan->flags & IEEE80211_CHAN_NO_IR) {
1776 chan->flags &= ~IEEE80211_CHAN_NO_IR; 1775 chan->flags &= ~IEEE80211_CHAN_NO_IR;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 7ca04a7de85a..05186a47878f 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1254,8 +1254,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
1254{ 1254{
1255 struct wireless_dev *wdev = dev->ieee80211_ptr; 1255 struct wireless_dev *wdev = dev->ieee80211_ptr;
1256 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); 1256 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
1257 /* we are under RTNL - globally locked - so can use a static struct */ 1257 struct station_info sinfo = {};
1258 static struct station_info sinfo;
1259 u8 addr[ETH_ALEN]; 1258 u8 addr[ETH_ALEN];
1260 int err; 1259 int err;
1261 1260
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 30e5746085b8..ac9477189d1c 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -102,6 +102,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
102 102
103 err = dev->xfrmdev_ops->xdo_dev_state_add(x); 103 err = dev->xfrmdev_ops->xdo_dev_state_add(x);
104 if (err) { 104 if (err) {
105 xso->dev = NULL;
105 dev_put(dev); 106 dev_put(dev);
106 return err; 107 return err;
107 } 108 }
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 347ab31574d5..5b2409746ae0 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -8,15 +8,29 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/bottom_half.h>
12#include <linux/interrupt.h>
11#include <linux/slab.h> 13#include <linux/slab.h>
12#include <linux/module.h> 14#include <linux/module.h>
13#include <linux/netdevice.h> 15#include <linux/netdevice.h>
16#include <linux/percpu.h>
14#include <net/dst.h> 17#include <net/dst.h>
15#include <net/ip.h> 18#include <net/ip.h>
16#include <net/xfrm.h> 19#include <net/xfrm.h>
17#include <net/ip_tunnels.h> 20#include <net/ip_tunnels.h>
18#include <net/ip6_tunnel.h> 21#include <net/ip6_tunnel.h>
19 22
23struct xfrm_trans_tasklet {
24 struct tasklet_struct tasklet;
25 struct sk_buff_head queue;
26};
27
28struct xfrm_trans_cb {
29 int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
30};
31
32#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
33
20static struct kmem_cache *secpath_cachep __read_mostly; 34static struct kmem_cache *secpath_cachep __read_mostly;
21 35
22static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); 36static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
25static struct gro_cells gro_cells; 39static struct gro_cells gro_cells;
26static struct net_device xfrm_napi_dev; 40static struct net_device xfrm_napi_dev;
27 41
42static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
43
28int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) 44int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
29{ 45{
30 int err = 0; 46 int err = 0;
@@ -207,7 +223,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
207 xfrm_address_t *daddr; 223 xfrm_address_t *daddr;
208 struct xfrm_mode *inner_mode; 224 struct xfrm_mode *inner_mode;
209 u32 mark = skb->mark; 225 u32 mark = skb->mark;
210 unsigned int family; 226 unsigned int family = AF_UNSPEC;
211 int decaps = 0; 227 int decaps = 0;
212 int async = 0; 228 int async = 0;
213 bool xfrm_gro = false; 229 bool xfrm_gro = false;
@@ -216,6 +232,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
216 232
217 if (encap_type < 0) { 233 if (encap_type < 0) {
218 x = xfrm_input_state(skb); 234 x = xfrm_input_state(skb);
235
236 if (unlikely(x->km.state != XFRM_STATE_VALID)) {
237 if (x->km.state == XFRM_STATE_ACQ)
238 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
239 else
240 XFRM_INC_STATS(net,
241 LINUX_MIB_XFRMINSTATEINVALID);
242 goto drop;
243 }
244
219 family = x->outer_mode->afinfo->family; 245 family = x->outer_mode->afinfo->family;
220 246
221 /* An encap_type of -1 indicates async resumption. */ 247 /* An encap_type of -1 indicates async resumption. */
@@ -467,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
467} 493}
468EXPORT_SYMBOL(xfrm_input_resume); 494EXPORT_SYMBOL(xfrm_input_resume);
469 495
496static void xfrm_trans_reinject(unsigned long data)
497{
498 struct xfrm_trans_tasklet *trans = (void *)data;
499 struct sk_buff_head queue;
500 struct sk_buff *skb;
501
502 __skb_queue_head_init(&queue);
503 skb_queue_splice_init(&trans->queue, &queue);
504
505 while ((skb = __skb_dequeue(&queue)))
506 XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
507}
508
509int xfrm_trans_queue(struct sk_buff *skb,
510 int (*finish)(struct net *, struct sock *,
511 struct sk_buff *))
512{
513 struct xfrm_trans_tasklet *trans;
514
515 trans = this_cpu_ptr(&xfrm_trans_tasklet);
516
517 if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
518 return -ENOBUFS;
519
520 XFRM_TRANS_SKB_CB(skb)->finish = finish;
521 __skb_queue_tail(&trans->queue, skb);
522 tasklet_schedule(&trans->tasklet);
523 return 0;
524}
525EXPORT_SYMBOL(xfrm_trans_queue);
526
470void __init xfrm_input_init(void) 527void __init xfrm_input_init(void)
471{ 528{
472 int err; 529 int err;
530 int i;
473 531
474 init_dummy_netdev(&xfrm_napi_dev); 532 init_dummy_netdev(&xfrm_napi_dev);
475 err = gro_cells_init(&gro_cells, &xfrm_napi_dev); 533 err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
@@ -480,4 +538,13 @@ void __init xfrm_input_init(void)
480 sizeof(struct sec_path), 538 sizeof(struct sec_path),
481 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 539 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
482 NULL); 540 NULL);
541
542 for_each_possible_cpu(i) {
543 struct xfrm_trans_tasklet *trans;
544
545 trans = &per_cpu(xfrm_trans_tasklet, i);
546 __skb_queue_head_init(&trans->queue);
547 tasklet_init(&trans->tasklet, xfrm_trans_reinject,
548 (unsigned long)trans);
549 }
483} 550}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9542975eb2f9..bd6b0e7a0ee4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -609,7 +609,8 @@ static void xfrm_hash_rebuild(struct work_struct *work)
609 609
610 /* re-insert all policies by order of creation */ 610 /* re-insert all policies by order of creation */
611 list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { 611 list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
612 if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { 612 if (policy->walk.dead ||
613 xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
613 /* skip socket policies */ 614 /* skip socket policies */
614 continue; 615 continue;
615 } 616 }
@@ -974,8 +975,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
974 } 975 }
975 if (!cnt) 976 if (!cnt)
976 err = -ESRCH; 977 err = -ESRCH;
977 else
978 xfrm_policy_cache_flush();
979out: 978out:
980 spin_unlock_bh(&net->xfrm.xfrm_policy_lock); 979 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
981 return err; 980 return err;
@@ -1168,9 +1167,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1168 again: 1167 again:
1169 pol = rcu_dereference(sk->sk_policy[dir]); 1168 pol = rcu_dereference(sk->sk_policy[dir]);
1170 if (pol != NULL) { 1169 if (pol != NULL) {
1171 bool match = xfrm_selector_match(&pol->selector, fl, family); 1170 bool match;
1172 int err = 0; 1171 int err = 0;
1173 1172
1173 if (pol->family != family) {
1174 pol = NULL;
1175 goto out;
1176 }
1177
1178 match = xfrm_selector_match(&pol->selector, fl, family);
1174 if (match) { 1179 if (match) {
1175 if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { 1180 if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1176 pol = NULL; 1181 pol = NULL;
@@ -1737,6 +1742,8 @@ void xfrm_policy_cache_flush(void)
1737 bool found = 0; 1742 bool found = 0;
1738 int cpu; 1743 int cpu;
1739 1744
1745 might_sleep();
1746
1740 local_bh_disable(); 1747 local_bh_disable();
1741 rcu_read_lock(); 1748 rcu_read_lock();
1742 for_each_possible_cpu(cpu) { 1749 for_each_possible_cpu(cpu) {
@@ -1833,6 +1840,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1833 sizeof(struct xfrm_policy *) * num_pols) == 0 && 1840 sizeof(struct xfrm_policy *) * num_pols) == 0 &&
1834 xfrm_xdst_can_reuse(xdst, xfrm, err)) { 1841 xfrm_xdst_can_reuse(xdst, xfrm, err)) {
1835 dst_hold(&xdst->u.dst); 1842 dst_hold(&xdst->u.dst);
1843 xfrm_pols_put(pols, num_pols);
1836 while (err > 0) 1844 while (err > 0)
1837 xfrm_state_put(xfrm[--err]); 1845 xfrm_state_put(xfrm[--err]);
1838 return xdst; 1846 return xdst;
@@ -2055,8 +2063,11 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
2055 if (num_xfrms <= 0) 2063 if (num_xfrms <= 0)
2056 goto make_dummy_bundle; 2064 goto make_dummy_bundle;
2057 2065
2066 local_bh_disable();
2058 xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, 2067 xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
2059 xflo->dst_orig); 2068 xflo->dst_orig);
2069 local_bh_enable();
2070
2060 if (IS_ERR(xdst)) { 2071 if (IS_ERR(xdst)) {
2061 err = PTR_ERR(xdst); 2072 err = PTR_ERR(xdst);
2062 if (err != -EAGAIN) 2073 if (err != -EAGAIN)
@@ -2143,9 +2154,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
2143 goto no_transform; 2154 goto no_transform;
2144 } 2155 }
2145 2156
2157 local_bh_disable();
2146 xdst = xfrm_resolve_and_create_bundle( 2158 xdst = xfrm_resolve_and_create_bundle(
2147 pols, num_pols, fl, 2159 pols, num_pols, fl,
2148 family, dst_orig); 2160 family, dst_orig);
2161 local_bh_enable();
2162
2149 if (IS_ERR(xdst)) { 2163 if (IS_ERR(xdst)) {
2150 xfrm_pols_put(pols, num_pols); 2164 xfrm_pols_put(pols, num_pols);
2151 err = PTR_ERR(xdst); 2165 err = PTR_ERR(xdst);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 065d89606888..a3785f538018 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -313,13 +313,14 @@ retry:
313 if ((type && !try_module_get(type->owner))) 313 if ((type && !try_module_get(type->owner)))
314 type = NULL; 314 type = NULL;
315 315
316 rcu_read_unlock();
317
316 if (!type && try_load) { 318 if (!type && try_load) {
317 request_module("xfrm-offload-%d-%d", family, proto); 319 request_module("xfrm-offload-%d-%d", family, proto);
318 try_load = 0; 320 try_load = false;
319 goto retry; 321 goto retry;
320 } 322 }
321 323
322 rcu_read_unlock();
323 return type; 324 return type;
324} 325}
325 326
@@ -1343,6 +1344,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
1343 1344
1344 if (orig->aead) { 1345 if (orig->aead) {
1345 x->aead = xfrm_algo_aead_clone(orig->aead); 1346 x->aead = xfrm_algo_aead_clone(orig->aead);
1347 x->geniv = orig->geniv;
1346 if (!x->aead) 1348 if (!x->aead)
1347 goto error; 1349 goto error;
1348 } 1350 }
@@ -1533,8 +1535,12 @@ out:
1533 err = -EINVAL; 1535 err = -EINVAL;
1534 spin_lock_bh(&x1->lock); 1536 spin_lock_bh(&x1->lock);
1535 if (likely(x1->km.state == XFRM_STATE_VALID)) { 1537 if (likely(x1->km.state == XFRM_STATE_VALID)) {
1536 if (x->encap && x1->encap) 1538 if (x->encap && x1->encap &&
1539 x->encap->encap_type == x1->encap->encap_type)
1537 memcpy(x1->encap, x->encap, sizeof(*x1->encap)); 1540 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1541 else if (x->encap || x1->encap)
1542 goto fail;
1543
1538 if (x->coaddr && x1->coaddr) { 1544 if (x->coaddr && x1->coaddr) {
1539 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); 1545 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1540 } 1546 }
@@ -1551,6 +1557,8 @@ out:
1551 x->km.state = XFRM_STATE_DEAD; 1557 x->km.state = XFRM_STATE_DEAD;
1552 __xfrm_state_put(x); 1558 __xfrm_state_put(x);
1553 } 1559 }
1560
1561fail:
1554 spin_unlock_bh(&x1->lock); 1562 spin_unlock_bh(&x1->lock);
1555 1563
1556 xfrm_state_put(x1); 1564 xfrm_state_put(x1);
@@ -2264,8 +2272,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
2264 goto error; 2272 goto error;
2265 } 2273 }
2266 2274
2267 x->km.state = XFRM_STATE_VALID;
2268
2269error: 2275error:
2270 return err; 2276 return err;
2271} 2277}
@@ -2274,7 +2280,13 @@ EXPORT_SYMBOL(__xfrm_init_state);
2274 2280
2275int xfrm_init_state(struct xfrm_state *x) 2281int xfrm_init_state(struct xfrm_state *x)
2276{ 2282{
2277 return __xfrm_init_state(x, true, false); 2283 int err;
2284
2285 err = __xfrm_init_state(x, true, false);
2286 if (!err)
2287 x->km.state = XFRM_STATE_VALID;
2288
2289 return err;
2278} 2290}
2279 2291
2280EXPORT_SYMBOL(xfrm_init_state); 2292EXPORT_SYMBOL(xfrm_init_state);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 983b0233767b..7f52b8eb177d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -598,13 +598,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
598 goto error; 598 goto error;
599 } 599 }
600 600
601 if (attrs[XFRMA_OFFLOAD_DEV]) {
602 err = xfrm_dev_state_add(net, x,
603 nla_data(attrs[XFRMA_OFFLOAD_DEV]));
604 if (err)
605 goto error;
606 }
607
608 if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, 601 if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
609 attrs[XFRMA_REPLAY_ESN_VAL]))) 602 attrs[XFRMA_REPLAY_ESN_VAL])))
610 goto error; 603 goto error;
@@ -620,6 +613,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
620 /* override default values from above */ 613 /* override default values from above */
621 xfrm_update_ae_params(x, attrs, 0); 614 xfrm_update_ae_params(x, attrs, 0);
622 615
616 /* configure the hardware if offload is requested */
617 if (attrs[XFRMA_OFFLOAD_DEV]) {
618 err = xfrm_dev_state_add(net, x,
619 nla_data(attrs[XFRMA_OFFLOAD_DEV]));
620 if (err)
621 goto error;
622 }
623
623 return x; 624 return x;
624 625
625error: 626error:
@@ -662,6 +663,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
662 goto out; 663 goto out;
663 } 664 }
664 665
666 if (x->km.state == XFRM_STATE_VOID)
667 x->km.state = XFRM_STATE_VALID;
668
665 c.seq = nlh->nlmsg_seq; 669 c.seq = nlh->nlmsg_seq;
666 c.portid = nlh->nlmsg_pid; 670 c.portid = nlh->nlmsg_pid;
667 c.event = nlh->nlmsg_type; 671 c.event = nlh->nlmsg_type;
@@ -1419,11 +1423,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
1419 1423
1420static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) 1424static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
1421{ 1425{
1426 u16 prev_family;
1422 int i; 1427 int i;
1423 1428
1424 if (nr > XFRM_MAX_DEPTH) 1429 if (nr > XFRM_MAX_DEPTH)
1425 return -EINVAL; 1430 return -EINVAL;
1426 1431
1432 prev_family = family;
1433
1427 for (i = 0; i < nr; i++) { 1434 for (i = 0; i < nr; i++) {
1428 /* We never validated the ut->family value, so many 1435 /* We never validated the ut->family value, so many
1429 * applications simply leave it at zero. The check was 1436 * applications simply leave it at zero. The check was
@@ -1435,6 +1442,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
1435 if (!ut[i].family) 1442 if (!ut[i].family)
1436 ut[i].family = family; 1443 ut[i].family = family;
1437 1444
1445 if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
1446 (ut[i].family != prev_family))
1447 return -EINVAL;
1448
1449 prev_family = ut[i].family;
1450
1438 switch (ut[i].family) { 1451 switch (ut[i].family) {
1439 case AF_INET: 1452 case AF_INET:
1440 break; 1453 break;
@@ -1445,6 +1458,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
1445 default: 1458 default:
1446 return -EINVAL; 1459 return -EINVAL;
1447 } 1460 }
1461
1462 switch (ut[i].id.proto) {
1463 case IPPROTO_AH:
1464 case IPPROTO_ESP:
1465 case IPPROTO_COMP:
1466#if IS_ENABLED(CONFIG_IPV6)
1467 case IPPROTO_ROUTING:
1468 case IPPROTO_DSTOPTS:
1469#endif
1470 case IPSEC_PROTO_ANY:
1471 break;
1472 default:
1473 return -EINVAL;
1474 }
1475
1448 } 1476 }
1449 1477
1450 return 0; 1478 return 0;
@@ -2470,7 +2498,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
2470 [XFRMA_PROTO] = { .type = NLA_U8 }, 2498 [XFRMA_PROTO] = { .type = NLA_U8 },
2471 [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, 2499 [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
2472 [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, 2500 [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) },
2473 [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 }, 2501 [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 },
2474}; 2502};
2475 2503
2476static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { 2504static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {