aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/8021q/vlan_core.c60
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/9p/Kconfig8
-rw-r--r--net/9p/client.c30
-rw-r--r--net/9p/mod.c4
-rw-r--r--net/9p/trans_fd.c7
-rw-r--r--net/9p/trans_rdma.c3
-rw-r--r--net/9p/util.c2
-rw-r--r--net/atm/lec.c2
-rw-r--r--net/atm/mpc.c2
-rw-r--r--net/bridge/br_device.c1
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_netfilter.c6
-rw-r--r--net/caif/cfmuxl.c2
-rw-r--r--net/caif/chnl_net.c9
-rw-r--r--net/ceph/messenger.c82
-rw-r--r--net/ceph/osd_client.c34
-rw-r--r--net/ceph/osdmap.c13
-rw-r--r--net/core/dev.c30
-rw-r--r--net/core/net-sysfs.c23
-rw-r--r--net/core/net_namespace.c75
-rw-r--r--net/core/netpoll.c7
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/dns_resolver/dns_key.c10
-rw-r--r--net/ieee802154/nl-phy.c3
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c14
-rw-r--r--net/ipv4/ip_options.c15
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/netfilter/ip_queue.c6
-rw-r--r--net/ipv4/netfilter/ip_tables.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c6
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c7
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c4
-rw-r--r--net/ipv4/ping.c1
-rw-r--r--net/ipv4/route.c82
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/netfilter/ip6_queue.c6
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c2
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/udp.c5
-rw-r--r--net/irda/iriap.c5
-rw-r--r--net/iucv/iucv.c2
-rw-r--r--net/l2tp/l2tp_debugfs.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c17
-rw-r--r--net/netfilter/nf_conntrack_core.c7
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c10
-rw-r--r--net/netfilter/nf_conntrack_irc.c3
-rw-r--r--net/netfilter/nf_conntrack_pptp.c3
-rw-r--r--net/netfilter/nf_conntrack_sane.c2
-rw-r--r--net/netfilter/nf_conntrack_sip.c2
-rw-r--r--net/netfilter/nfnetlink_log.c3
-rw-r--r--net/netfilter/nfnetlink_queue.c3
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/packet/af_packet.c17
-rw-r--r--net/rds/ib.c2
-rw-r--r--net/rds/ib_cm.c2
-rw-r--r--net/rds/iw.c2
-rw-r--r--net/rds/iw_cm.c2
-rw-r--r--net/rds/rdma_transport.c3
-rw-r--r--net/sched/sch_generic.c3
-rw-r--r--net/sctp/associola.c23
-rw-r--r--net/sctp/sm_sideeffect.c3
-rw-r--r--net/sctp/sm_statefuns.c14
-rw-r--r--net/sunrpc/auth.c4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c3
-rw-r--r--net/sunrpc/clnt.c29
-rw-r--r--net/sunrpc/rpcb_clnt.c97
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svcsock.c336
-rw-r--r--net/sunrpc/xdr.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c3
-rw-r--r--net/sunrpc/xprtrdma/verbs.c2
-rw-r--r--net/sunrpc/xprtsock.c435
-rw-r--r--net/xfrm/xfrm_replay.c4
92 files changed, 1251 insertions, 470 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index c7a581a96894..917ecb93ea28 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -205,7 +205,7 @@ int register_vlan_dev(struct net_device *dev)
205 grp->nr_vlans++; 205 grp->nr_vlans++;
206 206
207 if (ngrp) { 207 if (ngrp) {
208 if (ops->ndo_vlan_rx_register) 208 if (ops->ndo_vlan_rx_register && (real_dev->features & NETIF_F_HW_VLAN_RX))
209 ops->ndo_vlan_rx_register(real_dev, ngrp); 209 ops->ndo_vlan_rx_register(real_dev, ngrp);
210 rcu_assign_pointer(real_dev->vlgrp, ngrp); 210 rcu_assign_pointer(real_dev->vlgrp, ngrp);
211 } 211 }
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 41495dc2a4c9..fcc684678af6 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -23,6 +23,31 @@ bool vlan_do_receive(struct sk_buff **skbp)
23 return false; 23 return false;
24 24
25 skb->dev = vlan_dev; 25 skb->dev = vlan_dev;
26 if (skb->pkt_type == PACKET_OTHERHOST) {
27 /* Our lower layer thinks this is not local, let's make sure.
28 * This allows the VLAN to have a different MAC than the
29 * underlying device, and still route correctly. */
30 if (!compare_ether_addr(eth_hdr(skb)->h_dest,
31 vlan_dev->dev_addr))
32 skb->pkt_type = PACKET_HOST;
33 }
34
35 if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
36 unsigned int offset = skb->data - skb_mac_header(skb);
37
38 /*
39 * vlan_insert_tag expect skb->data pointing to mac header.
40 * So change skb->data before calling it and change back to
41 * original position later
42 */
43 skb_push(skb, offset);
44 skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
45 if (!skb)
46 return false;
47 skb_pull(skb, offset + VLAN_HLEN);
48 skb_reset_mac_len(skb);
49 }
50
26 skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); 51 skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
27 skb->vlan_tci = 0; 52 skb->vlan_tci = 0;
28 53
@@ -31,22 +56,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
31 u64_stats_update_begin(&rx_stats->syncp); 56 u64_stats_update_begin(&rx_stats->syncp);
32 rx_stats->rx_packets++; 57 rx_stats->rx_packets++;
33 rx_stats->rx_bytes += skb->len; 58 rx_stats->rx_bytes += skb->len;
34 59 if (skb->pkt_type == PACKET_MULTICAST)
35 switch (skb->pkt_type) {
36 case PACKET_BROADCAST:
37 break;
38 case PACKET_MULTICAST:
39 rx_stats->rx_multicast++; 60 rx_stats->rx_multicast++;
40 break;
41 case PACKET_OTHERHOST:
42 /* Our lower layer thinks this is not local, let's make sure.
43 * This allows the VLAN to have a different MAC than the
44 * underlying device, and still route correctly. */
45 if (!compare_ether_addr(eth_hdr(skb)->h_dest,
46 vlan_dev->dev_addr))
47 skb->pkt_type = PACKET_HOST;
48 break;
49 }
50 u64_stats_update_end(&rx_stats->syncp); 61 u64_stats_update_end(&rx_stats->syncp);
51 62
52 return true; 63 return true;
@@ -89,18 +100,13 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
89} 100}
90EXPORT_SYMBOL(vlan_gro_frags); 101EXPORT_SYMBOL(vlan_gro_frags);
91 102
92static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) 103static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
93{ 104{
94 if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { 105 if (skb_cow(skb, skb_headroom(skb)) < 0)
95 if (skb_cow(skb, skb_headroom(skb)) < 0) 106 return NULL;
96 skb = NULL; 107 memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
97 if (skb) { 108 skb->mac_header += VLAN_HLEN;
98 /* Lifted from Gleb's VLAN code... */ 109 skb_reset_mac_len(skb);
99 memmove(skb->data - ETH_HLEN,
100 skb->data - VLAN_ETH_HLEN, 12);
101 skb->mac_header += VLAN_HLEN;
102 }
103 }
104 return skb; 110 return skb;
105} 111}
106 112
@@ -161,7 +167,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
161 skb_pull_rcsum(skb, VLAN_HLEN); 167 skb_pull_rcsum(skb, VLAN_HLEN);
162 vlan_set_encap_proto(skb, vhdr); 168 vlan_set_encap_proto(skb, vhdr);
163 169
164 skb = vlan_check_reorder_header(skb); 170 skb = vlan_reorder_header(skb);
165 if (unlikely(!skb)) 171 if (unlikely(!skb))
166 goto err_free; 172 goto err_free;
167 173
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f247f5bff88d..7ea5cf9ea08a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -165,7 +165,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
165 u64_stats_update_begin(&stats->syncp); 165 u64_stats_update_begin(&stats->syncp);
166 stats->tx_packets++; 166 stats->tx_packets++;
167 stats->tx_bytes += len; 167 stats->tx_bytes += len;
168 u64_stats_update_begin(&stats->syncp); 168 u64_stats_update_end(&stats->syncp);
169 } else { 169 } else {
170 this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped); 170 this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
171 } 171 }
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 7ed75c7bd5d1..d9ea09b11cf8 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -3,8 +3,8 @@
3# 3#
4 4
5menuconfig NET_9P 5menuconfig NET_9P
6 depends on NET && EXPERIMENTAL 6 depends on NET
7 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" 7 tristate "Plan 9 Resource Sharing Support (9P2000)"
8 help 8 help
9 If you say Y here, you will get experimental support for 9 If you say Y here, you will get experimental support for
10 Plan 9 resource sharing via the 9P2000 protocol. 10 Plan 9 resource sharing via the 9P2000 protocol.
@@ -16,8 +16,8 @@ menuconfig NET_9P
16if NET_9P 16if NET_9P
17 17
18config NET_9P_VIRTIO 18config NET_9P_VIRTIO
19 depends on EXPERIMENTAL && VIRTIO 19 depends on VIRTIO
20 tristate "9P Virtio Transport (Experimental)" 20 tristate "9P Virtio Transport"
21 help 21 help
22 This builds support for a transports between 22 This builds support for a transports between
23 guest partitions and a host partition. 23 guest partitions and a host partition.
diff --git a/net/9p/client.c b/net/9p/client.c
index ceab943dfc49..9e3b0e640da1 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -92,9 +92,6 @@ static int get_protocol_version(const substring_t *name)
92 return version; 92 return version;
93} 93}
94 94
95static struct p9_req_t *
96p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
97
98/** 95/**
99 * parse_options - parse mount options into client structure 96 * parse_options - parse mount options into client structure
100 * @opts: options string passed from mount 97 * @opts: options string passed from mount
@@ -307,12 +304,13 @@ static int p9_tag_init(struct p9_client *c)
307 c->tagpool = p9_idpool_create(); 304 c->tagpool = p9_idpool_create();
308 if (IS_ERR(c->tagpool)) { 305 if (IS_ERR(c->tagpool)) {
309 err = PTR_ERR(c->tagpool); 306 err = PTR_ERR(c->tagpool);
310 c->tagpool = NULL;
311 goto error; 307 goto error;
312 } 308 }
313 309 err = p9_idpool_get(c->tagpool); /* reserve tag 0 */
314 p9_idpool_get(c->tagpool); /* reserve tag 0 */ 310 if (err < 0) {
315 311 p9_idpool_destroy(c->tagpool);
312 goto error;
313 }
316 c->max_tag = 0; 314 c->max_tag = 0;
317error: 315error:
318 return err; 316 return err;
@@ -518,12 +516,15 @@ out_err:
518 return err; 516 return err;
519} 517}
520 518
519static struct p9_req_t *
520p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
521
521/** 522/**
522 * p9_client_flush - flush (cancel) a request 523 * p9_client_flush - flush (cancel) a request
523 * @c: client state 524 * @c: client state
524 * @oldreq: request to cancel 525 * @oldreq: request to cancel
525 * 526 *
526 * This sents a flush for a particular requests and links 527 * This sents a flush for a particular request and links
527 * the flush request to the original request. The current 528 * the flush request to the original request. The current
528 * code only supports a single flush request although the protocol 529 * code only supports a single flush request although the protocol
529 * allows for multiple flush requests to be sent for a single request. 530 * allows for multiple flush requests to be sent for a single request.
@@ -789,11 +790,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
789 spin_lock_init(&clnt->lock); 790 spin_lock_init(&clnt->lock);
790 INIT_LIST_HEAD(&clnt->fidlist); 791 INIT_LIST_HEAD(&clnt->fidlist);
791 792
792 p9_tag_init(clnt); 793 err = p9_tag_init(clnt);
794 if (err < 0)
795 goto free_client;
793 796
794 err = parse_opts(options, clnt); 797 err = parse_opts(options, clnt);
795 if (err < 0) 798 if (err < 0)
796 goto free_client; 799 goto destroy_tagpool;
797 800
798 if (!clnt->trans_mod) 801 if (!clnt->trans_mod)
799 clnt->trans_mod = v9fs_get_default_trans(); 802 clnt->trans_mod = v9fs_get_default_trans();
@@ -802,13 +805,12 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
802 err = -EPROTONOSUPPORT; 805 err = -EPROTONOSUPPORT;
803 P9_DPRINTK(P9_DEBUG_ERROR, 806 P9_DPRINTK(P9_DEBUG_ERROR,
804 "No transport defined or default transport\n"); 807 "No transport defined or default transport\n");
805 goto free_client; 808 goto destroy_tagpool;
806 } 809 }
807 810
808 clnt->fidpool = p9_idpool_create(); 811 clnt->fidpool = p9_idpool_create();
809 if (IS_ERR(clnt->fidpool)) { 812 if (IS_ERR(clnt->fidpool)) {
810 err = PTR_ERR(clnt->fidpool); 813 err = PTR_ERR(clnt->fidpool);
811 clnt->fidpool = NULL;
812 goto put_trans; 814 goto put_trans;
813 } 815 }
814 816
@@ -834,6 +836,8 @@ destroy_fidpool:
834 p9_idpool_destroy(clnt->fidpool); 836 p9_idpool_destroy(clnt->fidpool);
835put_trans: 837put_trans:
836 v9fs_put_trans(clnt->trans_mod); 838 v9fs_put_trans(clnt->trans_mod);
839destroy_tagpool:
840 p9_idpool_destroy(clnt->tagpool);
837free_client: 841free_client:
838 kfree(clnt); 842 kfree(clnt);
839 return ERR_PTR(err); 843 return ERR_PTR(err);
@@ -1298,7 +1302,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1298 if (count < rsize) 1302 if (count < rsize)
1299 rsize = count; 1303 rsize = count;
1300 1304
1301 /* Don't bother zerocopy form small IO (< 1024) */ 1305 /* Don't bother zerocopy for small IO (< 1024) */
1302 if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == 1306 if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
1303 P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { 1307 P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
1304 req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, 1308 req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset,
diff --git a/net/9p/mod.c b/net/9p/mod.c
index cf8a4128cd5c..72c398275051 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -139,7 +139,7 @@ void v9fs_put_trans(struct p9_trans_module *m)
139} 139}
140 140
141/** 141/**
142 * v9fs_init - Initialize module 142 * init_p9 - Initialize module
143 * 143 *
144 */ 144 */
145static int __init init_p9(void) 145static int __init init_p9(void)
@@ -154,7 +154,7 @@ static int __init init_p9(void)
154} 154}
155 155
156/** 156/**
157 * v9fs_init - shutdown module 157 * exit_p9 - shutdown module
158 * 158 *
159 */ 159 */
160 160
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 4a9084395d35..fdfdb5747f63 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -916,8 +916,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
916 sin_server.sin_family = AF_INET; 916 sin_server.sin_family = AF_INET;
917 sin_server.sin_addr.s_addr = in_aton(addr); 917 sin_server.sin_addr.s_addr = in_aton(addr);
918 sin_server.sin_port = htons(opts.port); 918 sin_server.sin_port = htons(opts.port);
919 err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); 919 err = __sock_create(read_pnet(&current->nsproxy->net_ns), PF_INET,
920 920 SOCK_STREAM, IPPROTO_TCP, &csocket, 1);
921 if (err) { 921 if (err) {
922 P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); 922 P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n");
923 return err; 923 return err;
@@ -954,7 +954,8 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
954 954
955 sun_server.sun_family = PF_UNIX; 955 sun_server.sun_family = PF_UNIX;
956 strcpy(sun_server.sun_path, addr); 956 strcpy(sun_server.sun_path, addr);
957 err = sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); 957 err = __sock_create(read_pnet(&current->nsproxy->net_ns), PF_UNIX,
958 SOCK_STREAM, 0, &csocket, 1);
958 if (err < 0) { 959 if (err < 0) {
959 P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n"); 960 P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n");
960 return err; 961 return err;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 844a7a5607e3..159c50f1c6bf 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -589,7 +589,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
589 return -ENOMEM; 589 return -ENOMEM;
590 590
591 /* Create the RDMA CM ID */ 591 /* Create the RDMA CM ID */
592 rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); 592 rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP,
593 IB_QPT_RC);
593 if (IS_ERR(rdma->cm_id)) 594 if (IS_ERR(rdma->cm_id))
594 goto error; 595 goto error;
595 596
diff --git a/net/9p/util.c b/net/9p/util.c
index da6af81e59d9..9c1c9348ac35 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -93,7 +93,7 @@ int p9_idpool_get(struct p9_idpool *p)
93 93
94retry: 94retry:
95 if (idr_pre_get(&p->pool, GFP_NOFS) == 0) 95 if (idr_pre_get(&p->pool, GFP_NOFS) == 0)
96 return 0; 96 return -1;
97 97
98 spin_lock_irqsave(&p->lock, flags); 98 spin_lock_irqsave(&p->lock, flags);
99 99
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 25073b6ef474..ba48daa68c1f 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1171,7 +1171,7 @@ static int __init lane_module_init(void)
1171#endif 1171#endif
1172 1172
1173 register_atm_ioctl(&lane_ioctl_ops); 1173 register_atm_ioctl(&lane_ioctl_ops);
1174 pr_info("lec.c: " __DATE__ " " __TIME__ " initialized\n"); 1174 pr_info("lec.c: initialized\n");
1175 return 0; 1175 return 0;
1176} 1176}
1177 1177
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 644cdf071642..3ccca42e6f90 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1482,7 +1482,7 @@ static __init int atm_mpoa_init(void)
1482 if (mpc_proc_init() != 0) 1482 if (mpc_proc_init() != 0)
1483 pr_info("failed to initialize /proc/mpoa\n"); 1483 pr_info("failed to initialize /proc/mpoa\n");
1484 1484
1485 pr_info("mpc.c: " __DATE__ " " __TIME__ " initialized\n"); 1485 pr_info("mpc.c: initialized\n");
1486 1486
1487 return 0; 1487 return 0;
1488} 1488}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index a6b2f86378c7..c188c803c09c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -243,6 +243,7 @@ int br_netpoll_enable(struct net_bridge_port *p)
243 goto out; 243 goto out;
244 244
245 np->dev = p->dev; 245 np->dev = p->dev;
246 strlcpy(np->dev_name, p->dev->name, IFNAMSIZ);
246 247
247 err = __netpoll_setup(np); 248 err = __netpoll_setup(np);
248 if (err) { 249 if (err) {
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2f14eafdeeab..29b9812c8da0 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1424,7 +1424,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
1424 switch (ih->type) { 1424 switch (ih->type) {
1425 case IGMP_HOST_MEMBERSHIP_REPORT: 1425 case IGMP_HOST_MEMBERSHIP_REPORT:
1426 case IGMPV2_HOST_MEMBERSHIP_REPORT: 1426 case IGMPV2_HOST_MEMBERSHIP_REPORT:
1427 BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; 1427 BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
1428 err = br_ip4_multicast_add_group(br, port, ih->group); 1428 err = br_ip4_multicast_add_group(br, port, ih->group);
1429 break; 1429 break;
1430 case IGMPV3_HOST_MEMBERSHIP_REPORT: 1430 case IGMPV3_HOST_MEMBERSHIP_REPORT:
@@ -1543,7 +1543,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1543 goto out; 1543 goto out;
1544 } 1544 }
1545 mld = (struct mld_msg *)skb_transport_header(skb2); 1545 mld = (struct mld_msg *)skb_transport_header(skb2);
1546 BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; 1546 BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
1547 err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); 1547 err = br_ip6_multicast_add_group(br, port, &mld->mld_mca);
1548 break; 1548 break;
1549 } 1549 }
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 3fa123185e89..56149ec36d7f 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -104,10 +104,16 @@ static void fake_update_pmtu(struct dst_entry *dst, u32 mtu)
104{ 104{
105} 105}
106 106
107static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
108{
109 return NULL;
110}
111
107static struct dst_ops fake_dst_ops = { 112static struct dst_ops fake_dst_ops = {
108 .family = AF_INET, 113 .family = AF_INET,
109 .protocol = cpu_to_be16(ETH_P_IP), 114 .protocol = cpu_to_be16(ETH_P_IP),
110 .update_pmtu = fake_update_pmtu, 115 .update_pmtu = fake_update_pmtu,
116 .cow_metrics = fake_cow_metrics,
111}; 117};
112 118
113/* 119/*
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 3a66b8c10e09..c23979e79dfa 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -255,7 +255,7 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
255 255
256 if (cfsrvl_phyid_match(layer, phyid) && layer->ctrlcmd) { 256 if (cfsrvl_phyid_match(layer, phyid) && layer->ctrlcmd) {
257 257
258 if ((ctrl == _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND || 258 if ((ctrl == _CAIF_CTRLCMD_PHYIF_DOWN_IND ||
259 ctrl == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND) && 259 ctrl == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND) &&
260 layer->id != 0) { 260 layer->id != 0) {
261 261
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 649ebacaf6bc..adbb424403d4 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -139,17 +139,14 @@ static void close_work(struct work_struct *work)
139 struct chnl_net *dev = NULL; 139 struct chnl_net *dev = NULL;
140 struct list_head *list_node; 140 struct list_head *list_node;
141 struct list_head *_tmp; 141 struct list_head *_tmp;
142 /* May be called with or without RTNL lock held */ 142
143 int islocked = rtnl_is_locked(); 143 rtnl_lock();
144 if (!islocked)
145 rtnl_lock();
146 list_for_each_safe(list_node, _tmp, &chnl_net_list) { 144 list_for_each_safe(list_node, _tmp, &chnl_net_list) {
147 dev = list_entry(list_node, struct chnl_net, list_field); 145 dev = list_entry(list_node, struct chnl_net, list_field);
148 if (dev->state == CAIF_SHUTDOWN) 146 if (dev->state == CAIF_SHUTDOWN)
149 dev_close(dev->netdev); 147 dev_close(dev->netdev);
150 } 148 }
151 if (!islocked) 149 rtnl_unlock();
152 rtnl_unlock();
153} 150}
154static DECLARE_WORK(close_worker, close_work); 151static DECLARE_WORK(close_worker, close_work);
155 152
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index e15a82ccc05f..78b55f49de7c 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -76,7 +76,8 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss)
76 break; 76 break;
77 77
78 default: 78 default:
79 sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family); 79 snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)",
80 (int)ss->ss_family);
80 } 81 }
81 82
82 return s; 83 return s;
@@ -598,7 +599,7 @@ static void prepare_write_keepalive(struct ceph_connection *con)
598 * Connection negotiation. 599 * Connection negotiation.
599 */ 600 */
600 601
601static void prepare_connect_authorizer(struct ceph_connection *con) 602static int prepare_connect_authorizer(struct ceph_connection *con)
602{ 603{
603 void *auth_buf; 604 void *auth_buf;
604 int auth_len = 0; 605 int auth_len = 0;
@@ -612,13 +613,20 @@ static void prepare_connect_authorizer(struct ceph_connection *con)
612 con->auth_retry); 613 con->auth_retry);
613 mutex_lock(&con->mutex); 614 mutex_lock(&con->mutex);
614 615
616 if (test_bit(CLOSED, &con->state) ||
617 test_bit(OPENING, &con->state))
618 return -EAGAIN;
619
615 con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); 620 con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
616 con->out_connect.authorizer_len = cpu_to_le32(auth_len); 621 con->out_connect.authorizer_len = cpu_to_le32(auth_len);
617 622
618 con->out_kvec[con->out_kvec_left].iov_base = auth_buf; 623 if (auth_len) {
619 con->out_kvec[con->out_kvec_left].iov_len = auth_len; 624 con->out_kvec[con->out_kvec_left].iov_base = auth_buf;
620 con->out_kvec_left++; 625 con->out_kvec[con->out_kvec_left].iov_len = auth_len;
621 con->out_kvec_bytes += auth_len; 626 con->out_kvec_left++;
627 con->out_kvec_bytes += auth_len;
628 }
629 return 0;
622} 630}
623 631
624/* 632/*
@@ -640,9 +648,9 @@ static void prepare_write_banner(struct ceph_messenger *msgr,
640 set_bit(WRITE_PENDING, &con->state); 648 set_bit(WRITE_PENDING, &con->state);
641} 649}
642 650
643static void prepare_write_connect(struct ceph_messenger *msgr, 651static int prepare_write_connect(struct ceph_messenger *msgr,
644 struct ceph_connection *con, 652 struct ceph_connection *con,
645 int after_banner) 653 int after_banner)
646{ 654{
647 unsigned global_seq = get_global_seq(con->msgr, 0); 655 unsigned global_seq = get_global_seq(con->msgr, 0);
648 int proto; 656 int proto;
@@ -683,7 +691,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
683 con->out_more = 0; 691 con->out_more = 0;
684 set_bit(WRITE_PENDING, &con->state); 692 set_bit(WRITE_PENDING, &con->state);
685 693
686 prepare_connect_authorizer(con); 694 return prepare_connect_authorizer(con);
687} 695}
688 696
689 697
@@ -1065,8 +1073,10 @@ static void addr_set_port(struct sockaddr_storage *ss, int p)
1065 switch (ss->ss_family) { 1073 switch (ss->ss_family) {
1066 case AF_INET: 1074 case AF_INET:
1067 ((struct sockaddr_in *)ss)->sin_port = htons(p); 1075 ((struct sockaddr_in *)ss)->sin_port = htons(p);
1076 break;
1068 case AF_INET6: 1077 case AF_INET6:
1069 ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); 1078 ((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
1079 break;
1070 } 1080 }
1071} 1081}
1072 1082
@@ -1216,6 +1226,7 @@ static int process_connect(struct ceph_connection *con)
1216 u64 sup_feat = con->msgr->supported_features; 1226 u64 sup_feat = con->msgr->supported_features;
1217 u64 req_feat = con->msgr->required_features; 1227 u64 req_feat = con->msgr->required_features;
1218 u64 server_feat = le64_to_cpu(con->in_reply.features); 1228 u64 server_feat = le64_to_cpu(con->in_reply.features);
1229 int ret;
1219 1230
1220 dout("process_connect on %p tag %d\n", con, (int)con->in_tag); 1231 dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
1221 1232
@@ -1250,7 +1261,9 @@ static int process_connect(struct ceph_connection *con)
1250 return -1; 1261 return -1;
1251 } 1262 }
1252 con->auth_retry = 1; 1263 con->auth_retry = 1;
1253 prepare_write_connect(con->msgr, con, 0); 1264 ret = prepare_write_connect(con->msgr, con, 0);
1265 if (ret < 0)
1266 return ret;
1254 prepare_read_connect(con); 1267 prepare_read_connect(con);
1255 break; 1268 break;
1256 1269
@@ -1277,6 +1290,9 @@ static int process_connect(struct ceph_connection *con)
1277 if (con->ops->peer_reset) 1290 if (con->ops->peer_reset)
1278 con->ops->peer_reset(con); 1291 con->ops->peer_reset(con);
1279 mutex_lock(&con->mutex); 1292 mutex_lock(&con->mutex);
1293 if (test_bit(CLOSED, &con->state) ||
1294 test_bit(OPENING, &con->state))
1295 return -EAGAIN;
1280 break; 1296 break;
1281 1297
1282 case CEPH_MSGR_TAG_RETRY_SESSION: 1298 case CEPH_MSGR_TAG_RETRY_SESSION:
@@ -1341,7 +1357,9 @@ static int process_connect(struct ceph_connection *con)
1341 * to WAIT. This shouldn't happen if we are the 1357 * to WAIT. This shouldn't happen if we are the
1342 * client. 1358 * client.
1343 */ 1359 */
1344 pr_err("process_connect peer connecting WAIT\n"); 1360 pr_err("process_connect got WAIT as client\n");
1361 con->error_msg = "protocol error, got WAIT as client";
1362 return -1;
1345 1363
1346 default: 1364 default:
1347 pr_err("connect protocol error, will retry\n"); 1365 pr_err("connect protocol error, will retry\n");
@@ -1810,6 +1828,17 @@ static int try_read(struct ceph_connection *con)
1810more: 1828more:
1811 dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, 1829 dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
1812 con->in_base_pos); 1830 con->in_base_pos);
1831
1832 /*
1833 * process_connect and process_message drop and re-take
1834 * con->mutex. make sure we handle a racing close or reopen.
1835 */
1836 if (test_bit(CLOSED, &con->state) ||
1837 test_bit(OPENING, &con->state)) {
1838 ret = -EAGAIN;
1839 goto out;
1840 }
1841
1813 if (test_bit(CONNECTING, &con->state)) { 1842 if (test_bit(CONNECTING, &con->state)) {
1814 if (!test_bit(NEGOTIATING, &con->state)) { 1843 if (!test_bit(NEGOTIATING, &con->state)) {
1815 dout("try_read connecting\n"); 1844 dout("try_read connecting\n");
@@ -1938,8 +1967,10 @@ static void con_work(struct work_struct *work)
1938{ 1967{
1939 struct ceph_connection *con = container_of(work, struct ceph_connection, 1968 struct ceph_connection *con = container_of(work, struct ceph_connection,
1940 work.work); 1969 work.work);
1970 int ret;
1941 1971
1942 mutex_lock(&con->mutex); 1972 mutex_lock(&con->mutex);
1973restart:
1943 if (test_and_clear_bit(BACKOFF, &con->state)) { 1974 if (test_and_clear_bit(BACKOFF, &con->state)) {
1944 dout("con_work %p backing off\n", con); 1975 dout("con_work %p backing off\n", con);
1945 if (queue_delayed_work(ceph_msgr_wq, &con->work, 1976 if (queue_delayed_work(ceph_msgr_wq, &con->work,
@@ -1969,18 +2000,31 @@ static void con_work(struct work_struct *work)
1969 con_close_socket(con); 2000 con_close_socket(con);
1970 } 2001 }
1971 2002
1972 if (test_and_clear_bit(SOCK_CLOSED, &con->state) || 2003 if (test_and_clear_bit(SOCK_CLOSED, &con->state))
1973 try_read(con) < 0 || 2004 goto fault;
1974 try_write(con) < 0) { 2005
1975 mutex_unlock(&con->mutex); 2006 ret = try_read(con);
1976 ceph_fault(con); /* error/fault path */ 2007 if (ret == -EAGAIN)
1977 goto done_unlocked; 2008 goto restart;
1978 } 2009 if (ret < 0)
2010 goto fault;
2011
2012 ret = try_write(con);
2013 if (ret == -EAGAIN)
2014 goto restart;
2015 if (ret < 0)
2016 goto fault;
1979 2017
1980done: 2018done:
1981 mutex_unlock(&con->mutex); 2019 mutex_unlock(&con->mutex);
1982done_unlocked: 2020done_unlocked:
1983 con->ops->put(con); 2021 con->ops->put(con);
2022 return;
2023
2024fault:
2025 mutex_unlock(&con->mutex);
2026 ceph_fault(con); /* error/fault path */
2027 goto done_unlocked;
1984} 2028}
1985 2029
1986 2030
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6b5dda1cb5df..9cb627a4073a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -124,7 +124,7 @@ static void calc_layout(struct ceph_osd_client *osdc,
124 ceph_calc_raw_layout(osdc, layout, vino.snap, off, 124 ceph_calc_raw_layout(osdc, layout, vino.snap, off,
125 plen, &bno, req, op); 125 plen, &bno, req, op);
126 126
127 sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); 127 snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
128 req->r_oid_len = strlen(req->r_oid); 128 req->r_oid_len = strlen(req->r_oid);
129} 129}
130 130
@@ -1144,6 +1144,13 @@ static void handle_osds_timeout(struct work_struct *work)
1144 round_jiffies_relative(delay)); 1144 round_jiffies_relative(delay));
1145} 1145}
1146 1146
1147static void complete_request(struct ceph_osd_request *req)
1148{
1149 if (req->r_safe_callback)
1150 req->r_safe_callback(req, NULL);
1151 complete_all(&req->r_safe_completion); /* fsync waiter */
1152}
1153
1147/* 1154/*
1148 * handle osd op reply. either call the callback if it is specified, 1155 * handle osd op reply. either call the callback if it is specified,
1149 * or do the completion to wake up the waiting thread. 1156 * or do the completion to wake up the waiting thread.
@@ -1226,11 +1233,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1226 else 1233 else
1227 complete_all(&req->r_completion); 1234 complete_all(&req->r_completion);
1228 1235
1229 if (flags & CEPH_OSD_FLAG_ONDISK) { 1236 if (flags & CEPH_OSD_FLAG_ONDISK)
1230 if (req->r_safe_callback) 1237 complete_request(req);
1231 req->r_safe_callback(req, msg);
1232 complete_all(&req->r_safe_completion); /* fsync waiter */
1233 }
1234 1238
1235done: 1239done:
1236 dout("req=%p req->r_linger=%d\n", req, req->r_linger); 1240 dout("req=%p req->r_linger=%d\n", req, req->r_linger);
@@ -1421,6 +1425,15 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1421done: 1425done:
1422 downgrade_write(&osdc->map_sem); 1426 downgrade_write(&osdc->map_sem);
1423 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); 1427 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
1428
1429 /*
1430 * subscribe to subsequent osdmap updates if full to ensure
1431 * we find out when we are no longer full and stop returning
1432 * ENOSPC.
1433 */
1434 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
1435 ceph_monc_request_next_osdmap(&osdc->client->monc);
1436
1424 send_queued(osdc); 1437 send_queued(osdc);
1425 up_read(&osdc->map_sem); 1438 up_read(&osdc->map_sem);
1426 wake_up_all(&osdc->client->auth_wq); 1439 wake_up_all(&osdc->client->auth_wq);
@@ -1677,8 +1690,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
1677 */ 1690 */
1678 if (req->r_sent == 0) { 1691 if (req->r_sent == 0) {
1679 rc = __map_request(osdc, req); 1692 rc = __map_request(osdc, req);
1680 if (rc < 0) 1693 if (rc < 0) {
1694 if (nofail) {
1695 dout("osdc_start_request failed map, "
1696 " will retry %lld\n", req->r_tid);
1697 rc = 0;
1698 }
1681 goto out_unlock; 1699 goto out_unlock;
1700 }
1682 if (req->r_osd == NULL) { 1701 if (req->r_osd == NULL) {
1683 dout("send_request %p no up osds in pg\n", req); 1702 dout("send_request %p no up osds in pg\n", req);
1684 ceph_monc_request_next_osdmap(&osdc->client->monc); 1703 ceph_monc_request_next_osdmap(&osdc->client->monc);
@@ -1717,6 +1736,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
1717 __cancel_request(req); 1736 __cancel_request(req);
1718 __unregister_request(osdc, req); 1737 __unregister_request(osdc, req);
1719 mutex_unlock(&osdc->request_mutex); 1738 mutex_unlock(&osdc->request_mutex);
1739 complete_request(req);
1720 dout("wait_request tid %llu canceled/timed out\n", req->r_tid); 1740 dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
1721 return rc; 1741 return rc;
1722 } 1742 }
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 71603ac3dff5..e97c3588c3ec 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -765,7 +765,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
765 } 765 }
766 766
767 map->epoch++; 767 map->epoch++;
768 map->modified = map->modified; 768 map->modified = modified;
769 if (newcrush) { 769 if (newcrush) {
770 if (map->crush) 770 if (map->crush)
771 crush_destroy(map->crush); 771 crush_destroy(map->crush);
@@ -830,15 +830,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
830 map->osd_addr[osd] = addr; 830 map->osd_addr[osd] = addr;
831 } 831 }
832 832
833 /* new_down */ 833 /* new_state */
834 ceph_decode_32_safe(p, end, len, bad); 834 ceph_decode_32_safe(p, end, len, bad);
835 while (len--) { 835 while (len--) {
836 u32 osd; 836 u32 osd;
837 u8 xorstate;
837 ceph_decode_32_safe(p, end, osd, bad); 838 ceph_decode_32_safe(p, end, osd, bad);
839 xorstate = **(u8 **)p;
838 (*p)++; /* clean flag */ 840 (*p)++; /* clean flag */
839 pr_info("osd%d down\n", osd); 841 if (xorstate == 0)
842 xorstate = CEPH_OSD_UP;
843 if (xorstate & CEPH_OSD_UP)
844 pr_info("osd%d down\n", osd);
840 if (osd < map->max_osd) 845 if (osd < map->max_osd)
841 map->osd_state[osd] &= ~CEPH_OSD_UP; 846 map->osd_state[osd] ^= xorstate;
842 } 847 }
843 848
844 /* new_weight */ 849 /* new_weight */
diff --git a/net/core/dev.c b/net/core/dev.c
index c7e305d13b71..9c58c1ec41a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2096,6 +2096,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2096{ 2096{
2097 const struct net_device_ops *ops = dev->netdev_ops; 2097 const struct net_device_ops *ops = dev->netdev_ops;
2098 int rc = NETDEV_TX_OK; 2098 int rc = NETDEV_TX_OK;
2099 unsigned int skb_len;
2099 2100
2100 if (likely(!skb->next)) { 2101 if (likely(!skb->next)) {
2101 u32 features; 2102 u32 features;
@@ -2146,8 +2147,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2146 } 2147 }
2147 } 2148 }
2148 2149
2150 skb_len = skb->len;
2149 rc = ops->ndo_start_xmit(skb, dev); 2151 rc = ops->ndo_start_xmit(skb, dev);
2150 trace_net_dev_xmit(skb, rc); 2152 trace_net_dev_xmit(skb, rc, dev, skb_len);
2151 if (rc == NETDEV_TX_OK) 2153 if (rc == NETDEV_TX_OK)
2152 txq_trans_update(txq); 2154 txq_trans_update(txq);
2153 return rc; 2155 return rc;
@@ -2167,8 +2169,9 @@ gso:
2167 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2169 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2168 skb_dst_drop(nskb); 2170 skb_dst_drop(nskb);
2169 2171
2172 skb_len = nskb->len;
2170 rc = ops->ndo_start_xmit(nskb, dev); 2173 rc = ops->ndo_start_xmit(nskb, dev);
2171 trace_net_dev_xmit(nskb, rc); 2174 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2172 if (unlikely(rc != NETDEV_TX_OK)) { 2175 if (unlikely(rc != NETDEV_TX_OK)) {
2173 if (rc & ~NETDEV_TX_MASK) 2176 if (rc & ~NETDEV_TX_MASK)
2174 goto out_kfree_gso_skb; 2177 goto out_kfree_gso_skb;
@@ -3111,7 +3114,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3111 3114
3112 skb_reset_network_header(skb); 3115 skb_reset_network_header(skb);
3113 skb_reset_transport_header(skb); 3116 skb_reset_transport_header(skb);
3114 skb->mac_len = skb->network_header - skb->mac_header; 3117 skb_reset_mac_len(skb);
3115 3118
3116 pt_prev = NULL; 3119 pt_prev = NULL;
3117 3120
@@ -6175,6 +6178,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
6175 oldsd->output_queue = NULL; 6178 oldsd->output_queue = NULL;
6176 oldsd->output_queue_tailp = &oldsd->output_queue; 6179 oldsd->output_queue_tailp = &oldsd->output_queue;
6177 } 6180 }
6181 /* Append NAPI poll list from offline CPU. */
6182 if (!list_empty(&oldsd->poll_list)) {
6183 list_splice_init(&oldsd->poll_list, &sd->poll_list);
6184 raise_softirq_irqoff(NET_RX_SOFTIRQ);
6185 }
6178 6186
6179 raise_softirq_irqoff(NET_TX_SOFTIRQ); 6187 raise_softirq_irqoff(NET_TX_SOFTIRQ);
6180 local_irq_enable(); 6188 local_irq_enable();
@@ -6261,29 +6269,23 @@ err_name:
6261/** 6269/**
6262 * netdev_drivername - network driver for the device 6270 * netdev_drivername - network driver for the device
6263 * @dev: network device 6271 * @dev: network device
6264 * @buffer: buffer for resulting name
6265 * @len: size of buffer
6266 * 6272 *
6267 * Determine network driver for device. 6273 * Determine network driver for device.
6268 */ 6274 */
6269char *netdev_drivername(const struct net_device *dev, char *buffer, int len) 6275const char *netdev_drivername(const struct net_device *dev)
6270{ 6276{
6271 const struct device_driver *driver; 6277 const struct device_driver *driver;
6272 const struct device *parent; 6278 const struct device *parent;
6273 6279 const char *empty = "";
6274 if (len <= 0 || !buffer)
6275 return buffer;
6276 buffer[0] = 0;
6277 6280
6278 parent = dev->dev.parent; 6281 parent = dev->dev.parent;
6279
6280 if (!parent) 6282 if (!parent)
6281 return buffer; 6283 return empty;
6282 6284
6283 driver = parent->driver; 6285 driver = parent->driver;
6284 if (driver && driver->name) 6286 if (driver && driver->name)
6285 strlcpy(buffer, driver->name, len); 6287 return driver->name;
6286 return buffer; 6288 return empty;
6287} 6289}
6288 6290
6289static int __netdev_printk(const char *level, const struct net_device *dev, 6291static int __netdev_printk(const char *level, const struct net_device *dev,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 11b98bc2aa8f..33d2a1fba131 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1179,9 +1179,14 @@ static void remove_queue_kobjects(struct net_device *net)
1179#endif 1179#endif
1180} 1180}
1181 1181
1182static const void *net_current_ns(void) 1182static void *net_grab_current_ns(void)
1183{ 1183{
1184 return current->nsproxy->net_ns; 1184 struct net *ns = current->nsproxy->net_ns;
1185#ifdef CONFIG_NET_NS
1186 if (ns)
1187 atomic_inc(&ns->passive);
1188#endif
1189 return ns;
1185} 1190}
1186 1191
1187static const void *net_initial_ns(void) 1192static const void *net_initial_ns(void)
@@ -1196,22 +1201,13 @@ static const void *net_netlink_ns(struct sock *sk)
1196 1201
1197struct kobj_ns_type_operations net_ns_type_operations = { 1202struct kobj_ns_type_operations net_ns_type_operations = {
1198 .type = KOBJ_NS_TYPE_NET, 1203 .type = KOBJ_NS_TYPE_NET,
1199 .current_ns = net_current_ns, 1204 .grab_current_ns = net_grab_current_ns,
1200 .netlink_ns = net_netlink_ns, 1205 .netlink_ns = net_netlink_ns,
1201 .initial_ns = net_initial_ns, 1206 .initial_ns = net_initial_ns,
1207 .drop_ns = net_drop_ns,
1202}; 1208};
1203EXPORT_SYMBOL_GPL(net_ns_type_operations); 1209EXPORT_SYMBOL_GPL(net_ns_type_operations);
1204 1210
1205static void net_kobj_ns_exit(struct net *net)
1206{
1207 kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
1208}
1209
1210static struct pernet_operations kobj_net_ops = {
1211 .exit = net_kobj_ns_exit,
1212};
1213
1214
1215#ifdef CONFIG_HOTPLUG 1211#ifdef CONFIG_HOTPLUG
1216static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) 1212static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
1217{ 1213{
@@ -1339,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file);
1339int netdev_kobject_init(void) 1335int netdev_kobject_init(void)
1340{ 1336{
1341 kobj_ns_type_register(&net_ns_type_operations); 1337 kobj_ns_type_register(&net_ns_type_operations);
1342 register_pernet_subsys(&kobj_net_ops);
1343 return class_register(&net_class); 1338 return class_register(&net_class);
1344} 1339}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2e2dce6583e1..ea489db1bc23 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -8,6 +8,8 @@
8#include <linux/idr.h> 8#include <linux/idr.h>
9#include <linux/rculist.h> 9#include <linux/rculist.h>
10#include <linux/nsproxy.h> 10#include <linux/nsproxy.h>
11#include <linux/proc_fs.h>
12#include <linux/file.h>
11#include <net/net_namespace.h> 13#include <net/net_namespace.h>
12#include <net/netns/generic.h> 14#include <net/netns/generic.h>
13 15
@@ -126,6 +128,7 @@ static __net_init int setup_net(struct net *net)
126 LIST_HEAD(net_exit_list); 128 LIST_HEAD(net_exit_list);
127 129
128 atomic_set(&net->count, 1); 130 atomic_set(&net->count, 1);
131 atomic_set(&net->passive, 1);
129 132
130#ifdef NETNS_REFCNT_DEBUG 133#ifdef NETNS_REFCNT_DEBUG
131 atomic_set(&net->use_count, 0); 134 atomic_set(&net->use_count, 0);
@@ -208,6 +211,13 @@ static void net_free(struct net *net)
208 kmem_cache_free(net_cachep, net); 211 kmem_cache_free(net_cachep, net);
209} 212}
210 213
214void net_drop_ns(void *p)
215{
216 struct net *ns = p;
217 if (ns && atomic_dec_and_test(&ns->passive))
218 net_free(ns);
219}
220
211struct net *copy_net_ns(unsigned long flags, struct net *old_net) 221struct net *copy_net_ns(unsigned long flags, struct net *old_net)
212{ 222{
213 struct net *net; 223 struct net *net;
@@ -228,7 +238,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
228 } 238 }
229 mutex_unlock(&net_mutex); 239 mutex_unlock(&net_mutex);
230 if (rv < 0) { 240 if (rv < 0) {
231 net_free(net); 241 net_drop_ns(net);
232 return ERR_PTR(rv); 242 return ERR_PTR(rv);
233 } 243 }
234 return net; 244 return net;
@@ -284,7 +294,7 @@ static void cleanup_net(struct work_struct *work)
284 /* Finally it is safe to free my network namespace structure */ 294 /* Finally it is safe to free my network namespace structure */
285 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 295 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
286 list_del_init(&net->exit_list); 296 list_del_init(&net->exit_list);
287 net_free(net); 297 net_drop_ns(net);
288 } 298 }
289} 299}
290static DECLARE_WORK(net_cleanup_work, cleanup_net); 300static DECLARE_WORK(net_cleanup_work, cleanup_net);
@@ -302,6 +312,26 @@ void __put_net(struct net *net)
302} 312}
303EXPORT_SYMBOL_GPL(__put_net); 313EXPORT_SYMBOL_GPL(__put_net);
304 314
315struct net *get_net_ns_by_fd(int fd)
316{
317 struct proc_inode *ei;
318 struct file *file;
319 struct net *net;
320
321 file = proc_ns_fget(fd);
322 if (IS_ERR(file))
323 return ERR_CAST(file);
324
325 ei = PROC_I(file->f_dentry->d_inode);
326 if (ei->ns_ops == &netns_operations)
327 net = get_net(ei->ns);
328 else
329 net = ERR_PTR(-EINVAL);
330
331 fput(file);
332 return net;
333}
334
305#else 335#else
306struct net *copy_net_ns(unsigned long flags, struct net *old_net) 336struct net *copy_net_ns(unsigned long flags, struct net *old_net)
307{ 337{
@@ -309,6 +339,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
309 return ERR_PTR(-EINVAL); 339 return ERR_PTR(-EINVAL);
310 return old_net; 340 return old_net;
311} 341}
342
343struct net *get_net_ns_by_fd(int fd)
344{
345 return ERR_PTR(-EINVAL);
346}
312#endif 347#endif
313 348
314struct net *get_net_ns_by_pid(pid_t pid) 349struct net *get_net_ns_by_pid(pid_t pid)
@@ -561,3 +596,39 @@ void unregister_pernet_device(struct pernet_operations *ops)
561 mutex_unlock(&net_mutex); 596 mutex_unlock(&net_mutex);
562} 597}
563EXPORT_SYMBOL_GPL(unregister_pernet_device); 598EXPORT_SYMBOL_GPL(unregister_pernet_device);
599
600#ifdef CONFIG_NET_NS
601static void *netns_get(struct task_struct *task)
602{
603 struct net *net = NULL;
604 struct nsproxy *nsproxy;
605
606 rcu_read_lock();
607 nsproxy = task_nsproxy(task);
608 if (nsproxy)
609 net = get_net(nsproxy->net_ns);
610 rcu_read_unlock();
611
612 return net;
613}
614
615static void netns_put(void *ns)
616{
617 put_net(ns);
618}
619
620static int netns_install(struct nsproxy *nsproxy, void *ns)
621{
622 put_net(nsproxy->net_ns);
623 nsproxy->net_ns = get_net(ns);
624 return 0;
625}
626
627const struct proc_ns_operations netns_operations = {
628 .name = "net",
629 .type = CLONE_NEWNET,
630 .get = netns_get,
631 .put = netns_put,
632 .install = netns_install,
633};
634#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 2d7d6d473781..18d9cbda3a39 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -792,6 +792,13 @@ int netpoll_setup(struct netpoll *np)
792 return -ENODEV; 792 return -ENODEV;
793 } 793 }
794 794
795 if (ndev->master) {
796 printk(KERN_ERR "%s: %s is a slave device, aborting.\n",
797 np->name, np->dev_name);
798 err = -EBUSY;
799 goto put;
800 }
801
795 if (!netif_running(ndev)) { 802 if (!netif_running(ndev)) {
796 unsigned long atmost, atleast; 803 unsigned long atmost, atleast;
797 804
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d56cb9b0b94..abd936d8a716 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1046,6 +1046,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1046 [IFLA_LINKMODE] = { .type = NLA_U8 }, 1046 [IFLA_LINKMODE] = { .type = NLA_U8 },
1047 [IFLA_LINKINFO] = { .type = NLA_NESTED }, 1047 [IFLA_LINKINFO] = { .type = NLA_NESTED },
1048 [IFLA_NET_NS_PID] = { .type = NLA_U32 }, 1048 [IFLA_NET_NS_PID] = { .type = NLA_U32 },
1049 [IFLA_NET_NS_FD] = { .type = NLA_U32 },
1049 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, 1050 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
1050 [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, 1051 [IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
1051 [IFLA_VF_PORTS] = { .type = NLA_NESTED }, 1052 [IFLA_VF_PORTS] = { .type = NLA_NESTED },
@@ -1094,6 +1095,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
1094 */ 1095 */
1095 if (tb[IFLA_NET_NS_PID]) 1096 if (tb[IFLA_NET_NS_PID])
1096 net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); 1097 net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
1098 else if (tb[IFLA_NET_NS_FD])
1099 net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
1097 else 1100 else
1098 net = get_net(src_net); 1101 net = get_net(src_net);
1099 return net; 1102 return net;
@@ -1224,7 +1227,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1224 int send_addr_notify = 0; 1227 int send_addr_notify = 0;
1225 int err; 1228 int err;
1226 1229
1227 if (tb[IFLA_NET_NS_PID]) { 1230 if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
1228 struct net *net = rtnl_link_get_net(dev_net(dev), tb); 1231 struct net *net = rtnl_link_get_net(dev_net(dev), tb);
1229 if (IS_ERR(net)) { 1232 if (IS_ERR(net)) {
1230 err = PTR_ERR(net); 1233 err = PTR_ERR(net);
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index cfa7a5e1c5c9..fa000d26dc60 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -212,10 +212,12 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
212 int err = key->type_data.x[0]; 212 int err = key->type_data.x[0];
213 213
214 seq_puts(m, key->description); 214 seq_puts(m, key->description);
215 if (err) 215 if (key_is_instantiated(key)) {
216 seq_printf(m, ": %d", err); 216 if (err)
217 else 217 seq_printf(m, ": %d", err);
218 seq_printf(m, ": %u", key->datalen); 218 else
219 seq_printf(m, ": %u", key->datalen);
220 }
219} 221}
220 222
221/* 223/*
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index ed0eab39f531..02548b292b53 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -44,7 +44,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid,
44 pr_debug("%s\n", __func__); 44 pr_debug("%s\n", __func__);
45 45
46 if (!buf) 46 if (!buf)
47 goto out; 47 return -EMSGSIZE;
48 48
49 hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, 49 hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags,
50 IEEE802154_LIST_PHY); 50 IEEE802154_LIST_PHY);
@@ -65,6 +65,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid,
65 pages * sizeof(uint32_t), buf); 65 pages * sizeof(uint32_t), buf);
66 66
67 mutex_unlock(&phy->pib_lock); 67 mutex_unlock(&phy->pib_lock);
68 kfree(buf);
68 return genlmsg_end(msg, hdr); 69 return genlmsg_end(msg, hdr);
69 70
70nla_put_failure: 71nla_put_failure:
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cc1463156cd0..eae1f676f870 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -465,6 +465,9 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
465 if (addr_len < sizeof(struct sockaddr_in)) 465 if (addr_len < sizeof(struct sockaddr_in))
466 goto out; 466 goto out;
467 467
468 if (addr->sin_family != AF_INET)
469 goto out;
470
468 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); 471 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
469 472
470 /* Not specified by any standard per-se, however it breaks too 473 /* Not specified by any standard per-se, however it breaks too
@@ -673,6 +676,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
673 676
674 lock_sock(sk2); 677 lock_sock(sk2);
675 678
679 sock_rps_record_flow(sk2);
676 WARN_ON(!((1 << sk2->sk_state) & 680 WARN_ON(!((1 << sk2->sk_state) &
677 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); 681 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
678 682
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 61fac4cabc78..c14d88ad348d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
33 * This struct holds the first and last local port number. 33 * This struct holds the first and last local port number.
34 */ 34 */
35struct local_ports sysctl_local_ports __read_mostly = { 35struct local_ports sysctl_local_ports __read_mostly = {
36 .lock = SEQLOCK_UNLOCKED, 36 .lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock),
37 .range = { 32768, 61000 }, 37 .range = { 32768, 61000 },
38}; 38};
39 39
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 6ffe94ca5bc9..3267d3898437 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -437,7 +437,7 @@ static int valid_cc(const void *bc, int len, int cc)
437 return 0; 437 return 0;
438 if (cc == len) 438 if (cc == len)
439 return 1; 439 return 1;
440 if (op->yes < 4) 440 if (op->yes < 4 || op->yes & 3)
441 return 0; 441 return 0;
442 len -= op->yes; 442 len -= op->yes;
443 bc += op->yes; 443 bc += op->yes;
@@ -447,11 +447,11 @@ static int valid_cc(const void *bc, int len, int cc)
447 447
448static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) 448static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
449{ 449{
450 const unsigned char *bc = bytecode; 450 const void *bc = bytecode;
451 int len = bytecode_len; 451 int len = bytecode_len;
452 452
453 while (len > 0) { 453 while (len > 0) {
454 struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc; 454 const struct inet_diag_bc_op *op = bc;
455 455
456//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); 456//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
457 switch (op->code) { 457 switch (op->code) {
@@ -462,22 +462,20 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
462 case INET_DIAG_BC_S_LE: 462 case INET_DIAG_BC_S_LE:
463 case INET_DIAG_BC_D_GE: 463 case INET_DIAG_BC_D_GE:
464 case INET_DIAG_BC_D_LE: 464 case INET_DIAG_BC_D_LE:
465 if (op->yes < 4 || op->yes > len + 4)
466 return -EINVAL;
467 case INET_DIAG_BC_JMP: 465 case INET_DIAG_BC_JMP:
468 if (op->no < 4 || op->no > len + 4) 466 if (op->no < 4 || op->no > len + 4 || op->no & 3)
469 return -EINVAL; 467 return -EINVAL;
470 if (op->no < len && 468 if (op->no < len &&
471 !valid_cc(bytecode, bytecode_len, len - op->no)) 469 !valid_cc(bytecode, bytecode_len, len - op->no))
472 return -EINVAL; 470 return -EINVAL;
473 break; 471 break;
474 case INET_DIAG_BC_NOP: 472 case INET_DIAG_BC_NOP:
475 if (op->yes < 4 || op->yes > len + 4)
476 return -EINVAL;
477 break; 473 break;
478 default: 474 default:
479 return -EINVAL; 475 return -EINVAL;
480 } 476 }
477 if (op->yes < 4 || op->yes > len + 4 || op->yes & 3)
478 return -EINVAL;
481 bc += op->yes; 479 bc += op->yes;
482 len -= op->yes; 480 len -= op->yes;
483 } 481 }
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index c3118e1cd3bb..ec93335901dd 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -14,6 +14,7 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/types.h> 15#include <linux/types.h>
16#include <asm/uaccess.h> 16#include <asm/uaccess.h>
17#include <asm/unaligned.h>
17#include <linux/skbuff.h> 18#include <linux/skbuff.h>
18#include <linux/ip.h> 19#include <linux/ip.h>
19#include <linux/icmp.h> 20#include <linux/icmp.h>
@@ -350,7 +351,7 @@ int ip_options_compile(struct net *net,
350 goto error; 351 goto error;
351 } 352 }
352 if (optptr[2] <= optlen) { 353 if (optptr[2] <= optlen) {
353 __be32 *timeptr = NULL; 354 unsigned char *timeptr = NULL;
354 if (optptr[2]+3 > optptr[1]) { 355 if (optptr[2]+3 > optptr[1]) {
355 pp_ptr = optptr + 2; 356 pp_ptr = optptr + 2;
356 goto error; 357 goto error;
@@ -359,7 +360,7 @@ int ip_options_compile(struct net *net,
359 case IPOPT_TS_TSONLY: 360 case IPOPT_TS_TSONLY:
360 opt->ts = optptr - iph; 361 opt->ts = optptr - iph;
361 if (skb) 362 if (skb)
362 timeptr = (__be32*)&optptr[optptr[2]-1]; 363 timeptr = &optptr[optptr[2]-1];
363 opt->ts_needtime = 1; 364 opt->ts_needtime = 1;
364 optptr[2] += 4; 365 optptr[2] += 4;
365 break; 366 break;
@@ -371,7 +372,7 @@ int ip_options_compile(struct net *net,
371 opt->ts = optptr - iph; 372 opt->ts = optptr - iph;
372 if (rt) { 373 if (rt) {
373 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); 374 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
374 timeptr = (__be32*)&optptr[optptr[2]+3]; 375 timeptr = &optptr[optptr[2]+3];
375 } 376 }
376 opt->ts_needaddr = 1; 377 opt->ts_needaddr = 1;
377 opt->ts_needtime = 1; 378 opt->ts_needtime = 1;
@@ -389,7 +390,7 @@ int ip_options_compile(struct net *net,
389 if (inet_addr_type(net, addr) == RTN_UNICAST) 390 if (inet_addr_type(net, addr) == RTN_UNICAST)
390 break; 391 break;
391 if (skb) 392 if (skb)
392 timeptr = (__be32*)&optptr[optptr[2]+3]; 393 timeptr = &optptr[optptr[2]+3];
393 } 394 }
394 opt->ts_needtime = 1; 395 opt->ts_needtime = 1;
395 optptr[2] += 8; 396 optptr[2] += 8;
@@ -403,10 +404,10 @@ int ip_options_compile(struct net *net,
403 } 404 }
404 if (timeptr) { 405 if (timeptr) {
405 struct timespec tv; 406 struct timespec tv;
406 __be32 midtime; 407 u32 midtime;
407 getnstimeofday(&tv); 408 getnstimeofday(&tv);
408 midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC); 409 midtime = (tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC;
409 memcpy(timeptr, &midtime, sizeof(__be32)); 410 put_unaligned_be32(midtime, timeptr);
410 opt->is_changed = 1; 411 opt->is_changed = 1;
411 } 412 }
412 } else { 413 } else {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 98af3697c718..a8024eaa0e87 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -799,7 +799,9 @@ static int __ip_append_data(struct sock *sk,
799 int csummode = CHECKSUM_NONE; 799 int csummode = CHECKSUM_NONE;
800 struct rtable *rt = (struct rtable *)cork->dst; 800 struct rtable *rt = (struct rtable *)cork->dst;
801 801
802 exthdrlen = transhdrlen ? rt->dst.header_len : 0; 802 skb = skb_peek_tail(queue);
803
804 exthdrlen = !skb ? rt->dst.header_len : 0;
803 length += exthdrlen; 805 length += exthdrlen;
804 transhdrlen += exthdrlen; 806 transhdrlen += exthdrlen;
805 mtu = cork->fragsize; 807 mtu = cork->fragsize;
@@ -825,8 +827,6 @@ static int __ip_append_data(struct sock *sk,
825 !exthdrlen) 827 !exthdrlen)
826 csummode = CHECKSUM_PARTIAL; 828 csummode = CHECKSUM_PARTIAL;
827 829
828 skb = skb_peek_tail(queue);
829
830 cork->length += length; 830 cork->length += length;
831 if (((length > mtu) || (skb && skb_is_gso(skb))) && 831 if (((length > mtu) || (skb && skb_is_gso(skb))) &&
832 (sk->sk_protocol == IPPROTO_UDP) && 832 (sk->sk_protocol == IPPROTO_UDP) &&
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index d2c1311cb28d..5c9b9d963918 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -203,7 +203,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
203 else 203 else
204 pmsg->outdev_name[0] = '\0'; 204 pmsg->outdev_name[0] = '\0';
205 205
206 if (entry->indev && entry->skb->dev) { 206 if (entry->indev && entry->skb->dev &&
207 entry->skb->mac_header != entry->skb->network_header) {
207 pmsg->hw_type = entry->skb->dev->type; 208 pmsg->hw_type = entry->skb->dev->type;
208 pmsg->hw_addrlen = dev_parse_header(entry->skb, 209 pmsg->hw_addrlen = dev_parse_header(entry->skb,
209 pmsg->hw_addr); 210 pmsg->hw_addr);
@@ -402,7 +403,8 @@ ipq_dev_drop(int ifindex)
402static inline void 403static inline void
403__ipq_rcv_skb(struct sk_buff *skb) 404__ipq_rcv_skb(struct sk_buff *skb)
404{ 405{
405 int status, type, pid, flags, nlmsglen, skblen; 406 int status, type, pid, flags;
407 unsigned int nlmsglen, skblen;
406 struct nlmsghdr *nlh; 408 struct nlmsghdr *nlh;
407 409
408 skblen = skb->len; 410 skblen = skb->len;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 764743843503..24e556e83a3b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -566,7 +566,7 @@ check_entry(const struct ipt_entry *e, const char *name)
566 const struct xt_entry_target *t; 566 const struct xt_entry_target *t;
567 567
568 if (!ip_checkentry(&e->ip)) { 568 if (!ip_checkentry(&e->ip)) {
569 duprintf("ip check failed %p %s.\n", e, par->match->name); 569 duprintf("ip check failed %p %s.\n", e, name);
570 return -EINVAL; 570 return -EINVAL;
571 } 571 }
572 572
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index d609ac3cb9a4..5c9e97c79017 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -307,7 +307,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
307 * error messages (RELATED) and information requests (see below) */ 307 * error messages (RELATED) and information requests (see below) */
308 if (ip_hdr(skb)->protocol == IPPROTO_ICMP && 308 if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
309 (ctinfo == IP_CT_RELATED || 309 (ctinfo == IP_CT_RELATED ||
310 ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)) 310 ctinfo == IP_CT_RELATED_REPLY))
311 return XT_CONTINUE; 311 return XT_CONTINUE;
312 312
313 /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, 313 /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
@@ -321,12 +321,12 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
321 ct->mark = hash; 321 ct->mark = hash;
322 break; 322 break;
323 case IP_CT_RELATED: 323 case IP_CT_RELATED:
324 case IP_CT_RELATED+IP_CT_IS_REPLY: 324 case IP_CT_RELATED_REPLY:
325 /* FIXME: we don't handle expectations at the 325 /* FIXME: we don't handle expectations at the
326 * moment. they can arrive on a different node than 326 * moment. they can arrive on a different node than
327 * the master connection (e.g. FTP passive mode) */ 327 * the master connection (e.g. FTP passive mode) */
328 case IP_CT_ESTABLISHED: 328 case IP_CT_ESTABLISHED:
329 case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: 329 case IP_CT_ESTABLISHED_REPLY:
330 break; 330 break;
331 default: 331 default:
332 break; 332 break;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d2ed9dc74ebc..9931152a78b5 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -60,7 +60,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
60 nat = nfct_nat(ct); 60 nat = nfct_nat(ct);
61 61
62 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 62 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
63 ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); 63 ctinfo == IP_CT_RELATED_REPLY));
64 64
65 /* Source address is 0.0.0.0 - locally generated packet that is 65 /* Source address is 0.0.0.0 - locally generated packet that is
66 * probably not supposed to be masqueraded. 66 * probably not supposed to be masqueraded.
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index af6e9c778345..2b57e52c746c 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -25,7 +25,8 @@ MODULE_LICENSE("GPL");
25static inline bool match_ip(const struct sk_buff *skb, 25static inline bool match_ip(const struct sk_buff *skb,
26 const struct ipt_ecn_info *einfo) 26 const struct ipt_ecn_info *einfo)
27{ 27{
28 return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect; 28 return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
29 !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
29} 30}
30 31
31static inline bool match_tcp(const struct sk_buff *skb, 32static inline bool match_tcp(const struct sk_buff *skb,
@@ -76,8 +77,6 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
76 return false; 77 return false;
77 78
78 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { 79 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
79 if (ip_hdr(skb)->protocol != IPPROTO_TCP)
80 return false;
81 if (!match_tcp(skb, info, &par->hotdrop)) 80 if (!match_tcp(skb, info, &par->hotdrop))
82 return false; 81 return false;
83 } 82 }
@@ -97,7 +96,7 @@ static int ecn_mt_check(const struct xt_mtchk_param *par)
97 return -EINVAL; 96 return -EINVAL;
98 97
99 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && 98 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
100 ip->proto != IPPROTO_TCP) { 99 (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
101 pr_info("cannot match TCP bits in rule for non-tcp packets\n"); 100 pr_info("cannot match TCP bits in rule for non-tcp packets\n");
102 return -EINVAL; 101 return -EINVAL;
103 } 102 }
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 5a03c02af999..de9da21113a1 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -101,7 +101,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
101 101
102 /* This is where we call the helper: as the packet goes out. */ 102 /* This is where we call the helper: as the packet goes out. */
103 ct = nf_ct_get(skb, &ctinfo); 103 ct = nf_ct_get(skb, &ctinfo);
104 if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) 104 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
105 goto out; 105 goto out;
106 106
107 help = nfct_help(ct); 107 help = nfct_help(ct);
@@ -121,7 +121,9 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
121 return ret; 121 return ret;
122 } 122 }
123 123
124 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { 124 /* adjust seqs for loopback traffic only in outgoing direction */
125 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
126 !nf_is_loopback_packet(skb)) {
125 typeof(nf_nat_seq_adjust_hook) seq_adjust; 127 typeof(nf_nat_seq_adjust_hook) seq_adjust;
126 128
127 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); 129 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7404bde95994..ab5b27a2916f 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -160,7 +160,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
160 /* Update skb to refer to this connection */ 160 /* Update skb to refer to this connection */
161 skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; 161 skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
162 skb->nfctinfo = *ctinfo; 162 skb->nfctinfo = *ctinfo;
163 return -NF_ACCEPT; 163 return NF_ACCEPT;
164} 164}
165 165
166/* Small and modified version of icmp_rcv */ 166/* Small and modified version of icmp_rcv */
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 9c71b2755ce3..3346de5d94d0 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -433,7 +433,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
433 433
434 /* Must be RELATED */ 434 /* Must be RELATED */
435 NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || 435 NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
436 skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); 436 skb->nfctinfo == IP_CT_RELATED_REPLY);
437 437
438 /* Redirects on non-null nats must be dropped, else they'll 438 /* Redirects on non-null nats must be dropped, else they'll
439 start talking to each other without our translation, and be 439 start talking to each other without our translation, and be
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 99cfa28b6d38..ebc5f8894f99 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -160,7 +160,7 @@ static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data
160 160
161 if (skb->ip_summed != CHECKSUM_PARTIAL) { 161 if (skb->ip_summed != CHECKSUM_PARTIAL) {
162 if (!(rt->rt_flags & RTCF_LOCAL) && 162 if (!(rt->rt_flags & RTCF_LOCAL) &&
163 skb->dev->features & NETIF_F_V4_CSUM) { 163 (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
164 skb->ip_summed = CHECKSUM_PARTIAL; 164 skb->ip_summed = CHECKSUM_PARTIAL;
165 skb->csum_start = skb_headroom(skb) + 165 skb->csum_start = skb_headroom(skb) +
166 skb_network_offset(skb) + 166 skb_network_offset(skb) +
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 21c30426480b..733c9abc1cbd 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -53,7 +53,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
53 53
54 /* Connection must be valid and new. */ 54 /* Connection must be valid and new. */
55 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 55 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
56 ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); 56 ctinfo == IP_CT_RELATED_REPLY));
57 NF_CT_ASSERT(par->out != NULL); 57 NF_CT_ASSERT(par->out != NULL);
58 58
59 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); 59 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 7317bdf1d457..483b76d042da 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -116,7 +116,7 @@ nf_nat_fn(unsigned int hooknum,
116 116
117 switch (ctinfo) { 117 switch (ctinfo) {
118 case IP_CT_RELATED: 118 case IP_CT_RELATED:
119 case IP_CT_RELATED+IP_CT_IS_REPLY: 119 case IP_CT_RELATED_REPLY:
120 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { 120 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
121 if (!nf_nat_icmp_reply_translation(ct, ctinfo, 121 if (!nf_nat_icmp_reply_translation(ct, ctinfo,
122 hooknum, skb)) 122 hooknum, skb))
@@ -144,7 +144,7 @@ nf_nat_fn(unsigned int hooknum,
144 default: 144 default:
145 /* ESTABLISHED */ 145 /* ESTABLISHED */
146 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 146 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
147 ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); 147 ctinfo == IP_CT_ESTABLISHED_REPLY);
148 } 148 }
149 149
150 return nf_nat_packet(ct, ctinfo, hooknum, skb); 150 return nf_nat_packet(ct, ctinfo, hooknum, skb);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 9aaa67165f42..39b403f854c6 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -41,7 +41,6 @@
41#include <linux/proc_fs.h> 41#include <linux/proc_fs.h>
42#include <net/sock.h> 42#include <net/sock.h>
43#include <net/ping.h> 43#include <net/ping.h>
44#include <net/icmp.h>
45#include <net/udp.h> 44#include <net/udp.h>
46#include <net/route.h> 45#include <net/route.h>
47#include <net/inet_common.h> 46#include <net/inet_common.h>
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 52b0b956508b..aa13ef105110 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1316,6 +1316,23 @@ reject_redirect:
1316 ; 1316 ;
1317} 1317}
1318 1318
1319static bool peer_pmtu_expired(struct inet_peer *peer)
1320{
1321 unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
1322
1323 return orig &&
1324 time_after_eq(jiffies, orig) &&
1325 cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
1326}
1327
1328static bool peer_pmtu_cleaned(struct inet_peer *peer)
1329{
1330 unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
1331
1332 return orig &&
1333 cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
1334}
1335
1319static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1336static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1320{ 1337{
1321 struct rtable *rt = (struct rtable *)dst; 1338 struct rtable *rt = (struct rtable *)dst;
@@ -1331,14 +1348,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1331 rt_genid(dev_net(dst->dev))); 1348 rt_genid(dev_net(dst->dev)));
1332 rt_del(hash, rt); 1349 rt_del(hash, rt);
1333 ret = NULL; 1350 ret = NULL;
1334 } else if (rt->peer && 1351 } else if (rt->peer && peer_pmtu_expired(rt->peer)) {
1335 rt->peer->pmtu_expires && 1352 dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig);
1336 time_after_eq(jiffies, rt->peer->pmtu_expires)) {
1337 unsigned long orig = rt->peer->pmtu_expires;
1338
1339 if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
1340 dst_metric_set(dst, RTAX_MTU,
1341 rt->peer->pmtu_orig);
1342 } 1353 }
1343 } 1354 }
1344 return ret; 1355 return ret;
@@ -1531,8 +1542,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
1531 1542
1532static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) 1543static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
1533{ 1544{
1534 unsigned long expires = peer->pmtu_expires; 1545 unsigned long expires = ACCESS_ONCE(peer->pmtu_expires);
1535 1546
1547 if (!expires)
1548 return;
1536 if (time_before(jiffies, expires)) { 1549 if (time_before(jiffies, expires)) {
1537 u32 orig_dst_mtu = dst_mtu(dst); 1550 u32 orig_dst_mtu = dst_mtu(dst);
1538 if (peer->pmtu_learned < orig_dst_mtu) { 1551 if (peer->pmtu_learned < orig_dst_mtu) {
@@ -1555,10 +1568,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1555 rt_bind_peer(rt, rt->rt_dst, 1); 1568 rt_bind_peer(rt, rt->rt_dst, 1);
1556 peer = rt->peer; 1569 peer = rt->peer;
1557 if (peer) { 1570 if (peer) {
1571 unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires);
1572
1558 if (mtu < ip_rt_min_pmtu) 1573 if (mtu < ip_rt_min_pmtu)
1559 mtu = ip_rt_min_pmtu; 1574 mtu = ip_rt_min_pmtu;
1560 if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { 1575 if (!pmtu_expires || mtu < peer->pmtu_learned) {
1561 unsigned long pmtu_expires;
1562 1576
1563 pmtu_expires = jiffies + ip_rt_mtu_expires; 1577 pmtu_expires = jiffies + ip_rt_mtu_expires;
1564 if (!pmtu_expires) 1578 if (!pmtu_expires)
@@ -1612,13 +1626,14 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1612 rt_bind_peer(rt, rt->rt_dst, 0); 1626 rt_bind_peer(rt, rt->rt_dst, 0);
1613 1627
1614 peer = rt->peer; 1628 peer = rt->peer;
1615 if (peer && peer->pmtu_expires) 1629 if (peer) {
1616 check_peer_pmtu(dst, peer); 1630 check_peer_pmtu(dst, peer);
1617 1631
1618 if (peer && peer->redirect_learned.a4 && 1632 if (peer->redirect_learned.a4 &&
1619 peer->redirect_learned.a4 != rt->rt_gateway) { 1633 peer->redirect_learned.a4 != rt->rt_gateway) {
1620 if (check_peer_redir(dst, peer)) 1634 if (check_peer_redir(dst, peer))
1621 return NULL; 1635 return NULL;
1636 }
1622 } 1637 }
1623 1638
1624 rt->rt_peer_genid = rt_peer_genid(); 1639 rt->rt_peer_genid = rt_peer_genid();
@@ -1649,14 +1664,8 @@ static void ipv4_link_failure(struct sk_buff *skb)
1649 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1664 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1650 1665
1651 rt = skb_rtable(skb); 1666 rt = skb_rtable(skb);
1652 if (rt && 1667 if (rt && rt->peer && peer_pmtu_cleaned(rt->peer))
1653 rt->peer && 1668 dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
1654 rt->peer->pmtu_expires) {
1655 unsigned long orig = rt->peer->pmtu_expires;
1656
1657 if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
1658 dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
1659 }
1660} 1669}
1661 1670
1662static int ip_rt_bug(struct sk_buff *skb) 1671static int ip_rt_bug(struct sk_buff *skb)
@@ -1770,8 +1779,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
1770 sizeof(u32) * RTAX_MAX); 1779 sizeof(u32) * RTAX_MAX);
1771 dst_init_metrics(&rt->dst, peer->metrics, false); 1780 dst_init_metrics(&rt->dst, peer->metrics, false);
1772 1781
1773 if (peer->pmtu_expires) 1782 check_peer_pmtu(&rt->dst, peer);
1774 check_peer_pmtu(&rt->dst, peer);
1775 if (peer->redirect_learned.a4 && 1783 if (peer->redirect_learned.a4 &&
1776 peer->redirect_learned.a4 != rt->rt_gateway) { 1784 peer->redirect_learned.a4 != rt->rt_gateway) {
1777 rt->rt_gateway = peer->redirect_learned.a4; 1785 rt->rt_gateway = peer->redirect_learned.a4;
@@ -1894,9 +1902,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1894 1902
1895 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1903 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1896 rth = rt_intern_hash(hash, rth, skb, dev->ifindex); 1904 rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
1897 err = 0; 1905 return IS_ERR(rth) ? PTR_ERR(rth) : 0;
1898 if (IS_ERR(rth))
1899 err = PTR_ERR(rth);
1900 1906
1901e_nobufs: 1907e_nobufs:
1902 return -ENOBUFS; 1908 return -ENOBUFS;
@@ -2775,7 +2781,8 @@ static int rt_fill_info(struct net *net,
2775 struct rtable *rt = skb_rtable(skb); 2781 struct rtable *rt = skb_rtable(skb);
2776 struct rtmsg *r; 2782 struct rtmsg *r;
2777 struct nlmsghdr *nlh; 2783 struct nlmsghdr *nlh;
2778 long expires; 2784 long expires = 0;
2785 const struct inet_peer *peer = rt->peer;
2779 u32 id = 0, ts = 0, tsage = 0, error; 2786 u32 id = 0, ts = 0, tsage = 0, error;
2780 2787
2781 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); 2788 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
@@ -2823,15 +2830,16 @@ static int rt_fill_info(struct net *net,
2823 NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); 2830 NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
2824 2831
2825 error = rt->dst.error; 2832 error = rt->dst.error;
2826 expires = (rt->peer && rt->peer->pmtu_expires) ? 2833 if (peer) {
2827 rt->peer->pmtu_expires - jiffies : 0;
2828 if (rt->peer) {
2829 inet_peer_refcheck(rt->peer); 2834 inet_peer_refcheck(rt->peer);
2830 id = atomic_read(&rt->peer->ip_id_count) & 0xffff; 2835 id = atomic_read(&peer->ip_id_count) & 0xffff;
2831 if (rt->peer->tcp_ts_stamp) { 2836 if (peer->tcp_ts_stamp) {
2832 ts = rt->peer->tcp_ts; 2837 ts = peer->tcp_ts;
2833 tsage = get_seconds() - rt->peer->tcp_ts_stamp; 2838 tsage = get_seconds() - peer->tcp_ts_stamp;
2834 } 2839 }
2840 expires = ACCESS_ONCE(peer->pmtu_expires);
2841 if (expires)
2842 expires -= jiffies;
2835 } 2843 }
2836 2844
2837 if (rt_is_input_route(rt)) { 2845 if (rt_is_input_route(rt)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a7d6671e33b8..708dc203b034 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1589,6 +1589,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1589 goto discard; 1589 goto discard;
1590 1590
1591 if (nsk != sk) { 1591 if (nsk != sk) {
1592 sock_rps_save_rxhash(nsk, skb->rxhash);
1592 if (tcp_child_process(sk, nsk, skb)) { 1593 if (tcp_child_process(sk, nsk, skb)) {
1593 rsk = nsk; 1594 rsk = nsk;
1594 goto reset; 1595 goto reset;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index abca870d8ff6..48cd88e62553 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1249,6 +1249,9 @@ csum_copy_err:
1249 1249
1250 if (noblock) 1250 if (noblock)
1251 return -EAGAIN; 1251 return -EAGAIN;
1252
1253 /* starting over for a new packet */
1254 msg->msg_flags &= ~MSG_TRUNC;
1252 goto try_again; 1255 goto try_again;
1253} 1256}
1254 1257
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b7919f901fbf..d450a2f9fc06 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -272,6 +272,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
272 272
273 if (addr_len < SIN6_LEN_RFC2133) 273 if (addr_len < SIN6_LEN_RFC2133)
274 return -EINVAL; 274 return -EINVAL;
275
276 if (addr->sin6_family != AF_INET6)
277 return -EINVAL;
278
275 addr_type = ipv6_addr_type(&addr->sin6_addr); 279 addr_type = ipv6_addr_type(&addr->sin6_addr);
276 if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) 280 if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
277 return -EINVAL; 281 return -EINVAL;
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 413ab0754e1f..249394863284 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -204,7 +204,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
204 else 204 else
205 pmsg->outdev_name[0] = '\0'; 205 pmsg->outdev_name[0] = '\0';
206 206
207 if (entry->indev && entry->skb->dev) { 207 if (entry->indev && entry->skb->dev &&
208 entry->skb->mac_header != entry->skb->network_header) {
208 pmsg->hw_type = entry->skb->dev->type; 209 pmsg->hw_type = entry->skb->dev->type;
209 pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); 210 pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
210 } 211 }
@@ -403,7 +404,8 @@ ipq_dev_drop(int ifindex)
403static inline void 404static inline void
404__ipq_rcv_skb(struct sk_buff *skb) 405__ipq_rcv_skb(struct sk_buff *skb)
405{ 406{
406 int status, type, pid, flags, nlmsglen, skblen; 407 int status, type, pid, flags;
408 unsigned int nlmsglen, skblen;
407 struct nlmsghdr *nlh; 409 struct nlmsghdr *nlh;
408 410
409 skblen = skb->len; 411 skblen = skb->len;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c8af58b22562..4111050a9fc5 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -160,7 +160,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
160 160
161 /* This is where we call the helper: as the packet goes out. */ 161 /* This is where we call the helper: as the packet goes out. */
162 ct = nf_ct_get(skb, &ctinfo); 162 ct = nf_ct_get(skb, &ctinfo);
163 if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) 163 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
164 goto out; 164 goto out;
165 165
166 help = nfct_help(ct); 166 help = nfct_help(ct);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 1df3c8b6bf47..7c05e7eacbc6 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -177,7 +177,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
177 /* Update skb to refer to this connection */ 177 /* Update skb to refer to this connection */
178 skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; 178 skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
179 skb->nfctinfo = *ctinfo; 179 skb->nfctinfo = *ctinfo;
180 return -NF_ACCEPT; 180 return NF_ACCEPT;
181} 181}
182 182
183static int 183static int
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d1fd28711ba5..87551ca568cd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1644,6 +1644,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1644 * the new socket.. 1644 * the new socket..
1645 */ 1645 */
1646 if(nsk != sk) { 1646 if(nsk != sk) {
1647 sock_rps_save_rxhash(nsk, skb->rxhash);
1647 if (tcp_child_process(sk, nsk, skb)) 1648 if (tcp_child_process(sk, nsk, skb))
1648 goto reset; 1649 goto reset;
1649 if (opt_skb) 1650 if (opt_skb)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 41f8c9c08dba..328985c40883 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -453,8 +453,11 @@ csum_copy_err:
453 } 453 }
454 unlock_sock_fast(sk, slow); 454 unlock_sock_fast(sk, slow);
455 455
456 if (flags & MSG_DONTWAIT) 456 if (noblock)
457 return -EAGAIN; 457 return -EAGAIN;
458
459 /* starting over for a new packet */
460 msg->msg_flags &= ~MSG_TRUNC;
458 goto try_again; 461 goto try_again;
459} 462}
460 463
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 36477538cea8..f876eed7d4aa 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -87,6 +87,8 @@ static inline void iriap_start_watchdog_timer(struct iriap_cb *self,
87 iriap_watchdog_timer_expired); 87 iriap_watchdog_timer_expired);
88} 88}
89 89
90static struct lock_class_key irias_objects_key;
91
90/* 92/*
91 * Function iriap_init (void) 93 * Function iriap_init (void)
92 * 94 *
@@ -114,6 +116,9 @@ int __init iriap_init(void)
114 return -ENOMEM; 116 return -ENOMEM;
115 } 117 }
116 118
119 lockdep_set_class_and_name(&irias_objects->hb_spinlock, &irias_objects_key,
120 "irias_objects");
121
117 /* 122 /*
118 * Register some default services for IrLMP 123 * Register some default services for IrLMP
119 */ 124 */
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index a15c01524959..7f9124914b13 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -54,7 +54,7 @@
54#include <asm/atomic.h> 54#include <asm/atomic.h>
55#include <asm/ebcdic.h> 55#include <asm/ebcdic.h>
56#include <asm/io.h> 56#include <asm/io.h>
57#include <asm/s390_ext.h> 57#include <asm/irq.h>
58#include <asm/smp.h> 58#include <asm/smp.h>
59 59
60/* 60/*
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index b8dbae82fab8..76130134bfa6 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -258,7 +258,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
258 */ 258 */
259 pd->net = get_net_ns_by_pid(current->pid); 259 pd->net = get_net_ns_by_pid(current->pid);
260 if (IS_ERR(pd->net)) { 260 if (IS_ERR(pd->net)) {
261 rc = -PTR_ERR(pd->net); 261 rc = PTR_ERR(pd->net);
262 goto err_free_pd; 262 goto err_free_pd;
263 } 263 }
264 264
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 8041befc6555..42aa64b6b0b1 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -767,7 +767,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
767 if (!attr[IPSET_ATTR_SETNAME]) { 767 if (!attr[IPSET_ATTR_SETNAME]) {
768 for (i = 0; i < ip_set_max; i++) { 768 for (i = 0; i < ip_set_max; i++) {
769 if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { 769 if (ip_set_list[i] != NULL && ip_set_list[i]->ref) {
770 ret = IPSET_ERR_BUSY; 770 ret = -IPSET_ERR_BUSY;
771 goto out; 771 goto out;
772 } 772 }
773 } 773 }
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 4743e5402522..565a7c5b8818 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -146,8 +146,9 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
146{ 146{
147 const struct ip_set_hash *h = set->data; 147 const struct ip_set_hash *h = set->data;
148 ipset_adtfn adtfn = set->variant->adt[adt]; 148 ipset_adtfn adtfn = set->variant->adt[adt];
149 struct hash_ipportnet4_elem data = 149 struct hash_ipportnet4_elem data = {
150 { .cidr = h->nets[0].cidr || HOST_MASK }; 150 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
151 };
151 152
152 if (data.cidr == 0) 153 if (data.cidr == 0)
153 return -EINVAL; 154 return -EINVAL;
@@ -394,8 +395,9 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
394{ 395{
395 const struct ip_set_hash *h = set->data; 396 const struct ip_set_hash *h = set->data;
396 ipset_adtfn adtfn = set->variant->adt[adt]; 397 ipset_adtfn adtfn = set->variant->adt[adt];
397 struct hash_ipportnet6_elem data = 398 struct hash_ipportnet6_elem data = {
398 { .cidr = h->nets[0].cidr || HOST_MASK }; 399 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
400 };
399 401
400 if (data.cidr == 0) 402 if (data.cidr == 0)
401 return -EINVAL; 403 return -EINVAL;
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index c4db202b7da4..2aeeabcd5a21 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -131,7 +131,9 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
131{ 131{
132 const struct ip_set_hash *h = set->data; 132 const struct ip_set_hash *h = set->data;
133 ipset_adtfn adtfn = set->variant->adt[adt]; 133 ipset_adtfn adtfn = set->variant->adt[adt];
134 struct hash_net4_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; 134 struct hash_net4_elem data = {
135 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
136 };
135 137
136 if (data.cidr == 0) 138 if (data.cidr == 0)
137 return -EINVAL; 139 return -EINVAL;
@@ -296,7 +298,9 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
296{ 298{
297 const struct ip_set_hash *h = set->data; 299 const struct ip_set_hash *h = set->data;
298 ipset_adtfn adtfn = set->variant->adt[adt]; 300 ipset_adtfn adtfn = set->variant->adt[adt];
299 struct hash_net6_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; 301 struct hash_net6_elem data = {
302 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
303 };
300 304
301 if (data.cidr == 0) 305 if (data.cidr == 0)
302 return -EINVAL; 306 return -EINVAL;
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index d2a40362dd3a..e50d9bb8820b 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -144,7 +144,8 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
144 const struct ip_set_hash *h = set->data; 144 const struct ip_set_hash *h = set->data;
145 ipset_adtfn adtfn = set->variant->adt[adt]; 145 ipset_adtfn adtfn = set->variant->adt[adt];
146 struct hash_netport4_elem data = { 146 struct hash_netport4_elem data = {
147 .cidr = h->nets[0].cidr || HOST_MASK }; 147 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
148 };
148 149
149 if (data.cidr == 0) 150 if (data.cidr == 0)
150 return -EINVAL; 151 return -EINVAL;
@@ -357,7 +358,8 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
357 const struct ip_set_hash *h = set->data; 358 const struct ip_set_hash *h = set->data;
358 ipset_adtfn adtfn = set->variant->adt[adt]; 359 ipset_adtfn adtfn = set->variant->adt[adt];
359 struct hash_netport6_elem data = { 360 struct hash_netport6_elem data = {
360 .cidr = h->nets[0].cidr || HOST_MASK }; 361 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
362 };
361 363
362 if (data.cidr == 0) 364 if (data.cidr == 0)
363 return -EINVAL; 365 return -EINVAL;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index bf28ac2fc99b..782db275ac53 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -776,8 +776,16 @@ static void ip_vs_conn_expire(unsigned long data)
776 if (cp->control) 776 if (cp->control)
777 ip_vs_control_del(cp); 777 ip_vs_control_del(cp);
778 778
779 if (cp->flags & IP_VS_CONN_F_NFCT) 779 if (cp->flags & IP_VS_CONN_F_NFCT) {
780 ip_vs_conn_drop_conntrack(cp); 780 ip_vs_conn_drop_conntrack(cp);
781 /* Do not access conntracks during subsys cleanup
782 * because nf_conntrack_find_get can not be used after
783 * conntrack cleanup for the net.
784 */
785 smp_rmb();
786 if (ipvs->enable)
787 ip_vs_conn_drop_conntrack(cp);
788 }
781 789
782 ip_vs_pe_put(cp->pe); 790 ip_vs_pe_put(cp->pe);
783 kfree(cp->pe_data); 791 kfree(cp->pe_data);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index bfa808f4da13..24c28d238dcb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1772,7 +1772,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1772 .owner = THIS_MODULE, 1772 .owner = THIS_MODULE,
1773 .pf = PF_INET, 1773 .pf = PF_INET,
1774 .hooknum = NF_INET_LOCAL_IN, 1774 .hooknum = NF_INET_LOCAL_IN,
1775 .priority = 99, 1775 .priority = NF_IP_PRI_NAT_SRC - 2,
1776 }, 1776 },
1777 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1777 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1778 * or VS/NAT(change destination), so that filtering rules can be 1778 * or VS/NAT(change destination), so that filtering rules can be
@@ -1782,7 +1782,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1782 .owner = THIS_MODULE, 1782 .owner = THIS_MODULE,
1783 .pf = PF_INET, 1783 .pf = PF_INET,
1784 .hooknum = NF_INET_LOCAL_IN, 1784 .hooknum = NF_INET_LOCAL_IN,
1785 .priority = 101, 1785 .priority = NF_IP_PRI_NAT_SRC - 1,
1786 }, 1786 },
1787 /* Before ip_vs_in, change source only for VS/NAT */ 1787 /* Before ip_vs_in, change source only for VS/NAT */
1788 { 1788 {
@@ -1790,7 +1790,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1790 .owner = THIS_MODULE, 1790 .owner = THIS_MODULE,
1791 .pf = PF_INET, 1791 .pf = PF_INET,
1792 .hooknum = NF_INET_LOCAL_OUT, 1792 .hooknum = NF_INET_LOCAL_OUT,
1793 .priority = -99, 1793 .priority = NF_IP_PRI_NAT_DST + 1,
1794 }, 1794 },
1795 /* After mangle, schedule and forward local requests */ 1795 /* After mangle, schedule and forward local requests */
1796 { 1796 {
@@ -1798,7 +1798,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1798 .owner = THIS_MODULE, 1798 .owner = THIS_MODULE,
1799 .pf = PF_INET, 1799 .pf = PF_INET,
1800 .hooknum = NF_INET_LOCAL_OUT, 1800 .hooknum = NF_INET_LOCAL_OUT,
1801 .priority = -98, 1801 .priority = NF_IP_PRI_NAT_DST + 2,
1802 }, 1802 },
1803 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1803 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1804 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1804 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1824,7 +1824,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1824 .owner = THIS_MODULE, 1824 .owner = THIS_MODULE,
1825 .pf = PF_INET6, 1825 .pf = PF_INET6,
1826 .hooknum = NF_INET_LOCAL_IN, 1826 .hooknum = NF_INET_LOCAL_IN,
1827 .priority = 99, 1827 .priority = NF_IP6_PRI_NAT_SRC - 2,
1828 }, 1828 },
1829 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1829 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1830 * or VS/NAT(change destination), so that filtering rules can be 1830 * or VS/NAT(change destination), so that filtering rules can be
@@ -1834,7 +1834,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1834 .owner = THIS_MODULE, 1834 .owner = THIS_MODULE,
1835 .pf = PF_INET6, 1835 .pf = PF_INET6,
1836 .hooknum = NF_INET_LOCAL_IN, 1836 .hooknum = NF_INET_LOCAL_IN,
1837 .priority = 101, 1837 .priority = NF_IP6_PRI_NAT_SRC - 1,
1838 }, 1838 },
1839 /* Before ip_vs_in, change source only for VS/NAT */ 1839 /* Before ip_vs_in, change source only for VS/NAT */
1840 { 1840 {
@@ -1842,7 +1842,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1842 .owner = THIS_MODULE, 1842 .owner = THIS_MODULE,
1843 .pf = PF_INET, 1843 .pf = PF_INET,
1844 .hooknum = NF_INET_LOCAL_OUT, 1844 .hooknum = NF_INET_LOCAL_OUT,
1845 .priority = -99, 1845 .priority = NF_IP6_PRI_NAT_DST + 1,
1846 }, 1846 },
1847 /* After mangle, schedule and forward local requests */ 1847 /* After mangle, schedule and forward local requests */
1848 { 1848 {
@@ -1850,7 +1850,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1850 .owner = THIS_MODULE, 1850 .owner = THIS_MODULE,
1851 .pf = PF_INET6, 1851 .pf = PF_INET6,
1852 .hooknum = NF_INET_LOCAL_OUT, 1852 .hooknum = NF_INET_LOCAL_OUT,
1853 .priority = -98, 1853 .priority = NF_IP6_PRI_NAT_DST + 2,
1854 }, 1854 },
1855 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1855 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1856 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1856 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1945,6 +1945,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net)
1945{ 1945{
1946 EnterFunction(2); 1946 EnterFunction(2);
1947 net_ipvs(net)->enable = 0; /* Disable packet reception */ 1947 net_ipvs(net)->enable = 0; /* Disable packet reception */
1948 smp_wmb();
1948 __ip_vs_sync_cleanup(net); 1949 __ip_vs_sync_cleanup(net);
1949 LeaveFunction(2); 1950 LeaveFunction(2);
1950} 1951}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2e1c11f78419..f7af8b866017 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -850,7 +850,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
850 850
851 /* It exists; we have (non-exclusive) reference. */ 851 /* It exists; we have (non-exclusive) reference. */
852 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { 852 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
853 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; 853 *ctinfo = IP_CT_ESTABLISHED_REPLY;
854 /* Please set reply bit if this packet OK */ 854 /* Please set reply bit if this packet OK */
855 *set_reply = 1; 855 *set_reply = 1;
856 } else { 856 } else {
@@ -922,6 +922,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
922 ret = -ret; 922 ret = -ret;
923 goto out; 923 goto out;
924 } 924 }
925 /* ICMP[v6] protocol trackers may assign one conntrack. */
926 if (skb->nfct)
927 goto out;
925 } 928 }
926 929
927 ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, 930 ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
@@ -1143,7 +1146,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1143 /* This ICMP is in reverse direction to the packet which caused it */ 1146 /* This ICMP is in reverse direction to the packet which caused it */
1144 ct = nf_ct_get(skb, &ctinfo); 1147 ct = nf_ct_get(skb, &ctinfo);
1145 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) 1148 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1146 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; 1149 ctinfo = IP_CT_RELATED_REPLY;
1147 else 1150 else
1148 ctinfo = IP_CT_RELATED; 1151 ctinfo = IP_CT_RELATED;
1149 1152
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index e17cb7c7dd8f..6f5801eac999 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -368,7 +368,7 @@ static int help(struct sk_buff *skb,
368 368
369 /* Until there's been traffic both ways, don't look in packets. */ 369 /* Until there's been traffic both ways, don't look in packets. */
370 if (ctinfo != IP_CT_ESTABLISHED && 370 if (ctinfo != IP_CT_ESTABLISHED &&
371 ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { 371 ctinfo != IP_CT_ESTABLISHED_REPLY) {
372 pr_debug("ftp: Conntrackinfo = %u\n", ctinfo); 372 pr_debug("ftp: Conntrackinfo = %u\n", ctinfo);
373 return NF_ACCEPT; 373 return NF_ACCEPT;
374 } 374 }
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 18b2ce5c8ced..f03c2d4539f6 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -571,10 +571,9 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff,
571 int ret; 571 int ret;
572 572
573 /* Until there's been traffic both ways, don't look in packets. */ 573 /* Until there's been traffic both ways, don't look in packets. */
574 if (ctinfo != IP_CT_ESTABLISHED && 574 if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
575 ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
576 return NF_ACCEPT; 575 return NF_ACCEPT;
577 } 576
578 pr_debug("nf_ct_h245: skblen = %u\n", skb->len); 577 pr_debug("nf_ct_h245: skblen = %u\n", skb->len);
579 578
580 spin_lock_bh(&nf_h323_lock); 579 spin_lock_bh(&nf_h323_lock);
@@ -1125,10 +1124,9 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff,
1125 int ret; 1124 int ret;
1126 1125
1127 /* Until there's been traffic both ways, don't look in packets. */ 1126 /* Until there's been traffic both ways, don't look in packets. */
1128 if (ctinfo != IP_CT_ESTABLISHED && 1127 if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
1129 ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
1130 return NF_ACCEPT; 1128 return NF_ACCEPT;
1131 } 1129
1132 pr_debug("nf_ct_q931: skblen = %u\n", skb->len); 1130 pr_debug("nf_ct_q931: skblen = %u\n", skb->len);
1133 1131
1134 spin_lock_bh(&nf_h323_lock); 1132 spin_lock_bh(&nf_h323_lock);
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index b394aa318776..4f9390b98697 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -125,8 +125,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
125 return NF_ACCEPT; 125 return NF_ACCEPT;
126 126
127 /* Until there's been traffic both ways, don't look in packets. */ 127 /* Until there's been traffic both ways, don't look in packets. */
128 if (ctinfo != IP_CT_ESTABLISHED && 128 if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
129 ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
130 return NF_ACCEPT; 129 return NF_ACCEPT;
131 130
132 /* Not a full tcp header? */ 131 /* Not a full tcp header? */
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 088944824e13..2fd4565144de 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -519,8 +519,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
519 u_int16_t msg; 519 u_int16_t msg;
520 520
521 /* don't do any tracking before tcp handshake complete */ 521 /* don't do any tracking before tcp handshake complete */
522 if (ctinfo != IP_CT_ESTABLISHED && 522 if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
523 ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
524 return NF_ACCEPT; 523 return NF_ACCEPT;
525 524
526 nexthdr_off = protoff; 525 nexthdr_off = protoff;
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index d9e27734b2a2..8501823b3f9b 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -78,7 +78,7 @@ static int help(struct sk_buff *skb,
78 ct_sane_info = &nfct_help(ct)->help.ct_sane_info; 78 ct_sane_info = &nfct_help(ct)->help.ct_sane_info;
79 /* Until there's been traffic both ways, don't look in packets. */ 79 /* Until there's been traffic both ways, don't look in packets. */
80 if (ctinfo != IP_CT_ESTABLISHED && 80 if (ctinfo != IP_CT_ESTABLISHED &&
81 ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) 81 ctinfo != IP_CT_ESTABLISHED_REPLY)
82 return NF_ACCEPT; 82 return NF_ACCEPT;
83 83
84 /* Not a full tcp header? */ 84 /* Not a full tcp header? */
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index cb5a28581782..93faf6a3a637 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1423,7 +1423,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
1423 typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; 1423 typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
1424 1424
1425 if (ctinfo != IP_CT_ESTABLISHED && 1425 if (ctinfo != IP_CT_ESTABLISHED &&
1426 ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) 1426 ctinfo != IP_CT_ESTABLISHED_REPLY)
1427 return NF_ACCEPT; 1427 return NF_ACCEPT;
1428 1428
1429 /* No Data ? */ 1429 /* No Data ? */
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e0ee010935e7..2e7ccbb43ddb 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -456,7 +456,8 @@ __build_packet_message(struct nfulnl_instance *inst,
456 if (skb->mark) 456 if (skb->mark)
457 NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark)); 457 NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark));
458 458
459 if (indev && skb->dev) { 459 if (indev && skb->dev &&
460 skb->mac_header != skb->network_header) {
460 struct nfulnl_msg_packet_hw phw; 461 struct nfulnl_msg_packet_hw phw;
461 int len = dev_parse_header(skb, phw.hw_addr); 462 int len = dev_parse_header(skb, phw.hw_addr);
462 if (len > 0) { 463 if (len > 0) {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index b83123f12b42..fdd2fafe0a14 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -335,7 +335,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
335 if (entskb->mark) 335 if (entskb->mark)
336 NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark)); 336 NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark));
337 337
338 if (indev && entskb->dev) { 338 if (indev && entskb->dev &&
339 entskb->mac_header != entskb->network_header) {
339 struct nfqnl_msg_packet_hw phw; 340 struct nfqnl_msg_packet_hw phw;
340 int len = dev_parse_header(entskb, phw.hw_addr); 341 int len = dev_parse_header(entskb, phw.hw_addr);
341 if (len) { 342 if (len) {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 9cc46356b577..fe39f7e913df 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -143,9 +143,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
143 ct = nf_ct_get(skb, &ctinfo); 143 ct = nf_ct_get(skb, &ctinfo);
144 if (ct && !nf_ct_is_untracked(ct) && 144 if (ct && !nf_ct_is_untracked(ct) &&
145 ((iph->protocol != IPPROTO_ICMP && 145 ((iph->protocol != IPPROTO_ICMP &&
146 ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || 146 ctinfo == IP_CT_ESTABLISHED_REPLY) ||
147 (iph->protocol == IPPROTO_ICMP && 147 (iph->protocol == IPPROTO_ICMP &&
148 ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) && 148 ctinfo == IP_CT_RELATED_REPLY)) &&
149 (ct->status & IPS_SRC_NAT_DONE)) { 149 (ct->status & IPS_SRC_NAT_DONE)) {
150 150
151 daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; 151 daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 925f715686a5..c0c3cda19712 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -798,7 +798,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
798 getnstimeofday(&ts); 798 getnstimeofday(&ts);
799 h.h2->tp_sec = ts.tv_sec; 799 h.h2->tp_sec = ts.tv_sec;
800 h.h2->tp_nsec = ts.tv_nsec; 800 h.h2->tp_nsec = ts.tv_nsec;
801 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); 801 if (vlan_tx_tag_present(skb)) {
802 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
803 status |= TP_STATUS_VLAN_VALID;
804 } else {
805 h.h2->tp_vlan_tci = 0;
806 }
807 h.h2->tp_padding = 0;
802 hdrlen = sizeof(*h.h2); 808 hdrlen = sizeof(*h.h2);
803 break; 809 break;
804 default: 810 default:
@@ -1725,8 +1731,13 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1725 aux.tp_snaplen = skb->len; 1731 aux.tp_snaplen = skb->len;
1726 aux.tp_mac = 0; 1732 aux.tp_mac = 0;
1727 aux.tp_net = skb_network_offset(skb); 1733 aux.tp_net = skb_network_offset(skb);
1728 aux.tp_vlan_tci = vlan_tx_tag_get(skb); 1734 if (vlan_tx_tag_present(skb)) {
1729 1735 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
1736 aux.tp_status |= TP_STATUS_VLAN_VALID;
1737 } else {
1738 aux.tp_vlan_tci = 0;
1739 }
1740 aux.tp_padding = 0;
1730 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); 1741 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1731 } 1742 }
1732 1743
diff --git a/net/rds/ib.c b/net/rds/ib.c
index cce19f95c624..3b83086bcc30 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -325,7 +325,7 @@ static int rds_ib_laddr_check(__be32 addr)
325 /* Create a CMA ID and try to bind it. This catches both 325 /* Create a CMA ID and try to bind it. This catches both
326 * IB and iWARP capable NICs. 326 * IB and iWARP capable NICs.
327 */ 327 */
328 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); 328 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
329 if (IS_ERR(cm_id)) 329 if (IS_ERR(cm_id))
330 return PTR_ERR(cm_id); 330 return PTR_ERR(cm_id);
331 331
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index ee369d201a65..fd453dd5124b 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -587,7 +587,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
587 /* XXX I wonder what affect the port space has */ 587 /* XXX I wonder what affect the port space has */
588 /* delegate cm event handler to rdma_transport */ 588 /* delegate cm event handler to rdma_transport */
589 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, 589 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
590 RDMA_PS_TCP); 590 RDMA_PS_TCP, IB_QPT_RC);
591 if (IS_ERR(ic->i_cm_id)) { 591 if (IS_ERR(ic->i_cm_id)) {
592 ret = PTR_ERR(ic->i_cm_id); 592 ret = PTR_ERR(ic->i_cm_id);
593 ic->i_cm_id = NULL; 593 ic->i_cm_id = NULL;
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 5a9676fe594f..f7474844f096 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -226,7 +226,7 @@ static int rds_iw_laddr_check(__be32 addr)
226 /* Create a CMA ID and try to bind it. This catches both 226 /* Create a CMA ID and try to bind it. This catches both
227 * IB and iWARP capable NICs. 227 * IB and iWARP capable NICs.
228 */ 228 */
229 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); 229 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
230 if (IS_ERR(cm_id)) 230 if (IS_ERR(cm_id))
231 return PTR_ERR(cm_id); 231 return PTR_ERR(cm_id);
232 232
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 3a60a15d1b4a..c12db66f24c7 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -522,7 +522,7 @@ int rds_iw_conn_connect(struct rds_connection *conn)
522 /* XXX I wonder what affect the port space has */ 522 /* XXX I wonder what affect the port space has */
523 /* delegate cm event handler to rdma_transport */ 523 /* delegate cm event handler to rdma_transport */
524 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, 524 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
525 RDMA_PS_TCP); 525 RDMA_PS_TCP, IB_QPT_RC);
526 if (IS_ERR(ic->i_cm_id)) { 526 if (IS_ERR(ic->i_cm_id)) {
527 ret = PTR_ERR(ic->i_cm_id); 527 ret = PTR_ERR(ic->i_cm_id);
528 ic->i_cm_id = NULL; 528 ic->i_cm_id = NULL;
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 4195a0539829..f8760e1b6688 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -158,7 +158,8 @@ static int rds_rdma_listen_init(void)
158 struct rdma_cm_id *cm_id; 158 struct rdma_cm_id *cm_id;
159 int ret; 159 int ret;
160 160
161 cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP); 161 cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP,
162 IB_QPT_RC);
162 if (IS_ERR(cm_id)) { 163 if (IS_ERR(cm_id)) {
163 ret = PTR_ERR(cm_id); 164 ret = PTR_ERR(cm_id);
164 printk(KERN_ERR "RDS/RDMA: failed to setup listener, " 165 printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b1721d71c27c..b4c680900d7a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -251,9 +251,8 @@ static void dev_watchdog(unsigned long arg)
251 } 251 }
252 252
253 if (some_queue_timedout) { 253 if (some_queue_timedout) {
254 char drivername[64];
255 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", 254 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
256 dev->name, netdev_drivername(dev, drivername, 64), i); 255 dev->name, netdev_drivername(dev), i);
257 dev->netdev_ops->ndo_tx_timeout(dev); 256 dev->netdev_ops->ndo_tx_timeout(dev);
258 } 257 }
259 if (!mod_timer(&dev->watchdog_timer, 258 if (!mod_timer(&dev->watchdog_timer,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 525f97c467e9..4a62888f2e43 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -444,15 +444,7 @@ void sctp_association_free(struct sctp_association *asoc)
444 444
445 asoc->peer.transport_count = 0; 445 asoc->peer.transport_count = 0;
446 446
447 /* Free any cached ASCONF_ACK chunk. */ 447 sctp_asconf_queue_teardown(asoc);
448 sctp_assoc_free_asconf_acks(asoc);
449
450 /* Free the ASCONF queue. */
451 sctp_assoc_free_asconf_queue(asoc);
452
453 /* Free any cached ASCONF chunk. */
454 if (asoc->addip_last_asconf)
455 sctp_chunk_free(asoc->addip_last_asconf);
456 448
457 /* AUTH - Free the endpoint shared keys */ 449 /* AUTH - Free the endpoint shared keys */
458 sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); 450 sctp_auth_destroy_keys(&asoc->endpoint_shared_keys);
@@ -1646,3 +1638,16 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack(
1646 1638
1647 return NULL; 1639 return NULL;
1648} 1640}
1641
1642void sctp_asconf_queue_teardown(struct sctp_association *asoc)
1643{
1644 /* Free any cached ASCONF_ACK chunk. */
1645 sctp_assoc_free_asconf_acks(asoc);
1646
1647 /* Free the ASCONF queue. */
1648 sctp_assoc_free_asconf_queue(asoc);
1649
1650 /* Free any cached ASCONF chunk. */
1651 if (asoc->addip_last_asconf)
1652 sctp_chunk_free(asoc->addip_last_asconf);
1653}
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index d612ca1ca6c0..534c2e5feb05 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1670,6 +1670,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1670 case SCTP_CMD_SEND_NEXT_ASCONF: 1670 case SCTP_CMD_SEND_NEXT_ASCONF:
1671 sctp_cmd_send_asconf(asoc); 1671 sctp_cmd_send_asconf(asoc);
1672 break; 1672 break;
1673 case SCTP_CMD_PURGE_ASCONF_QUEUE:
1674 sctp_asconf_queue_teardown(asoc);
1675 break;
1673 default: 1676 default:
1674 pr_warn("Impossible command: %u, %p\n", 1677 pr_warn("Impossible command: %u, %p\n",
1675 cmd->verb, cmd->obj.ptr); 1678 cmd->verb, cmd->obj.ptr);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 7f4a4f8368ee..a297283154d5 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1718,11 +1718,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
1718 return SCTP_DISPOSITION_CONSUME; 1718 return SCTP_DISPOSITION_CONSUME;
1719 } 1719 }
1720 1720
1721 /* For now, fail any unsent/unacked data. Consider the optional 1721 /* For now, stop pending T3-rtx and SACK timers, fail any unsent/unacked
1722 * choice of resending of this data. 1722 * data. Consider the optional choice of resending of this data.
1723 */ 1723 */
1724 sctp_add_cmd_sf(commands, SCTP_CMD_T3_RTX_TIMERS_STOP, SCTP_NULL());
1725 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
1726 SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
1724 sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); 1727 sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL());
1725 1728
1729 /* Stop pending T4-rto timer, teardown ASCONF queue, ASCONF-ACK queue
1730 * and ASCONF-ACK cache.
1731 */
1732 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
1733 SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
1734 sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL());
1735
1726 repl = sctp_make_cookie_ack(new_asoc, chunk); 1736 repl = sctp_make_cookie_ack(new_asoc, chunk);
1727 if (!repl) 1737 if (!repl)
1728 goto nomem; 1738 goto nomem;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 67e31276682a..cd6e4aa19dbf 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -326,10 +326,12 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
326 * Run memory cache shrinker. 326 * Run memory cache shrinker.
327 */ 327 */
328static int 328static int
329rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 329rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc)
330{ 330{
331 LIST_HEAD(free); 331 LIST_HEAD(free);
332 int res; 332 int res;
333 int nr_to_scan = sc->nr_to_scan;
334 gfp_t gfp_mask = sc->gfp_mask;
333 335
334 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 336 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
335 return (nr_to_scan == 0) ? 0 : -1; 337 return (nr_to_scan == 0) ? 0 : -1;
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 0a9a2ec2e469..c3b75333b821 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -43,6 +43,7 @@
43#include <linux/sunrpc/gss_krb5.h> 43#include <linux/sunrpc/gss_krb5.h>
44#include <linux/sunrpc/xdr.h> 44#include <linux/sunrpc/xdr.h>
45#include <linux/crypto.h> 45#include <linux/crypto.h>
46#include <linux/sunrpc/gss_krb5_enctypes.h>
46 47
47#ifdef RPC_DEBUG 48#ifdef RPC_DEBUG
48# define RPCDBG_FACILITY RPCDBG_AUTH 49# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -750,7 +751,7 @@ static struct gss_api_mech gss_kerberos_mech = {
750 .gm_ops = &gss_kerberos_ops, 751 .gm_ops = &gss_kerberos_ops,
751 .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), 752 .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
752 .gm_pfs = gss_kerberos_pfs, 753 .gm_pfs = gss_kerberos_pfs,
753 .gm_upcall_enctypes = "18,17,16,23,3,1,2", 754 .gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES,
754}; 755};
755 756
756static int __init init_kerberos_module(void) 757static int __init init_kerberos_module(void)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8d83f9d48713..b84d7395535e 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -13,10 +13,6 @@
13 * and need to be refreshed, or when a packet was damaged in transit. 13 * and need to be refreshed, or when a packet was damaged in transit.
14 * This may be have to be moved to the VFS layer. 14 * This may be have to be moved to the VFS layer.
15 * 15 *
16 * NB: BSD uses a more intelligent approach to guessing when a request
17 * or reply has been lost by keeping the RTO estimate for each procedure.
18 * We currently make do with a constant timeout value.
19 *
20 * Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com> 16 * Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com>
21 * Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de> 17 * Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de>
22 */ 18 */
@@ -32,7 +28,9 @@
32#include <linux/slab.h> 28#include <linux/slab.h>
33#include <linux/utsname.h> 29#include <linux/utsname.h>
34#include <linux/workqueue.h> 30#include <linux/workqueue.h>
31#include <linux/in.h>
35#include <linux/in6.h> 32#include <linux/in6.h>
33#include <linux/un.h>
36 34
37#include <linux/sunrpc/clnt.h> 35#include <linux/sunrpc/clnt.h>
38#include <linux/sunrpc/rpc_pipe_fs.h> 36#include <linux/sunrpc/rpc_pipe_fs.h>
@@ -298,22 +296,27 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
298 * up a string representation of the passed-in address. 296 * up a string representation of the passed-in address.
299 */ 297 */
300 if (args->servername == NULL) { 298 if (args->servername == NULL) {
299 struct sockaddr_un *sun =
300 (struct sockaddr_un *)args->address;
301 struct sockaddr_in *sin =
302 (struct sockaddr_in *)args->address;
303 struct sockaddr_in6 *sin6 =
304 (struct sockaddr_in6 *)args->address;
305
301 servername[0] = '\0'; 306 servername[0] = '\0';
302 switch (args->address->sa_family) { 307 switch (args->address->sa_family) {
303 case AF_INET: { 308 case AF_LOCAL:
304 struct sockaddr_in *sin = 309 snprintf(servername, sizeof(servername), "%s",
305 (struct sockaddr_in *)args->address; 310 sun->sun_path);
311 break;
312 case AF_INET:
306 snprintf(servername, sizeof(servername), "%pI4", 313 snprintf(servername, sizeof(servername), "%pI4",
307 &sin->sin_addr.s_addr); 314 &sin->sin_addr.s_addr);
308 break; 315 break;
309 } 316 case AF_INET6:
310 case AF_INET6: {
311 struct sockaddr_in6 *sin =
312 (struct sockaddr_in6 *)args->address;
313 snprintf(servername, sizeof(servername), "%pI6", 317 snprintf(servername, sizeof(servername), "%pI6",
314 &sin->sin6_addr); 318 &sin6->sin6_addr);
315 break; 319 break;
316 }
317 default: 320 default:
318 /* caller wants default server name, but 321 /* caller wants default server name, but
319 * address family isn't recognized. */ 322 * address family isn't recognized. */
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c652e4cc9fe9..9a80a922c527 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -16,6 +16,7 @@
16 16
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/socket.h> 18#include <linux/socket.h>
19#include <linux/un.h>
19#include <linux/in.h> 20#include <linux/in.h>
20#include <linux/in6.h> 21#include <linux/in6.h>
21#include <linux/kernel.h> 22#include <linux/kernel.h>
@@ -32,6 +33,8 @@
32# define RPCDBG_FACILITY RPCDBG_BIND 33# define RPCDBG_FACILITY RPCDBG_BIND
33#endif 34#endif
34 35
36#define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock"
37
35#define RPCBIND_PROGRAM (100000u) 38#define RPCBIND_PROGRAM (100000u)
36#define RPCBIND_PORT (111u) 39#define RPCBIND_PORT (111u)
37 40
@@ -158,20 +161,69 @@ static void rpcb_map_release(void *data)
158 kfree(map); 161 kfree(map);
159} 162}
160 163
161static const struct sockaddr_in rpcb_inaddr_loopback = { 164/*
162 .sin_family = AF_INET, 165 * Returns zero on success, otherwise a negative errno value
163 .sin_addr.s_addr = htonl(INADDR_LOOPBACK), 166 * is returned.
164 .sin_port = htons(RPCBIND_PORT), 167 */
165}; 168static int rpcb_create_local_unix(void)
169{
170 static const struct sockaddr_un rpcb_localaddr_rpcbind = {
171 .sun_family = AF_LOCAL,
172 .sun_path = RPCBIND_SOCK_PATHNAME,
173 };
174 struct rpc_create_args args = {
175 .net = &init_net,
176 .protocol = XPRT_TRANSPORT_LOCAL,
177 .address = (struct sockaddr *)&rpcb_localaddr_rpcbind,
178 .addrsize = sizeof(rpcb_localaddr_rpcbind),
179 .servername = "localhost",
180 .program = &rpcb_program,
181 .version = RPCBVERS_2,
182 .authflavor = RPC_AUTH_NULL,
183 };
184 struct rpc_clnt *clnt, *clnt4;
185 int result = 0;
186
187 /*
188 * Because we requested an RPC PING at transport creation time,
189 * this works only if the user space portmapper is rpcbind, and
190 * it's listening on AF_LOCAL on the named socket.
191 */
192 clnt = rpc_create(&args);
193 if (IS_ERR(clnt)) {
194 dprintk("RPC: failed to create AF_LOCAL rpcbind "
195 "client (errno %ld).\n", PTR_ERR(clnt));
196 result = -PTR_ERR(clnt);
197 goto out;
198 }
199
200 clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
201 if (IS_ERR(clnt4)) {
202 dprintk("RPC: failed to bind second program to "
203 "rpcbind v4 client (errno %ld).\n",
204 PTR_ERR(clnt4));
205 clnt4 = NULL;
206 }
207
208 /* Protected by rpcb_create_local_mutex */
209 rpcb_local_clnt = clnt;
210 rpcb_local_clnt4 = clnt4;
166 211
167static DEFINE_MUTEX(rpcb_create_local_mutex); 212out:
213 return result;
214}
168 215
169/* 216/*
170 * Returns zero on success, otherwise a negative errno value 217 * Returns zero on success, otherwise a negative errno value
171 * is returned. 218 * is returned.
172 */ 219 */
173static int rpcb_create_local(void) 220static int rpcb_create_local_net(void)
174{ 221{
222 static const struct sockaddr_in rpcb_inaddr_loopback = {
223 .sin_family = AF_INET,
224 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
225 .sin_port = htons(RPCBIND_PORT),
226 };
175 struct rpc_create_args args = { 227 struct rpc_create_args args = {
176 .net = &init_net, 228 .net = &init_net,
177 .protocol = XPRT_TRANSPORT_TCP, 229 .protocol = XPRT_TRANSPORT_TCP,
@@ -186,13 +238,6 @@ static int rpcb_create_local(void)
186 struct rpc_clnt *clnt, *clnt4; 238 struct rpc_clnt *clnt, *clnt4;
187 int result = 0; 239 int result = 0;
188 240
189 if (rpcb_local_clnt)
190 return result;
191
192 mutex_lock(&rpcb_create_local_mutex);
193 if (rpcb_local_clnt)
194 goto out;
195
196 clnt = rpc_create(&args); 241 clnt = rpc_create(&args);
197 if (IS_ERR(clnt)) { 242 if (IS_ERR(clnt)) {
198 dprintk("RPC: failed to create local rpcbind " 243 dprintk("RPC: failed to create local rpcbind "
@@ -214,10 +259,34 @@ static int rpcb_create_local(void)
214 clnt4 = NULL; 259 clnt4 = NULL;
215 } 260 }
216 261
262 /* Protected by rpcb_create_local_mutex */
217 rpcb_local_clnt = clnt; 263 rpcb_local_clnt = clnt;
218 rpcb_local_clnt4 = clnt4; 264 rpcb_local_clnt4 = clnt4;
219 265
220out: 266out:
267 return result;
268}
269
270/*
271 * Returns zero on success, otherwise a negative errno value
272 * is returned.
273 */
274static int rpcb_create_local(void)
275{
276 static DEFINE_MUTEX(rpcb_create_local_mutex);
277 int result = 0;
278
279 if (rpcb_local_clnt)
280 return result;
281
282 mutex_lock(&rpcb_create_local_mutex);
283 if (rpcb_local_clnt)
284 goto out;
285
286 if (rpcb_create_local_unix() != 0)
287 result = rpcb_create_local_net();
288
289out:
221 mutex_unlock(&rpcb_create_local_mutex); 290 mutex_unlock(&rpcb_create_local_mutex);
222 return result; 291 return result;
223} 292}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 08e05a8ce025..2b90292e9505 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -942,6 +942,8 @@ static void svc_unregister(const struct svc_serv *serv)
942 if (progp->pg_vers[i]->vs_hidden) 942 if (progp->pg_vers[i]->vs_hidden)
943 continue; 943 continue;
944 944
945 dprintk("svc: attempting to unregister %sv%u\n",
946 progp->pg_name, i);
945 __svc_unregister(progp->pg_prog, i, progp->pg_name); 947 __svc_unregister(progp->pg_prog, i, progp->pg_name);
946 } 948 }
947 } 949 }
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b7d435c3f19e..af04f779ce9f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
387 return len; 387 return len;
388} 388}
389 389
390static int svc_partial_recvfrom(struct svc_rqst *rqstp,
391 struct kvec *iov, int nr,
392 int buflen, unsigned int base)
393{
394 size_t save_iovlen;
395 void __user *save_iovbase;
396 unsigned int i;
397 int ret;
398
399 if (base == 0)
400 return svc_recvfrom(rqstp, iov, nr, buflen);
401
402 for (i = 0; i < nr; i++) {
403 if (iov[i].iov_len > base)
404 break;
405 base -= iov[i].iov_len;
406 }
407 save_iovlen = iov[i].iov_len;
408 save_iovbase = iov[i].iov_base;
409 iov[i].iov_len -= base;
410 iov[i].iov_base += base;
411 ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen);
412 iov[i].iov_len = save_iovlen;
413 iov[i].iov_base = save_iovbase;
414 return ret;
415}
416
390/* 417/*
391 * Set socket snd and rcv buffer lengths 418 * Set socket snd and rcv buffer lengths
392 */ 419 */
@@ -409,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
409 lock_sock(sock->sk); 436 lock_sock(sock->sk);
410 sock->sk->sk_sndbuf = snd * 2; 437 sock->sk->sk_sndbuf = snd * 2;
411 sock->sk->sk_rcvbuf = rcv * 2; 438 sock->sk->sk_rcvbuf = rcv * 2;
412 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
413 sock->sk->sk_write_space(sock->sk); 439 sock->sk->sk_write_space(sock->sk);
414 release_sock(sock->sk); 440 release_sock(sock->sk);
415#endif 441#endif
@@ -884,6 +910,56 @@ failed:
884 return NULL; 910 return NULL;
885} 911}
886 912
913static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
914{
915 unsigned int i, len, npages;
916
917 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
918 return 0;
919 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
920 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
921 for (i = 0; i < npages; i++) {
922 if (rqstp->rq_pages[i] != NULL)
923 put_page(rqstp->rq_pages[i]);
924 BUG_ON(svsk->sk_pages[i] == NULL);
925 rqstp->rq_pages[i] = svsk->sk_pages[i];
926 svsk->sk_pages[i] = NULL;
927 }
928 rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
929 return len;
930}
931
932static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
933{
934 unsigned int i, len, npages;
935
936 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
937 return;
938 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
939 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
940 for (i = 0; i < npages; i++) {
941 svsk->sk_pages[i] = rqstp->rq_pages[i];
942 rqstp->rq_pages[i] = NULL;
943 }
944}
945
946static void svc_tcp_clear_pages(struct svc_sock *svsk)
947{
948 unsigned int i, len, npages;
949
950 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
951 goto out;
952 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
953 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
954 for (i = 0; i < npages; i++) {
955 BUG_ON(svsk->sk_pages[i] == NULL);
956 put_page(svsk->sk_pages[i]);
957 svsk->sk_pages[i] = NULL;
958 }
959out:
960 svsk->sk_tcplen = 0;
961}
962
887/* 963/*
888 * Receive data. 964 * Receive data.
889 * If we haven't gotten the record length yet, get the next four bytes. 965 * If we haven't gotten the record length yet, get the next four bytes.
@@ -893,31 +969,15 @@ failed:
893static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) 969static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
894{ 970{
895 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 971 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
972 unsigned int want;
896 int len; 973 int len;
897 974
898 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
899 /* sndbuf needs to have room for one request
900 * per thread, otherwise we can stall even when the
901 * network isn't a bottleneck.
902 *
903 * We count all threads rather than threads in a
904 * particular pool, which provides an upper bound
905 * on the number of threads which will access the socket.
906 *
907 * rcvbuf just needs to be able to hold a few requests.
908 * Normally they will be removed from the queue
909 * as soon a a complete request arrives.
910 */
911 svc_sock_setbufsize(svsk->sk_sock,
912 (serv->sv_nrthreads+3) * serv->sv_max_mesg,
913 3 * serv->sv_max_mesg);
914
915 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 975 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
916 976
917 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 977 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
918 int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
919 struct kvec iov; 978 struct kvec iov;
920 979
980 want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
921 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; 981 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
922 iov.iov_len = want; 982 iov.iov_len = want;
923 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) 983 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
@@ -927,7 +987,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
927 if (len < want) { 987 if (len < want) {
928 dprintk("svc: short recvfrom while reading record " 988 dprintk("svc: short recvfrom while reading record "
929 "length (%d of %d)\n", len, want); 989 "length (%d of %d)\n", len, want);
930 goto err_again; /* record header not complete */ 990 return -EAGAIN;
931 } 991 }
932 992
933 svsk->sk_reclen = ntohl(svsk->sk_reclen); 993 svsk->sk_reclen = ntohl(svsk->sk_reclen);
@@ -954,83 +1014,75 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
954 } 1014 }
955 } 1015 }
956 1016
957 /* Check whether enough data is available */ 1017 if (svsk->sk_reclen < 8)
958 len = svc_recv_available(svsk); 1018 goto err_delete; /* client is nuts. */
959 if (len < 0)
960 goto error;
961 1019
962 if (len < svsk->sk_reclen) {
963 dprintk("svc: incomplete TCP record (%d of %d)\n",
964 len, svsk->sk_reclen);
965 goto err_again; /* record not complete */
966 }
967 len = svsk->sk_reclen; 1020 len = svsk->sk_reclen;
968 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
969 1021
970 return len; 1022 return len;
971 error: 1023error:
972 if (len == -EAGAIN) 1024 dprintk("RPC: TCP recv_record got %d\n", len);
973 dprintk("RPC: TCP recv_record got EAGAIN\n");
974 return len; 1025 return len;
975 err_delete: 1026err_delete:
976 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1027 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
977 err_again:
978 return -EAGAIN; 1028 return -EAGAIN;
979} 1029}
980 1030
981static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, 1031static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
982 struct rpc_rqst **reqpp, struct kvec *vec)
983{ 1032{
1033 struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
984 struct rpc_rqst *req = NULL; 1034 struct rpc_rqst *req = NULL;
985 u32 *p; 1035 struct kvec *src, *dst;
986 u32 xid; 1036 __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
987 u32 calldir; 1037 __be32 xid;
988 int len; 1038 __be32 calldir;
989
990 len = svc_recvfrom(rqstp, vec, 1, 8);
991 if (len < 0)
992 goto error;
993 1039
994 p = (u32 *)rqstp->rq_arg.head[0].iov_base;
995 xid = *p++; 1040 xid = *p++;
996 calldir = *p; 1041 calldir = *p;
997 1042
998 if (calldir == 0) { 1043 if (bc_xprt)
999 /* REQUEST is the most common case */ 1044 req = xprt_lookup_rqst(bc_xprt, xid);
1000 vec[0] = rqstp->rq_arg.head[0];
1001 } else {
1002 /* REPLY */
1003 struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
1004
1005 if (bc_xprt)
1006 req = xprt_lookup_rqst(bc_xprt, xid);
1007
1008 if (!req) {
1009 printk(KERN_NOTICE
1010 "%s: Got unrecognized reply: "
1011 "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
1012 __func__, ntohl(calldir),
1013 bc_xprt, xid);
1014 vec[0] = rqstp->rq_arg.head[0];
1015 goto out;
1016 }
1017 1045
1018 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 1046 if (!req) {
1019 sizeof(struct xdr_buf)); 1047 printk(KERN_NOTICE
1020 /* copy the xid and call direction */ 1048 "%s: Got unrecognized reply: "
1021 memcpy(req->rq_private_buf.head[0].iov_base, 1049 "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
1022 rqstp->rq_arg.head[0].iov_base, 8); 1050 __func__, ntohl(calldir),
1023 vec[0] = req->rq_private_buf.head[0]; 1051 bc_xprt, xid);
1052 return -EAGAIN;
1024 } 1053 }
1025 out: 1054
1026 vec[0].iov_base += 8; 1055 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
1027 vec[0].iov_len -= 8; 1056 /*
1028 len = svsk->sk_reclen - 8; 1057 * XXX!: cheating for now! Only copying HEAD.
1029 error: 1058 * But we know this is good enough for now (in fact, for any
1030 *reqpp = req; 1059 * callback reply in the forseeable future).
1031 return len; 1060 */
1061 dst = &req->rq_private_buf.head[0];
1062 src = &rqstp->rq_arg.head[0];
1063 if (dst->iov_len < src->iov_len)
1064 return -EAGAIN; /* whatever; just giving up. */
1065 memcpy(dst->iov_base, src->iov_base, src->iov_len);
1066 xprt_complete_rqst(req->rq_task, svsk->sk_reclen);
1067 rqstp->rq_arg.len = 0;
1068 return 0;
1032} 1069}
1033 1070
1071static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
1072{
1073 int i = 0;
1074 int t = 0;
1075
1076 while (t < len) {
1077 vec[i].iov_base = page_address(pages[i]);
1078 vec[i].iov_len = PAGE_SIZE;
1079 i++;
1080 t += PAGE_SIZE;
1081 }
1082 return i;
1083}
1084
1085
1034/* 1086/*
1035 * Receive data from a TCP socket. 1087 * Receive data from a TCP socket.
1036 */ 1088 */
@@ -1041,8 +1093,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1041 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 1093 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1042 int len; 1094 int len;
1043 struct kvec *vec; 1095 struct kvec *vec;
1044 int pnum, vlen; 1096 unsigned int want, base;
1045 struct rpc_rqst *req = NULL; 1097 __be32 *p;
1098 __be32 calldir;
1099 int pnum;
1046 1100
1047 dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 1101 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
1048 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), 1102 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
@@ -1053,87 +1107,73 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1053 if (len < 0) 1107 if (len < 0)
1054 goto error; 1108 goto error;
1055 1109
1110 base = svc_tcp_restore_pages(svsk, rqstp);
1111 want = svsk->sk_reclen - base;
1112
1056 vec = rqstp->rq_vec; 1113 vec = rqstp->rq_vec;
1057 vec[0] = rqstp->rq_arg.head[0];
1058 vlen = PAGE_SIZE;
1059 1114
1060 /* 1115 pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
1061 * We have enough data for the whole tcp record. Let's try and read the 1116 svsk->sk_reclen);
1062 * first 8 bytes to get the xid and the call direction. We can use this
1063 * to figure out if this is a call or a reply to a callback. If
1064 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
1065 * In that case, don't bother with the calldir and just read the data.
1066 * It will be rejected in svc_process.
1067 */
1068 if (len >= 8) {
1069 len = svc_process_calldir(svsk, rqstp, &req, vec);
1070 if (len < 0)
1071 goto err_again;
1072 vlen -= 8;
1073 }
1074 1117
1075 pnum = 1;
1076 while (vlen < len) {
1077 vec[pnum].iov_base = (req) ?
1078 page_address(req->rq_private_buf.pages[pnum - 1]) :
1079 page_address(rqstp->rq_pages[pnum]);
1080 vec[pnum].iov_len = PAGE_SIZE;
1081 pnum++;
1082 vlen += PAGE_SIZE;
1083 }
1084 rqstp->rq_respages = &rqstp->rq_pages[pnum]; 1118 rqstp->rq_respages = &rqstp->rq_pages[pnum];
1085 1119
1086 /* Now receive data */ 1120 /* Now receive data */
1087 len = svc_recvfrom(rqstp, vec, pnum, len); 1121 len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
1088 if (len < 0) 1122 if (len >= 0)
1089 goto err_again; 1123 svsk->sk_tcplen += len;
1090 1124 if (len != want) {
1091 /* 1125 if (len < 0 && len != -EAGAIN)
1092 * Account for the 8 bytes we read earlier 1126 goto err_other;
1093 */ 1127 svc_tcp_save_pages(svsk, rqstp);
1094 len += 8; 1128 dprintk("svc: incomplete TCP record (%d of %d)\n",
1095 1129 svsk->sk_tcplen, svsk->sk_reclen);
1096 if (req) { 1130 goto err_noclose;
1097 xprt_complete_rqst(req->rq_task, len);
1098 len = 0;
1099 goto out;
1100 } 1131 }
1101 dprintk("svc: TCP complete record (%d bytes)\n", len); 1132
1102 rqstp->rq_arg.len = len; 1133 rqstp->rq_arg.len = svsk->sk_reclen;
1103 rqstp->rq_arg.page_base = 0; 1134 rqstp->rq_arg.page_base = 0;
1104 if (len <= rqstp->rq_arg.head[0].iov_len) { 1135 if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
1105 rqstp->rq_arg.head[0].iov_len = len; 1136 rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
1106 rqstp->rq_arg.page_len = 0; 1137 rqstp->rq_arg.page_len = 0;
1107 } else { 1138 } else
1108 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 1139 rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
1109 }
1110 1140
1111 rqstp->rq_xprt_ctxt = NULL; 1141 rqstp->rq_xprt_ctxt = NULL;
1112 rqstp->rq_prot = IPPROTO_TCP; 1142 rqstp->rq_prot = IPPROTO_TCP;
1113 1143
1114out: 1144 p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
1145 calldir = p[1];
1146 if (calldir)
1147 len = receive_cb_reply(svsk, rqstp);
1148
1115 /* Reset TCP read info */ 1149 /* Reset TCP read info */
1116 svsk->sk_reclen = 0; 1150 svsk->sk_reclen = 0;
1117 svsk->sk_tcplen = 0; 1151 svsk->sk_tcplen = 0;
1152 /* If we have more data, signal svc_xprt_enqueue() to try again */
1153 if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
1154 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1155
1156 if (len < 0)
1157 goto error;
1118 1158
1119 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); 1159 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
1120 if (serv->sv_stats) 1160 if (serv->sv_stats)
1121 serv->sv_stats->nettcpcnt++; 1161 serv->sv_stats->nettcpcnt++;
1122 1162
1123 return len; 1163 dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len);
1164 return rqstp->rq_arg.len;
1124 1165
1125err_again:
1126 if (len == -EAGAIN) {
1127 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1128 return len;
1129 }
1130error: 1166error:
1131 if (len != -EAGAIN) { 1167 if (len != -EAGAIN)
1132 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 1168 goto err_other;
1133 svsk->sk_xprt.xpt_server->sv_name, -len); 1169 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1134 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1135 }
1136 return -EAGAIN; 1170 return -EAGAIN;
1171err_other:
1172 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
1173 svsk->sk_xprt.xpt_server->sv_name, -len);
1174 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1175err_noclose:
1176 return -EAGAIN; /* record not complete */
1137} 1177}
1138 1178
1139/* 1179/*
@@ -1304,18 +1344,10 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1304 1344
1305 svsk->sk_reclen = 0; 1345 svsk->sk_reclen = 0;
1306 svsk->sk_tcplen = 0; 1346 svsk->sk_tcplen = 0;
1347 memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
1307 1348
1308 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 1349 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
1309 1350
1310 /* initialise setting must have enough space to
1311 * receive and respond to one request.
1312 * svc_tcp_recvfrom will re-adjust if necessary
1313 */
1314 svc_sock_setbufsize(svsk->sk_sock,
1315 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
1316 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
1317
1318 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1319 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1351 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1320 if (sk->sk_state != TCP_ESTABLISHED) 1352 if (sk->sk_state != TCP_ESTABLISHED)
1321 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1353 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1379,8 +1411,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1379 /* Initialize the socket */ 1411 /* Initialize the socket */
1380 if (sock->type == SOCK_DGRAM) 1412 if (sock->type == SOCK_DGRAM)
1381 svc_udp_init(svsk, serv); 1413 svc_udp_init(svsk, serv);
1382 else 1414 else {
1415 /* initialise setting must have enough space to
1416 * receive and respond to one request.
1417 */
1418 svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
1419 4 * serv->sv_max_mesg);
1383 svc_tcp_init(svsk, serv); 1420 svc_tcp_init(svsk, serv);
1421 }
1384 1422
1385 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1423 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1386 svsk, svsk->sk_sk); 1424 svsk, svsk->sk_sk);
@@ -1562,8 +1600,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
1562 1600
1563 svc_sock_detach(xprt); 1601 svc_sock_detach(xprt);
1564 1602
1565 if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) 1603 if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
1604 svc_tcp_clear_pages(svsk);
1566 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); 1605 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR);
1606 }
1567} 1607}
1568 1608
1569/* 1609/*
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 679cd674b81d..f008c14ad34c 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -638,6 +638,25 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
638} 638}
639EXPORT_SYMBOL_GPL(xdr_init_decode); 639EXPORT_SYMBOL_GPL(xdr_init_decode);
640 640
641/**
642 * xdr_init_decode - Initialize an xdr_stream for decoding data.
643 * @xdr: pointer to xdr_stream struct
644 * @buf: pointer to XDR buffer from which to decode data
645 * @pages: list of pages to decode into
646 * @len: length in bytes of buffer in pages
647 */
648void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
649 struct page **pages, unsigned int len)
650{
651 memset(buf, 0, sizeof(*buf));
652 buf->pages = pages;
653 buf->page_len = len;
654 buf->buflen = len;
655 buf->len = len;
656 xdr_init_decode(xdr, buf, NULL);
657}
658EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
659
641static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) 660static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
642{ 661{
643 __be32 *p = xdr->p; 662 __be32 *p = xdr->p;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 6c014dd3a20b..c3c232a88d94 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -695,7 +695,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
695 return ERR_PTR(-ENOMEM); 695 return ERR_PTR(-ENOMEM);
696 xprt = &cma_xprt->sc_xprt; 696 xprt = &cma_xprt->sc_xprt;
697 697
698 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); 698 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
699 IB_QPT_RC);
699 if (IS_ERR(listen_id)) { 700 if (IS_ERR(listen_id)) {
700 ret = PTR_ERR(listen_id); 701 ret = PTR_ERR(listen_id);
701 dprintk("svcrdma: rdma_create_id failed = %d\n", ret); 702 dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index d4297dc43dc4..80f8da344df5 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -387,7 +387,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
387 387
388 init_completion(&ia->ri_done); 388 init_completion(&ia->ri_done);
389 389
390 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); 390 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
391 if (IS_ERR(id)) { 391 if (IS_ERR(id)) {
392 rc = PTR_ERR(id); 392 rc = PTR_ERR(id);
393 dprintk("RPC: %s: rdma_create_id() failed %i\n", 393 dprintk("RPC: %s: rdma_create_id() failed %i\n",
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index bf005d3c65ef..72abb7358933 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/types.h> 21#include <linux/types.h>
22#include <linux/string.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23#include <linux/module.h> 24#include <linux/module.h>
24#include <linux/capability.h> 25#include <linux/capability.h>
@@ -28,6 +29,7 @@
28#include <linux/in.h> 29#include <linux/in.h>
29#include <linux/net.h> 30#include <linux/net.h>
30#include <linux/mm.h> 31#include <linux/mm.h>
32#include <linux/un.h>
31#include <linux/udp.h> 33#include <linux/udp.h>
32#include <linux/tcp.h> 34#include <linux/tcp.h>
33#include <linux/sunrpc/clnt.h> 35#include <linux/sunrpc/clnt.h>
@@ -45,6 +47,9 @@
45#include <net/tcp.h> 47#include <net/tcp.h>
46 48
47#include "sunrpc.h" 49#include "sunrpc.h"
50
51static void xs_close(struct rpc_xprt *xprt);
52
48/* 53/*
49 * xprtsock tunables 54 * xprtsock tunables
50 */ 55 */
@@ -261,6 +266,11 @@ static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
261 return (struct sockaddr *) &xprt->addr; 266 return (struct sockaddr *) &xprt->addr;
262} 267}
263 268
269static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt)
270{
271 return (struct sockaddr_un *) &xprt->addr;
272}
273
264static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) 274static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
265{ 275{
266 return (struct sockaddr_in *) &xprt->addr; 276 return (struct sockaddr_in *) &xprt->addr;
@@ -276,23 +286,34 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
276 struct sockaddr *sap = xs_addr(xprt); 286 struct sockaddr *sap = xs_addr(xprt);
277 struct sockaddr_in6 *sin6; 287 struct sockaddr_in6 *sin6;
278 struct sockaddr_in *sin; 288 struct sockaddr_in *sin;
289 struct sockaddr_un *sun;
279 char buf[128]; 290 char buf[128];
280 291
281 (void)rpc_ntop(sap, buf, sizeof(buf));
282 xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
283
284 switch (sap->sa_family) { 292 switch (sap->sa_family) {
293 case AF_LOCAL:
294 sun = xs_addr_un(xprt);
295 strlcpy(buf, sun->sun_path, sizeof(buf));
296 xprt->address_strings[RPC_DISPLAY_ADDR] =
297 kstrdup(buf, GFP_KERNEL);
298 break;
285 case AF_INET: 299 case AF_INET:
300 (void)rpc_ntop(sap, buf, sizeof(buf));
301 xprt->address_strings[RPC_DISPLAY_ADDR] =
302 kstrdup(buf, GFP_KERNEL);
286 sin = xs_addr_in(xprt); 303 sin = xs_addr_in(xprt);
287 snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); 304 snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
288 break; 305 break;
289 case AF_INET6: 306 case AF_INET6:
307 (void)rpc_ntop(sap, buf, sizeof(buf));
308 xprt->address_strings[RPC_DISPLAY_ADDR] =
309 kstrdup(buf, GFP_KERNEL);
290 sin6 = xs_addr_in6(xprt); 310 sin6 = xs_addr_in6(xprt);
291 snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); 311 snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
292 break; 312 break;
293 default: 313 default:
294 BUG(); 314 BUG();
295 } 315 }
316
296 xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); 317 xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
297} 318}
298 319
@@ -495,6 +516,70 @@ static int xs_nospace(struct rpc_task *task)
495 return ret; 516 return ret;
496} 517}
497 518
519/*
520 * Construct a stream transport record marker in @buf.
521 */
522static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
523{
524 u32 reclen = buf->len - sizeof(rpc_fraghdr);
525 rpc_fraghdr *base = buf->head[0].iov_base;
526 *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen);
527}
528
529/**
530 * xs_local_send_request - write an RPC request to an AF_LOCAL socket
531 * @task: RPC task that manages the state of an RPC request
532 *
533 * Return values:
534 * 0: The request has been sent
535 * EAGAIN: The socket was blocked, please call again later to
536 * complete the request
537 * ENOTCONN: Caller needs to invoke connect logic then call again
538 * other: Some other error occured, the request was not sent
539 */
540static int xs_local_send_request(struct rpc_task *task)
541{
542 struct rpc_rqst *req = task->tk_rqstp;
543 struct rpc_xprt *xprt = req->rq_xprt;
544 struct sock_xprt *transport =
545 container_of(xprt, struct sock_xprt, xprt);
546 struct xdr_buf *xdr = &req->rq_snd_buf;
547 int status;
548
549 xs_encode_stream_record_marker(&req->rq_snd_buf);
550
551 xs_pktdump("packet data:",
552 req->rq_svec->iov_base, req->rq_svec->iov_len);
553
554 status = xs_sendpages(transport->sock, NULL, 0,
555 xdr, req->rq_bytes_sent);
556 dprintk("RPC: %s(%u) = %d\n",
557 __func__, xdr->len - req->rq_bytes_sent, status);
558 if (likely(status >= 0)) {
559 req->rq_bytes_sent += status;
560 req->rq_xmit_bytes_sent += status;
561 if (likely(req->rq_bytes_sent >= req->rq_slen)) {
562 req->rq_bytes_sent = 0;
563 return 0;
564 }
565 status = -EAGAIN;
566 }
567
568 switch (status) {
569 case -EAGAIN:
570 status = xs_nospace(task);
571 break;
572 default:
573 dprintk("RPC: sendmsg returned unrecognized error %d\n",
574 -status);
575 case -EPIPE:
576 xs_close(xprt);
577 status = -ENOTCONN;
578 }
579
580 return status;
581}
582
498/** 583/**
499 * xs_udp_send_request - write an RPC request to a UDP socket 584 * xs_udp_send_request - write an RPC request to a UDP socket
500 * @task: address of RPC task that manages the state of an RPC request 585 * @task: address of RPC task that manages the state of an RPC request
@@ -574,13 +659,6 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
574 kernel_sock_shutdown(sock, SHUT_WR); 659 kernel_sock_shutdown(sock, SHUT_WR);
575} 660}
576 661
577static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
578{
579 u32 reclen = buf->len - sizeof(rpc_fraghdr);
580 rpc_fraghdr *base = buf->head[0].iov_base;
581 *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen);
582}
583
584/** 662/**
585 * xs_tcp_send_request - write an RPC request to a TCP socket 663 * xs_tcp_send_request - write an RPC request to a TCP socket
586 * @task: address of RPC task that manages the state of an RPC request 664 * @task: address of RPC task that manages the state of an RPC request
@@ -603,7 +681,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
603 struct xdr_buf *xdr = &req->rq_snd_buf; 681 struct xdr_buf *xdr = &req->rq_snd_buf;
604 int status; 682 int status;
605 683
606 xs_encode_tcp_record_marker(&req->rq_snd_buf); 684 xs_encode_stream_record_marker(&req->rq_snd_buf);
607 685
608 xs_pktdump("packet data:", 686 xs_pktdump("packet data:",
609 req->rq_svec->iov_base, 687 req->rq_svec->iov_base,
@@ -785,6 +863,88 @@ static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
785 return (struct rpc_xprt *) sk->sk_user_data; 863 return (struct rpc_xprt *) sk->sk_user_data;
786} 864}
787 865
866static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
867{
868 struct xdr_skb_reader desc = {
869 .skb = skb,
870 .offset = sizeof(rpc_fraghdr),
871 .count = skb->len - sizeof(rpc_fraghdr),
872 };
873
874 if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
875 return -1;
876 if (desc.count)
877 return -1;
878 return 0;
879}
880
881/**
882 * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets
883 * @sk: socket with data to read
884 * @len: how much data to read
885 *
886 * Currently this assumes we can read the whole reply in a single gulp.
887 */
888static void xs_local_data_ready(struct sock *sk, int len)
889{
890 struct rpc_task *task;
891 struct rpc_xprt *xprt;
892 struct rpc_rqst *rovr;
893 struct sk_buff *skb;
894 int err, repsize, copied;
895 u32 _xid;
896 __be32 *xp;
897
898 read_lock_bh(&sk->sk_callback_lock);
899 dprintk("RPC: %s...\n", __func__);
900 xprt = xprt_from_sock(sk);
901 if (xprt == NULL)
902 goto out;
903
904 skb = skb_recv_datagram(sk, 0, 1, &err);
905 if (skb == NULL)
906 goto out;
907
908 if (xprt->shutdown)
909 goto dropit;
910
911 repsize = skb->len - sizeof(rpc_fraghdr);
912 if (repsize < 4) {
913 dprintk("RPC: impossible RPC reply size %d\n", repsize);
914 goto dropit;
915 }
916
917 /* Copy the XID from the skb... */
918 xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
919 if (xp == NULL)
920 goto dropit;
921
922 /* Look up and lock the request corresponding to the given XID */
923 spin_lock(&xprt->transport_lock);
924 rovr = xprt_lookup_rqst(xprt, *xp);
925 if (!rovr)
926 goto out_unlock;
927 task = rovr->rq_task;
928
929 copied = rovr->rq_private_buf.buflen;
930 if (copied > repsize)
931 copied = repsize;
932
933 if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
934 dprintk("RPC: sk_buff copy failed\n");
935 goto out_unlock;
936 }
937
938 xprt_complete_rqst(task, copied);
939
940 out_unlock:
941 spin_unlock(&xprt->transport_lock);
942 dropit:
943 skb_free_datagram(sk, skb);
944 out:
945 read_unlock_bh(&sk->sk_callback_lock);
946}
947
788/** 948/**
789 * xs_udp_data_ready - "data ready" callback for UDP sockets 949 * xs_udp_data_ready - "data ready" callback for UDP sockets
790 * @sk: socket with data to read 950 * @sk: socket with data to read
@@ -1344,7 +1504,6 @@ static void xs_tcp_state_change(struct sock *sk)
1344 case TCP_CLOSE_WAIT: 1504 case TCP_CLOSE_WAIT:
1345 /* The server initiated a shutdown of the socket */ 1505 /* The server initiated a shutdown of the socket */
1346 xprt_force_disconnect(xprt); 1506 xprt_force_disconnect(xprt);
1347 case TCP_SYN_SENT:
1348 xprt->connect_cookie++; 1507 xprt->connect_cookie++;
1349 case TCP_CLOSING: 1508 case TCP_CLOSING:
1350 /* 1509 /*
@@ -1571,11 +1730,31 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
1571 return err; 1730 return err;
1572} 1731}
1573 1732
1733/*
1734 * We don't support autobind on AF_LOCAL sockets
1735 */
1736static void xs_local_rpcbind(struct rpc_task *task)
1737{
1738 xprt_set_bound(task->tk_xprt);
1739}
1740
1741static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)
1742{
1743}
1574 1744
1575#ifdef CONFIG_DEBUG_LOCK_ALLOC 1745#ifdef CONFIG_DEBUG_LOCK_ALLOC
1576static struct lock_class_key xs_key[2]; 1746static struct lock_class_key xs_key[2];
1577static struct lock_class_key xs_slock_key[2]; 1747static struct lock_class_key xs_slock_key[2];
1578 1748
1749static inline void xs_reclassify_socketu(struct socket *sock)
1750{
1751 struct sock *sk = sock->sk;
1752
1753 BUG_ON(sock_owned_by_user(sk));
1754 sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC",
1755 &xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]);
1756}
1757
1579static inline void xs_reclassify_socket4(struct socket *sock) 1758static inline void xs_reclassify_socket4(struct socket *sock)
1580{ 1759{
1581 struct sock *sk = sock->sk; 1760 struct sock *sk = sock->sk;
@@ -1597,6 +1776,9 @@ static inline void xs_reclassify_socket6(struct socket *sock)
1597static inline void xs_reclassify_socket(int family, struct socket *sock) 1776static inline void xs_reclassify_socket(int family, struct socket *sock)
1598{ 1777{
1599 switch (family) { 1778 switch (family) {
1779 case AF_LOCAL:
1780 xs_reclassify_socketu(sock);
1781 break;
1600 case AF_INET: 1782 case AF_INET:
1601 xs_reclassify_socket4(sock); 1783 xs_reclassify_socket4(sock);
1602 break; 1784 break;
@@ -1606,6 +1788,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
1606 } 1788 }
1607} 1789}
1608#else 1790#else
1791static inline void xs_reclassify_socketu(struct socket *sock)
1792{
1793}
1794
1609static inline void xs_reclassify_socket4(struct socket *sock) 1795static inline void xs_reclassify_socket4(struct socket *sock)
1610{ 1796{
1611} 1797}
@@ -1644,6 +1830,94 @@ out:
1644 return ERR_PTR(err); 1830 return ERR_PTR(err);
1645} 1831}
1646 1832
1833static int xs_local_finish_connecting(struct rpc_xprt *xprt,
1834 struct socket *sock)
1835{
1836 struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
1837 xprt);
1838
1839 if (!transport->inet) {
1840 struct sock *sk = sock->sk;
1841
1842 write_lock_bh(&sk->sk_callback_lock);
1843
1844 xs_save_old_callbacks(transport, sk);
1845
1846 sk->sk_user_data = xprt;
1847 sk->sk_data_ready = xs_local_data_ready;
1848 sk->sk_write_space = xs_udp_write_space;
1849 sk->sk_error_report = xs_error_report;
1850 sk->sk_allocation = GFP_ATOMIC;
1851
1852 xprt_clear_connected(xprt);
1853
1854 /* Reset to new socket */
1855 transport->sock = sock;
1856 transport->inet = sk;
1857
1858 write_unlock_bh(&sk->sk_callback_lock);
1859 }
1860
1861 /* Tell the socket layer to start connecting... */
1862 xprt->stat.connect_count++;
1863 xprt->stat.connect_start = jiffies;
1864 return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
1865}
1866
1867/**
1868 * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint
1869 * @xprt: RPC transport to connect
1870 * @transport: socket transport to connect
1871 * @create_sock: function to create a socket of the correct type
1872 *
1873 * Invoked by a work queue tasklet.
1874 */
1875static void xs_local_setup_socket(struct work_struct *work)
1876{
1877 struct sock_xprt *transport =
1878 container_of(work, struct sock_xprt, connect_worker.work);
1879 struct rpc_xprt *xprt = &transport->xprt;
1880 struct socket *sock;
1881 int status = -EIO;
1882
1883 if (xprt->shutdown)
1884 goto out;
1885
1886 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1887 status = __sock_create(xprt->xprt_net, AF_LOCAL,
1888 SOCK_STREAM, 0, &sock, 1);
1889 if (status < 0) {
1890 dprintk("RPC: can't create AF_LOCAL "
1891 "transport socket (%d).\n", -status);
1892 goto out;
1893 }
1894 xs_reclassify_socketu(sock);
1895
1896 dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n",
1897 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
1898
1899 status = xs_local_finish_connecting(xprt, sock);
1900 switch (status) {
1901 case 0:
1902 dprintk("RPC: xprt %p connected to %s\n",
1903 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
1904 xprt_set_connected(xprt);
1905 break;
1906 case -ENOENT:
1907 dprintk("RPC: xprt %p: socket %s does not exist\n",
1908 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
1909 break;
1910 default:
1911 printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
1912 __func__, -status,
1913 xprt->address_strings[RPC_DISPLAY_ADDR]);
1914 }
1915
1916out:
1917 xprt_clear_connecting(xprt);
1918 xprt_wake_pending_tasks(xprt, status);
1919}
1920
1647static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1921static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1648{ 1922{
1649 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1923 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1758,6 +2032,7 @@ static void xs_tcp_reuse_connection(struct sock_xprt *transport)
1758static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 2032static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1759{ 2033{
1760 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2034 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2035 int ret = -ENOTCONN;
1761 2036
1762 if (!transport->inet) { 2037 if (!transport->inet) {
1763 struct sock *sk = sock->sk; 2038 struct sock *sk = sock->sk;
@@ -1789,12 +2064,22 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1789 } 2064 }
1790 2065
1791 if (!xprt_bound(xprt)) 2066 if (!xprt_bound(xprt))
1792 return -ENOTCONN; 2067 goto out;
1793 2068
1794 /* Tell the socket layer to start connecting... */ 2069 /* Tell the socket layer to start connecting... */
1795 xprt->stat.connect_count++; 2070 xprt->stat.connect_count++;
1796 xprt->stat.connect_start = jiffies; 2071 xprt->stat.connect_start = jiffies;
1797 return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); 2072 ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
2073 switch (ret) {
2074 case 0:
2075 case -EINPROGRESS:
2076 /* SYN_SENT! */
2077 xprt->connect_cookie++;
2078 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
2079 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
2080 }
2081out:
2082 return ret;
1798} 2083}
1799 2084
1800/** 2085/**
@@ -1917,6 +2202,32 @@ static void xs_connect(struct rpc_task *task)
1917} 2202}
1918 2203
1919/** 2204/**
2205 * xs_local_print_stats - display AF_LOCAL socket-specifc stats
2206 * @xprt: rpc_xprt struct containing statistics
2207 * @seq: output file
2208 *
2209 */
2210static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
2211{
2212 long idle_time = 0;
2213
2214 if (xprt_connected(xprt))
2215 idle_time = (long)(jiffies - xprt->last_used) / HZ;
2216
2217 seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu "
2218 "%llu %llu\n",
2219 xprt->stat.bind_count,
2220 xprt->stat.connect_count,
2221 xprt->stat.connect_time,
2222 idle_time,
2223 xprt->stat.sends,
2224 xprt->stat.recvs,
2225 xprt->stat.bad_xids,
2226 xprt->stat.req_u,
2227 xprt->stat.bklog_u);
2228}
2229
2230/**
1920 * xs_udp_print_stats - display UDP socket-specifc stats 2231 * xs_udp_print_stats - display UDP socket-specifc stats
1921 * @xprt: rpc_xprt struct containing statistics 2232 * @xprt: rpc_xprt struct containing statistics
1922 * @seq: output file 2233 * @seq: output file
@@ -2014,10 +2325,7 @@ static int bc_sendto(struct rpc_rqst *req)
2014 unsigned long headoff; 2325 unsigned long headoff;
2015 unsigned long tailoff; 2326 unsigned long tailoff;
2016 2327
2017 /* 2328 xs_encode_stream_record_marker(xbufp);
2018 * Set up the rpc header and record marker stuff
2019 */
2020 xs_encode_tcp_record_marker(xbufp);
2021 2329
2022 tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; 2330 tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
2023 headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; 2331 headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
@@ -2089,6 +2397,21 @@ static void bc_destroy(struct rpc_xprt *xprt)
2089{ 2397{
2090} 2398}
2091 2399
2400static struct rpc_xprt_ops xs_local_ops = {
2401 .reserve_xprt = xprt_reserve_xprt,
2402 .release_xprt = xs_tcp_release_xprt,
2403 .rpcbind = xs_local_rpcbind,
2404 .set_port = xs_local_set_port,
2405 .connect = xs_connect,
2406 .buf_alloc = rpc_malloc,
2407 .buf_free = rpc_free,
2408 .send_request = xs_local_send_request,
2409 .set_retrans_timeout = xprt_set_retrans_timeout_def,
2410 .close = xs_close,
2411 .destroy = xs_destroy,
2412 .print_stats = xs_local_print_stats,
2413};
2414
2092static struct rpc_xprt_ops xs_udp_ops = { 2415static struct rpc_xprt_ops xs_udp_ops = {
2093 .set_buffer_size = xs_udp_set_buffer_size, 2416 .set_buffer_size = xs_udp_set_buffer_size,
2094 .reserve_xprt = xprt_reserve_xprt_cong, 2417 .reserve_xprt = xprt_reserve_xprt_cong,
@@ -2150,6 +2473,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap)
2150 }; 2473 };
2151 2474
2152 switch (family) { 2475 switch (family) {
2476 case AF_LOCAL:
2477 break;
2153 case AF_INET: 2478 case AF_INET:
2154 memcpy(sap, &sin, sizeof(sin)); 2479 memcpy(sap, &sin, sizeof(sin));
2155 break; 2480 break;
@@ -2197,6 +2522,70 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2197 return xprt; 2522 return xprt;
2198} 2523}
2199 2524
2525static const struct rpc_timeout xs_local_default_timeout = {
2526 .to_initval = 10 * HZ,
2527 .to_maxval = 10 * HZ,
2528 .to_retries = 2,
2529};
2530
2531/**
2532 * xs_setup_local - Set up transport to use an AF_LOCAL socket
2533 * @args: rpc transport creation arguments
2534 *
2535 * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP
2536 */
2537static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
2538{
2539 struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr;
2540 struct sock_xprt *transport;
2541 struct rpc_xprt *xprt;
2542 struct rpc_xprt *ret;
2543
2544 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
2545 if (IS_ERR(xprt))
2546 return xprt;
2547 transport = container_of(xprt, struct sock_xprt, xprt);
2548
2549 xprt->prot = 0;
2550 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
2551 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
2552
2553 xprt->bind_timeout = XS_BIND_TO;
2554 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
2555 xprt->idle_timeout = XS_IDLE_DISC_TO;
2556
2557 xprt->ops = &xs_local_ops;
2558 xprt->timeout = &xs_local_default_timeout;
2559
2560 switch (sun->sun_family) {
2561 case AF_LOCAL:
2562 if (sun->sun_path[0] != '/') {
2563 dprintk("RPC: bad AF_LOCAL address: %s\n",
2564 sun->sun_path);
2565 ret = ERR_PTR(-EINVAL);
2566 goto out_err;
2567 }
2568 xprt_set_bound(xprt);
2569 INIT_DELAYED_WORK(&transport->connect_worker,
2570 xs_local_setup_socket);
2571 xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
2572 break;
2573 default:
2574 ret = ERR_PTR(-EAFNOSUPPORT);
2575 goto out_err;
2576 }
2577
2578 dprintk("RPC: set up xprt to %s via AF_LOCAL\n",
2579 xprt->address_strings[RPC_DISPLAY_ADDR]);
2580
2581 if (try_module_get(THIS_MODULE))
2582 return xprt;
2583 ret = ERR_PTR(-EINVAL);
2584out_err:
2585 xprt_free(xprt);
2586 return ret;
2587}
2588
2200static const struct rpc_timeout xs_udp_default_timeout = { 2589static const struct rpc_timeout xs_udp_default_timeout = {
2201 .to_initval = 5 * HZ, 2590 .to_initval = 5 * HZ,
2202 .to_maxval = 30 * HZ, 2591 .to_maxval = 30 * HZ,
@@ -2438,6 +2827,14 @@ out_err:
2438 return ret; 2827 return ret;
2439} 2828}
2440 2829
2830static struct xprt_class xs_local_transport = {
2831 .list = LIST_HEAD_INIT(xs_local_transport.list),
2832 .name = "named UNIX socket",
2833 .owner = THIS_MODULE,
2834 .ident = XPRT_TRANSPORT_LOCAL,
2835 .setup = xs_setup_local,
2836};
2837
2441static struct xprt_class xs_udp_transport = { 2838static struct xprt_class xs_udp_transport = {
2442 .list = LIST_HEAD_INIT(xs_udp_transport.list), 2839 .list = LIST_HEAD_INIT(xs_udp_transport.list),
2443 .name = "udp", 2840 .name = "udp",
@@ -2473,6 +2870,7 @@ int init_socket_xprt(void)
2473 sunrpc_table_header = register_sysctl_table(sunrpc_table); 2870 sunrpc_table_header = register_sysctl_table(sunrpc_table);
2474#endif 2871#endif
2475 2872
2873 xprt_register_transport(&xs_local_transport);
2476 xprt_register_transport(&xs_udp_transport); 2874 xprt_register_transport(&xs_udp_transport);
2477 xprt_register_transport(&xs_tcp_transport); 2875 xprt_register_transport(&xs_tcp_transport);
2478 xprt_register_transport(&xs_bc_tcp_transport); 2876 xprt_register_transport(&xs_bc_tcp_transport);
@@ -2493,6 +2891,7 @@ void cleanup_socket_xprt(void)
2493 } 2891 }
2494#endif 2892#endif
2495 2893
2894 xprt_unregister_transport(&xs_local_transport);
2496 xprt_unregister_transport(&xs_udp_transport); 2895 xprt_unregister_transport(&xs_udp_transport);
2497 xprt_unregister_transport(&xs_tcp_transport); 2896 xprt_unregister_transport(&xs_tcp_transport);
2498 xprt_unregister_transport(&xs_bc_tcp_transport); 2897 xprt_unregister_transport(&xs_bc_tcp_transport);
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 47f1b8638df9..b11ea692bd7d 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -265,7 +265,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq)
265 bitnr = bitnr & 0x1F; 265 bitnr = bitnr & 0x1F;
266 replay_esn->bmp[nr] |= (1U << bitnr); 266 replay_esn->bmp[nr] |= (1U << bitnr);
267 } else { 267 } else {
268 nr = replay_esn->replay_window >> 5; 268 nr = (replay_esn->replay_window - 1) >> 5;
269 for (i = 0; i <= nr; i++) 269 for (i = 0; i <= nr; i++)
270 replay_esn->bmp[i] = 0; 270 replay_esn->bmp[i] = 0;
271 271
@@ -471,7 +471,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
471 bitnr = bitnr & 0x1F; 471 bitnr = bitnr & 0x1F;
472 replay_esn->bmp[nr] |= (1U << bitnr); 472 replay_esn->bmp[nr] |= (1U << bitnr);
473 } else { 473 } else {
474 nr = replay_esn->replay_window >> 5; 474 nr = (replay_esn->replay_window - 1) >> 5;
475 for (i = 0; i <= nr; i++) 475 for (i = 0; i <= nr; i++)
476 replay_esn->bmp[i] = 0; 476 replay_esn->bmp[i] = 0;
477 477