aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c18
-rw-r--r--net/802/stp.c4
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/9p/client.c178
-rw-r--r--net/9p/protocol.c5
-rw-r--r--net/9p/trans_virtio.c76
-rw-r--r--net/atm/atm_sysfs.c3
-rw-r--r--net/atm/resources.c7
-rw-r--r--net/atm/resources.h2
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/caif/caif_config_util.c13
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/caif/caif_socket.c45
-rw-r--r--net/caif/cfcnfg.c17
-rw-r--r--net/caif/cfctrl.c3
-rw-r--r--net/caif/cfdbgl.c14
-rw-r--r--net/caif/cfrfml.c2
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/ceph/Makefile22
-rw-r--r--net/ceph/buffer.c2
-rw-r--r--net/compat.c10
-rw-r--r--net/core/dev.c40
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/fib_rules.c21
-rw-r--r--net/core/filter.c87
-rw-r--r--net/core/iovec.c20
-rw-r--r--net/core/net-sysfs.c26
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/pktgen.c41
-rw-r--r--net/core/request_sock.c4
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/sock.c63
-rw-r--r--net/core/sysctl_net_core.c3
-rw-r--r--net/core/timestamping.c6
-rw-r--r--net/dccp/ccid.h34
-rw-r--r--net/dccp/ccids/ccid2.c23
-rw-r--r--net/dccp/ccids/ccid2.h5
-rw-r--r--net/dccp/ccids/ccid3.c12
-rw-r--r--net/dccp/dccp.h5
-rw-r--r--net/dccp/input.c3
-rw-r--r--net/dccp/output.c209
-rw-r--r--net/dccp/proto.c21
-rw-r--r--net/dccp/timer.c27
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/decnet/sysctl_net_decnet.c4
-rw-r--r--net/econet/af_econet.c99
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_hash.c54
-rw-r--r--net/ipv4/fib_lookup.h5
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/gre.c5
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_diag.c27
-rw-r--r--net/ipv4/inet_hashtables.c3
-rw-r--r--net/ipv4/inetpeer.c138
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ip_sockglue.c10
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c40
-rw-r--r--net/ipv4/proc.c9
-rw-r--r--net/ipv4/protocol.c8
-rw-r--r--net/ipv4/route.c75
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c42
-rw-r--r--net/ipv4/tunnel4.c29
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv6/addrconf.c76
-rw-r--r--net/ipv6/ip6_output.c12
-rw-r--r--net/ipv6/ip6_tunnel.c9
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/netfilter/Kconfig5
-rw-r--r--net/ipv6/netfilter/Makefile5
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c7
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/protocol.c8
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c15
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/ipv6/tunnel6.c24
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_output.c16
-rw-r--r--net/irda/irttp.c30
-rw-r--r--net/iucv/iucv.c3
-rw-r--r--net/l2tp/l2tp_core.c53
-rw-r--r--net/l2tp/l2tp_core.h33
-rw-r--r--net/l2tp/l2tp_debugfs.c2
-rw-r--r--net/l2tp/l2tp_ip.c8
-rw-r--r--net/llc/af_llc.c5
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/ipvs/Kconfig1
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c6
-rw-r--r--net/netfilter/xt_TPROXY.c10
-rw-r--r--net/netfilter/xt_socket.c19
-rw-r--r--net/netlink/af_netlink.c65
-rw-r--r--net/packet/af_packet.c7
-rw-r--r--net/rds/loop.c4
-rw-r--r--net/rds/message.c7
-rw-r--r--net/rds/rdma.c128
-rw-r--r--net/rds/send.c4
-rw-r--r--net/rds/tcp.c6
-rw-r--r--net/sched/cls_basic.c4
-rw-r--r--net/sched/cls_cgroup.c2
-rw-r--r--net/sched/em_text.c3
-rw-r--r--net/sched/sch_sfq.c20
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c14
-rw-r--r--net/sctp/sysctl.c4
-rw-r--r--net/socket.c35
-rw-r--r--net/sunrpc/Kconfig19
-rw-r--r--net/sunrpc/auth.c4
-rw-r--r--net/sunrpc/auth_generic.c2
-rw-r--r--net/sunrpc/auth_gss/Makefile5
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c2
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c247
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c186
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c267
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c127
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c51
-rw-r--r--net/sunrpc/cache.c288
-rw-r--r--net/sunrpc/clnt.c3
-rw-r--r--net/sunrpc/netns.h19
-rw-r--r--net/sunrpc/rpc_pipe.c19
-rw-r--r--net/sunrpc/rpcb_clnt.c60
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/stats.c43
-rw-r--r--net/sunrpc/sunrpc_syms.c58
-rw-r--r--net/sunrpc/svc.c3
-rw-r--r--net/sunrpc/svc_xprt.c59
-rw-r--r--net/sunrpc/svcauth_unix.c194
-rw-r--r--net/sunrpc/svcsock.c27
-rw-r--r--net/sunrpc/xdr.c61
-rw-r--r--net/sunrpc/xprt.c39
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c49
-rw-r--r--net/sunrpc/xprtrdma/transport.c25
-rw-r--r--net/sunrpc/xprtsock.c358
-rw-r--r--net/tipc/socket.c1
-rw-r--r--net/unix/af_unix.c51
-rw-r--r--net/unix/garbage.c9
-rw-r--r--net/x25/x25_facilities.c20
-rw-r--r--net/x25/x25_in.c2
-rw-r--r--net/x25/x25_link.c1
-rw-r--r--net/xfrm/xfrm_hash.c2
-rw-r--r--net/xfrm/xfrm_state.c2
159 files changed, 2409 insertions, 2413 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 941f2a324d3a..c1df2dad8c6b 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -346,8 +346,8 @@ int garp_request_join(const struct net_device *dev,
346 const struct garp_application *appl, 346 const struct garp_application *appl,
347 const void *data, u8 len, u8 type) 347 const void *data, u8 len, u8 type)
348{ 348{
349 struct garp_port *port = dev->garp_port; 349 struct garp_port *port = rtnl_dereference(dev->garp_port);
350 struct garp_applicant *app = port->applicants[appl->type]; 350 struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
351 struct garp_attr *attr; 351 struct garp_attr *attr;
352 352
353 spin_lock_bh(&app->lock); 353 spin_lock_bh(&app->lock);
@@ -366,8 +366,8 @@ void garp_request_leave(const struct net_device *dev,
366 const struct garp_application *appl, 366 const struct garp_application *appl,
367 const void *data, u8 len, u8 type) 367 const void *data, u8 len, u8 type)
368{ 368{
369 struct garp_port *port = dev->garp_port; 369 struct garp_port *port = rtnl_dereference(dev->garp_port);
370 struct garp_applicant *app = port->applicants[appl->type]; 370 struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
371 struct garp_attr *attr; 371 struct garp_attr *attr;
372 372
373 spin_lock_bh(&app->lock); 373 spin_lock_bh(&app->lock);
@@ -546,11 +546,11 @@ static int garp_init_port(struct net_device *dev)
546 546
547static void garp_release_port(struct net_device *dev) 547static void garp_release_port(struct net_device *dev)
548{ 548{
549 struct garp_port *port = dev->garp_port; 549 struct garp_port *port = rtnl_dereference(dev->garp_port);
550 unsigned int i; 550 unsigned int i;
551 551
552 for (i = 0; i <= GARP_APPLICATION_MAX; i++) { 552 for (i = 0; i <= GARP_APPLICATION_MAX; i++) {
553 if (port->applicants[i]) 553 if (rtnl_dereference(port->applicants[i]))
554 return; 554 return;
555 } 555 }
556 rcu_assign_pointer(dev->garp_port, NULL); 556 rcu_assign_pointer(dev->garp_port, NULL);
@@ -565,7 +565,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
565 565
566 ASSERT_RTNL(); 566 ASSERT_RTNL();
567 567
568 if (!dev->garp_port) { 568 if (!rtnl_dereference(dev->garp_port)) {
569 err = garp_init_port(dev); 569 err = garp_init_port(dev);
570 if (err < 0) 570 if (err < 0)
571 goto err1; 571 goto err1;
@@ -601,8 +601,8 @@ EXPORT_SYMBOL_GPL(garp_init_applicant);
601 601
602void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl) 602void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl)
603{ 603{
604 struct garp_port *port = dev->garp_port; 604 struct garp_port *port = rtnl_dereference(dev->garp_port);
605 struct garp_applicant *app = port->applicants[appl->type]; 605 struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
606 606
607 ASSERT_RTNL(); 607 ASSERT_RTNL();
608 608
diff --git a/net/802/stp.c b/net/802/stp.c
index 53c8f77f0ccd..978c30b1b36b 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c
@@ -21,8 +21,8 @@
21#define GARP_ADDR_MAX 0x2F 21#define GARP_ADDR_MAX 0x2F
22#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN) 22#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN)
23 23
24static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly; 24static const struct stp_proto __rcu *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly;
25static const struct stp_proto *stp_proto __read_mostly; 25static const struct stp_proto __rcu *stp_proto __read_mostly;
26 26
27static struct llc_sap *sap __read_mostly; 27static struct llc_sap *sap __read_mostly;
28static unsigned int sap_registered; 28static unsigned int sap_registered;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 05b867e43757..52077ca22072 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -112,7 +112,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
112 112
113 ASSERT_RTNL(); 113 ASSERT_RTNL();
114 114
115 grp = real_dev->vlgrp; 115 grp = rtnl_dereference(real_dev->vlgrp);
116 BUG_ON(!grp); 116 BUG_ON(!grp);
117 117
118 /* Take it out of our own structures, but be sure to interlock with 118 /* Take it out of our own structures, but be sure to interlock with
@@ -177,7 +177,7 @@ int register_vlan_dev(struct net_device *dev)
177 struct vlan_group *grp, *ngrp = NULL; 177 struct vlan_group *grp, *ngrp = NULL;
178 int err; 178 int err;
179 179
180 grp = real_dev->vlgrp; 180 grp = rtnl_dereference(real_dev->vlgrp);
181 if (!grp) { 181 if (!grp) {
182 ngrp = grp = vlan_group_alloc(real_dev); 182 ngrp = grp = vlan_group_alloc(real_dev);
183 if (!grp) 183 if (!grp)
@@ -385,7 +385,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
385 dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0); 385 dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
386 } 386 }
387 387
388 grp = dev->vlgrp; 388 grp = rtnl_dereference(dev->vlgrp);
389 if (!grp) 389 if (!grp)
390 goto out; 390 goto out;
391 391
diff --git a/net/9p/client.c b/net/9p/client.c
index 83bf0541d66f..a848bca9fbff 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -450,32 +450,43 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
450 return err; 450 return err;
451 } 451 }
452 452
453 if (type == P9_RERROR) { 453 if (type == P9_RERROR || type == P9_RLERROR) {
454 int ecode; 454 int ecode;
455 char *ename;
456 455
457 err = p9pdu_readf(req->rc, c->proto_version, "s?d", 456 if (!p9_is_proto_dotl(c)) {
458 &ename, &ecode); 457 char *ename;
459 if (err) {
460 P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
461 err);
462 return err;
463 }
464 458
465 if (p9_is_proto_dotu(c) || 459 err = p9pdu_readf(req->rc, c->proto_version, "s?d",
466 p9_is_proto_dotl(c)) 460 &ename, &ecode);
467 err = -ecode; 461 if (err)
462 goto out_err;
463
464 if (p9_is_proto_dotu(c))
465 err = -ecode;
466
467 if (!err || !IS_ERR_VALUE(err)) {
468 err = p9_errstr2errno(ename, strlen(ename));
469
470 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
468 471
469 if (!err || !IS_ERR_VALUE(err)) 472 kfree(ename);
470 err = p9_errstr2errno(ename, strlen(ename)); 473 }
474 } else {
475 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
476 err = -ecode;
471 477
472 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); 478 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
479 }
473 480
474 kfree(ename);
475 } else 481 } else
476 err = 0; 482 err = 0;
477 483
478 return err; 484 return err;
485
486out_err:
487 P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
488
489 return err;
479} 490}
480 491
481/** 492/**
@@ -568,11 +579,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
568 va_start(ap, fmt); 579 va_start(ap, fmt);
569 err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); 580 err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
570 va_end(ap); 581 va_end(ap);
582 if (err)
583 goto reterr;
571 p9pdu_finalize(req->tc); 584 p9pdu_finalize(req->tc);
572 585
573 err = c->trans_mod->request(c, req); 586 err = c->trans_mod->request(c, req);
574 if (err < 0) { 587 if (err < 0) {
575 c->status = Disconnected; 588 if (err != -ERESTARTSYS)
589 c->status = Disconnected;
576 goto reterr; 590 goto reterr;
577 } 591 }
578 592
@@ -1151,12 +1165,44 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
1151} 1165}
1152EXPORT_SYMBOL(p9_client_link); 1166EXPORT_SYMBOL(p9_client_link);
1153 1167
1168int p9_client_fsync(struct p9_fid *fid, int datasync)
1169{
1170 int err;
1171 struct p9_client *clnt;
1172 struct p9_req_t *req;
1173
1174 P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
1175 fid->fid, datasync);
1176 err = 0;
1177 clnt = fid->clnt;
1178
1179 req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync);
1180 if (IS_ERR(req)) {
1181 err = PTR_ERR(req);
1182 goto error;
1183 }
1184
1185 P9_DPRINTK(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
1186
1187 p9_free_req(clnt, req);
1188
1189error:
1190 return err;
1191}
1192EXPORT_SYMBOL(p9_client_fsync);
1193
1154int p9_client_clunk(struct p9_fid *fid) 1194int p9_client_clunk(struct p9_fid *fid)
1155{ 1195{
1156 int err; 1196 int err;
1157 struct p9_client *clnt; 1197 struct p9_client *clnt;
1158 struct p9_req_t *req; 1198 struct p9_req_t *req;
1159 1199
1200 if (!fid) {
1201 P9_EPRINTK(KERN_WARNING, "Trying to clunk with NULL fid\n");
1202 dump_stack();
1203 return 0;
1204 }
1205
1160 P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); 1206 P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid);
1161 err = 0; 1207 err = 0;
1162 clnt = fid->clnt; 1208 clnt = fid->clnt;
@@ -1240,16 +1286,13 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1240 1286
1241 if (data) { 1287 if (data) {
1242 memmove(data, dataptr, count); 1288 memmove(data, dataptr, count);
1243 } 1289 } else {
1244
1245 if (udata) {
1246 err = copy_to_user(udata, dataptr, count); 1290 err = copy_to_user(udata, dataptr, count);
1247 if (err) { 1291 if (err) {
1248 err = -EFAULT; 1292 err = -EFAULT;
1249 goto free_and_error; 1293 goto free_and_error;
1250 } 1294 }
1251 } 1295 }
1252
1253 p9_free_req(clnt, req); 1296 p9_free_req(clnt, req);
1254 return count; 1297 return count;
1255 1298
@@ -1761,3 +1804,96 @@ error:
1761 1804
1762} 1805}
1763EXPORT_SYMBOL(p9_client_mkdir_dotl); 1806EXPORT_SYMBOL(p9_client_mkdir_dotl);
1807
1808int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
1809{
1810 int err;
1811 struct p9_client *clnt;
1812 struct p9_req_t *req;
1813
1814 err = 0;
1815 clnt = fid->clnt;
1816 P9_DPRINTK(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d "
1817 "start %lld length %lld proc_id %d client_id %s\n",
1818 fid->fid, flock->type, flock->flags, flock->start,
1819 flock->length, flock->proc_id, flock->client_id);
1820
1821 req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type,
1822 flock->flags, flock->start, flock->length,
1823 flock->proc_id, flock->client_id);
1824
1825 if (IS_ERR(req))
1826 return PTR_ERR(req);
1827
1828 err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
1829 if (err) {
1830 p9pdu_dump(1, req->rc);
1831 goto error;
1832 }
1833 P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
1834error:
1835 p9_free_req(clnt, req);
1836 return err;
1837
1838}
1839EXPORT_SYMBOL(p9_client_lock_dotl);
1840
1841int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
1842{
1843 int err;
1844 struct p9_client *clnt;
1845 struct p9_req_t *req;
1846
1847 err = 0;
1848 clnt = fid->clnt;
1849 P9_DPRINTK(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld "
1850 "length %lld proc_id %d client_id %s\n", fid->fid, glock->type,
1851 glock->start, glock->length, glock->proc_id, glock->client_id);
1852
1853 req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid, glock->type,
1854 glock->start, glock->length, glock->proc_id, glock->client_id);
1855
1856 if (IS_ERR(req))
1857 return PTR_ERR(req);
1858
1859 err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
1860 &glock->start, &glock->length, &glock->proc_id,
1861 &glock->client_id);
1862 if (err) {
1863 p9pdu_dump(1, req->rc);
1864 goto error;
1865 }
1866 P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
1867 "proc_id %d client_id %s\n", glock->type, glock->start,
1868 glock->length, glock->proc_id, glock->client_id);
1869error:
1870 p9_free_req(clnt, req);
1871 return err;
1872}
1873EXPORT_SYMBOL(p9_client_getlock_dotl);
1874
1875int p9_client_readlink(struct p9_fid *fid, char **target)
1876{
1877 int err;
1878 struct p9_client *clnt;
1879 struct p9_req_t *req;
1880
1881 err = 0;
1882 clnt = fid->clnt;
1883 P9_DPRINTK(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
1884
1885 req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid);
1886 if (IS_ERR(req))
1887 return PTR_ERR(req);
1888
1889 err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
1890 if (err) {
1891 p9pdu_dump(1, req->rc);
1892 goto error;
1893 }
1894 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
1895error:
1896 p9_free_req(clnt, req);
1897 return err;
1898}
1899EXPORT_SYMBOL(p9_client_readlink);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 3acd3afb20c8..45c15f491401 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -122,9 +122,8 @@ static size_t
122pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) 122pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
123{ 123{
124 size_t len = MIN(pdu->capacity - pdu->size, size); 124 size_t len = MIN(pdu->capacity - pdu->size, size);
125 int err = copy_from_user(&pdu->sdata[pdu->size], udata, len); 125 if (copy_from_user(&pdu->sdata[pdu->size], udata, len))
126 if (err) 126 len = 0;
127 printk(KERN_WARNING "pdu_write_u returning: %d\n", err);
128 127
129 pdu->size += len; 128 pdu->size += len;
130 return size - len; 129 return size - len;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index b88515936e4b..c8f3f72ab20e 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -75,6 +75,8 @@ struct virtio_chan {
75 struct p9_client *client; 75 struct p9_client *client;
76 struct virtio_device *vdev; 76 struct virtio_device *vdev;
77 struct virtqueue *vq; 77 struct virtqueue *vq;
78 int ring_bufs_avail;
79 wait_queue_head_t *vc_wq;
78 80
79 /* Scatterlist: can be too big for stack. */ 81 /* Scatterlist: can be too big for stack. */
80 struct scatterlist sg[VIRTQUEUE_NUM]; 82 struct scatterlist sg[VIRTQUEUE_NUM];
@@ -134,16 +136,30 @@ static void req_done(struct virtqueue *vq)
134 struct p9_fcall *rc; 136 struct p9_fcall *rc;
135 unsigned int len; 137 unsigned int len;
136 struct p9_req_t *req; 138 struct p9_req_t *req;
139 unsigned long flags;
137 140
138 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); 141 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
139 142
140 while ((rc = virtqueue_get_buf(chan->vq, &len)) != NULL) { 143 do {
141 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); 144 spin_lock_irqsave(&chan->lock, flags);
142 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); 145 rc = virtqueue_get_buf(chan->vq, &len);
143 req = p9_tag_lookup(chan->client, rc->tag); 146
144 req->status = REQ_STATUS_RCVD; 147 if (rc != NULL) {
145 p9_client_cb(chan->client, req); 148 if (!chan->ring_bufs_avail) {
146 } 149 chan->ring_bufs_avail = 1;
150 wake_up(chan->vc_wq);
151 }
152 spin_unlock_irqrestore(&chan->lock, flags);
153 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
154 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
155 rc->tag);
156 req = p9_tag_lookup(chan->client, rc->tag);
157 req->status = REQ_STATUS_RCVD;
158 p9_client_cb(chan->client, req);
159 } else {
160 spin_unlock_irqrestore(&chan->lock, flags);
161 }
162 } while (rc != NULL);
147} 163}
148 164
149/** 165/**
@@ -199,23 +215,43 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
199 int in, out; 215 int in, out;
200 struct virtio_chan *chan = client->trans; 216 struct virtio_chan *chan = client->trans;
201 char *rdata = (char *)req->rc+sizeof(struct p9_fcall); 217 char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
218 unsigned long flags;
219 int err;
202 220
203 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 221 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
204 222
223req_retry:
224 req->status = REQ_STATUS_SENT;
225
226 spin_lock_irqsave(&chan->lock, flags);
205 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, 227 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
206 req->tc->size); 228 req->tc->size);
207 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, 229 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
208 client->msize); 230 client->msize);
209 231
210 req->status = REQ_STATUS_SENT; 232 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
211 233 if (err < 0) {
212 if (virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) { 234 if (err == -ENOSPC) {
213 P9_DPRINTK(P9_DEBUG_TRANS, 235 chan->ring_bufs_avail = 0;
214 "9p debug: virtio rpc add_buf returned failure"); 236 spin_unlock_irqrestore(&chan->lock, flags);
215 return -EIO; 237 err = wait_event_interruptible(*chan->vc_wq,
238 chan->ring_bufs_avail);
239 if (err == -ERESTARTSYS)
240 return err;
241
242 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
243 goto req_retry;
244 } else {
245 spin_unlock_irqrestore(&chan->lock, flags);
246 P9_DPRINTK(P9_DEBUG_TRANS,
247 "9p debug: "
248 "virtio rpc add_buf returned failure");
249 return -EIO;
250 }
216 } 251 }
217 252
218 virtqueue_kick(chan->vq); 253 virtqueue_kick(chan->vq);
254 spin_unlock_irqrestore(&chan->lock, flags);
219 255
220 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); 256 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
221 return 0; 257 return 0;
@@ -290,14 +326,23 @@ static int p9_virtio_probe(struct virtio_device *vdev)
290 chan->tag_len = tag_len; 326 chan->tag_len = tag_len;
291 err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 327 err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
292 if (err) { 328 if (err) {
293 kfree(tag); 329 goto out_free_tag;
294 goto out_free_vq;
295 } 330 }
331 chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
332 if (!chan->vc_wq) {
333 err = -ENOMEM;
334 goto out_free_tag;
335 }
336 init_waitqueue_head(chan->vc_wq);
337 chan->ring_bufs_avail = 1;
338
296 mutex_lock(&virtio_9p_lock); 339 mutex_lock(&virtio_9p_lock);
297 list_add_tail(&chan->chan_list, &virtio_chan_list); 340 list_add_tail(&chan->chan_list, &virtio_chan_list);
298 mutex_unlock(&virtio_9p_lock); 341 mutex_unlock(&virtio_9p_lock);
299 return 0; 342 return 0;
300 343
344out_free_tag:
345 kfree(tag);
301out_free_vq: 346out_free_vq:
302 vdev->config->del_vqs(vdev); 347 vdev->config->del_vqs(vdev);
303 kfree(chan); 348 kfree(chan);
@@ -371,6 +416,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
371 mutex_unlock(&virtio_9p_lock); 416 mutex_unlock(&virtio_9p_lock);
372 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 417 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
373 kfree(chan->tag); 418 kfree(chan->tag);
419 kfree(chan->vc_wq);
374 kfree(chan); 420 kfree(chan);
375 421
376} 422}
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 799c631f0fed..f7fa67c78766 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -143,12 +143,13 @@ static struct class atm_class = {
143 .dev_uevent = atm_uevent, 143 .dev_uevent = atm_uevent,
144}; 144};
145 145
146int atm_register_sysfs(struct atm_dev *adev) 146int atm_register_sysfs(struct atm_dev *adev, struct device *parent)
147{ 147{
148 struct device *cdev = &adev->class_dev; 148 struct device *cdev = &adev->class_dev;
149 int i, j, err; 149 int i, j, err;
150 150
151 cdev->class = &atm_class; 151 cdev->class = &atm_class;
152 cdev->parent = parent;
152 dev_set_drvdata(cdev, adev); 153 dev_set_drvdata(cdev, adev);
153 154
154 dev_set_name(cdev, "%s%d", adev->type, adev->number); 155 dev_set_name(cdev, "%s%d", adev->type, adev->number);
diff --git a/net/atm/resources.c b/net/atm/resources.c
index d29e58261511..23f45ce6f351 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -74,8 +74,9 @@ struct atm_dev *atm_dev_lookup(int number)
74} 74}
75EXPORT_SYMBOL(atm_dev_lookup); 75EXPORT_SYMBOL(atm_dev_lookup);
76 76
77struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops, 77struct atm_dev *atm_dev_register(const char *type, struct device *parent,
78 int number, unsigned long *flags) 78 const struct atmdev_ops *ops, int number,
79 unsigned long *flags)
79{ 80{
80 struct atm_dev *dev, *inuse; 81 struct atm_dev *dev, *inuse;
81 82
@@ -115,7 +116,7 @@ struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops,
115 goto out_fail; 116 goto out_fail;
116 } 117 }
117 118
118 if (atm_register_sysfs(dev) < 0) { 119 if (atm_register_sysfs(dev, parent) < 0) {
119 pr_err("atm_register_sysfs failed for dev %s\n", type); 120 pr_err("atm_register_sysfs failed for dev %s\n", type);
120 atm_proc_dev_deregister(dev); 121 atm_proc_dev_deregister(dev);
121 goto out_fail; 122 goto out_fail;
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 126fb1840dfb..521431e30507 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -42,6 +42,6 @@ static inline void atm_proc_dev_deregister(struct atm_dev *dev)
42 42
43#endif /* CONFIG_PROC_FS */ 43#endif /* CONFIG_PROC_FS */
44 44
45int atm_register_sysfs(struct atm_dev *adev); 45int atm_register_sysfs(struct atm_dev *adev, struct device *parent);
46void atm_unregister_sysfs(struct atm_dev *adev); 46void atm_unregister_sysfs(struct atm_dev *adev);
47#endif 47#endif
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 26eaebf4aaa9..bb86d2932394 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1392,6 +1392,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
1392 ax25_cb *ax25; 1392 ax25_cb *ax25;
1393 int err = 0; 1393 int err = 0;
1394 1394
1395 memset(fsa, 0, sizeof(fsa));
1395 lock_sock(sk); 1396 lock_sock(sk);
1396 ax25 = ax25_sk(sk); 1397 ax25 = ax25_sk(sk);
1397 1398
@@ -1403,7 +1404,6 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
1403 1404
1404 fsa->fsa_ax25.sax25_family = AF_AX25; 1405 fsa->fsa_ax25.sax25_family = AF_AX25;
1405 fsa->fsa_ax25.sax25_call = ax25->dest_addr; 1406 fsa->fsa_ax25.sax25_call = ax25->dest_addr;
1406 fsa->fsa_ax25.sax25_ndigis = 0;
1407 1407
1408 if (ax25->digipeat != NULL) { 1408 if (ax25->digipeat != NULL) {
1409 ndigi = ax25->digipeat->ndigi; 1409 ndigi = ax25->digipeat->ndigi;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index eb5b256ffc88..f19e347f56f6 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -437,7 +437,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
437 ip6h = ipv6_hdr(skb); 437 ip6h = ipv6_hdr(skb);
438 438
439 *(__force __be32 *)ip6h = htonl(0x60000000); 439 *(__force __be32 *)ip6h = htonl(0x60000000);
440 ip6h->payload_len = 8 + sizeof(*mldq); 440 ip6h->payload_len = htons(8 + sizeof(*mldq));
441 ip6h->nexthdr = IPPROTO_HOPOPTS; 441 ip6h->nexthdr = IPPROTO_HOPOPTS;
442 ip6h->hop_limit = 1; 442 ip6h->hop_limit = 1;
443 ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); 443 ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
index 76ae68303d3a..d522d8c1703e 100644
--- a/net/caif/caif_config_util.c
+++ b/net/caif/caif_config_util.c
@@ -16,11 +16,18 @@ int connect_req_to_link_param(struct cfcnfg *cnfg,
16{ 16{
17 struct dev_info *dev_info; 17 struct dev_info *dev_info;
18 enum cfcnfg_phy_preference pref; 18 enum cfcnfg_phy_preference pref;
19 int res;
20
19 memset(l, 0, sizeof(*l)); 21 memset(l, 0, sizeof(*l));
20 l->priority = s->priority; 22 /* In caif protocol low value is high priority */
23 l->priority = CAIF_PRIO_MAX - s->priority + 1;
21 24
22 if (s->link_name[0] != '\0') 25 if (s->ifindex != 0){
23 l->phyid = cfcnfg_get_named(cnfg, s->link_name); 26 res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex);
27 if (res < 0)
28 return res;
29 l->phyid = res;
30 }
24 else { 31 else {
25 switch (s->link_selector) { 32 switch (s->link_selector) {
26 case CAIF_LINK_HIGH_BANDW: 33 case CAIF_LINK_HIGH_BANDW:
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index b99369a055d1..a42a408306e4 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -307,6 +307,8 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
307 307
308 case NETDEV_UNREGISTER: 308 case NETDEV_UNREGISTER:
309 caifd = caif_get(dev); 309 caifd = caif_get(dev);
310 if (caifd == NULL)
311 break;
310 netdev_info(dev, "unregister\n"); 312 netdev_info(dev, "unregister\n");
311 atomic_set(&caifd->state, what); 313 atomic_set(&caifd->state, what);
312 caif_device_destroy(dev); 314 caif_device_destroy(dev);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 2eca2dd0000f..1bf0cf503796 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -716,8 +716,7 @@ static int setsockopt(struct socket *sock,
716{ 716{
717 struct sock *sk = sock->sk; 717 struct sock *sk = sock->sk;
718 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 718 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
719 int prio, linksel; 719 int linksel;
720 struct ifreq ifreq;
721 720
722 if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED) 721 if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
723 return -ENOPROTOOPT; 722 return -ENOPROTOOPT;
@@ -735,33 +734,6 @@ static int setsockopt(struct socket *sock,
735 release_sock(&cf_sk->sk); 734 release_sock(&cf_sk->sk);
736 return 0; 735 return 0;
737 736
738 case SO_PRIORITY:
739 if (lvl != SOL_SOCKET)
740 goto bad_sol;
741 if (ol < sizeof(int))
742 return -EINVAL;
743 if (copy_from_user(&prio, ov, sizeof(int)))
744 return -EINVAL;
745 lock_sock(&(cf_sk->sk));
746 cf_sk->conn_req.priority = prio;
747 release_sock(&cf_sk->sk);
748 return 0;
749
750 case SO_BINDTODEVICE:
751 if (lvl != SOL_SOCKET)
752 goto bad_sol;
753 if (ol < sizeof(struct ifreq))
754 return -EINVAL;
755 if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
756 return -EFAULT;
757 lock_sock(&(cf_sk->sk));
758 strncpy(cf_sk->conn_req.link_name, ifreq.ifr_name,
759 sizeof(cf_sk->conn_req.link_name));
760 cf_sk->conn_req.link_name
761 [sizeof(cf_sk->conn_req.link_name)-1] = 0;
762 release_sock(&cf_sk->sk);
763 return 0;
764
765 case CAIFSO_REQ_PARAM: 737 case CAIFSO_REQ_PARAM:
766 if (lvl != SOL_CAIF) 738 if (lvl != SOL_CAIF)
767 goto bad_sol; 739 goto bad_sol;
@@ -880,6 +852,18 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
880 sock->state = SS_CONNECTING; 852 sock->state = SS_CONNECTING;
881 sk->sk_state = CAIF_CONNECTING; 853 sk->sk_state = CAIF_CONNECTING;
882 854
855 /* Check priority value comming from socket */
856 /* if priority value is out of range it will be ajusted */
857 if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX)
858 cf_sk->conn_req.priority = CAIF_PRIO_MAX;
859 else if (cf_sk->sk.sk_priority < CAIF_PRIO_MIN)
860 cf_sk->conn_req.priority = CAIF_PRIO_MIN;
861 else
862 cf_sk->conn_req.priority = cf_sk->sk.sk_priority;
863
864 /*ifindex = id of the interface.*/
865 cf_sk->conn_req.ifindex = cf_sk->sk.sk_bound_dev_if;
866
883 dbfs_atomic_inc(&cnt.num_connect_req); 867 dbfs_atomic_inc(&cnt.num_connect_req);
884 cf_sk->layer.receive = caif_sktrecv_cb; 868 cf_sk->layer.receive = caif_sktrecv_cb;
885 err = caif_connect_client(&cf_sk->conn_req, 869 err = caif_connect_client(&cf_sk->conn_req,
@@ -905,6 +889,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
905 cf_sk->maxframe = mtu - (headroom + tailroom); 889 cf_sk->maxframe = mtu - (headroom + tailroom);
906 if (cf_sk->maxframe < 1) { 890 if (cf_sk->maxframe < 1) {
907 pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu); 891 pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
892 err = -ENODEV;
908 goto out; 893 goto out;
909 } 894 }
910 895
@@ -1142,7 +1127,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
1142 set_rx_flow_on(cf_sk); 1127 set_rx_flow_on(cf_sk);
1143 1128
1144 /* Set default options on configuration */ 1129 /* Set default options on configuration */
1145 cf_sk->conn_req.priority = CAIF_PRIO_NORMAL; 1130 cf_sk->sk.sk_priority= CAIF_PRIO_NORMAL;
1146 cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY; 1131 cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
1147 cf_sk->conn_req.protocol = protocol; 1132 cf_sk->conn_req.protocol = protocol;
1148 /* Increase the number of sockets created. */ 1133 /* Increase the number of sockets created. */
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 41adafd18914..21ede141018a 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -173,18 +173,15 @@ static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg,
173 return NULL; 173 return NULL;
174} 174}
175 175
176int cfcnfg_get_named(struct cfcnfg *cnfg, char *name) 176
177int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi)
177{ 178{
178 int i; 179 int i;
179 180 for (i = 0; i < MAX_PHY_LAYERS; i++)
180 /* Try to match with specified name */ 181 if (cnfg->phy_layers[i].frm_layer != NULL &&
181 for (i = 0; i < MAX_PHY_LAYERS; i++) { 182 cnfg->phy_layers[i].ifindex == ifi)
182 if (cnfg->phy_layers[i].frm_layer != NULL 183 return i;
183 && strcmp(cnfg->phy_layers[i].phy_layer->name, 184 return -ENODEV;
184 name) == 0)
185 return cnfg->phy_layers[i].frm_layer->id;
186 }
187 return 0;
188} 185}
189 186
190int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer) 187int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 08f267a109aa..3cd8f978e309 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -361,11 +361,10 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
361 struct cfctrl_request_info *p, *tmp; 361 struct cfctrl_request_info *p, *tmp;
362 struct cfctrl *ctrl = container_obj(layr); 362 struct cfctrl *ctrl = container_obj(layr);
363 spin_lock(&ctrl->info_list_lock); 363 spin_lock(&ctrl->info_list_lock);
364 pr_warn("enter\n");
365 364
366 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 365 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
367 if (p->client_layer == adap_layer) { 366 if (p->client_layer == adap_layer) {
368 pr_warn("cancel req :%d\n", p->sequence_no); 367 pr_debug("cancel req :%d\n", p->sequence_no);
369 list_del(&p->list); 368 list_del(&p->list);
370 kfree(p); 369 kfree(p);
371 } 370 }
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 496fda9ac66f..11a2af4c162a 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -12,6 +12,8 @@
12#include <net/caif/cfsrvl.h> 12#include <net/caif/cfsrvl.h>
13#include <net/caif/cfpkt.h> 13#include <net/caif/cfpkt.h>
14 14
15#define container_obj(layr) ((struct cfsrvl *) layr)
16
15static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt); 17static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
16static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt); 18static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
17 19
@@ -38,5 +40,17 @@ static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
38 40
39static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt) 41static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
40{ 42{
43 struct cfsrvl *service = container_obj(layr);
44 struct caif_payload_info *info;
45 int ret;
46
47 if (!cfsrvl_ready(service, &ret))
48 return ret;
49
50 /* Add info for MUX-layer to route the packet out */
51 info = cfpkt_info(pkt);
52 info->channel_id = service->layer.id;
53 info->dev_info = &service->dev_info;
54
41 return layr->dn->transmit(layr->dn, pkt); 55 return layr->dn->transmit(layr->dn, pkt);
42} 56}
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index bde8481e8d25..e2fb5fa75795 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -193,7 +193,7 @@ out:
193 193
194static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt) 194static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
195{ 195{
196 caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size); 196 caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size);
197 197
198 /* Add info for MUX-layer to route the packet out. */ 198 /* Add info for MUX-layer to route the packet out. */
199 cfpkt_info(pkt)->channel_id = rfml->serv.layer.id; 199 cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 08ffe9e4be20..6faa8256e10c 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -125,7 +125,7 @@ struct bcm_sock {
125 struct list_head tx_ops; 125 struct list_head tx_ops;
126 unsigned long dropped_usr_msgs; 126 unsigned long dropped_usr_msgs;
127 struct proc_dir_entry *bcm_proc_read; 127 struct proc_dir_entry *bcm_proc_read;
128 char procname [9]; /* pointer printed in ASCII with \0 */ 128 char procname [20]; /* pointer printed in ASCII with \0 */
129}; 129};
130 130
131static inline struct bcm_sock *bcm_sk(const struct sock *sk) 131static inline struct bcm_sock *bcm_sk(const struct sock *sk)
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index aab1cabb8035..5f19415ec9c0 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -1,9 +1,6 @@
1# 1#
2# Makefile for CEPH filesystem. 2# Makefile for CEPH filesystem.
3# 3#
4
5ifneq ($(KERNELRELEASE),)
6
7obj-$(CONFIG_CEPH_LIB) += libceph.o 4obj-$(CONFIG_CEPH_LIB) += libceph.o
8 5
9libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ 6libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
@@ -16,22 +13,3 @@ libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
16 ceph_fs.o ceph_strings.o ceph_hash.o \ 13 ceph_fs.o ceph_strings.o ceph_hash.o \
17 pagevec.o 14 pagevec.o
18 15
19else
20#Otherwise we were called directly from the command
21# line; invoke the kernel build system.
22
23KERNELDIR ?= /lib/modules/$(shell uname -r)/build
24PWD := $(shell pwd)
25
26default: all
27
28all:
29 $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
30
31modules_install:
32 $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
33
34clean:
35 $(MAKE) -C $(KERNELDIR) M=$(PWD) clean
36
37endif
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 53d8abfa25d5..bf3e6a13c215 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -19,7 +19,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
19 if (b->vec.iov_base) { 19 if (b->vec.iov_base) {
20 b->is_vmalloc = false; 20 b->is_vmalloc = false;
21 } else { 21 } else {
22 b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL); 22 b->vec.iov_base = __vmalloc(len, gfp | __GFP_HIGHMEM, PAGE_KERNEL);
23 if (!b->vec.iov_base) { 23 if (!b->vec.iov_base) {
24 kfree(b); 24 kfree(b);
25 return NULL; 25 return NULL;
diff --git a/net/compat.c b/net/compat.c
index 63d260e81472..3649d5895361 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -41,10 +41,12 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
41 compat_size_t len; 41 compat_size_t len;
42 42
43 if (get_user(len, &uiov32->iov_len) || 43 if (get_user(len, &uiov32->iov_len) ||
44 get_user(buf, &uiov32->iov_base)) { 44 get_user(buf, &uiov32->iov_base))
45 tot_len = -EFAULT; 45 return -EFAULT;
46 break; 46
47 } 47 if (len > INT_MAX - tot_len)
48 len = INT_MAX - tot_len;
49
48 tot_len += len; 50 tot_len += len;
49 kiov->iov_base = compat_ptr(buf); 51 kiov->iov_base = compat_ptr(buf);
50 kiov->iov_len = (__kernel_size_t) len; 52 kiov->iov_len = (__kernel_size_t) len;
diff --git a/net/core/dev.c b/net/core/dev.c
index 78b5a89b0f40..0dd54a69dace 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1685,10 +1685,10 @@ EXPORT_SYMBOL(netif_device_attach);
1685 1685
1686static bool can_checksum_protocol(unsigned long features, __be16 protocol) 1686static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1687{ 1687{
1688 return ((features & NETIF_F_GEN_CSUM) || 1688 return ((features & NETIF_F_NO_CSUM) ||
1689 ((features & NETIF_F_IP_CSUM) && 1689 ((features & NETIF_F_V4_CSUM) &&
1690 protocol == htons(ETH_P_IP)) || 1690 protocol == htons(ETH_P_IP)) ||
1691 ((features & NETIF_F_IPV6_CSUM) && 1691 ((features & NETIF_F_V6_CSUM) &&
1692 protocol == htons(ETH_P_IPV6)) || 1692 protocol == htons(ETH_P_IPV6)) ||
1693 ((features & NETIF_F_FCOE_CRC) && 1693 ((features & NETIF_F_FCOE_CRC) &&
1694 protocol == htons(ETH_P_FCOE))); 1694 protocol == htons(ETH_P_FCOE)));
@@ -1696,22 +1696,18 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1696 1696
1697static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) 1697static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1698{ 1698{
1699 __be16 protocol = skb->protocol;
1699 int features = dev->features; 1700 int features = dev->features;
1700 1701
1701 if (vlan_tx_tag_present(skb)) 1702 if (vlan_tx_tag_present(skb)) {
1702 features &= dev->vlan_features; 1703 features &= dev->vlan_features;
1703 1704 } else if (protocol == htons(ETH_P_8021Q)) {
1704 if (can_checksum_protocol(features, skb->protocol))
1705 return true;
1706
1707 if (skb->protocol == htons(ETH_P_8021Q)) {
1708 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 1705 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1709 if (can_checksum_protocol(dev->features & dev->vlan_features, 1706 protocol = veh->h_vlan_encapsulated_proto;
1710 veh->h_vlan_encapsulated_proto)) 1707 features &= dev->vlan_features;
1711 return true;
1712 } 1708 }
1713 1709
1714 return false; 1710 return can_checksum_protocol(features, protocol);
1715} 1711}
1716 1712
1717/** 1713/**
@@ -2135,7 +2131,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2135 } else { 2131 } else {
2136 struct sock *sk = skb->sk; 2132 struct sock *sk = skb->sk;
2137 queue_index = sk_tx_queue_get(sk); 2133 queue_index = sk_tx_queue_get(sk);
2138 if (queue_index < 0) { 2134 if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
2139 2135
2140 queue_index = 0; 2136 queue_index = 0;
2141 if (dev->real_num_tx_queues > 1) 2137 if (dev->real_num_tx_queues > 1)
@@ -2213,7 +2209,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2213} 2209}
2214 2210
2215static DEFINE_PER_CPU(int, xmit_recursion); 2211static DEFINE_PER_CPU(int, xmit_recursion);
2216#define RECURSION_LIMIT 3 2212#define RECURSION_LIMIT 10
2217 2213
2218/** 2214/**
2219 * dev_queue_xmit - transmit a buffer 2215 * dev_queue_xmit - transmit a buffer
@@ -2413,7 +2409,7 @@ EXPORT_SYMBOL(__skb_get_rxhash);
2413#ifdef CONFIG_RPS 2409#ifdef CONFIG_RPS
2414 2410
2415/* One global table that all flow-based protocols share. */ 2411/* One global table that all flow-based protocols share. */
2416struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; 2412struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2417EXPORT_SYMBOL(rps_sock_flow_table); 2413EXPORT_SYMBOL(rps_sock_flow_table);
2418 2414
2419/* 2415/*
@@ -2425,7 +2421,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2425 struct rps_dev_flow **rflowp) 2421 struct rps_dev_flow **rflowp)
2426{ 2422{
2427 struct netdev_rx_queue *rxqueue; 2423 struct netdev_rx_queue *rxqueue;
2428 struct rps_map *map = NULL; 2424 struct rps_map *map;
2429 struct rps_dev_flow_table *flow_table; 2425 struct rps_dev_flow_table *flow_table;
2430 struct rps_sock_flow_table *sock_flow_table; 2426 struct rps_sock_flow_table *sock_flow_table;
2431 int cpu = -1; 2427 int cpu = -1;
@@ -2444,15 +2440,15 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2444 } else 2440 } else
2445 rxqueue = dev->_rx; 2441 rxqueue = dev->_rx;
2446 2442
2447 if (rxqueue->rps_map) { 2443 map = rcu_dereference(rxqueue->rps_map);
2448 map = rcu_dereference(rxqueue->rps_map); 2444 if (map) {
2449 if (map && map->len == 1) { 2445 if (map->len == 1) {
2450 tcpu = map->cpus[0]; 2446 tcpu = map->cpus[0];
2451 if (cpu_online(tcpu)) 2447 if (cpu_online(tcpu))
2452 cpu = tcpu; 2448 cpu = tcpu;
2453 goto done; 2449 goto done;
2454 } 2450 }
2455 } else if (!rxqueue->rps_flow_table) { 2451 } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
2456 goto done; 2452 goto done;
2457 } 2453 }
2458 2454
@@ -5416,7 +5412,7 @@ void netdev_run_todo(void)
5416 /* paranoia */ 5412 /* paranoia */
5417 BUG_ON(netdev_refcnt_read(dev)); 5413 BUG_ON(netdev_refcnt_read(dev));
5418 WARN_ON(rcu_dereference_raw(dev->ip_ptr)); 5414 WARN_ON(rcu_dereference_raw(dev->ip_ptr));
5419 WARN_ON(dev->ip6_ptr); 5415 WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
5420 WARN_ON(dev->dn_ptr); 5416 WARN_ON(dev->dn_ptr);
5421 5417
5422 if (dev->destructor) 5418 if (dev->destructor)
diff --git a/net/core/dst.c b/net/core/dst.c
index 8abe628b79f1..b99c7c7ffce2 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -370,6 +370,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
370 370
371static struct notifier_block dst_dev_notifier = { 371static struct notifier_block dst_dev_notifier = {
372 .notifier_call = dst_dev_event, 372 .notifier_call = dst_dev_event,
373 .priority = -10, /* must be called after other network notifiers */
373}; 374};
374 375
375void __init dst_init(void) 376void __init dst_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 1bc3f253ba6c..82a4369ae150 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -351,12 +351,12 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
351 351
352 list_for_each_entry(r, &ops->rules_list, list) { 352 list_for_each_entry(r, &ops->rules_list, list) {
353 if (r->pref == rule->target) { 353 if (r->pref == rule->target) {
354 rule->ctarget = r; 354 RCU_INIT_POINTER(rule->ctarget, r);
355 break; 355 break;
356 } 356 }
357 } 357 }
358 358
359 if (rule->ctarget == NULL) 359 if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
360 unresolved = 1; 360 unresolved = 1;
361 } else if (rule->action == FR_ACT_GOTO) 361 } else if (rule->action == FR_ACT_GOTO)
362 goto errout_free; 362 goto errout_free;
@@ -373,6 +373,11 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
373 373
374 fib_rule_get(rule); 374 fib_rule_get(rule);
375 375
376 if (last)
377 list_add_rcu(&rule->list, &last->list);
378 else
379 list_add_rcu(&rule->list, &ops->rules_list);
380
376 if (ops->unresolved_rules) { 381 if (ops->unresolved_rules) {
377 /* 382 /*
378 * There are unresolved goto rules in the list, check if 383 * There are unresolved goto rules in the list, check if
@@ -381,7 +386,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
381 list_for_each_entry(r, &ops->rules_list, list) { 386 list_for_each_entry(r, &ops->rules_list, list) {
382 if (r->action == FR_ACT_GOTO && 387 if (r->action == FR_ACT_GOTO &&
383 r->target == rule->pref) { 388 r->target == rule->pref) {
384 BUG_ON(r->ctarget != NULL); 389 BUG_ON(rtnl_dereference(r->ctarget) != NULL);
385 rcu_assign_pointer(r->ctarget, rule); 390 rcu_assign_pointer(r->ctarget, rule);
386 if (--ops->unresolved_rules == 0) 391 if (--ops->unresolved_rules == 0)
387 break; 392 break;
@@ -395,11 +400,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
395 if (unresolved) 400 if (unresolved)
396 ops->unresolved_rules++; 401 ops->unresolved_rules++;
397 402
398 if (last)
399 list_add_rcu(&rule->list, &last->list);
400 else
401 list_add_rcu(&rule->list, &ops->rules_list);
402
403 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); 403 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
404 flush_route_cache(ops); 404 flush_route_cache(ops);
405 rules_ops_put(ops); 405 rules_ops_put(ops);
@@ -487,7 +487,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
487 */ 487 */
488 if (ops->nr_goto_rules > 0) { 488 if (ops->nr_goto_rules > 0) {
489 list_for_each_entry(tmp, &ops->rules_list, list) { 489 list_for_each_entry(tmp, &ops->rules_list, list) {
490 if (tmp->ctarget == rule) { 490 if (rtnl_dereference(tmp->ctarget) == rule) {
491 rcu_assign_pointer(tmp->ctarget, NULL); 491 rcu_assign_pointer(tmp->ctarget, NULL);
492 ops->unresolved_rules++; 492 ops->unresolved_rules++;
493 } 493 }
@@ -545,7 +545,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
545 frh->action = rule->action; 545 frh->action = rule->action;
546 frh->flags = rule->flags; 546 frh->flags = rule->flags;
547 547
548 if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) 548 if (rule->action == FR_ACT_GOTO &&
549 rcu_dereference_raw(rule->ctarget) == NULL)
549 frh->flags |= FIB_RULE_UNRESOLVED; 550 frh->flags |= FIB_RULE_UNRESOLVED;
550 551
551 if (rule->iifname[0]) { 552 if (rule->iifname[0]) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 7adf50352918..ae21a0d3c4a2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -89,8 +89,8 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
89 rcu_read_lock_bh(); 89 rcu_read_lock_bh();
90 filter = rcu_dereference_bh(sk->sk_filter); 90 filter = rcu_dereference_bh(sk->sk_filter);
91 if (filter) { 91 if (filter) {
92 unsigned int pkt_len = sk_run_filter(skb, filter->insns, 92 unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
93 filter->len); 93
94 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 94 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
95 } 95 }
96 rcu_read_unlock_bh(); 96 rcu_read_unlock_bh();
@@ -112,39 +112,41 @@ EXPORT_SYMBOL(sk_filter);
112 */ 112 */
113unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) 113unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
114{ 114{
115 struct sock_filter *fentry; /* We walk down these */
116 void *ptr; 115 void *ptr;
117 u32 A = 0; /* Accumulator */ 116 u32 A = 0; /* Accumulator */
118 u32 X = 0; /* Index Register */ 117 u32 X = 0; /* Index Register */
119 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ 118 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
119 unsigned long memvalid = 0;
120 u32 tmp; 120 u32 tmp;
121 int k; 121 int k;
122 int pc; 122 int pc;
123 123
124 BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
124 /* 125 /*
125 * Process array of filter instructions. 126 * Process array of filter instructions.
126 */ 127 */
127 for (pc = 0; pc < flen; pc++) { 128 for (pc = 0; pc < flen; pc++) {
128 fentry = &filter[pc]; 129 const struct sock_filter *fentry = &filter[pc];
130 u32 f_k = fentry->k;
129 131
130 switch (fentry->code) { 132 switch (fentry->code) {
131 case BPF_S_ALU_ADD_X: 133 case BPF_S_ALU_ADD_X:
132 A += X; 134 A += X;
133 continue; 135 continue;
134 case BPF_S_ALU_ADD_K: 136 case BPF_S_ALU_ADD_K:
135 A += fentry->k; 137 A += f_k;
136 continue; 138 continue;
137 case BPF_S_ALU_SUB_X: 139 case BPF_S_ALU_SUB_X:
138 A -= X; 140 A -= X;
139 continue; 141 continue;
140 case BPF_S_ALU_SUB_K: 142 case BPF_S_ALU_SUB_K:
141 A -= fentry->k; 143 A -= f_k;
142 continue; 144 continue;
143 case BPF_S_ALU_MUL_X: 145 case BPF_S_ALU_MUL_X:
144 A *= X; 146 A *= X;
145 continue; 147 continue;
146 case BPF_S_ALU_MUL_K: 148 case BPF_S_ALU_MUL_K:
147 A *= fentry->k; 149 A *= f_k;
148 continue; 150 continue;
149 case BPF_S_ALU_DIV_X: 151 case BPF_S_ALU_DIV_X:
150 if (X == 0) 152 if (X == 0)
@@ -152,49 +154,49 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
152 A /= X; 154 A /= X;
153 continue; 155 continue;
154 case BPF_S_ALU_DIV_K: 156 case BPF_S_ALU_DIV_K:
155 A /= fentry->k; 157 A /= f_k;
156 continue; 158 continue;
157 case BPF_S_ALU_AND_X: 159 case BPF_S_ALU_AND_X:
158 A &= X; 160 A &= X;
159 continue; 161 continue;
160 case BPF_S_ALU_AND_K: 162 case BPF_S_ALU_AND_K:
161 A &= fentry->k; 163 A &= f_k;
162 continue; 164 continue;
163 case BPF_S_ALU_OR_X: 165 case BPF_S_ALU_OR_X:
164 A |= X; 166 A |= X;
165 continue; 167 continue;
166 case BPF_S_ALU_OR_K: 168 case BPF_S_ALU_OR_K:
167 A |= fentry->k; 169 A |= f_k;
168 continue; 170 continue;
169 case BPF_S_ALU_LSH_X: 171 case BPF_S_ALU_LSH_X:
170 A <<= X; 172 A <<= X;
171 continue; 173 continue;
172 case BPF_S_ALU_LSH_K: 174 case BPF_S_ALU_LSH_K:
173 A <<= fentry->k; 175 A <<= f_k;
174 continue; 176 continue;
175 case BPF_S_ALU_RSH_X: 177 case BPF_S_ALU_RSH_X:
176 A >>= X; 178 A >>= X;
177 continue; 179 continue;
178 case BPF_S_ALU_RSH_K: 180 case BPF_S_ALU_RSH_K:
179 A >>= fentry->k; 181 A >>= f_k;
180 continue; 182 continue;
181 case BPF_S_ALU_NEG: 183 case BPF_S_ALU_NEG:
182 A = -A; 184 A = -A;
183 continue; 185 continue;
184 case BPF_S_JMP_JA: 186 case BPF_S_JMP_JA:
185 pc += fentry->k; 187 pc += f_k;
186 continue; 188 continue;
187 case BPF_S_JMP_JGT_K: 189 case BPF_S_JMP_JGT_K:
188 pc += (A > fentry->k) ? fentry->jt : fentry->jf; 190 pc += (A > f_k) ? fentry->jt : fentry->jf;
189 continue; 191 continue;
190 case BPF_S_JMP_JGE_K: 192 case BPF_S_JMP_JGE_K:
191 pc += (A >= fentry->k) ? fentry->jt : fentry->jf; 193 pc += (A >= f_k) ? fentry->jt : fentry->jf;
192 continue; 194 continue;
193 case BPF_S_JMP_JEQ_K: 195 case BPF_S_JMP_JEQ_K:
194 pc += (A == fentry->k) ? fentry->jt : fentry->jf; 196 pc += (A == f_k) ? fentry->jt : fentry->jf;
195 continue; 197 continue;
196 case BPF_S_JMP_JSET_K: 198 case BPF_S_JMP_JSET_K:
197 pc += (A & fentry->k) ? fentry->jt : fentry->jf; 199 pc += (A & f_k) ? fentry->jt : fentry->jf;
198 continue; 200 continue;
199 case BPF_S_JMP_JGT_X: 201 case BPF_S_JMP_JGT_X:
200 pc += (A > X) ? fentry->jt : fentry->jf; 202 pc += (A > X) ? fentry->jt : fentry->jf;
@@ -209,7 +211,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
209 pc += (A & X) ? fentry->jt : fentry->jf; 211 pc += (A & X) ? fentry->jt : fentry->jf;
210 continue; 212 continue;
211 case BPF_S_LD_W_ABS: 213 case BPF_S_LD_W_ABS:
212 k = fentry->k; 214 k = f_k;
213load_w: 215load_w:
214 ptr = load_pointer(skb, k, 4, &tmp); 216 ptr = load_pointer(skb, k, 4, &tmp);
215 if (ptr != NULL) { 217 if (ptr != NULL) {
@@ -218,7 +220,7 @@ load_w:
218 } 220 }
219 break; 221 break;
220 case BPF_S_LD_H_ABS: 222 case BPF_S_LD_H_ABS:
221 k = fentry->k; 223 k = f_k;
222load_h: 224load_h:
223 ptr = load_pointer(skb, k, 2, &tmp); 225 ptr = load_pointer(skb, k, 2, &tmp);
224 if (ptr != NULL) { 226 if (ptr != NULL) {
@@ -227,7 +229,7 @@ load_h:
227 } 229 }
228 break; 230 break;
229 case BPF_S_LD_B_ABS: 231 case BPF_S_LD_B_ABS:
230 k = fentry->k; 232 k = f_k;
231load_b: 233load_b:
232 ptr = load_pointer(skb, k, 1, &tmp); 234 ptr = load_pointer(skb, k, 1, &tmp);
233 if (ptr != NULL) { 235 if (ptr != NULL) {
@@ -242,32 +244,34 @@ load_b:
242 X = skb->len; 244 X = skb->len;
243 continue; 245 continue;
244 case BPF_S_LD_W_IND: 246 case BPF_S_LD_W_IND:
245 k = X + fentry->k; 247 k = X + f_k;
246 goto load_w; 248 goto load_w;
247 case BPF_S_LD_H_IND: 249 case BPF_S_LD_H_IND:
248 k = X + fentry->k; 250 k = X + f_k;
249 goto load_h; 251 goto load_h;
250 case BPF_S_LD_B_IND: 252 case BPF_S_LD_B_IND:
251 k = X + fentry->k; 253 k = X + f_k;
252 goto load_b; 254 goto load_b;
253 case BPF_S_LDX_B_MSH: 255 case BPF_S_LDX_B_MSH:
254 ptr = load_pointer(skb, fentry->k, 1, &tmp); 256 ptr = load_pointer(skb, f_k, 1, &tmp);
255 if (ptr != NULL) { 257 if (ptr != NULL) {
256 X = (*(u8 *)ptr & 0xf) << 2; 258 X = (*(u8 *)ptr & 0xf) << 2;
257 continue; 259 continue;
258 } 260 }
259 return 0; 261 return 0;
260 case BPF_S_LD_IMM: 262 case BPF_S_LD_IMM:
261 A = fentry->k; 263 A = f_k;
262 continue; 264 continue;
263 case BPF_S_LDX_IMM: 265 case BPF_S_LDX_IMM:
264 X = fentry->k; 266 X = f_k;
265 continue; 267 continue;
266 case BPF_S_LD_MEM: 268 case BPF_S_LD_MEM:
267 A = mem[fentry->k]; 269 A = (memvalid & (1UL << f_k)) ?
270 mem[f_k] : 0;
268 continue; 271 continue;
269 case BPF_S_LDX_MEM: 272 case BPF_S_LDX_MEM:
270 X = mem[fentry->k]; 273 X = (memvalid & (1UL << f_k)) ?
274 mem[f_k] : 0;
271 continue; 275 continue;
272 case BPF_S_MISC_TAX: 276 case BPF_S_MISC_TAX:
273 X = A; 277 X = A;
@@ -276,14 +280,16 @@ load_b:
276 A = X; 280 A = X;
277 continue; 281 continue;
278 case BPF_S_RET_K: 282 case BPF_S_RET_K:
279 return fentry->k; 283 return f_k;
280 case BPF_S_RET_A: 284 case BPF_S_RET_A:
281 return A; 285 return A;
282 case BPF_S_ST: 286 case BPF_S_ST:
283 mem[fentry->k] = A; 287 memvalid |= 1UL << f_k;
288 mem[f_k] = A;
284 continue; 289 continue;
285 case BPF_S_STX: 290 case BPF_S_STX:
286 mem[fentry->k] = X; 291 memvalid |= 1UL << f_k;
292 mem[f_k] = X;
287 continue; 293 continue;
288 default: 294 default:
289 WARN_ON(1); 295 WARN_ON(1);
@@ -583,23 +589,16 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
583EXPORT_SYMBOL(sk_chk_filter); 589EXPORT_SYMBOL(sk_chk_filter);
584 590
585/** 591/**
586 * sk_filter_rcu_release: Release a socket filter by rcu_head 592 * sk_filter_release_rcu - Release a socket filter by rcu_head
587 * @rcu: rcu_head that contains the sk_filter to free 593 * @rcu: rcu_head that contains the sk_filter to free
588 */ 594 */
589static void sk_filter_rcu_release(struct rcu_head *rcu) 595void sk_filter_release_rcu(struct rcu_head *rcu)
590{ 596{
591 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 597 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
592 598
593 sk_filter_release(fp); 599 kfree(fp);
594}
595
596static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
597{
598 unsigned int size = sk_filter_len(fp);
599
600 atomic_sub(size, &sk->sk_omem_alloc);
601 call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
602} 600}
601EXPORT_SYMBOL(sk_filter_release_rcu);
603 602
604/** 603/**
605 * sk_attach_filter - attach a socket filter 604 * sk_attach_filter - attach a socket filter
@@ -643,7 +642,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
643 rcu_assign_pointer(sk->sk_filter, fp); 642 rcu_assign_pointer(sk->sk_filter, fp);
644 643
645 if (old_fp) 644 if (old_fp)
646 sk_filter_delayed_uncharge(sk, old_fp); 645 sk_filter_uncharge(sk, old_fp);
647 return 0; 646 return 0;
648} 647}
649EXPORT_SYMBOL_GPL(sk_attach_filter); 648EXPORT_SYMBOL_GPL(sk_attach_filter);
@@ -657,7 +656,7 @@ int sk_detach_filter(struct sock *sk)
657 sock_owned_by_user(sk)); 656 sock_owned_by_user(sk));
658 if (filter) { 657 if (filter) {
659 rcu_assign_pointer(sk->sk_filter, NULL); 658 rcu_assign_pointer(sk->sk_filter, NULL);
660 sk_filter_delayed_uncharge(sk, filter); 659 sk_filter_uncharge(sk, filter);
661 ret = 0; 660 ret = 0;
662 } 661 }
663 return ret; 662 return ret;
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 72aceb1fe4fa..c40f27e7d208 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -35,10 +35,9 @@
35 * in any case. 35 * in any case.
36 */ 36 */
37 37
38long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) 38int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
39{ 39{
40 int size, ct; 40 int size, ct, err;
41 long err;
42 41
43 if (m->msg_namelen) { 42 if (m->msg_namelen) {
44 if (mode == VERIFY_READ) { 43 if (mode == VERIFY_READ) {
@@ -62,14 +61,13 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
62 err = 0; 61 err = 0;
63 62
64 for (ct = 0; ct < m->msg_iovlen; ct++) { 63 for (ct = 0; ct < m->msg_iovlen; ct++) {
65 err += iov[ct].iov_len; 64 size_t len = iov[ct].iov_len;
66 /* 65
67 * Goal is not to verify user data, but to prevent returning 66 if (len > INT_MAX - err) {
68 * negative value, which is interpreted as errno. 67 len = INT_MAX - err;
69 * Overflow is still possible, but it is harmless. 68 iov[ct].iov_len = len;
70 */ 69 }
71 if (err < 0) 70 err += len;
72 return -EMSGSIZE;
73 } 71 }
74 72
75 return err; 73 return err;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b143173e3eb2..7f902cad10f8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -598,7 +598,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
598 } 598 }
599 599
600 spin_lock(&rps_map_lock); 600 spin_lock(&rps_map_lock);
601 old_map = queue->rps_map; 601 old_map = rcu_dereference_protected(queue->rps_map,
602 lockdep_is_held(&rps_map_lock));
602 rcu_assign_pointer(queue->rps_map, map); 603 rcu_assign_pointer(queue->rps_map, map);
603 spin_unlock(&rps_map_lock); 604 spin_unlock(&rps_map_lock);
604 605
@@ -677,7 +678,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
677 table = NULL; 678 table = NULL;
678 679
679 spin_lock(&rps_dev_flow_lock); 680 spin_lock(&rps_dev_flow_lock);
680 old_table = queue->rps_flow_table; 681 old_table = rcu_dereference_protected(queue->rps_flow_table,
682 lockdep_is_held(&rps_dev_flow_lock));
681 rcu_assign_pointer(queue->rps_flow_table, table); 683 rcu_assign_pointer(queue->rps_flow_table, table);
682 spin_unlock(&rps_dev_flow_lock); 684 spin_unlock(&rps_dev_flow_lock);
683 685
@@ -705,16 +707,26 @@ static void rx_queue_release(struct kobject *kobj)
705{ 707{
706 struct netdev_rx_queue *queue = to_rx_queue(kobj); 708 struct netdev_rx_queue *queue = to_rx_queue(kobj);
707 struct netdev_rx_queue *first = queue->first; 709 struct netdev_rx_queue *first = queue->first;
710 struct rps_map *map;
711 struct rps_dev_flow_table *flow_table;
708 712
709 if (queue->rps_map)
710 call_rcu(&queue->rps_map->rcu, rps_map_release);
711 713
712 if (queue->rps_flow_table) 714 map = rcu_dereference_raw(queue->rps_map);
713 call_rcu(&queue->rps_flow_table->rcu, 715 if (map) {
714 rps_dev_flow_table_release); 716 RCU_INIT_POINTER(queue->rps_map, NULL);
717 call_rcu(&map->rcu, rps_map_release);
718 }
719
720 flow_table = rcu_dereference_raw(queue->rps_flow_table);
721 if (flow_table) {
722 RCU_INIT_POINTER(queue->rps_flow_table, NULL);
723 call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
724 }
715 725
716 if (atomic_dec_and_test(&first->count)) 726 if (atomic_dec_and_test(&first->count))
717 kfree(first); 727 kfree(first);
728 else
729 memset(kobj, 0, sizeof(*kobj));
718} 730}
719 731
720static struct kobj_type rx_queue_ktype = { 732static struct kobj_type rx_queue_ktype = {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index c988e685433a..3f860261c5ee 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -42,7 +42,9 @@ static int net_assign_generic(struct net *net, int id, void *data)
42 BUG_ON(!mutex_is_locked(&net_mutex)); 42 BUG_ON(!mutex_is_locked(&net_mutex));
43 BUG_ON(id == 0); 43 BUG_ON(id == 0);
44 44
45 ng = old_ng = net->gen; 45 old_ng = rcu_dereference_protected(net->gen,
46 lockdep_is_held(&net_mutex));
47 ng = old_ng;
46 if (old_ng->len >= id) 48 if (old_ng->len >= id)
47 goto assign; 49 goto assign;
48 50
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2c0df0f95b3d..33bc3823ac6f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -771,10 +771,10 @@ done:
771static unsigned long num_arg(const char __user * user_buffer, 771static unsigned long num_arg(const char __user * user_buffer,
772 unsigned long maxlen, unsigned long *num) 772 unsigned long maxlen, unsigned long *num)
773{ 773{
774 int i = 0; 774 int i;
775 *num = 0; 775 *num = 0;
776 776
777 for (; i < maxlen; i++) { 777 for (i = 0; i < maxlen; i++) {
778 char c; 778 char c;
779 if (get_user(c, &user_buffer[i])) 779 if (get_user(c, &user_buffer[i]))
780 return -EFAULT; 780 return -EFAULT;
@@ -789,9 +789,9 @@ static unsigned long num_arg(const char __user * user_buffer,
789 789
790static int strn_len(const char __user * user_buffer, unsigned int maxlen) 790static int strn_len(const char __user * user_buffer, unsigned int maxlen)
791{ 791{
792 int i = 0; 792 int i;
793 793
794 for (; i < maxlen; i++) { 794 for (i = 0; i < maxlen; i++) {
795 char c; 795 char c;
796 if (get_user(c, &user_buffer[i])) 796 if (get_user(c, &user_buffer[i]))
797 return -EFAULT; 797 return -EFAULT;
@@ -846,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file,
846{ 846{
847 struct seq_file *seq = file->private_data; 847 struct seq_file *seq = file->private_data;
848 struct pktgen_dev *pkt_dev = seq->private; 848 struct pktgen_dev *pkt_dev = seq->private;
849 int i = 0, max, len; 849 int i, max, len;
850 char name[16], valstr[32]; 850 char name[16], valstr[32];
851 unsigned long value = 0; 851 unsigned long value = 0;
852 char *pg_result = NULL; 852 char *pg_result = NULL;
@@ -860,13 +860,13 @@ static ssize_t pktgen_if_write(struct file *file,
860 return -EINVAL; 860 return -EINVAL;
861 } 861 }
862 862
863 max = count - i; 863 max = count;
864 tmp = count_trail_chars(&user_buffer[i], max); 864 tmp = count_trail_chars(user_buffer, max);
865 if (tmp < 0) { 865 if (tmp < 0) {
866 pr_warning("illegal format\n"); 866 pr_warning("illegal format\n");
867 return tmp; 867 return tmp;
868 } 868 }
869 i += tmp; 869 i = tmp;
870 870
871 /* Read variable name */ 871 /* Read variable name */
872 872
@@ -887,10 +887,11 @@ static ssize_t pktgen_if_write(struct file *file,
887 i += len; 887 i += len;
888 888
889 if (debug) { 889 if (debug) {
890 char tb[count + 1]; 890 size_t copy = min_t(size_t, count, 1023);
891 if (copy_from_user(tb, user_buffer, count)) 891 char tb[copy + 1];
892 if (copy_from_user(tb, user_buffer, copy))
892 return -EFAULT; 893 return -EFAULT;
893 tb[count] = 0; 894 tb[copy] = 0;
894 printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name, 895 printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name,
895 (unsigned long)count, tb); 896 (unsigned long)count, tb);
896 } 897 }
@@ -1764,7 +1765,7 @@ static ssize_t pktgen_thread_write(struct file *file,
1764{ 1765{
1765 struct seq_file *seq = file->private_data; 1766 struct seq_file *seq = file->private_data;
1766 struct pktgen_thread *t = seq->private; 1767 struct pktgen_thread *t = seq->private;
1767 int i = 0, max, len, ret; 1768 int i, max, len, ret;
1768 char name[40]; 1769 char name[40];
1769 char *pg_result; 1770 char *pg_result;
1770 1771
@@ -1773,12 +1774,12 @@ static ssize_t pktgen_thread_write(struct file *file,
1773 return -EINVAL; 1774 return -EINVAL;
1774 } 1775 }
1775 1776
1776 max = count - i; 1777 max = count;
1777 len = count_trail_chars(&user_buffer[i], max); 1778 len = count_trail_chars(user_buffer, max);
1778 if (len < 0) 1779 if (len < 0)
1779 return len; 1780 return len;
1780 1781
1781 i += len; 1782 i = len;
1782 1783
1783 /* Read variable name */ 1784 /* Read variable name */
1784 1785
@@ -1975,7 +1976,7 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
1975 const char *ifname) 1976 const char *ifname)
1976{ 1977{
1977 char b[IFNAMSIZ+5]; 1978 char b[IFNAMSIZ+5];
1978 int i = 0; 1979 int i;
1979 1980
1980 for (i = 0; ifname[i] != '@'; i++) { 1981 for (i = 0; ifname[i] != '@'; i++) {
1981 if (i == IFNAMSIZ) 1982 if (i == IFNAMSIZ)
@@ -2519,8 +2520,8 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
2519{ 2520{
2520 if (pkt_dev->cflows) { 2521 if (pkt_dev->cflows) {
2521 /* let go of the SAs if we have them */ 2522 /* let go of the SAs if we have them */
2522 int i = 0; 2523 int i;
2523 for (; i < pkt_dev->cflows; i++) { 2524 for (i = 0; i < pkt_dev->cflows; i++) {
2524 struct xfrm_state *x = pkt_dev->flows[i].x; 2525 struct xfrm_state *x = pkt_dev->flows[i].x;
2525 if (x) { 2526 if (x) {
2526 xfrm_state_put(x); 2527 xfrm_state_put(x);
@@ -2611,8 +2612,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2611 /* Update any of the values, used when we're incrementing various 2612 /* Update any of the values, used when we're incrementing various
2612 * fields. 2613 * fields.
2613 */ 2614 */
2614 queue_map = pkt_dev->cur_queue_map;
2615 mod_cur_headers(pkt_dev); 2615 mod_cur_headers(pkt_dev);
2616 queue_map = pkt_dev->cur_queue_map;
2616 2617
2617 datalen = (odev->hard_header_len + 16) & ~0xf; 2618 datalen = (odev->hard_header_len + 16) & ~0xf;
2618 2619
@@ -2975,8 +2976,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2975 /* Update any of the values, used when we're incrementing various 2976 /* Update any of the values, used when we're incrementing various
2976 * fields. 2977 * fields.
2977 */ 2978 */
2978 queue_map = pkt_dev->cur_queue_map;
2979 mod_cur_headers(pkt_dev); 2979 mod_cur_headers(pkt_dev);
2980 queue_map = pkt_dev->cur_queue_map;
2980 2981
2981 skb = __netdev_alloc_skb(odev, 2982 skb = __netdev_alloc_skb(odev,
2982 pkt_dev->cur_pkt_size + 64 2983 pkt_dev->cur_pkt_size + 64
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 7552495aff7a..fceeb37d7161 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -45,9 +45,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
45 nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); 45 nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
46 lopt_size += nr_table_entries * sizeof(struct request_sock *); 46 lopt_size += nr_table_entries * sizeof(struct request_sock *);
47 if (lopt_size > PAGE_SIZE) 47 if (lopt_size > PAGE_SIZE)
48 lopt = __vmalloc(lopt_size, 48 lopt = vzalloc(lopt_size);
49 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
50 PAGE_KERNEL);
51 else 49 else
52 lopt = kzalloc(lopt_size, GFP_KERNEL); 50 lopt = kzalloc(lopt_size, GFP_KERNEL);
53 if (lopt == NULL) 51 if (lopt == NULL)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8121268ddbdd..841c287ef40a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -347,16 +347,17 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
347 if (!ops) 347 if (!ops)
348 return 0; 348 return 0;
349 349
350 size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ 350 size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
351 nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ 351 nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
352 352
353 if (ops->get_size) 353 if (ops->get_size)
354 /* IFLA_INFO_DATA + nested data */ 354 /* IFLA_INFO_DATA + nested data */
355 size += nlmsg_total_size(sizeof(struct nlattr)) + 355 size += nla_total_size(sizeof(struct nlattr)) +
356 ops->get_size(dev); 356 ops->get_size(dev);
357 357
358 if (ops->get_xstats_size) 358 if (ops->get_xstats_size)
359 size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ 359 /* IFLA_INFO_XSTATS */
360 size += nla_total_size(ops->get_xstats_size(dev));
360 361
361 return size; 362 return size;
362} 363}
diff --git a/net/core/sock.c b/net/core/sock.c
index 11db43632df8..e5af8d5d5b50 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1009,6 +1009,36 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
1009#endif 1009#endif
1010} 1010}
1011 1011
1012/*
1013 * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
1014 * un-modified. Special care is taken when initializing object to zero.
1015 */
1016static inline void sk_prot_clear_nulls(struct sock *sk, int size)
1017{
1018 if (offsetof(struct sock, sk_node.next) != 0)
1019 memset(sk, 0, offsetof(struct sock, sk_node.next));
1020 memset(&sk->sk_node.pprev, 0,
1021 size - offsetof(struct sock, sk_node.pprev));
1022}
1023
1024void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1025{
1026 unsigned long nulls1, nulls2;
1027
1028 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1029 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1030 if (nulls1 > nulls2)
1031 swap(nulls1, nulls2);
1032
1033 if (nulls1 != 0)
1034 memset((char *)sk, 0, nulls1);
1035 memset((char *)sk + nulls1 + sizeof(void *), 0,
1036 nulls2 - nulls1 - sizeof(void *));
1037 memset((char *)sk + nulls2 + sizeof(void *), 0,
1038 size - nulls2 - sizeof(void *));
1039}
1040EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1041
1012static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, 1042static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1013 int family) 1043 int family)
1014{ 1044{
@@ -1021,19 +1051,12 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1021 if (!sk) 1051 if (!sk)
1022 return sk; 1052 return sk;
1023 if (priority & __GFP_ZERO) { 1053 if (priority & __GFP_ZERO) {
1024 /* 1054 if (prot->clear_sk)
1025 * caches using SLAB_DESTROY_BY_RCU should let 1055 prot->clear_sk(sk, prot->obj_size);
1026 * sk_node.next un-modified. Special care is taken 1056 else
1027 * when initializing object to zero. 1057 sk_prot_clear_nulls(sk, prot->obj_size);
1028 */
1029 if (offsetof(struct sock, sk_node.next) != 0)
1030 memset(sk, 0, offsetof(struct sock, sk_node.next));
1031 memset(&sk->sk_node.pprev, 0,
1032 prot->obj_size - offsetof(struct sock,
1033 sk_node.pprev));
1034 } 1058 }
1035 } 1059 } else
1036 else
1037 sk = kmalloc(prot->obj_size, priority); 1060 sk = kmalloc(prot->obj_size, priority);
1038 1061
1039 if (sk != NULL) { 1062 if (sk != NULL) {
@@ -1225,7 +1248,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1225 sock_reset_flag(newsk, SOCK_DONE); 1248 sock_reset_flag(newsk, SOCK_DONE);
1226 skb_queue_head_init(&newsk->sk_error_queue); 1249 skb_queue_head_init(&newsk->sk_error_queue);
1227 1250
1228 filter = newsk->sk_filter; 1251 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1229 if (filter != NULL) 1252 if (filter != NULL)
1230 sk_filter_charge(newsk, filter); 1253 sk_filter_charge(newsk, filter);
1231 1254
@@ -1653,10 +1676,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
1653{ 1676{
1654 struct proto *prot = sk->sk_prot; 1677 struct proto *prot = sk->sk_prot;
1655 int amt = sk_mem_pages(size); 1678 int amt = sk_mem_pages(size);
1656 int allocated; 1679 long allocated;
1657 1680
1658 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; 1681 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1659 allocated = atomic_add_return(amt, prot->memory_allocated); 1682 allocated = atomic_long_add_return(amt, prot->memory_allocated);
1660 1683
1661 /* Under limit. */ 1684 /* Under limit. */
1662 if (allocated <= prot->sysctl_mem[0]) { 1685 if (allocated <= prot->sysctl_mem[0]) {
@@ -1714,7 +1737,7 @@ suppress_allocation:
1714 1737
1715 /* Alas. Undo changes. */ 1738 /* Alas. Undo changes. */
1716 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; 1739 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
1717 atomic_sub(amt, prot->memory_allocated); 1740 atomic_long_sub(amt, prot->memory_allocated);
1718 return 0; 1741 return 0;
1719} 1742}
1720EXPORT_SYMBOL(__sk_mem_schedule); 1743EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1727,12 +1750,12 @@ void __sk_mem_reclaim(struct sock *sk)
1727{ 1750{
1728 struct proto *prot = sk->sk_prot; 1751 struct proto *prot = sk->sk_prot;
1729 1752
1730 atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 1753 atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
1731 prot->memory_allocated); 1754 prot->memory_allocated);
1732 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; 1755 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
1733 1756
1734 if (prot->memory_pressure && *prot->memory_pressure && 1757 if (prot->memory_pressure && *prot->memory_pressure &&
1735 (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) 1758 (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
1736 *prot->memory_pressure = 0; 1759 *prot->memory_pressure = 0;
1737} 1760}
1738EXPORT_SYMBOL(__sk_mem_reclaim); 1761EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -2452,12 +2475,12 @@ static char proto_method_implemented(const void *method)
2452 2475
2453static void proto_seq_printf(struct seq_file *seq, struct proto *proto) 2476static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2454{ 2477{
2455 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s " 2478 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
2456 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 2479 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2457 proto->name, 2480 proto->name,
2458 proto->obj_size, 2481 proto->obj_size,
2459 sock_prot_inuse_get(seq_file_net(seq), proto), 2482 sock_prot_inuse_get(seq_file_net(seq), proto),
2460 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, 2483 proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
2461 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", 2484 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
2462 proto->max_header, 2485 proto->max_header,
2463 proto->slab == NULL ? "no" : "yes", 2486 proto->slab == NULL ? "no" : "yes",
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 01eee5d984be..385b6095fdc4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -34,7 +34,8 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
34 34
35 mutex_lock(&sock_flow_mutex); 35 mutex_lock(&sock_flow_mutex);
36 36
37 orig_sock_table = rps_sock_flow_table; 37 orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
38 lockdep_is_held(&sock_flow_mutex));
38 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 39 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
39 40
40 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 41 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22da85b..c19bb4ee405e 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -96,11 +96,13 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
96 struct phy_device *phydev; 96 struct phy_device *phydev;
97 unsigned int type; 97 unsigned int type;
98 98
99 skb_push(skb, ETH_HLEN); 99 if (skb_headroom(skb) < ETH_HLEN)
100 return false;
101 __skb_push(skb, ETH_HLEN);
100 102
101 type = classify(skb); 103 type = classify(skb);
102 104
103 skb_pull(skb, ETH_HLEN); 105 __skb_pull(skb, ETH_HLEN);
104 106
105 switch (type) { 107 switch (type) {
106 case PTP_CLASS_V1_IPV4: 108 case PTP_CLASS_V1_IPV4:
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 117fb093dcaf..75c3582a7678 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -134,13 +134,41 @@ static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
134extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); 134extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
135extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); 135extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
136 136
137/*
138 * Congestion control of queued data packets via CCID decision.
139 *
140 * The TX CCID performs its congestion-control by indicating whether and when a
141 * queued packet may be sent, using the return code of ccid_hc_tx_send_packet().
142 * The following modes are supported via the symbolic constants below:
143 * - timer-based pacing (CCID returns a delay value in milliseconds);
144 * - autonomous dequeueing (CCID internally schedules dccps_xmitlet).
145 */
146
147enum ccid_dequeueing_decision {
148 CCID_PACKET_SEND_AT_ONCE = 0x00000, /* "green light": no delay */
149 CCID_PACKET_DELAY_MAX = 0x0FFFF, /* maximum delay in msecs */
150 CCID_PACKET_DELAY = 0x10000, /* CCID msec-delay mode */
151 CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000, /* CCID autonomous mode */
152 CCID_PACKET_ERR = 0xF0000, /* error condition */
153};
154
155static inline int ccid_packet_dequeue_eval(const int return_code)
156{
157 if (return_code < 0)
158 return CCID_PACKET_ERR;
159 if (return_code == 0)
160 return CCID_PACKET_SEND_AT_ONCE;
161 if (return_code <= CCID_PACKET_DELAY_MAX)
162 return CCID_PACKET_DELAY;
163 return return_code;
164}
165
137static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, 166static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
138 struct sk_buff *skb) 167 struct sk_buff *skb)
139{ 168{
140 int rc = 0;
141 if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL) 169 if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
142 rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb); 170 return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
143 return rc; 171 return CCID_PACKET_SEND_AT_ONCE;
144} 172}
145 173
146static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, 174static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index d850e291f87c..6576eae9e779 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -78,12 +78,9 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
78 78
79static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) 79static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
80{ 80{
81 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 81 if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
82 82 return CCID_PACKET_WILL_DEQUEUE_LATER;
83 if (hc->tx_pipe < hc->tx_cwnd) 83 return CCID_PACKET_SEND_AT_ONCE;
84 return 0;
85
86 return 1; /* XXX CCID should dequeue when ready instead of polling */
87} 84}
88 85
89static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) 86static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
@@ -115,6 +112,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
115{ 112{
116 struct sock *sk = (struct sock *)data; 113 struct sock *sk = (struct sock *)data;
117 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 114 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
115 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
118 116
119 bh_lock_sock(sk); 117 bh_lock_sock(sk);
120 if (sock_owned_by_user(sk)) { 118 if (sock_owned_by_user(sk)) {
@@ -129,8 +127,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
129 if (hc->tx_rto > DCCP_RTO_MAX) 127 if (hc->tx_rto > DCCP_RTO_MAX)
130 hc->tx_rto = DCCP_RTO_MAX; 128 hc->tx_rto = DCCP_RTO_MAX;
131 129
132 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
133
134 /* adjust pipe, cwnd etc */ 130 /* adjust pipe, cwnd etc */
135 hc->tx_ssthresh = hc->tx_cwnd / 2; 131 hc->tx_ssthresh = hc->tx_cwnd / 2;
136 if (hc->tx_ssthresh < 2) 132 if (hc->tx_ssthresh < 2)
@@ -146,6 +142,12 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
146 hc->tx_rpseq = 0; 142 hc->tx_rpseq = 0;
147 hc->tx_rpdupack = -1; 143 hc->tx_rpdupack = -1;
148 ccid2_change_l_ack_ratio(sk, 1); 144 ccid2_change_l_ack_ratio(sk, 1);
145
146 /* if we were blocked before, we may now send cwnd=1 packet */
147 if (sender_was_blocked)
148 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
149 /* restart backed-off timer */
150 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
149out: 151out:
150 bh_unlock_sock(sk); 152 bh_unlock_sock(sk);
151 sock_put(sk); 153 sock_put(sk);
@@ -434,6 +436,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
434{ 436{
435 struct dccp_sock *dp = dccp_sk(sk); 437 struct dccp_sock *dp = dccp_sk(sk);
436 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 438 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
439 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
437 u64 ackno, seqno; 440 u64 ackno, seqno;
438 struct ccid2_seq *seqp; 441 struct ccid2_seq *seqp;
439 unsigned char *vector; 442 unsigned char *vector;
@@ -631,6 +634,10 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
631 sk_stop_timer(sk, &hc->tx_rtotimer); 634 sk_stop_timer(sk, &hc->tx_rtotimer);
632 else 635 else
633 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); 636 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
637
638 /* check if incoming Acks allow pending packets to be sent */
639 if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
640 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
634} 641}
635 642
636static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 643static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 9731c2dc1487..25cb6b216eda 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -81,6 +81,11 @@ struct ccid2_hc_tx_sock {
81 u64 tx_high_ack; 81 u64 tx_high_ack;
82}; 82};
83 83
84static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
85{
86 return hc->tx_pipe >= hc->tx_cwnd;
87}
88
84struct ccid2_hc_rx_sock { 89struct ccid2_hc_rx_sock {
85 int rx_data; 90 int rx_data;
86}; 91};
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 3060a60ed5ab..3d604e1349c0 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -268,11 +268,11 @@ out:
268 sock_put(sk); 268 sock_put(sk);
269} 269}
270 270
271/* 271/**
272 * returns 272 * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets
273 * > 0: delay (in msecs) that should pass before actually sending 273 * @skb: next packet candidate to send on @sk
274 * = 0: can send immediately 274 * This function uses the convention of ccid_packet_dequeue_eval() and
275 * < 0: error condition; do not send packet 275 * returns a millisecond-delay value between 0 and t_mbi = 64000 msec.
276 */ 276 */
277static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) 277static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
278{ 278{
@@ -348,7 +348,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
348 348
349 /* set the nominal send time for the next following packet */ 349 /* set the nominal send time for the next following packet */
350 hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi); 350 hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi);
351 return 0; 351 return CCID_PACKET_SEND_AT_ONCE;
352} 352}
353 353
354static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len) 354static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3eb264b60823..a8ed459508b2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,8 +243,9 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
243extern void dccp_send_sync(struct sock *sk, const u64 seq, 243extern void dccp_send_sync(struct sock *sk, const u64 seq,
244 const enum dccp_pkt_type pkt_type); 244 const enum dccp_pkt_type pkt_type);
245 245
246extern void dccp_write_xmit(struct sock *sk, int block); 246extern void dccp_write_xmit(struct sock *sk);
247extern void dccp_write_space(struct sock *sk); 247extern void dccp_write_space(struct sock *sk);
248extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
248 249
249extern void dccp_init_xmit_timers(struct sock *sk); 250extern void dccp_init_xmit_timers(struct sock *sk);
250static inline void dccp_clear_xmit_timers(struct sock *sk) 251static inline void dccp_clear_xmit_timers(struct sock *sk)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 265985370fa1..e424a09e83f6 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -239,7 +239,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
239 dccp_update_gsr(sk, seqno); 239 dccp_update_gsr(sk, seqno);
240 240
241 if (dh->dccph_type != DCCP_PKT_SYNC && 241 if (dh->dccph_type != DCCP_PKT_SYNC &&
242 (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) 242 ackno != DCCP_PKT_WITHOUT_ACK_SEQ &&
243 after48(ackno, dp->dccps_gar))
243 dp->dccps_gar = ackno; 244 dp->dccps_gar = ackno;
244 } else { 245 } else {
245 unsigned long now = jiffies; 246 unsigned long now = jiffies;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index a988fe9ffcba..45b91853f5ae 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -209,108 +209,150 @@ void dccp_write_space(struct sock *sk)
209} 209}
210 210
211/** 211/**
212 * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet 212 * dccp_wait_for_ccid - Await CCID send permission
213 * @sk: socket to wait for 213 * @sk: socket to wait for
214 * @skb: current skb to pass on for waiting 214 * @delay: timeout in jiffies
215 * @delay: sleep timeout in milliseconds (> 0) 215 * This is used by CCIDs which need to delay the send time in process context.
216 * This function is called by default when the socket is closed, and
217 * when a non-zero linger time is set on the socket. For consistency
218 */ 216 */
219static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) 217static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
220{ 218{
221 struct dccp_sock *dp = dccp_sk(sk);
222 DEFINE_WAIT(wait); 219 DEFINE_WAIT(wait);
223 unsigned long jiffdelay; 220 long remaining;
224 int rc; 221
222 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
223 sk->sk_write_pending++;
224 release_sock(sk);
225
226 remaining = schedule_timeout(delay);
227
228 lock_sock(sk);
229 sk->sk_write_pending--;
230 finish_wait(sk_sleep(sk), &wait);
231
232 if (signal_pending(current) || sk->sk_err)
233 return -1;
234 return remaining;
235}
236
237/**
238 * dccp_xmit_packet - Send data packet under control of CCID
239 * Transmits next-queued payload and informs CCID to account for the packet.
240 */
241static void dccp_xmit_packet(struct sock *sk)
242{
243 int err, len;
244 struct dccp_sock *dp = dccp_sk(sk);
245 struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
225 246
226 do { 247 if (unlikely(skb == NULL))
227 dccp_pr_debug("delayed send by %d msec\n", delay); 248 return;
228 jiffdelay = msecs_to_jiffies(delay); 249 len = skb->len;
229 250
230 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 251 if (sk->sk_state == DCCP_PARTOPEN) {
252 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
253 /*
254 * See 8.1.5 - Handshake Completion.
255 *
256 * For robustness we resend Confirm options until the client has
257 * entered OPEN. During the initial feature negotiation, the MPS
258 * is smaller than usual, reduced by the Change/Confirm options.
259 */
260 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
261 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
262 dccp_send_ack(sk);
263 dccp_feat_list_purge(&dp->dccps_featneg);
264 }
231 265
232 sk->sk_write_pending++; 266 inet_csk_schedule_ack(sk);
233 release_sock(sk); 267 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
234 schedule_timeout(jiffdelay); 268 inet_csk(sk)->icsk_rto,
235 lock_sock(sk); 269 DCCP_RTO_MAX);
236 sk->sk_write_pending--; 270 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
271 } else if (dccp_ack_pending(sk)) {
272 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
273 } else {
274 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
275 }
276
277 err = dccp_transmit_skb(sk, skb);
278 if (err)
279 dccp_pr_debug("transmit_skb() returned err=%d\n", err);
280 /*
281 * Register this one as sent even if an error occurred. To the remote
282 * end a local packet drop is indistinguishable from network loss, i.e.
283 * any local drop will eventually be reported via receiver feedback.
284 */
285 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
286}
237 287
238 if (sk->sk_err) 288/**
239 goto do_error; 289 * dccp_flush_write_queue - Drain queue at end of connection
240 if (signal_pending(current)) 290 * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
241 goto do_interrupted; 291 * happen that the TX queue is not empty at the end of a connection. We give the
292 * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
293 * returns with a non-empty write queue, it will be purged later.
294 */
295void dccp_flush_write_queue(struct sock *sk, long *time_budget)
296{
297 struct dccp_sock *dp = dccp_sk(sk);
298 struct sk_buff *skb;
299 long delay, rc;
242 300
301 while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
243 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 302 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
244 } while ((delay = rc) > 0); 303
245out: 304 switch (ccid_packet_dequeue_eval(rc)) {
246 finish_wait(sk_sleep(sk), &wait); 305 case CCID_PACKET_WILL_DEQUEUE_LATER:
247 return rc; 306 /*
248 307 * If the CCID determines when to send, the next sending
249do_error: 308 * time is unknown or the CCID may not even send again
250 rc = -EPIPE; 309 * (e.g. remote host crashes or lost Ack packets).
251 goto out; 310 */
252do_interrupted: 311 DCCP_WARN("CCID did not manage to send all packets\n");
253 rc = -EINTR; 312 return;
254 goto out; 313 case CCID_PACKET_DELAY:
314 delay = msecs_to_jiffies(rc);
315 if (delay > *time_budget)
316 return;
317 rc = dccp_wait_for_ccid(sk, delay);
318 if (rc < 0)
319 return;
320 *time_budget -= (delay - rc);
321 /* check again if we can send now */
322 break;
323 case CCID_PACKET_SEND_AT_ONCE:
324 dccp_xmit_packet(sk);
325 break;
326 case CCID_PACKET_ERR:
327 skb_dequeue(&sk->sk_write_queue);
328 kfree_skb(skb);
329 dccp_pr_debug("packet discarded due to err=%ld\n", rc);
330 }
331 }
255} 332}
256 333
257void dccp_write_xmit(struct sock *sk, int block) 334void dccp_write_xmit(struct sock *sk)
258{ 335{
259 struct dccp_sock *dp = dccp_sk(sk); 336 struct dccp_sock *dp = dccp_sk(sk);
260 struct sk_buff *skb; 337 struct sk_buff *skb;
261 338
262 while ((skb = skb_peek(&sk->sk_write_queue))) { 339 while ((skb = skb_peek(&sk->sk_write_queue))) {
263 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 340 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
264
265 if (err > 0) {
266 if (!block) {
267 sk_reset_timer(sk, &dp->dccps_xmit_timer,
268 msecs_to_jiffies(err)+jiffies);
269 break;
270 } else
271 err = dccp_wait_for_ccid(sk, skb, err);
272 if (err && err != -EINTR)
273 DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
274 }
275 341
276 skb_dequeue(&sk->sk_write_queue); 342 switch (ccid_packet_dequeue_eval(rc)) {
277 if (err == 0) { 343 case CCID_PACKET_WILL_DEQUEUE_LATER:
278 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 344 return;
279 const int len = skb->len; 345 case CCID_PACKET_DELAY:
280 346 sk_reset_timer(sk, &dp->dccps_xmit_timer,
281 if (sk->sk_state == DCCP_PARTOPEN) { 347 jiffies + msecs_to_jiffies(rc));
282 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; 348 return;
283 /* 349 case CCID_PACKET_SEND_AT_ONCE:
284 * See 8.1.5 - Handshake Completion. 350 dccp_xmit_packet(sk);
285 * 351 break;
286 * For robustness we resend Confirm options until the client has 352 case CCID_PACKET_ERR:
287 * entered OPEN. During the initial feature negotiation, the MPS 353 skb_dequeue(&sk->sk_write_queue);
288 * is smaller than usual, reduced by the Change/Confirm options.
289 */
290 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
291 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
292 dccp_send_ack(sk);
293 dccp_feat_list_purge(&dp->dccps_featneg);
294 }
295
296 inet_csk_schedule_ack(sk);
297 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
298 inet_csk(sk)->icsk_rto,
299 DCCP_RTO_MAX);
300 dcb->dccpd_type = DCCP_PKT_DATAACK;
301 } else if (dccp_ack_pending(sk))
302 dcb->dccpd_type = DCCP_PKT_DATAACK;
303 else
304 dcb->dccpd_type = DCCP_PKT_DATA;
305
306 err = dccp_transmit_skb(sk, skb);
307 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
308 if (err)
309 DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
310 err);
311 } else {
312 dccp_pr_debug("packet discarded due to err=%d\n", err);
313 kfree_skb(skb); 354 kfree_skb(skb);
355 dccp_pr_debug("packet discarded due to err=%d\n", rc);
314 } 356 }
315 } 357 }
316} 358}
@@ -622,7 +664,6 @@ void dccp_send_close(struct sock *sk, const int active)
622 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; 664 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
623 665
624 if (active) { 666 if (active) {
625 dccp_write_xmit(sk, 1);
626 dccp_skb_entail(sk, skb); 667 dccp_skb_entail(sk, skb);
627 dccp_transmit_skb(sk, skb_clone(skb, prio)); 668 dccp_transmit_skb(sk, skb_clone(skb, prio));
628 /* 669 /*
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 7e5fc04eb6d1..ef343d53fcea 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -726,7 +726,13 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
726 goto out_discard; 726 goto out_discard;
727 727
728 skb_queue_tail(&sk->sk_write_queue, skb); 728 skb_queue_tail(&sk->sk_write_queue, skb);
729 dccp_write_xmit(sk,0); 729 /*
730 * The xmit_timer is set if the TX CCID is rate-based and will expire
731 * when congestion control permits to release further packets into the
732 * network. Window-based CCIDs do not use this timer.
733 */
734 if (!timer_pending(&dp->dccps_xmit_timer))
735 dccp_write_xmit(sk);
730out_release: 736out_release:
731 release_sock(sk); 737 release_sock(sk);
732 return rc ? : len; 738 return rc ? : len;
@@ -951,9 +957,22 @@ void dccp_close(struct sock *sk, long timeout)
951 /* Check zero linger _after_ checking for unread data. */ 957 /* Check zero linger _after_ checking for unread data. */
952 sk->sk_prot->disconnect(sk, 0); 958 sk->sk_prot->disconnect(sk, 0);
953 } else if (sk->sk_state != DCCP_CLOSED) { 959 } else if (sk->sk_state != DCCP_CLOSED) {
960 /*
961 * Normal connection termination. May need to wait if there are
962 * still packets in the TX queue that are delayed by the CCID.
963 */
964 dccp_flush_write_queue(sk, &timeout);
954 dccp_terminate_connection(sk); 965 dccp_terminate_connection(sk);
955 } 966 }
956 967
968 /*
969 * Flush write queue. This may be necessary in several cases:
970 * - we have been closed by the peer but still have application data;
971 * - abortive termination (unread data or zero linger time),
972 * - normal termination but queue could not be flushed within time limit
973 */
974 __skb_queue_purge(&sk->sk_write_queue);
975
957 sk_stream_wait_close(sk, timeout); 976 sk_stream_wait_close(sk, timeout);
958 977
959adjudge_to_death: 978adjudge_to_death:
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 1a9aa05d4dc4..7587870b7040 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -237,32 +237,35 @@ out:
237 sock_put(sk); 237 sock_put(sk);
238} 238}
239 239
240/* Transmit-delay timer: used by the CCIDs to delay actual send time */ 240/**
241static void dccp_write_xmit_timer(unsigned long data) 241 * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface
242 * See the comments above %ccid_dequeueing_decision for supported modes.
243 */
244static void dccp_write_xmitlet(unsigned long data)
242{ 245{
243 struct sock *sk = (struct sock *)data; 246 struct sock *sk = (struct sock *)data;
244 struct dccp_sock *dp = dccp_sk(sk);
245 247
246 bh_lock_sock(sk); 248 bh_lock_sock(sk);
247 if (sock_owned_by_user(sk)) 249 if (sock_owned_by_user(sk))
248 sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); 250 sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
249 else 251 else
250 dccp_write_xmit(sk, 0); 252 dccp_write_xmit(sk);
251 bh_unlock_sock(sk); 253 bh_unlock_sock(sk);
252 sock_put(sk);
253} 254}
254 255
255static void dccp_init_write_xmit_timer(struct sock *sk) 256static void dccp_write_xmit_timer(unsigned long data)
256{ 257{
257 struct dccp_sock *dp = dccp_sk(sk); 258 dccp_write_xmitlet(data);
258 259 sock_put((struct sock *)data);
259 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
260 (unsigned long)sk);
261} 260}
262 261
263void dccp_init_xmit_timers(struct sock *sk) 262void dccp_init_xmit_timers(struct sock *sk)
264{ 263{
265 dccp_init_write_xmit_timer(sk); 264 struct dccp_sock *dp = dccp_sk(sk);
265
266 tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
267 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
268 (unsigned long)sk);
266 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, 269 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
267 &dccp_keepalive_timer); 270 &dccp_keepalive_timer);
268} 271}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d6b93d19790f..6f97268ed85f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -155,7 +155,7 @@ static const struct proto_ops dn_proto_ops;
155static DEFINE_RWLOCK(dn_hash_lock); 155static DEFINE_RWLOCK(dn_hash_lock);
156static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; 156static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
157static struct hlist_head dn_wild_sk; 157static struct hlist_head dn_wild_sk;
158static atomic_t decnet_memory_allocated; 158static atomic_long_t decnet_memory_allocated;
159 159
160static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags); 160static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
161static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags); 161static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
@@ -1556,6 +1556,8 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
1556 if (r_len > sizeof(struct linkinfo_dn)) 1556 if (r_len > sizeof(struct linkinfo_dn))
1557 r_len = sizeof(struct linkinfo_dn); 1557 r_len = sizeof(struct linkinfo_dn);
1558 1558
1559 memset(&link, 0, sizeof(link));
1560
1559 switch(sock->state) { 1561 switch(sock->state) {
1560 case SS_CONNECTING: 1562 case SS_CONNECTING:
1561 link.idn_linkstate = LL_CONNECTING; 1563 link.idn_linkstate = LL_CONNECTING;
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index be3eb8e23288..28f8b5e5f73b 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -38,7 +38,7 @@ int decnet_log_martians = 1;
38int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW; 38int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
39 39
40/* Reasonable defaults, I hope, based on tcp's defaults */ 40/* Reasonable defaults, I hope, based on tcp's defaults */
41int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 }; 41long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
42int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; 42int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
43int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; 43int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
44 44
@@ -324,7 +324,7 @@ static ctl_table dn_table[] = {
324 .data = &sysctl_decnet_mem, 324 .data = &sysctl_decnet_mem,
325 .maxlen = sizeof(sysctl_decnet_mem), 325 .maxlen = sizeof(sysctl_decnet_mem),
326 .mode = 0644, 326 .mode = 0644,
327 .proc_handler = proc_dointvec, 327 .proc_handler = proc_doulongvec_minmax
328 }, 328 },
329 { 329 {
330 .procname = "decnet_rmem", 330 .procname = "decnet_rmem",
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index f8c1ae4b41f0..15dcc1a586b4 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -31,6 +31,7 @@
31#include <linux/skbuff.h> 31#include <linux/skbuff.h>
32#include <linux/udp.h> 32#include <linux/udp.h>
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/vmalloc.h>
34#include <net/sock.h> 35#include <net/sock.h>
35#include <net/inet_common.h> 36#include <net/inet_common.h>
36#include <linux/stat.h> 37#include <linux/stat.h>
@@ -276,12 +277,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
276#endif 277#endif
277#ifdef CONFIG_ECONET_AUNUDP 278#ifdef CONFIG_ECONET_AUNUDP
278 struct msghdr udpmsg; 279 struct msghdr udpmsg;
279 struct iovec iov[msg->msg_iovlen+1]; 280 struct iovec iov[2];
280 struct aunhdr ah; 281 struct aunhdr ah;
281 struct sockaddr_in udpdest; 282 struct sockaddr_in udpdest;
282 __kernel_size_t size; 283 __kernel_size_t size;
283 int i;
284 mm_segment_t oldfs; 284 mm_segment_t oldfs;
285 char *userbuf;
285#endif 286#endif
286 287
287 /* 288 /*
@@ -297,23 +298,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
297 298
298 mutex_lock(&econet_mutex); 299 mutex_lock(&econet_mutex);
299 300
300 if (saddr == NULL) { 301 if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) {
301 struct econet_sock *eo = ec_sk(sk); 302 mutex_unlock(&econet_mutex);
302 303 return -EINVAL;
303 addr.station = eo->station; 304 }
304 addr.net = eo->net; 305 addr.station = saddr->addr.station;
305 port = eo->port; 306 addr.net = saddr->addr.net;
306 cb = eo->cb; 307 port = saddr->port;
307 } else { 308 cb = saddr->cb;
308 if (msg->msg_namelen < sizeof(struct sockaddr_ec)) {
309 mutex_unlock(&econet_mutex);
310 return -EINVAL;
311 }
312 addr.station = saddr->addr.station;
313 addr.net = saddr->addr.net;
314 port = saddr->port;
315 cb = saddr->cb;
316 }
317 309
318 /* Look for a device with the right network number. */ 310 /* Look for a device with the right network number. */
319 dev = net2dev_map[addr.net]; 311 dev = net2dev_map[addr.net];
@@ -328,17 +320,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
328 } 320 }
329 } 321 }
330 322
331 if (len + 15 > dev->mtu) {
332 mutex_unlock(&econet_mutex);
333 return -EMSGSIZE;
334 }
335
336 if (dev->type == ARPHRD_ECONET) { 323 if (dev->type == ARPHRD_ECONET) {
337 /* Real hardware Econet. We're not worthy etc. */ 324 /* Real hardware Econet. We're not worthy etc. */
338#ifdef CONFIG_ECONET_NATIVE 325#ifdef CONFIG_ECONET_NATIVE
339 unsigned short proto = 0; 326 unsigned short proto = 0;
340 int res; 327 int res;
341 328
329 if (len + 15 > dev->mtu) {
330 mutex_unlock(&econet_mutex);
331 return -EMSGSIZE;
332 }
333
342 dev_hold(dev); 334 dev_hold(dev);
343 335
344 skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev), 336 skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev),
@@ -351,7 +343,6 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
351 343
352 eb = (struct ec_cb *)&skb->cb; 344 eb = (struct ec_cb *)&skb->cb;
353 345
354 /* BUG: saddr may be NULL */
355 eb->cookie = saddr->cookie; 346 eb->cookie = saddr->cookie;
356 eb->sec = *saddr; 347 eb->sec = *saddr;
357 eb->sent = ec_tx_done; 348 eb->sent = ec_tx_done;
@@ -415,6 +406,11 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
415 return -ENETDOWN; /* No socket - can't send */ 406 return -ENETDOWN; /* No socket - can't send */
416 } 407 }
417 408
409 if (len > 32768) {
410 err = -E2BIG;
411 goto error;
412 }
413
418 /* Make up a UDP datagram and hand it off to some higher intellect. */ 414 /* Make up a UDP datagram and hand it off to some higher intellect. */
419 415
420 memset(&udpdest, 0, sizeof(udpdest)); 416 memset(&udpdest, 0, sizeof(udpdest));
@@ -446,36 +442,26 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
446 442
447 /* tack our header on the front of the iovec */ 443 /* tack our header on the front of the iovec */
448 size = sizeof(struct aunhdr); 444 size = sizeof(struct aunhdr);
449 /*
450 * XXX: that is b0rken. We can't mix userland and kernel pointers
451 * in iovec, since on a lot of platforms copy_from_user() will
452 * *not* work with the kernel and userland ones at the same time,
453 * regardless of what we do with set_fs(). And we are talking about
454 * econet-over-ethernet here, so "it's only ARM anyway" doesn't
455 * apply. Any suggestions on fixing that code? -- AV
456 */
457 iov[0].iov_base = (void *)&ah; 445 iov[0].iov_base = (void *)&ah;
458 iov[0].iov_len = size; 446 iov[0].iov_len = size;
459 for (i = 0; i < msg->msg_iovlen; i++) { 447
460 void __user *base = msg->msg_iov[i].iov_base; 448 userbuf = vmalloc(len);
461 size_t iov_len = msg->msg_iov[i].iov_len; 449 if (userbuf == NULL) {
462 /* Check it now since we switch to KERNEL_DS later. */ 450 err = -ENOMEM;
463 if (!access_ok(VERIFY_READ, base, iov_len)) { 451 goto error;
464 mutex_unlock(&econet_mutex);
465 return -EFAULT;
466 }
467 iov[i+1].iov_base = base;
468 iov[i+1].iov_len = iov_len;
469 size += iov_len;
470 } 452 }
471 453
454 iov[1].iov_base = userbuf;
455 iov[1].iov_len = len;
456 err = memcpy_fromiovec(userbuf, msg->msg_iov, len);
457 if (err)
458 goto error_free_buf;
459
472 /* Get a skbuff (no data, just holds our cb information) */ 460 /* Get a skbuff (no data, just holds our cb information) */
473 if ((skb = sock_alloc_send_skb(sk, 0, 461 if ((skb = sock_alloc_send_skb(sk, 0,
474 msg->msg_flags & MSG_DONTWAIT, 462 msg->msg_flags & MSG_DONTWAIT,
475 &err)) == NULL) { 463 &err)) == NULL)
476 mutex_unlock(&econet_mutex); 464 goto error_free_buf;
477 return err;
478 }
479 465
480 eb = (struct ec_cb *)&skb->cb; 466 eb = (struct ec_cb *)&skb->cb;
481 467
@@ -491,7 +477,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
491 udpmsg.msg_name = (void *)&udpdest; 477 udpmsg.msg_name = (void *)&udpdest;
492 udpmsg.msg_namelen = sizeof(udpdest); 478 udpmsg.msg_namelen = sizeof(udpdest);
493 udpmsg.msg_iov = &iov[0]; 479 udpmsg.msg_iov = &iov[0];
494 udpmsg.msg_iovlen = msg->msg_iovlen + 1; 480 udpmsg.msg_iovlen = 2;
495 udpmsg.msg_control = NULL; 481 udpmsg.msg_control = NULL;
496 udpmsg.msg_controllen = 0; 482 udpmsg.msg_controllen = 0;
497 udpmsg.msg_flags=0; 483 udpmsg.msg_flags=0;
@@ -499,9 +485,13 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
499 oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */ 485 oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */
500 err = sock_sendmsg(udpsock, &udpmsg, size); 486 err = sock_sendmsg(udpsock, &udpmsg, size);
501 set_fs(oldfs); 487 set_fs(oldfs);
488
489error_free_buf:
490 vfree(userbuf);
502#else 491#else
503 err = -EPROTOTYPE; 492 err = -EPROTOTYPE;
504#endif 493#endif
494 error:
505 mutex_unlock(&econet_mutex); 495 mutex_unlock(&econet_mutex);
506 496
507 return err; 497 return err;
@@ -671,6 +661,11 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
671 err = 0; 661 err = 0;
672 switch (cmd) { 662 switch (cmd) {
673 case SIOCSIFADDR: 663 case SIOCSIFADDR:
664 if (!capable(CAP_NET_ADMIN)) {
665 err = -EPERM;
666 break;
667 }
668
674 edev = dev->ec_ptr; 669 edev = dev->ec_ptr;
675 if (edev == NULL) { 670 if (edev == NULL) {
676 /* Magic up a new one. */ 671 /* Magic up a new one. */
@@ -856,9 +851,13 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
856{ 851{
857 struct iphdr *ip = ip_hdr(skb); 852 struct iphdr *ip = ip_hdr(skb);
858 unsigned char stn = ntohl(ip->saddr) & 0xff; 853 unsigned char stn = ntohl(ip->saddr) & 0xff;
854 struct dst_entry *dst = skb_dst(skb);
855 struct ec_device *edev = NULL;
859 struct sock *sk = NULL; 856 struct sock *sk = NULL;
860 struct sk_buff *newskb; 857 struct sk_buff *newskb;
861 struct ec_device *edev = skb->dev->ec_ptr; 858
859 if (dst)
860 edev = dst->dev->ec_ptr;
862 861
863 if (! edev) 862 if (! edev)
864 goto bad; 863 goto bad;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 36e27c2107de..eb6f69a8f27a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1052,7 +1052,7 @@ static void ip_fib_net_exit(struct net *net)
1052 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1052 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1053 hlist_del(node); 1053 hlist_del(node);
1054 fib_table_flush(tb); 1054 fib_table_flush(tb);
1055 kfree(tb); 1055 fib_free_table(tb);
1056 } 1056 }
1057 } 1057 }
1058 kfree(net->ipv4.fib_table_hash); 1058 kfree(net->ipv4.fib_table_hash);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 43e1c594ce8f..b3acb0417b21 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -120,11 +120,12 @@ static inline void fn_rebuild_zone(struct fn_zone *fz,
120 struct fib_node *f; 120 struct fib_node *f;
121 121
122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { 122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
123 struct hlist_head __rcu *new_head; 123 struct hlist_head *new_head;
124 124
125 hlist_del_rcu(&f->fn_hash); 125 hlist_del_rcu(&f->fn_hash);
126 126
127 new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 127 new_head = rcu_dereference_protected(fz->fz_hash, 1) +
128 fn_hash(f->fn_key, fz);
128 hlist_add_head_rcu(&f->fn_hash, new_head); 129 hlist_add_head_rcu(&f->fn_hash, new_head);
129 } 130 }
130 } 131 }
@@ -179,8 +180,8 @@ static void fn_rehash_zone(struct fn_zone *fz)
179 memcpy(&nfz, fz, sizeof(nfz)); 180 memcpy(&nfz, fz, sizeof(nfz));
180 181
181 write_seqlock_bh(&fz->fz_lock); 182 write_seqlock_bh(&fz->fz_lock);
182 old_ht = fz->fz_hash; 183 old_ht = rcu_dereference_protected(fz->fz_hash, 1);
183 nfz.fz_hash = ht; 184 RCU_INIT_POINTER(nfz.fz_hash, ht);
184 nfz.fz_hashmask = new_hashmask; 185 nfz.fz_hashmask = new_hashmask;
185 nfz.fz_divisor = new_divisor; 186 nfz.fz_divisor = new_divisor;
186 fn_rebuild_zone(&nfz, old_ht, old_divisor); 187 fn_rebuild_zone(&nfz, old_ht, old_divisor);
@@ -236,7 +237,7 @@ fn_new_zone(struct fn_hash *table, int z)
236 seqlock_init(&fz->fz_lock); 237 seqlock_init(&fz->fz_lock);
237 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1; 238 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
238 fz->fz_hashmask = fz->fz_divisor - 1; 239 fz->fz_hashmask = fz->fz_divisor - 1;
239 fz->fz_hash = fz->fz_embedded_hash; 240 RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash);
240 fz->fz_order = z; 241 fz->fz_order = z;
241 fz->fz_revorder = 32 - z; 242 fz->fz_revorder = 32 - z;
242 fz->fz_mask = inet_make_mask(z); 243 fz->fz_mask = inet_make_mask(z);
@@ -272,7 +273,7 @@ int fib_table_lookup(struct fib_table *tb,
272 for (fz = rcu_dereference(t->fn_zone_list); 273 for (fz = rcu_dereference(t->fn_zone_list);
273 fz != NULL; 274 fz != NULL;
274 fz = rcu_dereference(fz->fz_next)) { 275 fz = rcu_dereference(fz->fz_next)) {
275 struct hlist_head __rcu *head; 276 struct hlist_head *head;
276 struct hlist_node *node; 277 struct hlist_node *node;
277 struct fib_node *f; 278 struct fib_node *f;
278 __be32 k; 279 __be32 k;
@@ -282,7 +283,7 @@ int fib_table_lookup(struct fib_table *tb,
282 seq = read_seqbegin(&fz->fz_lock); 283 seq = read_seqbegin(&fz->fz_lock);
283 k = fz_key(flp->fl4_dst, fz); 284 k = fz_key(flp->fl4_dst, fz);
284 285
285 head = &fz->fz_hash[fn_hash(k, fz)]; 286 head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
286 hlist_for_each_entry_rcu(f, node, head, fn_hash) { 287 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
287 if (f->fn_key != k) 288 if (f->fn_key != k)
288 continue; 289 continue;
@@ -311,6 +312,7 @@ void fib_table_select_default(struct fib_table *tb,
311 struct fib_info *last_resort; 312 struct fib_info *last_resort;
312 struct fn_hash *t = (struct fn_hash *)tb->tb_data; 313 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
313 struct fn_zone *fz = t->fn_zones[0]; 314 struct fn_zone *fz = t->fn_zones[0];
315 struct hlist_head *head;
314 316
315 if (fz == NULL) 317 if (fz == NULL)
316 return; 318 return;
@@ -320,7 +322,8 @@ void fib_table_select_default(struct fib_table *tb,
320 order = -1; 322 order = -1;
321 323
322 rcu_read_lock(); 324 rcu_read_lock();
323 hlist_for_each_entry_rcu(f, node, &fz->fz_hash[0], fn_hash) { 325 head = rcu_dereference(fz->fz_hash);
326 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
324 struct fib_alias *fa; 327 struct fib_alias *fa;
325 328
326 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { 329 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
@@ -374,7 +377,7 @@ out:
374/* Insert node F to FZ. */ 377/* Insert node F to FZ. */
375static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) 378static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
376{ 379{
377 struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 380 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz);
378 381
379 hlist_add_head_rcu(&f->fn_hash, head); 382 hlist_add_head_rcu(&f->fn_hash, head);
380} 383}
@@ -382,7 +385,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
382/* Return the node in FZ matching KEY. */ 385/* Return the node in FZ matching KEY. */
383static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) 386static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
384{ 387{
385 struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)]; 388 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz);
386 struct hlist_node *node; 389 struct hlist_node *node;
387 struct fib_node *f; 390 struct fib_node *f;
388 391
@@ -662,7 +665,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
662 665
663static int fn_flush_list(struct fn_zone *fz, int idx) 666static int fn_flush_list(struct fn_zone *fz, int idx)
664{ 667{
665 struct hlist_head *head = &fz->fz_hash[idx]; 668 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx;
666 struct hlist_node *node, *n; 669 struct hlist_node *node, *n;
667 struct fib_node *f; 670 struct fib_node *f;
668 int found = 0; 671 int found = 0;
@@ -713,6 +716,24 @@ int fib_table_flush(struct fib_table *tb)
713 return found; 716 return found;
714} 717}
715 718
719void fib_free_table(struct fib_table *tb)
720{
721 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
722 struct fn_zone *fz, *next;
723
724 next = table->fn_zone_list;
725 while (next != NULL) {
726 fz = next;
727 next = fz->fz_next;
728
729 if (fz->fz_hash != fz->fz_embedded_hash)
730 fz_hash_free(fz->fz_hash, fz->fz_divisor);
731
732 kfree(fz);
733 }
734
735 kfree(tb);
736}
716 737
717static inline int 738static inline int
718fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, 739fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
@@ -761,14 +782,15 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
761 struct fn_zone *fz) 782 struct fn_zone *fz)
762{ 783{
763 int h, s_h; 784 int h, s_h;
785 struct hlist_head *head = rcu_dereference(fz->fz_hash);
764 786
765 if (fz->fz_hash == NULL) 787 if (head == NULL)
766 return skb->len; 788 return skb->len;
767 s_h = cb->args[3]; 789 s_h = cb->args[3];
768 for (h = s_h; h < fz->fz_divisor; h++) { 790 for (h = s_h; h < fz->fz_divisor; h++) {
769 if (hlist_empty(&fz->fz_hash[h])) 791 if (hlist_empty(head + h))
770 continue; 792 continue;
771 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h]) < 0) { 793 if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) {
772 cb->args[3] = h; 794 cb->args[3] = h;
773 return -1; 795 return -1;
774 } 796 }
@@ -872,7 +894,7 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
872 if (!iter->zone->fz_nent) 894 if (!iter->zone->fz_nent)
873 continue; 895 continue;
874 896
875 iter->hash_head = iter->zone->fz_hash; 897 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
876 maxslot = iter->zone->fz_divisor; 898 maxslot = iter->zone->fz_divisor;
877 899
878 for (iter->bucket = 0; iter->bucket < maxslot; 900 for (iter->bucket = 0; iter->bucket < maxslot;
@@ -957,7 +979,7 @@ static struct fib_alias *fib_get_next(struct seq_file *seq)
957 goto out; 979 goto out;
958 980
959 iter->bucket = 0; 981 iter->bucket = 0;
960 iter->hash_head = iter->zone->fz_hash; 982 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
961 983
962 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { 984 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
963 list_for_each_entry(fa, &fn->fn_alias, fa_list) { 985 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index a29edf2219c8..c079cc0ec651 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -47,11 +47,8 @@ extern int fib_detect_death(struct fib_info *fi, int order,
47static inline void fib_result_assign(struct fib_result *res, 47static inline void fib_result_assign(struct fib_result *res,
48 struct fib_info *fi) 48 struct fib_info *fi)
49{ 49{
50 if (res->fi != NULL) 50 /* we used to play games with refcounts, but we now use RCU */
51 fib_info_put(res->fi);
52 res->fi = fi; 51 res->fi = fi;
53 if (fi != NULL)
54 atomic_inc(&fi->fib_clntref);
55} 52}
56 53
57#endif /* _FIB_LOOKUP_H */ 54#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b14450895102..0f280348e0fd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -365,7 +365,7 @@ static struct tnode *tnode_alloc(size_t size)
365 if (size <= PAGE_SIZE) 365 if (size <= PAGE_SIZE)
366 return kzalloc(size, GFP_KERNEL); 366 return kzalloc(size, GFP_KERNEL);
367 else 367 else
368 return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 368 return vzalloc(size);
369} 369}
370 370
371static void __tnode_vfree(struct work_struct *arg) 371static void __tnode_vfree(struct work_struct *arg)
@@ -1797,6 +1797,11 @@ int fib_table_flush(struct fib_table *tb)
1797 return found; 1797 return found;
1798} 1798}
1799 1799
1800void fib_free_table(struct fib_table *tb)
1801{
1802 kfree(tb);
1803}
1804
1800void fib_table_select_default(struct fib_table *tb, 1805void fib_table_select_default(struct fib_table *tb,
1801 const struct flowi *flp, 1806 const struct flowi *flp,
1802 struct fib_result *res) 1807 struct fib_result *res)
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index caea6885fdbd..c6933f2ea310 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -22,7 +22,7 @@
22#include <net/gre.h> 22#include <net/gre.h>
23 23
24 24
25static const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly; 25static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
26static DEFINE_SPINLOCK(gre_proto_lock); 26static DEFINE_SPINLOCK(gre_proto_lock);
27 27
28int gre_add_protocol(const struct gre_protocol *proto, u8 version) 28int gre_add_protocol(const struct gre_protocol *proto, u8 version)
@@ -51,7 +51,8 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
51 goto err_out; 51 goto err_out;
52 52
53 spin_lock(&gre_proto_lock); 53 spin_lock(&gre_proto_lock);
54 if (gre_proto[version] != proto) 54 if (rcu_dereference_protected(gre_proto[version],
55 lockdep_is_held(&gre_proto_lock)) != proto)
55 goto err_out_unlock; 56 goto err_out_unlock;
56 rcu_assign_pointer(gre_proto[version], NULL); 57 rcu_assign_pointer(gre_proto[version], NULL);
57 spin_unlock(&gre_proto_lock); 58 spin_unlock(&gre_proto_lock);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 96bc7f9475a3..e5d1a44bcbdf 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -569,6 +569,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
569 /* No need to clone since we're just using its address. */ 569 /* No need to clone since we're just using its address. */
570 rt2 = rt; 570 rt2 = rt;
571 571
572 if (!fl.nl_u.ip4_u.saddr)
573 fl.nl_u.ip4_u.saddr = rt->rt_src;
574
572 err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); 575 err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);
573 switch (err) { 576 switch (err) {
574 case 0: 577 case 0:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c8877c6c7216..3c53c2d89e3b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2306,10 +2306,8 @@ void ip_mc_drop_socket(struct sock *sk)
2306 2306
2307 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); 2307 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
2308 (void) ip_mc_leave_src(sk, iml, in_dev); 2308 (void) ip_mc_leave_src(sk, iml, in_dev);
2309 if (in_dev != NULL) { 2309 if (in_dev != NULL)
2310 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); 2310 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
2311 in_dev_put(in_dev);
2312 }
2313 /* decrease mem now to avoid the memleak warning */ 2311 /* decrease mem now to avoid the memleak warning */
2314 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); 2312 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
2315 call_rcu(&iml->rcu, ip_mc_socklist_reclaim); 2313 call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ba8042665849..2ada17129fce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -490,9 +490,11 @@ static int inet_csk_diag_dump(struct sock *sk,
490{ 490{
491 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 491 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
492 492
493 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 493 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
494 struct inet_diag_entry entry; 494 struct inet_diag_entry entry;
495 struct rtattr *bc = (struct rtattr *)(r + 1); 495 const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
496 sizeof(*r),
497 INET_DIAG_REQ_BYTECODE);
496 struct inet_sock *inet = inet_sk(sk); 498 struct inet_sock *inet = inet_sk(sk);
497 499
498 entry.family = sk->sk_family; 500 entry.family = sk->sk_family;
@@ -512,7 +514,7 @@ static int inet_csk_diag_dump(struct sock *sk,
512 entry.dport = ntohs(inet->inet_dport); 514 entry.dport = ntohs(inet->inet_dport);
513 entry.userlocks = sk->sk_userlocks; 515 entry.userlocks = sk->sk_userlocks;
514 516
515 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) 517 if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
516 return 0; 518 return 0;
517 } 519 }
518 520
@@ -527,9 +529,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
527{ 529{
528 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 530 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
529 531
530 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 532 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
531 struct inet_diag_entry entry; 533 struct inet_diag_entry entry;
532 struct rtattr *bc = (struct rtattr *)(r + 1); 534 const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
535 sizeof(*r),
536 INET_DIAG_REQ_BYTECODE);
533 537
534 entry.family = tw->tw_family; 538 entry.family = tw->tw_family;
535#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 539#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -548,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
548 entry.dport = ntohs(tw->tw_dport); 552 entry.dport = ntohs(tw->tw_dport);
549 entry.userlocks = 0; 553 entry.userlocks = 0;
550 554
551 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) 555 if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
552 return 0; 556 return 0;
553 } 557 }
554 558
@@ -618,7 +622,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
618 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 622 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
619 struct inet_connection_sock *icsk = inet_csk(sk); 623 struct inet_connection_sock *icsk = inet_csk(sk);
620 struct listen_sock *lopt; 624 struct listen_sock *lopt;
621 struct rtattr *bc = NULL; 625 const struct nlattr *bc = NULL;
622 struct inet_sock *inet = inet_sk(sk); 626 struct inet_sock *inet = inet_sk(sk);
623 int j, s_j; 627 int j, s_j;
624 int reqnum, s_reqnum; 628 int reqnum, s_reqnum;
@@ -638,8 +642,9 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
638 if (!lopt || !lopt->qlen) 642 if (!lopt || !lopt->qlen)
639 goto out; 643 goto out;
640 644
641 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 645 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
642 bc = (struct rtattr *)(r + 1); 646 bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
647 INET_DIAG_REQ_BYTECODE);
643 entry.sport = inet->inet_num; 648 entry.sport = inet->inet_num;
644 entry.userlocks = sk->sk_userlocks; 649 entry.userlocks = sk->sk_userlocks;
645 } 650 }
@@ -672,8 +677,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
672 &ireq->rmt_addr; 677 &ireq->rmt_addr;
673 entry.dport = ntohs(ireq->rmt_port); 678 entry.dport = ntohs(ireq->rmt_port);
674 679
675 if (!inet_diag_bc_run(RTA_DATA(bc), 680 if (!inet_diag_bc_run(nla_data(bc),
676 RTA_PAYLOAD(bc), &entry)) 681 nla_len(bc), &entry))
677 continue; 682 continue;
678 } 683 }
679 684
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 1b344f30b463..3c0369a3a663 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
133 } 133 }
134 } 134 }
135 } 135 }
136 sk_add_bind_node(child, &tb->owners); 136 inet_bind_hash(child, tb, port);
137 inet_csk(child)->icsk_bind_hash = tb;
138 spin_unlock(&head->lock); 137 spin_unlock(&head->lock);
139 138
140 return 0; 139 return 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9ffa24b9a804..9e94d7cf4f8a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -72,18 +72,19 @@ static struct kmem_cache *peer_cachep __read_mostly;
72#define node_height(x) x->avl_height 72#define node_height(x) x->avl_height
73 73
74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) 74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
75#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
75static const struct inet_peer peer_fake_node = { 76static const struct inet_peer peer_fake_node = {
76 .avl_left = peer_avl_empty, 77 .avl_left = peer_avl_empty_rcu,
77 .avl_right = peer_avl_empty, 78 .avl_right = peer_avl_empty_rcu,
78 .avl_height = 0 79 .avl_height = 0
79}; 80};
80 81
81static struct { 82static struct {
82 struct inet_peer *root; 83 struct inet_peer __rcu *root;
83 spinlock_t lock; 84 spinlock_t lock;
84 int total; 85 int total;
85} peers = { 86} peers = {
86 .root = peer_avl_empty, 87 .root = peer_avl_empty_rcu,
87 .lock = __SPIN_LOCK_UNLOCKED(peers.lock), 88 .lock = __SPIN_LOCK_UNLOCKED(peers.lock),
88 .total = 0, 89 .total = 0,
89}; 90};
@@ -156,11 +157,14 @@ static void unlink_from_unused(struct inet_peer *p)
156 */ 157 */
157#define lookup(_daddr, _stack) \ 158#define lookup(_daddr, _stack) \
158({ \ 159({ \
159 struct inet_peer *u, **v; \ 160 struct inet_peer *u; \
161 struct inet_peer __rcu **v; \
160 \ 162 \
161 stackptr = _stack; \ 163 stackptr = _stack; \
162 *stackptr++ = &peers.root; \ 164 *stackptr++ = &peers.root; \
163 for (u = peers.root; u != peer_avl_empty; ) { \ 165 for (u = rcu_dereference_protected(peers.root, \
166 lockdep_is_held(&peers.lock)); \
167 u != peer_avl_empty; ) { \
164 if (_daddr == u->v4daddr) \ 168 if (_daddr == u->v4daddr) \
165 break; \ 169 break; \
166 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ 170 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
@@ -168,7 +172,8 @@ static void unlink_from_unused(struct inet_peer *p)
168 else \ 172 else \
169 v = &u->avl_right; \ 173 v = &u->avl_right; \
170 *stackptr++ = v; \ 174 *stackptr++ = v; \
171 u = *v; \ 175 u = rcu_dereference_protected(*v, \
176 lockdep_is_held(&peers.lock)); \
172 } \ 177 } \
173 u; \ 178 u; \
174}) 179})
@@ -209,13 +214,17 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
209/* Called with local BH disabled and the pool lock held. */ 214/* Called with local BH disabled and the pool lock held. */
210#define lookup_rightempty(start) \ 215#define lookup_rightempty(start) \
211({ \ 216({ \
212 struct inet_peer *u, **v; \ 217 struct inet_peer *u; \
218 struct inet_peer __rcu **v; \
213 *stackptr++ = &start->avl_left; \ 219 *stackptr++ = &start->avl_left; \
214 v = &start->avl_left; \ 220 v = &start->avl_left; \
215 for (u = *v; u->avl_right != peer_avl_empty; ) { \ 221 for (u = rcu_dereference_protected(*v, \
222 lockdep_is_held(&peers.lock)); \
223 u->avl_right != peer_avl_empty_rcu; ) { \
216 v = &u->avl_right; \ 224 v = &u->avl_right; \
217 *stackptr++ = v; \ 225 *stackptr++ = v; \
218 u = *v; \ 226 u = rcu_dereference_protected(*v, \
227 lockdep_is_held(&peers.lock)); \
219 } \ 228 } \
220 u; \ 229 u; \
221}) 230})
@@ -224,74 +233,86 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
224 * Variable names are the proof of operation correctness. 233 * Variable names are the proof of operation correctness.
225 * Look into mm/map_avl.c for more detail description of the ideas. 234 * Look into mm/map_avl.c for more detail description of the ideas.
226 */ 235 */
227static void peer_avl_rebalance(struct inet_peer **stack[], 236static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
228 struct inet_peer ***stackend) 237 struct inet_peer __rcu ***stackend)
229{ 238{
230 struct inet_peer **nodep, *node, *l, *r; 239 struct inet_peer __rcu **nodep;
240 struct inet_peer *node, *l, *r;
231 int lh, rh; 241 int lh, rh;
232 242
233 while (stackend > stack) { 243 while (stackend > stack) {
234 nodep = *--stackend; 244 nodep = *--stackend;
235 node = *nodep; 245 node = rcu_dereference_protected(*nodep,
236 l = node->avl_left; 246 lockdep_is_held(&peers.lock));
237 r = node->avl_right; 247 l = rcu_dereference_protected(node->avl_left,
248 lockdep_is_held(&peers.lock));
249 r = rcu_dereference_protected(node->avl_right,
250 lockdep_is_held(&peers.lock));
238 lh = node_height(l); 251 lh = node_height(l);
239 rh = node_height(r); 252 rh = node_height(r);
240 if (lh > rh + 1) { /* l: RH+2 */ 253 if (lh > rh + 1) { /* l: RH+2 */
241 struct inet_peer *ll, *lr, *lrl, *lrr; 254 struct inet_peer *ll, *lr, *lrl, *lrr;
242 int lrh; 255 int lrh;
243 ll = l->avl_left; 256 ll = rcu_dereference_protected(l->avl_left,
244 lr = l->avl_right; 257 lockdep_is_held(&peers.lock));
258 lr = rcu_dereference_protected(l->avl_right,
259 lockdep_is_held(&peers.lock));
245 lrh = node_height(lr); 260 lrh = node_height(lr);
246 if (lrh <= node_height(ll)) { /* ll: RH+1 */ 261 if (lrh <= node_height(ll)) { /* ll: RH+1 */
247 node->avl_left = lr; /* lr: RH or RH+1 */ 262 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
248 node->avl_right = r; /* r: RH */ 263 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
249 node->avl_height = lrh + 1; /* RH+1 or RH+2 */ 264 node->avl_height = lrh + 1; /* RH+1 or RH+2 */
250 l->avl_left = ll; /* ll: RH+1 */ 265 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
251 l->avl_right = node; /* node: RH+1 or RH+2 */ 266 RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
252 l->avl_height = node->avl_height + 1; 267 l->avl_height = node->avl_height + 1;
253 *nodep = l; 268 RCU_INIT_POINTER(*nodep, l);
254 } else { /* ll: RH, lr: RH+1 */ 269 } else { /* ll: RH, lr: RH+1 */
255 lrl = lr->avl_left; /* lrl: RH or RH-1 */ 270 lrl = rcu_dereference_protected(lr->avl_left,
256 lrr = lr->avl_right; /* lrr: RH or RH-1 */ 271 lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */
257 node->avl_left = lrr; /* lrr: RH or RH-1 */ 272 lrr = rcu_dereference_protected(lr->avl_right,
258 node->avl_right = r; /* r: RH */ 273 lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */
274 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
275 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
259 node->avl_height = rh + 1; /* node: RH+1 */ 276 node->avl_height = rh + 1; /* node: RH+1 */
260 l->avl_left = ll; /* ll: RH */ 277 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
261 l->avl_right = lrl; /* lrl: RH or RH-1 */ 278 RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
262 l->avl_height = rh + 1; /* l: RH+1 */ 279 l->avl_height = rh + 1; /* l: RH+1 */
263 lr->avl_left = l; /* l: RH+1 */ 280 RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
264 lr->avl_right = node; /* node: RH+1 */ 281 RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
265 lr->avl_height = rh + 2; 282 lr->avl_height = rh + 2;
266 *nodep = lr; 283 RCU_INIT_POINTER(*nodep, lr);
267 } 284 }
268 } else if (rh > lh + 1) { /* r: LH+2 */ 285 } else if (rh > lh + 1) { /* r: LH+2 */
269 struct inet_peer *rr, *rl, *rlr, *rll; 286 struct inet_peer *rr, *rl, *rlr, *rll;
270 int rlh; 287 int rlh;
271 rr = r->avl_right; 288 rr = rcu_dereference_protected(r->avl_right,
272 rl = r->avl_left; 289 lockdep_is_held(&peers.lock));
290 rl = rcu_dereference_protected(r->avl_left,
291 lockdep_is_held(&peers.lock));
273 rlh = node_height(rl); 292 rlh = node_height(rl);
274 if (rlh <= node_height(rr)) { /* rr: LH+1 */ 293 if (rlh <= node_height(rr)) { /* rr: LH+1 */
275 node->avl_right = rl; /* rl: LH or LH+1 */ 294 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
276 node->avl_left = l; /* l: LH */ 295 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
277 node->avl_height = rlh + 1; /* LH+1 or LH+2 */ 296 node->avl_height = rlh + 1; /* LH+1 or LH+2 */
278 r->avl_right = rr; /* rr: LH+1 */ 297 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
279 r->avl_left = node; /* node: LH+1 or LH+2 */ 298 RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
280 r->avl_height = node->avl_height + 1; 299 r->avl_height = node->avl_height + 1;
281 *nodep = r; 300 RCU_INIT_POINTER(*nodep, r);
282 } else { /* rr: RH, rl: RH+1 */ 301 } else { /* rr: RH, rl: RH+1 */
283 rlr = rl->avl_right; /* rlr: LH or LH-1 */ 302 rlr = rcu_dereference_protected(rl->avl_right,
284 rll = rl->avl_left; /* rll: LH or LH-1 */ 303 lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */
285 node->avl_right = rll; /* rll: LH or LH-1 */ 304 rll = rcu_dereference_protected(rl->avl_left,
286 node->avl_left = l; /* l: LH */ 305 lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */
306 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
307 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
287 node->avl_height = lh + 1; /* node: LH+1 */ 308 node->avl_height = lh + 1; /* node: LH+1 */
288 r->avl_right = rr; /* rr: LH */ 309 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
289 r->avl_left = rlr; /* rlr: LH or LH-1 */ 310 RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
290 r->avl_height = lh + 1; /* r: LH+1 */ 311 r->avl_height = lh + 1; /* r: LH+1 */
291 rl->avl_right = r; /* r: LH+1 */ 312 RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
292 rl->avl_left = node; /* node: LH+1 */ 313 RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */
293 rl->avl_height = lh + 2; 314 rl->avl_height = lh + 2;
294 *nodep = rl; 315 RCU_INIT_POINTER(*nodep, rl);
295 } 316 }
296 } else { 317 } else {
297 node->avl_height = (lh > rh ? lh : rh) + 1; 318 node->avl_height = (lh > rh ? lh : rh) + 1;
@@ -303,10 +324,10 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
303#define link_to_pool(n) \ 324#define link_to_pool(n) \
304do { \ 325do { \
305 n->avl_height = 1; \ 326 n->avl_height = 1; \
306 n->avl_left = peer_avl_empty; \ 327 n->avl_left = peer_avl_empty_rcu; \
307 n->avl_right = peer_avl_empty; \ 328 n->avl_right = peer_avl_empty_rcu; \
308 smp_wmb(); /* lockless readers can catch us now */ \ 329 /* lockless readers can catch us now */ \
309 **--stackptr = n; \ 330 rcu_assign_pointer(**--stackptr, n); \
310 peer_avl_rebalance(stack, stackptr); \ 331 peer_avl_rebalance(stack, stackptr); \
311} while (0) 332} while (0)
312 333
@@ -330,24 +351,25 @@ static void unlink_from_pool(struct inet_peer *p)
330 * We use refcnt=-1 to alert lockless readers this entry is deleted. 351 * We use refcnt=-1 to alert lockless readers this entry is deleted.
331 */ 352 */
332 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { 353 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
333 struct inet_peer **stack[PEER_MAXDEPTH]; 354 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
334 struct inet_peer ***stackptr, ***delp; 355 struct inet_peer __rcu ***stackptr, ***delp;
335 if (lookup(p->v4daddr, stack) != p) 356 if (lookup(p->v4daddr, stack) != p)
336 BUG(); 357 BUG();
337 delp = stackptr - 1; /* *delp[0] == p */ 358 delp = stackptr - 1; /* *delp[0] == p */
338 if (p->avl_left == peer_avl_empty) { 359 if (p->avl_left == peer_avl_empty_rcu) {
339 *delp[0] = p->avl_right; 360 *delp[0] = p->avl_right;
340 --stackptr; 361 --stackptr;
341 } else { 362 } else {
342 /* look for a node to insert instead of p */ 363 /* look for a node to insert instead of p */
343 struct inet_peer *t; 364 struct inet_peer *t;
344 t = lookup_rightempty(p); 365 t = lookup_rightempty(p);
345 BUG_ON(*stackptr[-1] != t); 366 BUG_ON(rcu_dereference_protected(*stackptr[-1],
367 lockdep_is_held(&peers.lock)) != t);
346 **--stackptr = t->avl_left; 368 **--stackptr = t->avl_left;
347 /* t is removed, t->v4daddr > x->v4daddr for any 369 /* t is removed, t->v4daddr > x->v4daddr for any
348 * x in p->avl_left subtree. 370 * x in p->avl_left subtree.
349 * Put t in the old place of p. */ 371 * Put t in the old place of p. */
350 *delp[0] = t; 372 RCU_INIT_POINTER(*delp[0], t);
351 t->avl_left = p->avl_left; 373 t->avl_left = p->avl_left;
352 t->avl_right = p->avl_right; 374 t->avl_right = p->avl_right;
353 t->avl_height = p->avl_height; 375 t->avl_height = p->avl_height;
@@ -414,7 +436,7 @@ static int cleanup_once(unsigned long ttl)
414struct inet_peer *inet_getpeer(__be32 daddr, int create) 436struct inet_peer *inet_getpeer(__be32 daddr, int create)
415{ 437{
416 struct inet_peer *p; 438 struct inet_peer *p;
417 struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; 439 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
418 440
419 /* Look up for the address quickly, lockless. 441 /* Look up for the address quickly, lockless.
420 * Because of a concurrent writer, we might not find an existing entry. 442 * Because of a concurrent writer, we might not find an existing entry.
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d0ffcbe369b7..70ff77f02eee 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1072,6 +1072,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1072 break; 1072 break;
1073 } 1073 }
1074 ipgre_tunnel_unlink(ign, t); 1074 ipgre_tunnel_unlink(ign, t);
1075 synchronize_net();
1075 t->parms.iph.saddr = p.iph.saddr; 1076 t->parms.iph.saddr = p.iph.saddr;
1076 t->parms.iph.daddr = p.iph.daddr; 1077 t->parms.iph.daddr = p.iph.daddr;
1077 t->parms.i_key = p.i_key; 1078 t->parms.i_key = p.i_key;
@@ -1324,7 +1325,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1324{ 1325{
1325 struct ip_tunnel *tunnel = netdev_priv(dev); 1326 struct ip_tunnel *tunnel = netdev_priv(dev);
1326 struct iphdr *iph = &tunnel->parms.iph; 1327 struct iphdr *iph = &tunnel->parms.iph;
1327 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1328 1328
1329 tunnel->dev = dev; 1329 tunnel->dev = dev;
1330 strcpy(tunnel->parms.name, dev->name); 1330 strcpy(tunnel->parms.name, dev->name);
@@ -1335,7 +1335,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1335 tunnel->hlen = sizeof(struct iphdr) + 4; 1335 tunnel->hlen = sizeof(struct iphdr) + 4;
1336 1336
1337 dev_hold(dev); 1337 dev_hold(dev);
1338 rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
1339} 1338}
1340 1339
1341 1340
@@ -1382,10 +1381,12 @@ static int __net_init ipgre_init_net(struct net *net)
1382 if ((err = register_netdev(ign->fb_tunnel_dev))) 1381 if ((err = register_netdev(ign->fb_tunnel_dev)))
1383 goto err_reg_dev; 1382 goto err_reg_dev;
1384 1383
1384 rcu_assign_pointer(ign->tunnels_wc[0],
1385 netdev_priv(ign->fb_tunnel_dev));
1385 return 0; 1386 return 0;
1386 1387
1387err_reg_dev: 1388err_reg_dev:
1388 free_netdev(ign->fb_tunnel_dev); 1389 ipgre_dev_free(ign->fb_tunnel_dev);
1389err_alloc_dev: 1390err_alloc_dev:
1390 return err; 1391 return err;
1391} 1392}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 64b70ad162e3..3948c86e59ca 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -238,7 +238,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
238 but receiver should be enough clever f.e. to forward mtrace requests, 238 but receiver should be enough clever f.e. to forward mtrace requests,
239 sent to multicast group to reach destination designated router. 239 sent to multicast group to reach destination designated router.
240 */ 240 */
241struct ip_ra_chain *ip_ra_chain; 241struct ip_ra_chain __rcu *ip_ra_chain;
242static DEFINE_SPINLOCK(ip_ra_lock); 242static DEFINE_SPINLOCK(ip_ra_lock);
243 243
244 244
@@ -253,7 +253,8 @@ static void ip_ra_destroy_rcu(struct rcu_head *head)
253int ip_ra_control(struct sock *sk, unsigned char on, 253int ip_ra_control(struct sock *sk, unsigned char on,
254 void (*destructor)(struct sock *)) 254 void (*destructor)(struct sock *))
255{ 255{
256 struct ip_ra_chain *ra, *new_ra, **rap; 256 struct ip_ra_chain *ra, *new_ra;
257 struct ip_ra_chain __rcu **rap;
257 258
258 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) 259 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
259 return -EINVAL; 260 return -EINVAL;
@@ -261,7 +262,10 @@ int ip_ra_control(struct sock *sk, unsigned char on,
261 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 262 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
262 263
263 spin_lock_bh(&ip_ra_lock); 264 spin_lock_bh(&ip_ra_lock);
264 for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { 265 for (rap = &ip_ra_chain;
266 (ra = rcu_dereference_protected(*rap,
267 lockdep_is_held(&ip_ra_lock))) != NULL;
268 rap = &ra->next) {
265 if (ra->sk == sk) { 269 if (ra->sk == sk) {
266 if (on) { 270 if (on) {
267 spin_unlock_bh(&ip_ra_lock); 271 spin_unlock_bh(&ip_ra_lock);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e9b816e6cd73..cd300aaee78f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -676,6 +676,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
676 } 676 }
677 t = netdev_priv(dev); 677 t = netdev_priv(dev);
678 ipip_tunnel_unlink(ipn, t); 678 ipip_tunnel_unlink(ipn, t);
679 synchronize_net();
679 t->parms.iph.saddr = p.iph.saddr; 680 t->parms.iph.saddr = p.iph.saddr;
680 t->parms.iph.daddr = p.iph.daddr; 681 t->parms.iph.daddr = p.iph.daddr;
681 memcpy(dev->dev_addr, &p.iph.saddr, 4); 682 memcpy(dev->dev_addr, &p.iph.saddr, 4);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3cad2591ace0..3fac340a28d5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -927,6 +927,7 @@ static int get_info(struct net *net, void __user *user,
927 private = &tmp; 927 private = &tmp;
928 } 928 }
929#endif 929#endif
930 memset(&info, 0, sizeof(info));
930 info.valid_hooks = t->valid_hooks; 931 info.valid_hooks = t->valid_hooks;
931 memcpy(info.hook_entry, private->hook_entry, 932 memcpy(info.hook_entry, private->hook_entry,
932 sizeof(info.hook_entry)); 933 sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d31b007a6d80..a846d633b3b6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1124,6 +1124,7 @@ static int get_info(struct net *net, void __user *user,
1124 private = &tmp; 1124 private = &tmp;
1125 } 1125 }
1126#endif 1126#endif
1127 memset(&info, 0, sizeof(info));
1127 info.valid_hooks = t->valid_hooks; 1128 info.valid_hooks = t->valid_hooks;
1128 memcpy(info.hook_entry, private->hook_entry, 1129 memcpy(info.hook_entry, private->hook_entry,
1129 sizeof(info.hook_entry)); 1130 sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 295c97431e43..c04787ce1a71 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,26 +47,6 @@ __nf_nat_proto_find(u_int8_t protonum)
47 return rcu_dereference(nf_nat_protos[protonum]); 47 return rcu_dereference(nf_nat_protos[protonum]);
48} 48}
49 49
50static const struct nf_nat_protocol *
51nf_nat_proto_find_get(u_int8_t protonum)
52{
53 const struct nf_nat_protocol *p;
54
55 rcu_read_lock();
56 p = __nf_nat_proto_find(protonum);
57 if (!try_module_get(p->me))
58 p = &nf_nat_unknown_protocol;
59 rcu_read_unlock();
60
61 return p;
62}
63
64static void
65nf_nat_proto_put(const struct nf_nat_protocol *p)
66{
67 module_put(p->me);
68}
69
70/* We keep an extra hash for each conntrack, for fast searching. */ 50/* We keep an extra hash for each conntrack, for fast searching. */
71static inline unsigned int 51static inline unsigned int
72hash_by_src(const struct net *net, u16 zone, 52hash_by_src(const struct net *net, u16 zone,
@@ -588,6 +568,26 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
588#include <linux/netfilter/nfnetlink.h> 568#include <linux/netfilter/nfnetlink.h>
589#include <linux/netfilter/nfnetlink_conntrack.h> 569#include <linux/netfilter/nfnetlink_conntrack.h>
590 570
571static const struct nf_nat_protocol *
572nf_nat_proto_find_get(u_int8_t protonum)
573{
574 const struct nf_nat_protocol *p;
575
576 rcu_read_lock();
577 p = __nf_nat_proto_find(protonum);
578 if (!try_module_get(p->me))
579 p = &nf_nat_unknown_protocol;
580 rcu_read_unlock();
581
582 return p;
583}
584
585static void
586nf_nat_proto_put(const struct nf_nat_protocol *p)
587{
588 module_put(p->me);
589}
590
591static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { 591static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
592 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, 592 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
593 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, 593 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4ae1f203f7cb..b14ec7d03b6e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
59 local_bh_enable(); 59 local_bh_enable();
60 60
61 socket_seq_show(seq); 61 socket_seq_show(seq);
62 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", 62 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
63 sock_prot_inuse_get(net, &tcp_prot), orphans, 63 sock_prot_inuse_get(net, &tcp_prot), orphans,
64 tcp_death_row.tw_count, sockets, 64 tcp_death_row.tw_count, sockets,
65 atomic_read(&tcp_memory_allocated)); 65 atomic_long_read(&tcp_memory_allocated));
66 seq_printf(seq, "UDP: inuse %d mem %d\n", 66 seq_printf(seq, "UDP: inuse %d mem %ld\n",
67 sock_prot_inuse_get(net, &udp_prot), 67 sock_prot_inuse_get(net, &udp_prot),
68 atomic_read(&udp_memory_allocated)); 68 atomic_long_read(&udp_memory_allocated));
69 seq_printf(seq, "UDPLITE: inuse %d\n", 69 seq_printf(seq, "UDPLITE: inuse %d\n",
70 sock_prot_inuse_get(net, &udplite_prot)); 70 sock_prot_inuse_get(net, &udplite_prot));
71 seq_printf(seq, "RAW: inuse %d\n", 71 seq_printf(seq, "RAW: inuse %d\n",
@@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = {
253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), 253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), 254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
255 SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), 255 SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
256 SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
256 SNMP_MIB_SENTINEL 257 SNMP_MIB_SENTINEL
257}; 258};
258 259
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 65699c24411c..9ae5c01cd0b2 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <net/protocol.h> 29#include <net/protocol.h>
30 30
31const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly; 31const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
32 32
33/* 33/*
34 * Add a protocol handler to the hash tables 34 * Add a protocol handler to the hash tables
@@ -38,7 +38,8 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
38{ 38{
39 int hash = protocol & (MAX_INET_PROTOS - 1); 39 int hash = protocol & (MAX_INET_PROTOS - 1);
40 40
41 return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1; 41 return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
42 NULL, prot) ? 0 : -1;
42} 43}
43EXPORT_SYMBOL(inet_add_protocol); 44EXPORT_SYMBOL(inet_add_protocol);
44 45
@@ -50,7 +51,8 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
50{ 51{
51 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 52 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
52 53
53 ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1; 54 ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash],
55 prot, NULL) == prot) ? 0 : -1;
54 56
55 synchronize_net(); 57 synchronize_net();
56 58
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d6cb2bfcd8e1..987bf9adb318 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -198,7 +198,7 @@ const __u8 ip_tos2prio[16] = {
198 */ 198 */
199 199
200struct rt_hash_bucket { 200struct rt_hash_bucket {
201 struct rtable *chain; 201 struct rtable __rcu *chain;
202}; 202};
203 203
204#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ 204#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
@@ -280,7 +280,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
280 struct rtable *r = NULL; 280 struct rtable *r = NULL;
281 281
282 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 282 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
283 if (!rt_hash_table[st->bucket].chain) 283 if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
284 continue; 284 continue;
285 rcu_read_lock_bh(); 285 rcu_read_lock_bh();
286 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); 286 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -300,17 +300,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
300{ 300{
301 struct rt_cache_iter_state *st = seq->private; 301 struct rt_cache_iter_state *st = seq->private;
302 302
303 r = r->dst.rt_next; 303 r = rcu_dereference_bh(r->dst.rt_next);
304 while (!r) { 304 while (!r) {
305 rcu_read_unlock_bh(); 305 rcu_read_unlock_bh();
306 do { 306 do {
307 if (--st->bucket < 0) 307 if (--st->bucket < 0)
308 return NULL; 308 return NULL;
309 } while (!rt_hash_table[st->bucket].chain); 309 } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
310 rcu_read_lock_bh(); 310 rcu_read_lock_bh();
311 r = rt_hash_table[st->bucket].chain; 311 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
312 } 312 }
313 return rcu_dereference_bh(r); 313 return r;
314} 314}
315 315
316static struct rtable *rt_cache_get_next(struct seq_file *seq, 316static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -721,19 +721,23 @@ static void rt_do_flush(int process_context)
721 for (i = 0; i <= rt_hash_mask; i++) { 721 for (i = 0; i <= rt_hash_mask; i++) {
722 if (process_context && need_resched()) 722 if (process_context && need_resched())
723 cond_resched(); 723 cond_resched();
724 rth = rt_hash_table[i].chain; 724 rth = rcu_dereference_raw(rt_hash_table[i].chain);
725 if (!rth) 725 if (!rth)
726 continue; 726 continue;
727 727
728 spin_lock_bh(rt_hash_lock_addr(i)); 728 spin_lock_bh(rt_hash_lock_addr(i));
729#ifdef CONFIG_NET_NS 729#ifdef CONFIG_NET_NS
730 { 730 {
731 struct rtable ** prev, * p; 731 struct rtable __rcu **prev;
732 struct rtable *p;
732 733
733 rth = rt_hash_table[i].chain; 734 rth = rcu_dereference_protected(rt_hash_table[i].chain,
735 lockdep_is_held(rt_hash_lock_addr(i)));
734 736
735 /* defer releasing the head of the list after spin_unlock */ 737 /* defer releasing the head of the list after spin_unlock */
736 for (tail = rth; tail; tail = tail->dst.rt_next) 738 for (tail = rth; tail;
739 tail = rcu_dereference_protected(tail->dst.rt_next,
740 lockdep_is_held(rt_hash_lock_addr(i))))
737 if (!rt_is_expired(tail)) 741 if (!rt_is_expired(tail))
738 break; 742 break;
739 if (rth != tail) 743 if (rth != tail)
@@ -741,8 +745,12 @@ static void rt_do_flush(int process_context)
741 745
742 /* call rt_free on entries after the tail requiring flush */ 746 /* call rt_free on entries after the tail requiring flush */
743 prev = &rt_hash_table[i].chain; 747 prev = &rt_hash_table[i].chain;
744 for (p = *prev; p; p = next) { 748 for (p = rcu_dereference_protected(*prev,
745 next = p->dst.rt_next; 749 lockdep_is_held(rt_hash_lock_addr(i)));
750 p != NULL;
751 p = next) {
752 next = rcu_dereference_protected(p->dst.rt_next,
753 lockdep_is_held(rt_hash_lock_addr(i)));
746 if (!rt_is_expired(p)) { 754 if (!rt_is_expired(p)) {
747 prev = &p->dst.rt_next; 755 prev = &p->dst.rt_next;
748 } else { 756 } else {
@@ -752,14 +760,15 @@ static void rt_do_flush(int process_context)
752 } 760 }
753 } 761 }
754#else 762#else
755 rth = rt_hash_table[i].chain; 763 rth = rcu_dereference_protected(rt_hash_table[i].chain,
756 rt_hash_table[i].chain = NULL; 764 lockdep_is_held(rt_hash_lock_addr(i)));
765 rcu_assign_pointer(rt_hash_table[i].chain, NULL);
757 tail = NULL; 766 tail = NULL;
758#endif 767#endif
759 spin_unlock_bh(rt_hash_lock_addr(i)); 768 spin_unlock_bh(rt_hash_lock_addr(i));
760 769
761 for (; rth != tail; rth = next) { 770 for (; rth != tail; rth = next) {
762 next = rth->dst.rt_next; 771 next = rcu_dereference_protected(rth->dst.rt_next, 1);
763 rt_free(rth); 772 rt_free(rth);
764 } 773 }
765 } 774 }
@@ -790,7 +799,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
790 while (aux != rth) { 799 while (aux != rth) {
791 if (compare_hash_inputs(&aux->fl, &rth->fl)) 800 if (compare_hash_inputs(&aux->fl, &rth->fl))
792 return 0; 801 return 0;
793 aux = aux->dst.rt_next; 802 aux = rcu_dereference_protected(aux->dst.rt_next, 1);
794 } 803 }
795 return ONE; 804 return ONE;
796} 805}
@@ -799,7 +808,8 @@ static void rt_check_expire(void)
799{ 808{
800 static unsigned int rover; 809 static unsigned int rover;
801 unsigned int i = rover, goal; 810 unsigned int i = rover, goal;
802 struct rtable *rth, **rthp; 811 struct rtable *rth;
812 struct rtable __rcu **rthp;
803 unsigned long samples = 0; 813 unsigned long samples = 0;
804 unsigned long sum = 0, sum2 = 0; 814 unsigned long sum = 0, sum2 = 0;
805 unsigned long delta; 815 unsigned long delta;
@@ -825,11 +835,12 @@ static void rt_check_expire(void)
825 835
826 samples++; 836 samples++;
827 837
828 if (*rthp == NULL) 838 if (rcu_dereference_raw(*rthp) == NULL)
829 continue; 839 continue;
830 length = 0; 840 length = 0;
831 spin_lock_bh(rt_hash_lock_addr(i)); 841 spin_lock_bh(rt_hash_lock_addr(i));
832 while ((rth = *rthp) != NULL) { 842 while ((rth = rcu_dereference_protected(*rthp,
843 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
833 prefetch(rth->dst.rt_next); 844 prefetch(rth->dst.rt_next);
834 if (rt_is_expired(rth)) { 845 if (rt_is_expired(rth)) {
835 *rthp = rth->dst.rt_next; 846 *rthp = rth->dst.rt_next;
@@ -941,7 +952,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
941 static unsigned long last_gc; 952 static unsigned long last_gc;
942 static int rover; 953 static int rover;
943 static int equilibrium; 954 static int equilibrium;
944 struct rtable *rth, **rthp; 955 struct rtable *rth;
956 struct rtable __rcu **rthp;
945 unsigned long now = jiffies; 957 unsigned long now = jiffies;
946 int goal; 958 int goal;
947 int entries = dst_entries_get_fast(&ipv4_dst_ops); 959 int entries = dst_entries_get_fast(&ipv4_dst_ops);
@@ -995,7 +1007,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
995 k = (k + 1) & rt_hash_mask; 1007 k = (k + 1) & rt_hash_mask;
996 rthp = &rt_hash_table[k].chain; 1008 rthp = &rt_hash_table[k].chain;
997 spin_lock_bh(rt_hash_lock_addr(k)); 1009 spin_lock_bh(rt_hash_lock_addr(k));
998 while ((rth = *rthp) != NULL) { 1010 while ((rth = rcu_dereference_protected(*rthp,
1011 lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
999 if (!rt_is_expired(rth) && 1012 if (!rt_is_expired(rth) &&
1000 !rt_may_expire(rth, tmo, expire)) { 1013 !rt_may_expire(rth, tmo, expire)) {
1001 tmo >>= 1; 1014 tmo >>= 1;
@@ -1071,7 +1084,7 @@ static int slow_chain_length(const struct rtable *head)
1071 1084
1072 while (rth) { 1085 while (rth) {
1073 length += has_noalias(head, rth); 1086 length += has_noalias(head, rth);
1074 rth = rth->dst.rt_next; 1087 rth = rcu_dereference_protected(rth->dst.rt_next, 1);
1075 } 1088 }
1076 return length >> FRACT_BITS; 1089 return length >> FRACT_BITS;
1077} 1090}
@@ -1079,9 +1092,9 @@ static int slow_chain_length(const struct rtable *head)
1079static int rt_intern_hash(unsigned hash, struct rtable *rt, 1092static int rt_intern_hash(unsigned hash, struct rtable *rt,
1080 struct rtable **rp, struct sk_buff *skb, int ifindex) 1093 struct rtable **rp, struct sk_buff *skb, int ifindex)
1081{ 1094{
1082 struct rtable *rth, **rthp; 1095 struct rtable *rth, *cand;
1096 struct rtable __rcu **rthp, **candp;
1083 unsigned long now; 1097 unsigned long now;
1084 struct rtable *cand, **candp;
1085 u32 min_score; 1098 u32 min_score;
1086 int chain_length; 1099 int chain_length;
1087 int attempts = !in_softirq(); 1100 int attempts = !in_softirq();
@@ -1128,7 +1141,8 @@ restart:
1128 rthp = &rt_hash_table[hash].chain; 1141 rthp = &rt_hash_table[hash].chain;
1129 1142
1130 spin_lock_bh(rt_hash_lock_addr(hash)); 1143 spin_lock_bh(rt_hash_lock_addr(hash));
1131 while ((rth = *rthp) != NULL) { 1144 while ((rth = rcu_dereference_protected(*rthp,
1145 lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
1132 if (rt_is_expired(rth)) { 1146 if (rt_is_expired(rth)) {
1133 *rthp = rth->dst.rt_next; 1147 *rthp = rth->dst.rt_next;
1134 rt_free(rth); 1148 rt_free(rth);
@@ -1324,12 +1338,14 @@ EXPORT_SYMBOL(__ip_select_ident);
1324 1338
1325static void rt_del(unsigned hash, struct rtable *rt) 1339static void rt_del(unsigned hash, struct rtable *rt)
1326{ 1340{
1327 struct rtable **rthp, *aux; 1341 struct rtable __rcu **rthp;
1342 struct rtable *aux;
1328 1343
1329 rthp = &rt_hash_table[hash].chain; 1344 rthp = &rt_hash_table[hash].chain;
1330 spin_lock_bh(rt_hash_lock_addr(hash)); 1345 spin_lock_bh(rt_hash_lock_addr(hash));
1331 ip_rt_put(rt); 1346 ip_rt_put(rt);
1332 while ((aux = *rthp) != NULL) { 1347 while ((aux = rcu_dereference_protected(*rthp,
1348 lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
1333 if (aux == rt || rt_is_expired(aux)) { 1349 if (aux == rt || rt_is_expired(aux)) {
1334 *rthp = aux->dst.rt_next; 1350 *rthp = aux->dst.rt_next;
1335 rt_free(aux); 1351 rt_free(aux);
@@ -1346,7 +1362,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1346{ 1362{
1347 int i, k; 1363 int i, k;
1348 struct in_device *in_dev = __in_dev_get_rcu(dev); 1364 struct in_device *in_dev = __in_dev_get_rcu(dev);
1349 struct rtable *rth, **rthp; 1365 struct rtable *rth;
1366 struct rtable __rcu **rthp;
1350 __be32 skeys[2] = { saddr, 0 }; 1367 __be32 skeys[2] = { saddr, 0 };
1351 int ikeys[2] = { dev->ifindex, 0 }; 1368 int ikeys[2] = { dev->ifindex, 0 };
1352 struct netevent_redirect netevent; 1369 struct netevent_redirect netevent;
@@ -1379,7 +1396,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1379 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1396 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1380 rt_genid(net)); 1397 rt_genid(net));
1381 1398
1382 rthp=&rt_hash_table[hash].chain; 1399 rthp = &rt_hash_table[hash].chain;
1383 1400
1384 while ((rth = rcu_dereference(*rthp)) != NULL) { 1401 while ((rth = rcu_dereference(*rthp)) != NULL) {
1385 struct rtable *rt; 1402 struct rtable *rt;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d96c1da4b17c..1b4ec21497a4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -26,6 +26,8 @@ static int zero;
26static int tcp_retr1_max = 255; 26static int tcp_retr1_max = 255;
27static int ip_local_port_range_min[] = { 1, 1 }; 27static int ip_local_port_range_min[] = { 1, 1 };
28static int ip_local_port_range_max[] = { 65535, 65535 }; 28static int ip_local_port_range_max[] = { 65535, 65535 };
29static int tcp_adv_win_scale_min = -31;
30static int tcp_adv_win_scale_max = 31;
29 31
30/* Update system visible IP port range */ 32/* Update system visible IP port range */
31static void set_local_port_range(int range[2]) 33static void set_local_port_range(int range[2])
@@ -398,7 +400,7 @@ static struct ctl_table ipv4_table[] = {
398 .data = &sysctl_tcp_mem, 400 .data = &sysctl_tcp_mem,
399 .maxlen = sizeof(sysctl_tcp_mem), 401 .maxlen = sizeof(sysctl_tcp_mem),
400 .mode = 0644, 402 .mode = 0644,
401 .proc_handler = proc_dointvec 403 .proc_handler = proc_doulongvec_minmax
402 }, 404 },
403 { 405 {
404 .procname = "tcp_wmem", 406 .procname = "tcp_wmem",
@@ -426,7 +428,9 @@ static struct ctl_table ipv4_table[] = {
426 .data = &sysctl_tcp_adv_win_scale, 428 .data = &sysctl_tcp_adv_win_scale,
427 .maxlen = sizeof(int), 429 .maxlen = sizeof(int),
428 .mode = 0644, 430 .mode = 0644,
429 .proc_handler = proc_dointvec 431 .proc_handler = proc_dointvec_minmax,
432 .extra1 = &tcp_adv_win_scale_min,
433 .extra2 = &tcp_adv_win_scale_max,
430 }, 434 },
431 { 435 {
432 .procname = "tcp_tw_reuse", 436 .procname = "tcp_tw_reuse",
@@ -602,8 +606,7 @@ static struct ctl_table ipv4_table[] = {
602 .data = &sysctl_udp_mem, 606 .data = &sysctl_udp_mem,
603 .maxlen = sizeof(sysctl_udp_mem), 607 .maxlen = sizeof(sysctl_udp_mem),
604 .mode = 0644, 608 .mode = 0644,
605 .proc_handler = proc_dointvec_minmax, 609 .proc_handler = proc_doulongvec_minmax,
606 .extra1 = &zero
607 }, 610 },
608 { 611 {
609 .procname = "udp_rmem_min", 612 .procname = "udp_rmem_min",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1664a0590bb8..f15c36a706ec 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
282struct percpu_counter tcp_orphan_count; 282struct percpu_counter tcp_orphan_count;
283EXPORT_SYMBOL_GPL(tcp_orphan_count); 283EXPORT_SYMBOL_GPL(tcp_orphan_count);
284 284
285int sysctl_tcp_mem[3] __read_mostly; 285long sysctl_tcp_mem[3] __read_mostly;
286int sysctl_tcp_wmem[3] __read_mostly; 286int sysctl_tcp_wmem[3] __read_mostly;
287int sysctl_tcp_rmem[3] __read_mostly; 287int sysctl_tcp_rmem[3] __read_mostly;
288 288
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
290EXPORT_SYMBOL(sysctl_tcp_rmem); 290EXPORT_SYMBOL(sysctl_tcp_rmem);
291EXPORT_SYMBOL(sysctl_tcp_wmem); 291EXPORT_SYMBOL(sysctl_tcp_wmem);
292 292
293atomic_t tcp_memory_allocated; /* Current allocated memory. */ 293atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
294EXPORT_SYMBOL(tcp_memory_allocated); 294EXPORT_SYMBOL(tcp_memory_allocated);
295 295
296/* 296/*
@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2246 /* Values greater than interface MTU won't take effect. However 2246 /* Values greater than interface MTU won't take effect. However
2247 * at the point when this call is done we typically don't yet 2247 * at the point when this call is done we typically don't yet
2248 * know which interface is going to be used */ 2248 * know which interface is going to be used */
2249 if (val < 8 || val > MAX_TCP_WINDOW) { 2249 if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
2250 err = -EINVAL; 2250 err = -EINVAL;
2251 break; 2251 break;
2252 } 2252 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3357f69e353d..6d8ab1c4efc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk)
259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + 259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
260 sizeof(struct sk_buff); 260 sizeof(struct sk_buff);
261 261
262 if (sk->sk_sndbuf < 3 * sndmem) 262 if (sk->sk_sndbuf < 3 * sndmem) {
263 sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]); 263 sk->sk_sndbuf = 3 * sndmem;
264 if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
265 sk->sk_sndbuf = sysctl_tcp_wmem[2];
266 }
264} 267}
265 268
266/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) 269/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk)
396 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && 399 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
397 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && 400 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
398 !tcp_memory_pressure && 401 !tcp_memory_pressure &&
399 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { 402 atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
400 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), 403 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
401 sysctl_tcp_rmem[2]); 404 sysctl_tcp_rmem[2]);
402 } 405 }
@@ -4861,7 +4864,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk)
4861 return 0; 4864 return 0;
4862 4865
4863 /* If we are under soft global TCP memory pressure, do not expand. */ 4866 /* If we are under soft global TCP memory pressure, do not expand. */
4864 if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) 4867 if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
4865 return 0; 4868 return 0;
4866 4869
4867 /* If we filled the congestion window, do not expand. */ 4870 /* If we filled the congestion window, do not expand. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8f8527d41682..e13da6de1fc7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
415 !icsk->icsk_backoff) 415 !icsk->icsk_backoff)
416 break; 416 break;
417 417
418 if (sock_owned_by_user(sk))
419 break;
420
418 icsk->icsk_backoff--; 421 icsk->icsk_backoff--;
419 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << 422 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
420 icsk->icsk_backoff; 423 icsk->icsk_backoff;
@@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
429 if (remaining) { 432 if (remaining) {
430 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 433 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
431 remaining, TCP_RTO_MAX); 434 remaining, TCP_RTO_MAX);
432 } else if (sock_owned_by_user(sk)) {
433 /* RTO revert clocked out retransmission,
434 * but socket is locked. Will defer. */
435 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
436 HZ/20, TCP_RTO_MAX);
437 } else { 435 } else {
438 /* RTO revert clocked out retransmission. 436 /* RTO revert clocked out retransmission.
439 * Will retransmit now */ 437 * Will retransmit now */
@@ -2045,7 +2043,9 @@ get_req:
2045 } 2043 }
2046get_sk: 2044get_sk:
2047 sk_nulls_for_each_from(sk, node) { 2045 sk_nulls_for_each_from(sk, node) {
2048 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { 2046 if (!net_eq(sock_net(sk), net))
2047 continue;
2048 if (sk->sk_family == st->family) {
2049 cur = sk; 2049 cur = sk;
2050 goto out; 2050 goto out;
2051 } 2051 }
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 43cf901d7659..a66735f75963 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -347,7 +347,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
347 * socket up. We've got bigger problems than 347 * socket up. We've got bigger problems than
348 * non-graceful socket closings. 348 * non-graceful socket closings.
349 */ 349 */
350 LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n"); 350 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
351 } 351 }
352 352
353 tcp_update_metrics(sk); 353 tcp_update_metrics(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 05b1ecf36763..61c2463e2753 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -231,11 +231,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
231 /* when initializing use the value from init_rcv_wnd 231 /* when initializing use the value from init_rcv_wnd
232 * rather than the default from above 232 * rather than the default from above
233 */ 233 */
234 if (init_rcv_wnd && 234 if (init_rcv_wnd)
235 (*rcv_wnd > init_rcv_wnd * mss)) 235 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
236 *rcv_wnd = init_rcv_wnd * mss; 236 else
237 else if (*rcv_wnd > init_cwnd * mss) 237 *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
238 *rcv_wnd = init_cwnd * mss;
239 } 238 }
240 239
241 /* Set the clamp no higher than max representable value */ 240 /* Set the clamp no higher than max representable value */
@@ -386,27 +385,30 @@ struct tcp_out_options {
386 */ 385 */
387static u8 tcp_cookie_size_check(u8 desired) 386static u8 tcp_cookie_size_check(u8 desired)
388{ 387{
389 if (desired > 0) { 388 int cookie_size;
389
390 if (desired > 0)
390 /* previously specified */ 391 /* previously specified */
391 return desired; 392 return desired;
392 } 393
393 if (sysctl_tcp_cookie_size <= 0) { 394 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
395 if (cookie_size <= 0)
394 /* no default specified */ 396 /* no default specified */
395 return 0; 397 return 0;
396 } 398
397 if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) { 399 if (cookie_size <= TCP_COOKIE_MIN)
398 /* value too small, specify minimum */ 400 /* value too small, specify minimum */
399 return TCP_COOKIE_MIN; 401 return TCP_COOKIE_MIN;
400 } 402
401 if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) { 403 if (cookie_size >= TCP_COOKIE_MAX)
402 /* value too large, specify maximum */ 404 /* value too large, specify maximum */
403 return TCP_COOKIE_MAX; 405 return TCP_COOKIE_MAX;
404 } 406
405 if (0x1 & sysctl_tcp_cookie_size) { 407 if (cookie_size & 1)
406 /* 8-bit multiple, illegal, fix it */ 408 /* 8-bit multiple, illegal, fix it */
407 return (u8)(sysctl_tcp_cookie_size + 0x1); 409 cookie_size++;
408 } 410
409 return (u8)sysctl_tcp_cookie_size; 411 return (u8)cookie_size;
410} 412}
411 413
412/* Write previously computed TCP options to the packet. 414/* Write previously computed TCP options to the packet.
@@ -1513,6 +1515,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1513 struct tcp_sock *tp = tcp_sk(sk); 1515 struct tcp_sock *tp = tcp_sk(sk);
1514 const struct inet_connection_sock *icsk = inet_csk(sk); 1516 const struct inet_connection_sock *icsk = inet_csk(sk);
1515 u32 send_win, cong_win, limit, in_flight; 1517 u32 send_win, cong_win, limit, in_flight;
1518 int win_divisor;
1516 1519
1517 if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) 1520 if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
1518 goto send_now; 1521 goto send_now;
@@ -1544,13 +1547,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1544 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) 1547 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1545 goto send_now; 1548 goto send_now;
1546 1549
1547 if (sysctl_tcp_tso_win_divisor) { 1550 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1551 if (win_divisor) {
1548 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); 1552 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1549 1553
1550 /* If at least some fraction of a window is available, 1554 /* If at least some fraction of a window is available,
1551 * just use it. 1555 * just use it.
1552 */ 1556 */
1553 chunk /= sysctl_tcp_tso_win_divisor; 1557 chunk /= win_divisor;
1554 if (limit >= chunk) 1558 if (limit >= chunk)
1555 goto send_now; 1559 goto send_now;
1556 } else { 1560 } else {
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 9a17bd2a0a37..ac3b3ee4b07c 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,27 +14,32 @@
14#include <net/protocol.h> 14#include <net/protocol.h>
15#include <net/xfrm.h> 15#include <net/xfrm.h>
16 16
17static struct xfrm_tunnel *tunnel4_handlers __read_mostly; 17static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
18static struct xfrm_tunnel *tunnel64_handlers __read_mostly; 18static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
19static DEFINE_MUTEX(tunnel4_mutex); 19static DEFINE_MUTEX(tunnel4_mutex);
20 20
21static inline struct xfrm_tunnel **fam_handlers(unsigned short family) 21static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
22{ 22{
23 return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers; 23 return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
24} 24}
25 25
26int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family) 26int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
27{ 27{
28 struct xfrm_tunnel **pprev; 28 struct xfrm_tunnel __rcu **pprev;
29 struct xfrm_tunnel *t;
30
29 int ret = -EEXIST; 31 int ret = -EEXIST;
30 int priority = handler->priority; 32 int priority = handler->priority;
31 33
32 mutex_lock(&tunnel4_mutex); 34 mutex_lock(&tunnel4_mutex);
33 35
34 for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { 36 for (pprev = fam_handlers(family);
35 if ((*pprev)->priority > priority) 37 (t = rcu_dereference_protected(*pprev,
38 lockdep_is_held(&tunnel4_mutex))) != NULL;
39 pprev = &t->next) {
40 if (t->priority > priority)
36 break; 41 break;
37 if ((*pprev)->priority == priority) 42 if (t->priority == priority)
38 goto err; 43 goto err;
39 } 44 }
40 45
@@ -52,13 +57,17 @@ EXPORT_SYMBOL(xfrm4_tunnel_register);
52 57
53int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) 58int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
54{ 59{
55 struct xfrm_tunnel **pprev; 60 struct xfrm_tunnel __rcu **pprev;
61 struct xfrm_tunnel *t;
56 int ret = -ENOENT; 62 int ret = -ENOENT;
57 63
58 mutex_lock(&tunnel4_mutex); 64 mutex_lock(&tunnel4_mutex);
59 65
60 for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { 66 for (pprev = fam_handlers(family);
61 if (*pprev == handler) { 67 (t = rcu_dereference_protected(*pprev,
68 lockdep_is_held(&tunnel4_mutex))) != NULL;
69 pprev = &t->next) {
70 if (t == handler) {
62 *pprev = handler->next; 71 *pprev = handler->next;
63 ret = 0; 72 ret = 0;
64 break; 73 break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b3f7e8cf18ac..2d3ded4d0786 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,7 +110,7 @@
110struct udp_table udp_table __read_mostly; 110struct udp_table udp_table __read_mostly;
111EXPORT_SYMBOL(udp_table); 111EXPORT_SYMBOL(udp_table);
112 112
113int sysctl_udp_mem[3] __read_mostly; 113long sysctl_udp_mem[3] __read_mostly;
114EXPORT_SYMBOL(sysctl_udp_mem); 114EXPORT_SYMBOL(sysctl_udp_mem);
115 115
116int sysctl_udp_rmem_min __read_mostly; 116int sysctl_udp_rmem_min __read_mostly;
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min);
119int sysctl_udp_wmem_min __read_mostly; 119int sysctl_udp_wmem_min __read_mostly;
120EXPORT_SYMBOL(sysctl_udp_wmem_min); 120EXPORT_SYMBOL(sysctl_udp_wmem_min);
121 121
122atomic_t udp_memory_allocated; 122atomic_long_t udp_memory_allocated;
123EXPORT_SYMBOL(udp_memory_allocated); 123EXPORT_SYMBOL(udp_memory_allocated);
124 124
125#define MAX_UDP_PORTS 65536 125#define MAX_UDP_PORTS 65536
@@ -1413,7 +1413,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1413 } 1413 }
1414 } 1414 }
1415 1415
1416 if (sk->sk_filter) { 1416 if (rcu_dereference_raw(sk->sk_filter)) {
1417 if (udp_lib_checksum_complete(skb)) 1417 if (udp_lib_checksum_complete(skb))
1418 goto drop; 1418 goto drop;
1419 } 1419 }
@@ -1899,6 +1899,7 @@ struct proto udp_prot = {
1899 .compat_setsockopt = compat_udp_setsockopt, 1899 .compat_setsockopt = compat_udp_setsockopt,
1900 .compat_getsockopt = compat_udp_getsockopt, 1900 .compat_getsockopt = compat_udp_getsockopt,
1901#endif 1901#endif
1902 .clear_sk = sk_prot_clear_portaddr_nulls,
1902}; 1903};
1903EXPORT_SYMBOL(udp_prot); 1904EXPORT_SYMBOL(udp_prot);
1904 1905
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index ab76aa928fa9..aee9963f7f5a 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -57,6 +57,7 @@ struct proto udplite_prot = {
57 .compat_setsockopt = compat_udp_setsockopt, 57 .compat_setsockopt = compat_udp_setsockopt,
58 .compat_getsockopt = compat_udp_getsockopt, 58 .compat_getsockopt = compat_udp_getsockopt,
59#endif 59#endif
60 .clear_sk = sk_prot_clear_portaddr_nulls,
60}; 61};
61EXPORT_SYMBOL(udplite_prot); 62EXPORT_SYMBOL(udplite_prot);
62 63
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ec7a91d9e865..848b35591042 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -98,7 +98,11 @@
98#endif 98#endif
99 99
100#define INFINITY_LIFE_TIME 0xFFFFFFFF 100#define INFINITY_LIFE_TIME 0xFFFFFFFF
101#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b))) 101
102static inline u32 cstamp_delta(unsigned long cstamp)
103{
104 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
105}
102 106
103#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) 107#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1)
104#define ADDRCONF_TIMER_FUZZ (HZ / 4) 108#define ADDRCONF_TIMER_FUZZ (HZ / 4)
@@ -836,7 +840,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
836{ 840{
837 struct inet6_dev *idev = ifp->idev; 841 struct inet6_dev *idev = ifp->idev;
838 struct in6_addr addr, *tmpaddr; 842 struct in6_addr addr, *tmpaddr;
839 unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp; 843 unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age;
840 unsigned long regen_advance; 844 unsigned long regen_advance;
841 int tmp_plen; 845 int tmp_plen;
842 int ret = 0; 846 int ret = 0;
@@ -886,12 +890,13 @@ retry:
886 goto out; 890 goto out;
887 } 891 }
888 memcpy(&addr.s6_addr[8], idev->rndid, 8); 892 memcpy(&addr.s6_addr[8], idev->rndid, 8);
893 age = (jiffies - ifp->tstamp) / HZ;
889 tmp_valid_lft = min_t(__u32, 894 tmp_valid_lft = min_t(__u32,
890 ifp->valid_lft, 895 ifp->valid_lft,
891 idev->cnf.temp_valid_lft); 896 idev->cnf.temp_valid_lft + age);
892 tmp_prefered_lft = min_t(__u32, 897 tmp_prefered_lft = min_t(__u32,
893 ifp->prefered_lft, 898 ifp->prefered_lft,
894 idev->cnf.temp_prefered_lft - 899 idev->cnf.temp_prefered_lft + age -
895 idev->cnf.max_desync_factor); 900 idev->cnf.max_desync_factor);
896 tmp_plen = ifp->prefix_len; 901 tmp_plen = ifp->prefix_len;
897 max_addresses = idev->cnf.max_addresses; 902 max_addresses = idev->cnf.max_addresses;
@@ -1426,8 +1431,10 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1426{ 1431{
1427 struct inet6_dev *idev = ifp->idev; 1432 struct inet6_dev *idev = ifp->idev;
1428 1433
1429 if (addrconf_dad_end(ifp)) 1434 if (addrconf_dad_end(ifp)) {
1435 in6_ifa_put(ifp);
1430 return; 1436 return;
1437 }
1431 1438
1432 if (net_ratelimit()) 1439 if (net_ratelimit())
1433 printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n", 1440 printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n",
@@ -2021,10 +2028,11 @@ ok:
2021 ipv6_ifa_notify(0, ift); 2028 ipv6_ifa_notify(0, ift);
2022 } 2029 }
2023 2030
2024 if (create && in6_dev->cnf.use_tempaddr > 0) { 2031 if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) {
2025 /* 2032 /*
2026 * When a new public address is created as described in [ADDRCONF], 2033 * When a new public address is created as described in [ADDRCONF],
2027 * also create a new temporary address. 2034 * also create a new temporary address. Also create a temporary
2035 * address if it's enabled but no temporary address currently exists.
2028 */ 2036 */
2029 read_unlock_bh(&in6_dev->lock); 2037 read_unlock_bh(&in6_dev->lock);
2030 ipv6_create_tempaddr(ifp, NULL); 2038 ipv6_create_tempaddr(ifp, NULL);
@@ -2661,7 +2669,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2661 2669
2662 ASSERT_RTNL(); 2670 ASSERT_RTNL();
2663 2671
2664 rt6_ifdown(net, dev); 2672 /* Flush routes if device is being removed or it is not loopback */
2673 if (how || !(dev->flags & IFF_LOOPBACK))
2674 rt6_ifdown(net, dev);
2665 neigh_ifdown(&nd_tbl, dev); 2675 neigh_ifdown(&nd_tbl, dev);
2666 2676
2667 idev = __in6_dev_get(dev); 2677 idev = __in6_dev_get(dev);
@@ -2736,10 +2746,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2736 /* Flag it for later restoration when link comes up */ 2746 /* Flag it for later restoration when link comes up */
2737 ifa->flags |= IFA_F_TENTATIVE; 2747 ifa->flags |= IFA_F_TENTATIVE;
2738 ifa->state = INET6_IFADDR_STATE_DAD; 2748 ifa->state = INET6_IFADDR_STATE_DAD;
2739
2740 write_unlock_bh(&idev->lock);
2741
2742 in6_ifa_hold(ifa);
2743 } else { 2749 } else {
2744 list_del(&ifa->if_list); 2750 list_del(&ifa->if_list);
2745 2751
@@ -2754,19 +2760,15 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2754 ifa->state = INET6_IFADDR_STATE_DEAD; 2760 ifa->state = INET6_IFADDR_STATE_DEAD;
2755 spin_unlock_bh(&ifa->state_lock); 2761 spin_unlock_bh(&ifa->state_lock);
2756 2762
2757 if (state == INET6_IFADDR_STATE_DEAD) 2763 if (state != INET6_IFADDR_STATE_DEAD) {
2758 goto put_ifa; 2764 __ipv6_ifa_notify(RTM_DELADDR, ifa);
2759 } 2765 atomic_notifier_call_chain(&inet6addr_chain,
2760 2766 NETDEV_DOWN, ifa);
2761 __ipv6_ifa_notify(RTM_DELADDR, ifa); 2767 }
2762 if (ifa->state == INET6_IFADDR_STATE_DEAD)
2763 atomic_notifier_call_chain(&inet6addr_chain,
2764 NETDEV_DOWN, ifa);
2765
2766put_ifa:
2767 in6_ifa_put(ifa);
2768 2768
2769 write_lock_bh(&idev->lock); 2769 in6_ifa_put(ifa);
2770 write_lock_bh(&idev->lock);
2771 }
2770 } 2772 }
2771 2773
2772 list_splice(&keep_list, &idev->addr_list); 2774 list_splice(&keep_list, &idev->addr_list);
@@ -3448,10 +3450,8 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
3448{ 3450{
3449 struct ifa_cacheinfo ci; 3451 struct ifa_cacheinfo ci;
3450 3452
3451 ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100 3453 ci.cstamp = cstamp_delta(cstamp);
3452 + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); 3454 ci.tstamp = cstamp_delta(tstamp);
3453 ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
3454 + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
3455 ci.ifa_prefered = preferred; 3455 ci.ifa_prefered = preferred;
3456 ci.ifa_valid = valid; 3456 ci.ifa_valid = valid;
3457 3457
@@ -3802,8 +3802,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
3802 array[DEVCONF_AUTOCONF] = cnf->autoconf; 3802 array[DEVCONF_AUTOCONF] = cnf->autoconf;
3803 array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits; 3803 array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
3804 array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; 3804 array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
3805 array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval; 3805 array[DEVCONF_RTR_SOLICIT_INTERVAL] =
3806 array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay; 3806 jiffies_to_msecs(cnf->rtr_solicit_interval);
3807 array[DEVCONF_RTR_SOLICIT_DELAY] =
3808 jiffies_to_msecs(cnf->rtr_solicit_delay);
3807 array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; 3809 array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
3808#ifdef CONFIG_IPV6_PRIVACY 3810#ifdef CONFIG_IPV6_PRIVACY
3809 array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; 3811 array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
@@ -3817,7 +3819,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
3817 array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; 3819 array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
3818#ifdef CONFIG_IPV6_ROUTER_PREF 3820#ifdef CONFIG_IPV6_ROUTER_PREF
3819 array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; 3821 array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
3820 array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval; 3822 array[DEVCONF_RTR_PROBE_INTERVAL] =
3823 jiffies_to_msecs(cnf->rtr_probe_interval);
3821#ifdef CONFIG_IPV6_ROUTE_INFO 3824#ifdef CONFIG_IPV6_ROUTE_INFO
3822 array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; 3825 array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
3823#endif 3826#endif
@@ -3933,10 +3936,9 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
3933 NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); 3936 NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
3934 3937
3935 ci.max_reasm_len = IPV6_MAXPLEN; 3938 ci.max_reasm_len = IPV6_MAXPLEN;
3936 ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100 3939 ci.tstamp = cstamp_delta(idev->tstamp);
3937 + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); 3940 ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
3938 ci.reachable_time = idev->nd_parms->reachable_time; 3941 ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
3939 ci.retrans_time = idev->nd_parms->retrans_time;
3940 NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); 3942 NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
3941 3943
3942 nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); 3944 nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
@@ -4021,11 +4023,11 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
4021 kfree_skb(skb); 4023 kfree_skb(skb);
4022 goto errout; 4024 goto errout;
4023 } 4025 }
4024 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); 4026 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC);
4025 return; 4027 return;
4026errout: 4028errout:
4027 if (err < 0) 4029 if (err < 0)
4028 rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); 4030 rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
4029} 4031}
4030 4032
4031static inline size_t inet6_prefix_nlmsg_size(void) 4033static inline size_t inet6_prefix_nlmsg_size(void)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 99157b4cd56e..94b5bf132b2e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,7 +56,7 @@
56#include <net/checksum.h> 56#include <net/checksum.h>
57#include <linux/mroute6.h> 57#include <linux/mroute6.h>
58 58
59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 60
61int __ip6_local_out(struct sk_buff *skb) 61int __ip6_local_out(struct sk_buff *skb)
62{ 62{
@@ -145,14 +145,6 @@ static int ip6_finish_output2(struct sk_buff *skb)
145 return -EINVAL; 145 return -EINVAL;
146} 146}
147 147
148static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
149{
150 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
151
152 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
153 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
154}
155
156static int ip6_finish_output(struct sk_buff *skb) 148static int ip6_finish_output(struct sk_buff *skb)
157{ 149{
158 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 150 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
@@ -601,7 +593,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
601 return offset; 593 return offset;
602} 594}
603 595
604static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 596int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
605{ 597{
606 struct sk_buff *frag; 598 struct sk_buff *frag;
607 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 599 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c2c0f89397b1..70e891a20fb9 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1175,6 +1175,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1175 sizeof (struct ipv6hdr); 1175 sizeof (struct ipv6hdr);
1176 1176
1177 dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr); 1177 dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
1178 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1179 dev->mtu-=8;
1178 1180
1179 if (dev->mtu < IPV6_MIN_MTU) 1181 if (dev->mtu < IPV6_MIN_MTU)
1180 dev->mtu = IPV6_MIN_MTU; 1182 dev->mtu = IPV6_MIN_MTU;
@@ -1284,6 +1286,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1284 t = netdev_priv(dev); 1286 t = netdev_priv(dev);
1285 1287
1286 ip6_tnl_unlink(ip6n, t); 1288 ip6_tnl_unlink(ip6n, t);
1289 synchronize_net();
1287 err = ip6_tnl_change(t, &p); 1290 err = ip6_tnl_change(t, &p);
1288 ip6_tnl_link(ip6n, t); 1291 ip6_tnl_link(ip6n, t);
1289 netdev_state_change(dev); 1292 netdev_state_change(dev);
@@ -1362,15 +1365,21 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
1362 1365
1363static void ip6_tnl_dev_setup(struct net_device *dev) 1366static void ip6_tnl_dev_setup(struct net_device *dev)
1364{ 1367{
1368 struct ip6_tnl *t;
1369
1365 dev->netdev_ops = &ip6_tnl_netdev_ops; 1370 dev->netdev_ops = &ip6_tnl_netdev_ops;
1366 dev->destructor = ip6_dev_free; 1371 dev->destructor = ip6_dev_free;
1367 1372
1368 dev->type = ARPHRD_TUNNEL6; 1373 dev->type = ARPHRD_TUNNEL6;
1369 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); 1374 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
1370 dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); 1375 dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
1376 t = netdev_priv(dev);
1377 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1378 dev->mtu-=8;
1371 dev->flags |= IFF_NOARP; 1379 dev->flags |= IFF_NOARP;
1372 dev->addr_len = sizeof(struct in6_addr); 1380 dev->addr_len = sizeof(struct in6_addr);
1373 dev->features |= NETIF_F_NETNS_LOCAL; 1381 dev->features |= NETIF_F_NETNS_LOCAL;
1382 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1374} 1383}
1375 1384
1376 1385
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0553867a317f..d1770e061c08 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -343,6 +343,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
343 break; 343 break;
344 344
345 case IPV6_TRANSPARENT: 345 case IPV6_TRANSPARENT:
346 if (!capable(CAP_NET_ADMIN)) {
347 retv = -EPERM;
348 break;
349 }
346 if (optlen < sizeof(int)) 350 if (optlen < sizeof(int))
347 goto e_inval; 351 goto e_inval;
348 /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */ 352 /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 44d2eeac089b..448464844a25 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -5,10 +5,15 @@
5menu "IPv6: Netfilter Configuration" 5menu "IPv6: Netfilter Configuration"
6 depends on INET && IPV6 && NETFILTER 6 depends on INET && IPV6 && NETFILTER
7 7
8config NF_DEFRAG_IPV6
9 tristate
10 default n
11
8config NF_CONNTRACK_IPV6 12config NF_CONNTRACK_IPV6
9 tristate "IPv6 connection tracking support" 13 tristate "IPv6 connection tracking support"
10 depends on INET && IPV6 && NF_CONNTRACK 14 depends on INET && IPV6 && NF_CONNTRACK
11 default m if NETFILTER_ADVANCED=n 15 default m if NETFILTER_ADVANCED=n
16 select NF_DEFRAG_IPV6
12 ---help--- 17 ---help---
13 Connection tracking keeps a record of what packets have passed 18 Connection tracking keeps a record of what packets have passed
14 through your machine, in order to figure out how they are related 19 through your machine, in order to figure out how they are related
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 3f8e4a3d83ce..0a432c9b0795 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -12,11 +12,14 @@ obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
12 12
13# objects for l3 independent conntrack 13# objects for l3 independent conntrack
14nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o 14nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
15nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
16 15
17# l3 independent conntrack 16# l3 independent conntrack
18obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o 17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
19 18
19# defrag
20nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
21obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
22
20# matches 23# matches
21obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o 24obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
22obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o 25obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 51df035897e7..455582384ece 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1137,6 +1137,7 @@ static int get_info(struct net *net, void __user *user,
1137 private = &tmp; 1137 private = &tmp;
1138 } 1138 }
1139#endif 1139#endif
1140 memset(&info, 0, sizeof(info));
1140 info.valid_hooks = t->valid_hooks; 1141 info.valid_hooks = t->valid_hooks;
1141 memcpy(info.hook_entry, private->hook_entry, 1142 memcpy(info.hook_entry, private->hook_entry,
1142 sizeof(info.hook_entry)); 1143 sizeof(info.hook_entry));
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 489d71b844ac..79d43aa8fa8d 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -286,7 +286,7 @@ found:
286 286
287 /* Check for overlap with preceding fragment. */ 287 /* Check for overlap with preceding fragment. */
288 if (prev && 288 if (prev &&
289 (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0) 289 (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
290 goto discard_fq; 290 goto discard_fq;
291 291
292 /* Look for overlap with succeeding segment. */ 292 /* Look for overlap with succeeding segment. */
@@ -625,21 +625,24 @@ int nf_ct_frag6_init(void)
625 inet_frags_init_net(&nf_init_frags); 625 inet_frags_init_net(&nf_init_frags);
626 inet_frags_init(&nf_frags); 626 inet_frags_init(&nf_frags);
627 627
628#ifdef CONFIG_SYSCTL
628 nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path, 629 nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
629 nf_ct_frag6_sysctl_table); 630 nf_ct_frag6_sysctl_table);
630 if (!nf_ct_frag6_sysctl_header) { 631 if (!nf_ct_frag6_sysctl_header) {
631 inet_frags_fini(&nf_frags); 632 inet_frags_fini(&nf_frags);
632 return -ENOMEM; 633 return -ENOMEM;
633 } 634 }
635#endif
634 636
635 return 0; 637 return 0;
636} 638}
637 639
638void nf_ct_frag6_cleanup(void) 640void nf_ct_frag6_cleanup(void)
639{ 641{
642#ifdef CONFIG_SYSCTL
640 unregister_sysctl_table(nf_ct_frag6_sysctl_header); 643 unregister_sysctl_table(nf_ct_frag6_sysctl_header);
641 nf_ct_frag6_sysctl_header = NULL; 644 nf_ct_frag6_sysctl_header = NULL;
642 645#endif
643 inet_frags_fini(&nf_frags); 646 inet_frags_fini(&nf_frags);
644 647
645 nf_init_frags.low_thresh = 0; 648 nf_init_frags.low_thresh = 0;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index d082eaeefa25..24b3558b8e67 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -126,6 +126,8 @@ static const struct snmp_mib snmp6_udp6_list[] = {
126 SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS), 126 SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
127 SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS), 127 SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
128 SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS), 128 SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
129 SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
130 SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
129 SNMP_MIB_SENTINEL 131 SNMP_MIB_SENTINEL
130}; 132};
131 133
@@ -134,6 +136,8 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
134 SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), 136 SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
135 SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS), 137 SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
136 SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), 138 SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
139 SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
140 SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
137 SNMP_MIB_SENTINEL 141 SNMP_MIB_SENTINEL
138}; 142};
139 143
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 9bb936ae2452..9a7978fdc02a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,13 +25,14 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <net/protocol.h> 26#include <net/protocol.h>
27 27
28const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] __read_mostly; 28const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
29 29
30int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) 30int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
31{ 31{
32 int hash = protocol & (MAX_INET_PROTOS - 1); 32 int hash = protocol & (MAX_INET_PROTOS - 1);
33 33
34 return !cmpxchg(&inet6_protos[hash], NULL, prot) ? 0 : -1; 34 return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
35 NULL, prot) ? 0 : -1;
35} 36}
36EXPORT_SYMBOL(inet6_add_protocol); 37EXPORT_SYMBOL(inet6_add_protocol);
37 38
@@ -43,7 +44,8 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
43{ 44{
44 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 45 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
45 46
46 ret = (cmpxchg(&inet6_protos[hash], prot, NULL) == prot) ? 0 : -1; 47 ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
48 prot, NULL) == prot) ? 0 : -1;
47 49
48 synchronize_net(); 50 synchronize_net();
49 51
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 45e6efb7f171..86c39526ba5e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -373,7 +373,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
373 373
374static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) 374static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
375{ 375{
376 if ((raw6_sk(sk)->checksum || sk->sk_filter) && 376 if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) &&
377 skb_checksum_complete(skb)) { 377 skb_checksum_complete(skb)) {
378 atomic_inc(&sk->sk_drops); 378 atomic_inc(&sk->sk_drops);
379 kfree_skb(skb); 379 kfree_skb(skb);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index c7ba3149633f..0f2766453759 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -349,7 +349,7 @@ found:
349 349
350 /* Check for overlap with preceding fragment. */ 350 /* Check for overlap with preceding fragment. */
351 if (prev && 351 if (prev &&
352 (FRAG6_CB(prev)->offset + prev->len) - offset > 0) 352 (FRAG6_CB(prev)->offset + prev->len) > offset)
353 goto discard_fq; 353 goto discard_fq;
354 354
355 /* Look for overlap with succeeding segment. */ 355 /* Look for overlap with succeeding segment. */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 25661f968f3f..7659d6f16e6b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1565,11 +1565,16 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1565{ 1565{
1566 struct rt6_info *rt, *nrt; 1566 struct rt6_info *rt, *nrt;
1567 int allfrag = 0; 1567 int allfrag = 0;
1568 1568again:
1569 rt = rt6_lookup(net, daddr, saddr, ifindex, 0); 1569 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1570 if (rt == NULL) 1570 if (rt == NULL)
1571 return; 1571 return;
1572 1572
1573 if (rt6_check_expired(rt)) {
1574 ip6_del_rt(rt);
1575 goto again;
1576 }
1577
1573 if (pmtu >= dst_mtu(&rt->dst)) 1578 if (pmtu >= dst_mtu(&rt->dst))
1574 goto out; 1579 goto out;
1575 1580
@@ -1945,8 +1950,12 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1945 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1950 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1946 struct neighbour *neigh; 1951 struct neighbour *neigh;
1947 1952
1948 if (rt == NULL) 1953 if (rt == NULL) {
1954 if (net_ratelimit())
1955 pr_warning("IPv6: Maximum number of routes reached,"
1956 " consider increasing route/max_size.\n");
1949 return ERR_PTR(-ENOMEM); 1957 return ERR_PTR(-ENOMEM);
1958 }
1950 1959
1951 dev_hold(net->loopback_dev); 1960 dev_hold(net->loopback_dev);
1952 in6_dev_hold(idev); 1961 in6_dev_hold(idev);
@@ -2741,6 +2750,7 @@ static void __net_exit ip6_route_net_exit(struct net *net)
2741 kfree(net->ipv6.ip6_prohibit_entry); 2750 kfree(net->ipv6.ip6_prohibit_entry);
2742 kfree(net->ipv6.ip6_blk_hole_entry); 2751 kfree(net->ipv6.ip6_blk_hole_entry);
2743#endif 2752#endif
2753 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2744} 2754}
2745 2755
2746static struct pernet_operations ip6_route_net_ops = { 2756static struct pernet_operations ip6_route_net_ops = {
@@ -2832,5 +2842,6 @@ void ip6_route_cleanup(void)
2832 xfrm6_fini(); 2842 xfrm6_fini();
2833 fib6_gc_cleanup(); 2843 fib6_gc_cleanup();
2834 unregister_pernet_subsys(&ip6_route_net_ops); 2844 unregister_pernet_subsys(&ip6_route_net_ops);
2845 dst_entries_destroy(&ip6_dst_blackhole_ops);
2835 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2846 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2836} 2847}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 367a6cc584cc..8c4d00c7cd2b 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -606,8 +606,9 @@ static int ipip6_rcv(struct sk_buff *skb)
606 return 0; 606 return 0;
607 } 607 }
608 608
609 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 609 /* no tunnel matched, let upstream know, ipsec may handle it */
610 rcu_read_unlock(); 610 rcu_read_unlock();
611 return 1;
611out: 612out:
612 kfree_skb(skb); 613 kfree_skb(skb);
613 return 0; 614 return 0;
@@ -963,6 +964,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
963 } 964 }
964 t = netdev_priv(dev); 965 t = netdev_priv(dev);
965 ipip6_tunnel_unlink(sitn, t); 966 ipip6_tunnel_unlink(sitn, t);
967 synchronize_net();
966 t->parms.iph.saddr = p.iph.saddr; 968 t->parms.iph.saddr = p.iph.saddr;
967 t->parms.iph.daddr = p.iph.daddr; 969 t->parms.iph.daddr = p.iph.daddr;
968 memcpy(dev->dev_addr, &p.iph.saddr, 4); 970 memcpy(dev->dev_addr, &p.iph.saddr, 4);
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index d9864725d0c6..4f3cec12aa85 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -30,23 +30,26 @@
30#include <net/protocol.h> 30#include <net/protocol.h>
31#include <net/xfrm.h> 31#include <net/xfrm.h>
32 32
33static struct xfrm6_tunnel *tunnel6_handlers __read_mostly; 33static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
34static struct xfrm6_tunnel *tunnel46_handlers __read_mostly; 34static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
35static DEFINE_MUTEX(tunnel6_mutex); 35static DEFINE_MUTEX(tunnel6_mutex);
36 36
37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) 37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
38{ 38{
39 struct xfrm6_tunnel **pprev; 39 struct xfrm6_tunnel __rcu **pprev;
40 struct xfrm6_tunnel *t;
40 int ret = -EEXIST; 41 int ret = -EEXIST;
41 int priority = handler->priority; 42 int priority = handler->priority;
42 43
43 mutex_lock(&tunnel6_mutex); 44 mutex_lock(&tunnel6_mutex);
44 45
45 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; 46 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
46 *pprev; pprev = &(*pprev)->next) { 47 (t = rcu_dereference_protected(*pprev,
47 if ((*pprev)->priority > priority) 48 lockdep_is_held(&tunnel6_mutex))) != NULL;
49 pprev = &t->next) {
50 if (t->priority > priority)
48 break; 51 break;
49 if ((*pprev)->priority == priority) 52 if (t->priority == priority)
50 goto err; 53 goto err;
51 } 54 }
52 55
@@ -65,14 +68,17 @@ EXPORT_SYMBOL(xfrm6_tunnel_register);
65 68
66int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) 69int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
67{ 70{
68 struct xfrm6_tunnel **pprev; 71 struct xfrm6_tunnel __rcu **pprev;
72 struct xfrm6_tunnel *t;
69 int ret = -ENOENT; 73 int ret = -ENOENT;
70 74
71 mutex_lock(&tunnel6_mutex); 75 mutex_lock(&tunnel6_mutex);
72 76
73 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; 77 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
74 *pprev; pprev = &(*pprev)->next) { 78 (t = rcu_dereference_protected(*pprev,
75 if (*pprev == handler) { 79 lockdep_is_held(&tunnel6_mutex))) != NULL;
80 pprev = &t->next) {
81 if (t == handler) {
76 *pprev = handler->next; 82 *pprev = handler->next;
77 ret = 0; 83 ret = 0;
78 break; 84 break;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c84dad432114..cd6cb7c3e563 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -527,7 +527,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
527 } 527 }
528 } 528 }
529 529
530 if (sk->sk_filter) { 530 if (rcu_dereference_raw(sk->sk_filter)) {
531 if (udp_lib_checksum_complete(skb)) 531 if (udp_lib_checksum_complete(skb))
532 goto drop; 532 goto drop;
533 } 533 }
@@ -1477,6 +1477,7 @@ struct proto udpv6_prot = {
1477 .compat_setsockopt = compat_udpv6_setsockopt, 1477 .compat_setsockopt = compat_udpv6_setsockopt,
1478 .compat_getsockopt = compat_udpv6_getsockopt, 1478 .compat_getsockopt = compat_udpv6_getsockopt,
1479#endif 1479#endif
1480 .clear_sk = sk_prot_clear_portaddr_nulls,
1480}; 1481};
1481 1482
1482static struct inet_protosw udpv6_protosw = { 1483static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 5f48fadc27f7..986c4de5292e 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -55,6 +55,7 @@ struct proto udplitev6_prot = {
55 .compat_setsockopt = compat_udpv6_setsockopt, 55 .compat_setsockopt = compat_udpv6_setsockopt,
56 .compat_getsockopt = compat_udpv6_getsockopt, 56 .compat_getsockopt = compat_udpv6_getsockopt,
57#endif 57#endif
58 .clear_sk = sk_prot_clear_portaddr_nulls,
58}; 59};
59 60
60static struct inet_protosw udplite6_protosw = { 61static struct inet_protosw udplite6_protosw = {
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6434bd5ce088..8e688b3de9ab 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -17,6 +17,7 @@
17#include <linux/netfilter_ipv6.h> 17#include <linux/netfilter_ipv6.h>
18#include <net/dst.h> 18#include <net/dst.h>
19#include <net/ipv6.h> 19#include <net/ipv6.h>
20#include <net/ip6_route.h>
20#include <net/xfrm.h> 21#include <net/xfrm.h>
21 22
22int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, 23int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
@@ -88,8 +89,21 @@ static int xfrm6_output_finish(struct sk_buff *skb)
88 return xfrm_output(skb); 89 return xfrm_output(skb);
89} 90}
90 91
92static int __xfrm6_output(struct sk_buff *skb)
93{
94 struct dst_entry *dst = skb_dst(skb);
95 struct xfrm_state *x = dst->xfrm;
96
97 if ((x && x->props.mode == XFRM_MODE_TUNNEL) &&
98 ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
99 dst_allfrag(skb_dst(skb)))) {
100 return ip6_fragment(skb, xfrm6_output_finish);
101 }
102 return xfrm6_output_finish(skb);
103}
104
91int xfrm6_output(struct sk_buff *skb) 105int xfrm6_output(struct sk_buff *skb)
92{ 106{
93 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, 107 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
94 skb_dst(skb)->dev, xfrm6_output_finish); 108 skb_dst(skb)->dev, __xfrm6_output);
95} 109}
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 285761e77d90..f6054f9ccbe3 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -550,22 +550,30 @@ EXPORT_SYMBOL(irttp_close_tsap);
550 */ 550 */
551int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb) 551int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
552{ 552{
553 int ret;
554
553 IRDA_ASSERT(self != NULL, return -1;); 555 IRDA_ASSERT(self != NULL, return -1;);
554 IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); 556 IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
555 IRDA_ASSERT(skb != NULL, return -1;); 557 IRDA_ASSERT(skb != NULL, return -1;);
556 558
557 IRDA_DEBUG(4, "%s()\n", __func__); 559 IRDA_DEBUG(4, "%s()\n", __func__);
558 560
561 /* Take shortcut on zero byte packets */
562 if (skb->len == 0) {
563 ret = 0;
564 goto err;
565 }
566
559 /* Check that nothing bad happens */ 567 /* Check that nothing bad happens */
560 if ((skb->len == 0) || (!self->connected)) { 568 if (!self->connected) {
561 IRDA_DEBUG(1, "%s(), No data, or not connected\n", 569 IRDA_WARNING("%s(), Not connected\n", __func__);
562 __func__); 570 ret = -ENOTCONN;
563 goto err; 571 goto err;
564 } 572 }
565 573
566 if (skb->len > self->max_seg_size) { 574 if (skb->len > self->max_seg_size) {
567 IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n", 575 IRDA_ERROR("%s(), UData is too large for IrLAP!\n", __func__);
568 __func__); 576 ret = -EMSGSIZE;
569 goto err; 577 goto err;
570 } 578 }
571 579
@@ -576,7 +584,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
576 584
577err: 585err:
578 dev_kfree_skb(skb); 586 dev_kfree_skb(skb);
579 return -1; 587 return ret;
580} 588}
581EXPORT_SYMBOL(irttp_udata_request); 589EXPORT_SYMBOL(irttp_udata_request);
582 590
@@ -599,9 +607,15 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
599 IRDA_DEBUG(2, "%s() : queue len = %d\n", __func__, 607 IRDA_DEBUG(2, "%s() : queue len = %d\n", __func__,
600 skb_queue_len(&self->tx_queue)); 608 skb_queue_len(&self->tx_queue));
601 609
610 /* Take shortcut on zero byte packets */
611 if (skb->len == 0) {
612 ret = 0;
613 goto err;
614 }
615
602 /* Check that nothing bad happens */ 616 /* Check that nothing bad happens */
603 if ((skb->len == 0) || (!self->connected)) { 617 if (!self->connected) {
604 IRDA_WARNING("%s: No data, or not connected\n", __func__); 618 IRDA_WARNING("%s: Not connected\n", __func__);
605 ret = -ENOTCONN; 619 ret = -ENOTCONN;
606 goto err; 620 goto err;
607 } 621 }
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 499c045d6910..f7db676de77d 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1798,7 +1798,8 @@ static void iucv_work_fn(struct work_struct *work)
1798 * Handles external interrupts coming in from CP. 1798 * Handles external interrupts coming in from CP.
1799 * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn(). 1799 * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn().
1800 */ 1800 */
1801static void iucv_external_interrupt(u16 code) 1801static void iucv_external_interrupt(unsigned int ext_int_code,
1802 unsigned int param32, unsigned long param64)
1802{ 1803{
1803 struct iucv_irq_data *p; 1804 struct iucv_irq_data *p;
1804 struct iucv_irq_list *work; 1805 struct iucv_irq_list *work;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1712af1c7b3f..c64ce0a0bb03 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -111,6 +111,10 @@ struct l2tp_net {
111 spinlock_t l2tp_session_hlist_lock; 111 spinlock_t l2tp_session_hlist_lock;
112}; 112};
113 113
114static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
115static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
116static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
117
114static inline struct l2tp_net *l2tp_pernet(struct net *net) 118static inline struct l2tp_net *l2tp_pernet(struct net *net)
115{ 119{
116 BUG_ON(!net); 120 BUG_ON(!net);
@@ -118,6 +122,34 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net)
118 return net_generic(net, l2tp_net_id); 122 return net_generic(net, l2tp_net_id);
119} 123}
120 124
125
126/* Tunnel reference counts. Incremented per session that is added to
127 * the tunnel.
128 */
129static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
130{
131 atomic_inc(&tunnel->ref_count);
132}
133
134static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
135{
136 if (atomic_dec_and_test(&tunnel->ref_count))
137 l2tp_tunnel_free(tunnel);
138}
139#ifdef L2TP_REFCNT_DEBUG
140#define l2tp_tunnel_inc_refcount(_t) do { \
141 printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
142 l2tp_tunnel_inc_refcount_1(_t); \
143 } while (0)
144#define l2tp_tunnel_dec_refcount(_t) do { \
145 printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
146 l2tp_tunnel_dec_refcount_1(_t); \
147 } while (0)
148#else
149#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
150#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
151#endif
152
121/* Session hash global list for L2TPv3. 153/* Session hash global list for L2TPv3.
122 * The session_id SHOULD be random according to RFC3931, but several 154 * The session_id SHOULD be random according to RFC3931, but several
123 * L2TP implementations use incrementing session_ids. So we do a real 155 * L2TP implementations use incrementing session_ids. So we do a real
@@ -699,8 +731,8 @@ EXPORT_SYMBOL(l2tp_recv_common);
699 * Returns 1 if the packet was not a good data packet and could not be 731 * Returns 1 if the packet was not a good data packet and could not be
700 * forwarded. All such packets are passed up to userspace to deal with. 732 * forwarded. All such packets are passed up to userspace to deal with.
701 */ 733 */
702int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, 734static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
703 int (*payload_hook)(struct sk_buff *skb)) 735 int (*payload_hook)(struct sk_buff *skb))
704{ 736{
705 struct l2tp_session *session = NULL; 737 struct l2tp_session *session = NULL;
706 unsigned char *ptr, *optr; 738 unsigned char *ptr, *optr;
@@ -812,7 +844,6 @@ error:
812 844
813 return 1; 845 return 1;
814} 846}
815EXPORT_SYMBOL_GPL(l2tp_udp_recv_core);
816 847
817/* UDP encapsulation receive handler. See net/ipv4/udp.c. 848/* UDP encapsulation receive handler. See net/ipv4/udp.c.
818 * Return codes: 849 * Return codes:
@@ -922,7 +953,8 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
922 return bufp - optr; 953 return bufp - optr;
923} 954}
924 955
925int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len) 956static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
957 size_t data_len)
926{ 958{
927 struct l2tp_tunnel *tunnel = session->tunnel; 959 struct l2tp_tunnel *tunnel = session->tunnel;
928 unsigned int len = skb->len; 960 unsigned int len = skb->len;
@@ -970,7 +1002,6 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
970 1002
971 return 0; 1003 return 0;
972} 1004}
973EXPORT_SYMBOL_GPL(l2tp_xmit_core);
974 1005
975/* Automatically called when the skb is freed. 1006/* Automatically called when the skb is freed.
976 */ 1007 */
@@ -1089,7 +1120,7 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
1089 * The tunnel context is deleted only when all session sockets have been 1120 * The tunnel context is deleted only when all session sockets have been
1090 * closed. 1121 * closed.
1091 */ 1122 */
1092void l2tp_tunnel_destruct(struct sock *sk) 1123static void l2tp_tunnel_destruct(struct sock *sk)
1093{ 1124{
1094 struct l2tp_tunnel *tunnel; 1125 struct l2tp_tunnel *tunnel;
1095 1126
@@ -1128,11 +1159,10 @@ void l2tp_tunnel_destruct(struct sock *sk)
1128end: 1159end:
1129 return; 1160 return;
1130} 1161}
1131EXPORT_SYMBOL(l2tp_tunnel_destruct);
1132 1162
1133/* When the tunnel is closed, all the attached sessions need to go too. 1163/* When the tunnel is closed, all the attached sessions need to go too.
1134 */ 1164 */
1135void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) 1165static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
1136{ 1166{
1137 int hash; 1167 int hash;
1138 struct hlist_node *walk; 1168 struct hlist_node *walk;
@@ -1193,12 +1223,11 @@ again:
1193 } 1223 }
1194 write_unlock_bh(&tunnel->hlist_lock); 1224 write_unlock_bh(&tunnel->hlist_lock);
1195} 1225}
1196EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
1197 1226
1198/* Really kill the tunnel. 1227/* Really kill the tunnel.
1199 * Come here only when all sessions have been cleared from the tunnel. 1228 * Come here only when all sessions have been cleared from the tunnel.
1200 */ 1229 */
1201void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) 1230static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
1202{ 1231{
1203 struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); 1232 struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
1204 1233
@@ -1217,7 +1246,6 @@ void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
1217 atomic_dec(&l2tp_tunnel_count); 1246 atomic_dec(&l2tp_tunnel_count);
1218 kfree(tunnel); 1247 kfree(tunnel);
1219} 1248}
1220EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
1221 1249
1222/* Create a socket for the tunnel, if one isn't set up by 1250/* Create a socket for the tunnel, if one isn't set up by
1223 * userspace. This is used for static tunnels where there is no 1251 * userspace. This is used for static tunnels where there is no
@@ -1512,7 +1540,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete);
1512/* We come here whenever a session's send_seq, cookie_len or 1540/* We come here whenever a session's send_seq, cookie_len or
1513 * l2specific_len parameters are set. 1541 * l2specific_len parameters are set.
1514 */ 1542 */
1515void l2tp_session_set_header_len(struct l2tp_session *session, int version) 1543static void l2tp_session_set_header_len(struct l2tp_session *session, int version)
1516{ 1544{
1517 if (version == L2TP_HDR_VER_2) { 1545 if (version == L2TP_HDR_VER_2) {
1518 session->hdr_len = 6; 1546 session->hdr_len = 6;
@@ -1525,7 +1553,6 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
1525 } 1553 }
1526 1554
1527} 1555}
1528EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
1529 1556
1530struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) 1557struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
1531{ 1558{
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index f0f318edd3f1..a16a48e79fab 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -231,48 +231,15 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i
231extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); 231extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
232extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); 232extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
233extern int l2tp_session_delete(struct l2tp_session *session); 233extern int l2tp_session_delete(struct l2tp_session *session);
234extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
235extern void l2tp_session_free(struct l2tp_session *session); 234extern void l2tp_session_free(struct l2tp_session *session);
236extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); 235extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
237extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
238extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); 236extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
239 237
240extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
241extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); 238extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
242extern void l2tp_tunnel_destruct(struct sock *sk);
243extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
244extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
245 239
246extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); 240extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
247extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); 241extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
248 242
249/* Tunnel reference counts. Incremented per session that is added to
250 * the tunnel.
251 */
252static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
253{
254 atomic_inc(&tunnel->ref_count);
255}
256
257static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
258{
259 if (atomic_dec_and_test(&tunnel->ref_count))
260 l2tp_tunnel_free(tunnel);
261}
262#ifdef L2TP_REFCNT_DEBUG
263#define l2tp_tunnel_inc_refcount(_t) do { \
264 printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
265 l2tp_tunnel_inc_refcount_1(_t); \
266 } while (0)
267#define l2tp_tunnel_dec_refcount(_t) do { \
268 printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
269 l2tp_tunnel_dec_refcount_1(_t); \
270 } while (0)
271#else
272#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
273#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
274#endif
275
276/* Session reference counts. Incremented when code obtains a reference 243/* Session reference counts. Incremented when code obtains a reference
277 * to a session. 244 * to a session.
278 */ 245 */
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 104ec3b283d4..b8dbae82fab8 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -249,7 +249,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
249 struct seq_file *seq; 249 struct seq_file *seq;
250 int rc = -ENOMEM; 250 int rc = -ENOMEM;
251 251
252 pd = kzalloc(GFP_KERNEL, sizeof(*pd)); 252 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
253 if (pd == NULL) 253 if (pd == NULL)
254 goto out; 254 goto out;
255 255
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 1c770c0644d1..522e219f3558 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -576,7 +576,7 @@ out:
576 return copied; 576 return copied;
577} 577}
578 578
579struct proto l2tp_ip_prot = { 579static struct proto l2tp_ip_prot = {
580 .name = "L2TP/IP", 580 .name = "L2TP/IP",
581 .owner = THIS_MODULE, 581 .owner = THIS_MODULE,
582 .init = l2tp_ip_open, 582 .init = l2tp_ip_open,
@@ -674,4 +674,8 @@ MODULE_LICENSE("GPL");
674MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); 674MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
675MODULE_DESCRIPTION("L2TP over IP"); 675MODULE_DESCRIPTION("L2TP over IP");
676MODULE_VERSION("1.0"); 676MODULE_VERSION("1.0");
677MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP); 677
678/* Use the value of SOCK_DGRAM (2) directory, because __stringify does't like
679 * enums
680 */
681MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 582612998211..e35dbe55f520 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -317,8 +317,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
317 goto out; 317 goto out;
318 rc = -ENODEV; 318 rc = -ENODEV;
319 rtnl_lock(); 319 rtnl_lock();
320 rcu_read_lock();
320 if (sk->sk_bound_dev_if) { 321 if (sk->sk_bound_dev_if) {
321 llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); 322 llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
322 if (llc->dev) { 323 if (llc->dev) {
323 if (!addr->sllc_arphrd) 324 if (!addr->sllc_arphrd)
324 addr->sllc_arphrd = llc->dev->type; 325 addr->sllc_arphrd = llc->dev->type;
@@ -329,13 +330,13 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
329 !llc_mac_match(addr->sllc_mac, 330 !llc_mac_match(addr->sllc_mac,
330 llc->dev->dev_addr)) { 331 llc->dev->dev_addr)) {
331 rc = -EINVAL; 332 rc = -EINVAL;
332 dev_put(llc->dev);
333 llc->dev = NULL; 333 llc->dev = NULL;
334 } 334 }
335 } 335 }
336 } else 336 } else
337 llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, 337 llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd,
338 addr->sllc_mac); 338 addr->sllc_mac);
339 rcu_read_unlock();
339 rtnl_unlock(); 340 rtnl_unlock();
340 if (!llc->dev) 341 if (!llc->dev)
341 goto out; 342 goto out;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 43288259f4a1..1534f2b44caf 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -525,6 +525,7 @@ config NETFILTER_XT_TARGET_TPROXY
525 depends on NETFILTER_XTABLES 525 depends on NETFILTER_XTABLES
526 depends on NETFILTER_ADVANCED 526 depends on NETFILTER_ADVANCED
527 select NF_DEFRAG_IPV4 527 select NF_DEFRAG_IPV4
528 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
528 help 529 help
529 This option adds a `TPROXY' target, which is somewhat similar to 530 This option adds a `TPROXY' target, which is somewhat similar to
530 REDIRECT. It can only be used in the mangle table and is useful 531 REDIRECT. It can only be used in the mangle table and is useful
@@ -927,6 +928,7 @@ config NETFILTER_XT_MATCH_SOCKET
927 depends on NETFILTER_ADVANCED 928 depends on NETFILTER_ADVANCED
928 depends on !NF_CONNTRACK || NF_CONNTRACK 929 depends on !NF_CONNTRACK || NF_CONNTRACK
929 select NF_DEFRAG_IPV4 930 select NF_DEFRAG_IPV4
931 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
930 help 932 help
931 This option adds a `socket' match, which can be used to match 933 This option adds a `socket' match, which can be used to match
932 packets for which a TCP or UDP socket lookup finds a valid socket. 934 packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index a22dac227055..70bd1d0774c6 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -4,6 +4,7 @@
4menuconfig IP_VS 4menuconfig IP_VS
5 tristate "IP virtual server support" 5 tristate "IP virtual server support"
6 depends on NET && INET && NETFILTER 6 depends on NET && INET && NETFILTER
7 depends on (NF_CONNTRACK || NF_CONNTRACK=n)
7 ---help--- 8 ---help---
8 IP Virtual Server support will let you build a high-performance 9 IP Virtual Server support will let you build a high-performance
9 virtual server based on cluster of two or more real servers. This 10 virtual server based on cluster of two or more real servers. This
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1eacf8d9966a..27a5ea6b6a0f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1312,7 +1312,8 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
1312 if (!hash) { 1312 if (!hash) {
1313 *vmalloced = 1; 1313 *vmalloced = 1;
1314 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1314 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1315 hash = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 1315 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1316 PAGE_KERNEL);
1316 } 1317 }
1317 1318
1318 if (hash && nulls) 1319 if (hash && nulls)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index ed6d92958023..dc7bb74110df 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -292,6 +292,12 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
292 292
293 for (i = 0; i < MAX_NF_CT_PROTO; i++) 293 for (i = 0; i < MAX_NF_CT_PROTO; i++)
294 proto_array[i] = &nf_conntrack_l4proto_generic; 294 proto_array[i] = &nf_conntrack_l4proto_generic;
295
296 /* Before making proto_array visible to lockless readers,
297 * we must make sure its content is committed to memory.
298 */
299 smp_wmb();
300
295 nf_ct_protos[l4proto->l3proto] = proto_array; 301 nf_ct_protos[l4proto->l3proto] = proto_array;
296 } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != 302 } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
297 &nf_conntrack_l4proto_generic) { 303 &nf_conntrack_l4proto_generic) {
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 19c482caf30b..640678f47a2a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -21,7 +21,9 @@
21#include <linux/netfilter_ipv4/ip_tables.h> 21#include <linux/netfilter_ipv4/ip_tables.h>
22 22
23#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 23#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
24#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 24
25#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
26#define XT_TPROXY_HAVE_IPV6 1
25#include <net/if_inet6.h> 27#include <net/if_inet6.h>
26#include <net/addrconf.h> 28#include <net/addrconf.h>
27#include <linux/netfilter_ipv6/ip6_tables.h> 29#include <linux/netfilter_ipv6/ip6_tables.h>
@@ -172,7 +174,7 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
172 return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); 174 return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
173} 175}
174 176
175#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 177#ifdef XT_TPROXY_HAVE_IPV6
176 178
177static inline const struct in6_addr * 179static inline const struct in6_addr *
178tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, 180tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
@@ -372,7 +374,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
372 .hooks = 1 << NF_INET_PRE_ROUTING, 374 .hooks = 1 << NF_INET_PRE_ROUTING,
373 .me = THIS_MODULE, 375 .me = THIS_MODULE,
374 }, 376 },
375#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 377#ifdef XT_TPROXY_HAVE_IPV6
376 { 378 {
377 .name = "TPROXY", 379 .name = "TPROXY",
378 .family = NFPROTO_IPV6, 380 .family = NFPROTO_IPV6,
@@ -391,7 +393,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
391static int __init tproxy_tg_init(void) 393static int __init tproxy_tg_init(void)
392{ 394{
393 nf_defrag_ipv4_enable(); 395 nf_defrag_ipv4_enable();
394#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 396#ifdef XT_TPROXY_HAVE_IPV6
395 nf_defrag_ipv6_enable(); 397 nf_defrag_ipv6_enable();
396#endif 398#endif
397 399
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2dbd4c857735..00d6ae838303 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -14,7 +14,6 @@
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/netfilter/x_tables.h> 15#include <linux/netfilter/x_tables.h>
16#include <linux/netfilter_ipv4/ip_tables.h> 16#include <linux/netfilter_ipv4/ip_tables.h>
17#include <linux/netfilter_ipv6/ip6_tables.h>
18#include <net/tcp.h> 17#include <net/tcp.h>
19#include <net/udp.h> 18#include <net/udp.h>
20#include <net/icmp.h> 19#include <net/icmp.h>
@@ -22,7 +21,12 @@
22#include <net/inet_sock.h> 21#include <net/inet_sock.h>
23#include <net/netfilter/nf_tproxy_core.h> 22#include <net/netfilter/nf_tproxy_core.h>
24#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 23#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
24
25#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
26#define XT_SOCKET_HAVE_IPV6 1
27#include <linux/netfilter_ipv6/ip6_tables.h>
25#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 28#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
29#endif
26 30
27#include <linux/netfilter/xt_socket.h> 31#include <linux/netfilter/xt_socket.h>
28 32
@@ -186,12 +190,12 @@ socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par)
186 return socket_match(skb, par, par->matchinfo); 190 return socket_match(skb, par, par->matchinfo);
187} 191}
188 192
189#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 193#ifdef XT_SOCKET_HAVE_IPV6
190 194
191static int 195static int
192extract_icmp6_fields(const struct sk_buff *skb, 196extract_icmp6_fields(const struct sk_buff *skb,
193 unsigned int outside_hdrlen, 197 unsigned int outside_hdrlen,
194 u8 *protocol, 198 int *protocol,
195 struct in6_addr **raddr, 199 struct in6_addr **raddr,
196 struct in6_addr **laddr, 200 struct in6_addr **laddr,
197 __be16 *rport, 201 __be16 *rport,
@@ -248,8 +252,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
248 struct sock *sk; 252 struct sock *sk;
249 struct in6_addr *daddr, *saddr; 253 struct in6_addr *daddr, *saddr;
250 __be16 dport, sport; 254 __be16 dport, sport;
251 int thoff; 255 int thoff, tproto;
252 u8 tproto;
253 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; 256 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
254 257
255 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL); 258 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
@@ -301,7 +304,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
301 sk = NULL; 304 sk = NULL;
302 } 305 }
303 306
304 pr_debug("proto %hhu %pI6:%hu -> %pI6:%hu " 307 pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
305 "(orig %pI6:%hu) sock %p\n", 308 "(orig %pI6:%hu) sock %p\n",
306 tproto, saddr, ntohs(sport), 309 tproto, saddr, ntohs(sport),
307 daddr, ntohs(dport), 310 daddr, ntohs(dport),
@@ -331,7 +334,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
331 (1 << NF_INET_LOCAL_IN), 334 (1 << NF_INET_LOCAL_IN),
332 .me = THIS_MODULE, 335 .me = THIS_MODULE,
333 }, 336 },
334#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 337#ifdef XT_SOCKET_HAVE_IPV6
335 { 338 {
336 .name = "socket", 339 .name = "socket",
337 .revision = 1, 340 .revision = 1,
@@ -348,7 +351,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
348static int __init socket_mt_init(void) 351static int __init socket_mt_init(void)
349{ 352{
350 nf_defrag_ipv4_enable(); 353 nf_defrag_ipv4_enable();
351#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 354#ifdef XT_SOCKET_HAVE_IPV6
352 nf_defrag_ipv6_enable(); 355 nf_defrag_ipv6_enable();
353#endif 356#endif
354 357
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index cd96ed3ccee4..478181d53c55 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -83,9 +83,9 @@ struct netlink_sock {
83 struct module *module; 83 struct module *module;
84}; 84};
85 85
86struct listeners_rcu_head { 86struct listeners {
87 struct rcu_head rcu_head; 87 struct rcu_head rcu;
88 void *ptr; 88 unsigned long masks[0];
89}; 89};
90 90
91#define NETLINK_KERNEL_SOCKET 0x1 91#define NETLINK_KERNEL_SOCKET 0x1
@@ -119,7 +119,7 @@ struct nl_pid_hash {
119struct netlink_table { 119struct netlink_table {
120 struct nl_pid_hash hash; 120 struct nl_pid_hash hash;
121 struct hlist_head mc_list; 121 struct hlist_head mc_list;
122 unsigned long *listeners; 122 struct listeners __rcu *listeners;
123 unsigned int nl_nonroot; 123 unsigned int nl_nonroot;
124 unsigned int groups; 124 unsigned int groups;
125 struct mutex *cb_mutex; 125 struct mutex *cb_mutex;
@@ -338,7 +338,7 @@ netlink_update_listeners(struct sock *sk)
338 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 338 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
339 mask |= nlk_sk(sk)->groups[i]; 339 mask |= nlk_sk(sk)->groups[i];
340 } 340 }
341 tbl->listeners[i] = mask; 341 tbl->listeners->masks[i] = mask;
342 } 342 }
343 /* this function is only called with the netlink table "grabbed", which 343 /* this function is only called with the netlink table "grabbed", which
344 * makes sure updates are visible before bind or setsockopt return. */ 344 * makes sure updates are visible before bind or setsockopt return. */
@@ -936,7 +936,7 @@ EXPORT_SYMBOL(netlink_unicast);
936int netlink_has_listeners(struct sock *sk, unsigned int group) 936int netlink_has_listeners(struct sock *sk, unsigned int group)
937{ 937{
938 int res = 0; 938 int res = 0;
939 unsigned long *listeners; 939 struct listeners *listeners;
940 940
941 BUG_ON(!netlink_is_kernel(sk)); 941 BUG_ON(!netlink_is_kernel(sk));
942 942
@@ -944,7 +944,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
944 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 944 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
945 945
946 if (group - 1 < nl_table[sk->sk_protocol].groups) 946 if (group - 1 < nl_table[sk->sk_protocol].groups)
947 res = test_bit(group - 1, listeners); 947 res = test_bit(group - 1, listeners->masks);
948 948
949 rcu_read_unlock(); 949 rcu_read_unlock();
950 950
@@ -1498,7 +1498,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1498 struct socket *sock; 1498 struct socket *sock;
1499 struct sock *sk; 1499 struct sock *sk;
1500 struct netlink_sock *nlk; 1500 struct netlink_sock *nlk;
1501 unsigned long *listeners = NULL; 1501 struct listeners *listeners = NULL;
1502 1502
1503 BUG_ON(!nl_table); 1503 BUG_ON(!nl_table);
1504 1504
@@ -1523,8 +1523,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1523 if (groups < 32) 1523 if (groups < 32)
1524 groups = 32; 1524 groups = 32;
1525 1525
1526 listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head), 1526 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1527 GFP_KERNEL);
1528 if (!listeners) 1527 if (!listeners)
1529 goto out_sock_release; 1528 goto out_sock_release;
1530 1529
@@ -1541,7 +1540,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1541 netlink_table_grab(); 1540 netlink_table_grab();
1542 if (!nl_table[unit].registered) { 1541 if (!nl_table[unit].registered) {
1543 nl_table[unit].groups = groups; 1542 nl_table[unit].groups = groups;
1544 nl_table[unit].listeners = listeners; 1543 rcu_assign_pointer(nl_table[unit].listeners, listeners);
1545 nl_table[unit].cb_mutex = cb_mutex; 1544 nl_table[unit].cb_mutex = cb_mutex;
1546 nl_table[unit].module = module; 1545 nl_table[unit].module = module;
1547 nl_table[unit].registered = 1; 1546 nl_table[unit].registered = 1;
@@ -1572,43 +1571,28 @@ netlink_kernel_release(struct sock *sk)
1572EXPORT_SYMBOL(netlink_kernel_release); 1571EXPORT_SYMBOL(netlink_kernel_release);
1573 1572
1574 1573
1575static void netlink_free_old_listeners(struct rcu_head *rcu_head) 1574static void listeners_free_rcu(struct rcu_head *head)
1576{ 1575{
1577 struct listeners_rcu_head *lrh; 1576 kfree(container_of(head, struct listeners, rcu));
1578
1579 lrh = container_of(rcu_head, struct listeners_rcu_head, rcu_head);
1580 kfree(lrh->ptr);
1581} 1577}
1582 1578
1583int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 1579int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1584{ 1580{
1585 unsigned long *listeners, *old = NULL; 1581 struct listeners *new, *old;
1586 struct listeners_rcu_head *old_rcu_head;
1587 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 1582 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1588 1583
1589 if (groups < 32) 1584 if (groups < 32)
1590 groups = 32; 1585 groups = 32;
1591 1586
1592 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 1587 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1593 listeners = kzalloc(NLGRPSZ(groups) + 1588 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1594 sizeof(struct listeners_rcu_head), 1589 if (!new)
1595 GFP_ATOMIC);
1596 if (!listeners)
1597 return -ENOMEM; 1590 return -ENOMEM;
1598 old = tbl->listeners; 1591 old = rcu_dereference_raw(tbl->listeners);
1599 memcpy(listeners, old, NLGRPSZ(tbl->groups)); 1592 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1600 rcu_assign_pointer(tbl->listeners, listeners); 1593 rcu_assign_pointer(tbl->listeners, new);
1601 /* 1594
1602 * Free the old memory after an RCU grace period so we 1595 call_rcu(&old->rcu, listeners_free_rcu);
1603 * don't leak it. We use call_rcu() here in order to be
1604 * able to call this function from atomic contexts. The
1605 * allocation of this memory will have reserved enough
1606 * space for struct listeners_rcu_head at the end.
1607 */
1608 old_rcu_head = (void *)(tbl->listeners +
1609 NLGRPLONGS(tbl->groups));
1610 old_rcu_head->ptr = old;
1611 call_rcu(&old_rcu_head->rcu_head, netlink_free_old_listeners);
1612 } 1596 }
1613 tbl->groups = groups; 1597 tbl->groups = groups;
1614 1598
@@ -2104,18 +2088,17 @@ static void __net_exit netlink_net_exit(struct net *net)
2104 2088
2105static void __init netlink_add_usersock_entry(void) 2089static void __init netlink_add_usersock_entry(void)
2106{ 2090{
2107 unsigned long *listeners; 2091 struct listeners *listeners;
2108 int groups = 32; 2092 int groups = 32;
2109 2093
2110 listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head), 2094 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2111 GFP_KERNEL);
2112 if (!listeners) 2095 if (!listeners)
2113 panic("netlink_add_usersock_entry: Cannot allocate listneres\n"); 2096 panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2114 2097
2115 netlink_table_grab(); 2098 netlink_table_grab();
2116 2099
2117 nl_table[NETLINK_USERSOCK].groups = groups; 2100 nl_table[NETLINK_USERSOCK].groups = groups;
2118 nl_table[NETLINK_USERSOCK].listeners = listeners; 2101 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2119 nl_table[NETLINK_USERSOCK].module = THIS_MODULE; 2102 nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2120 nl_table[NETLINK_USERSOCK].registered = 1; 2103 nl_table[NETLINK_USERSOCK].registered = 1;
2121 2104
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3616f27b9d46..8298e676f5a0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1610,9 +1610,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1610 1610
1611 err = -EINVAL; 1611 err = -EINVAL;
1612 vnet_hdr_len = sizeof(vnet_hdr); 1612 vnet_hdr_len = sizeof(vnet_hdr);
1613 if ((len -= vnet_hdr_len) < 0) 1613 if (len < vnet_hdr_len)
1614 goto out_free; 1614 goto out_free;
1615 1615
1616 len -= vnet_hdr_len;
1617
1616 if (skb_is_gso(skb)) { 1618 if (skb_is_gso(skb)) {
1617 struct skb_shared_info *sinfo = skb_shinfo(skb); 1619 struct skb_shared_info *sinfo = skb_shinfo(skb);
1618 1620
@@ -1719,7 +1721,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1719 rcu_read_lock(); 1721 rcu_read_lock();
1720 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); 1722 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1721 if (dev) 1723 if (dev)
1722 strlcpy(uaddr->sa_data, dev->name, 15); 1724 strncpy(uaddr->sa_data, dev->name, 14);
1723 else 1725 else
1724 memset(uaddr->sa_data, 0, 14); 1726 memset(uaddr->sa_data, 0, 14);
1725 rcu_read_unlock(); 1727 rcu_read_unlock();
@@ -1742,6 +1744,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1742 sll->sll_family = AF_PACKET; 1744 sll->sll_family = AF_PACKET;
1743 sll->sll_ifindex = po->ifindex; 1745 sll->sll_ifindex = po->ifindex;
1744 sll->sll_protocol = po->num; 1746 sll->sll_protocol = po->num;
1747 sll->sll_pkttype = 0;
1745 rcu_read_lock(); 1748 rcu_read_lock();
1746 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); 1749 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1747 if (dev) { 1750 if (dev) {
diff --git a/net/rds/loop.c b/net/rds/loop.c
index c390156b426f..aeec1d483b17 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -134,8 +134,12 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
134static void rds_loop_conn_free(void *arg) 134static void rds_loop_conn_free(void *arg)
135{ 135{
136 struct rds_loop_connection *lc = arg; 136 struct rds_loop_connection *lc = arg;
137 unsigned long flags;
138
137 rdsdebug("lc %p\n", lc); 139 rdsdebug("lc %p\n", lc);
140 spin_lock_irqsave(&loop_conns_lock, flags);
138 list_del(&lc->loop_node); 141 list_del(&lc->loop_node);
142 spin_unlock_irqrestore(&loop_conns_lock, flags);
139 kfree(lc); 143 kfree(lc);
140} 144}
141 145
diff --git a/net/rds/message.c b/net/rds/message.c
index a84545dae370..1fd3d29023d7 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -224,6 +224,9 @@ struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
224 WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs); 224 WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs);
225 WARN_ON(!nents); 225 WARN_ON(!nents);
226 226
227 if (rm->m_used_sgs + nents > rm->m_total_sgs)
228 return NULL;
229
227 sg_ret = &sg_first[rm->m_used_sgs]; 230 sg_ret = &sg_first[rm->m_used_sgs];
228 sg_init_table(sg_ret, nents); 231 sg_init_table(sg_ret, nents);
229 rm->m_used_sgs += nents; 232 rm->m_used_sgs += nents;
@@ -246,6 +249,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
246 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 249 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
247 rm->data.op_nents = ceil(total_len, PAGE_SIZE); 250 rm->data.op_nents = ceil(total_len, PAGE_SIZE);
248 rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); 251 rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
252 if (!rm->data.op_sg) {
253 rds_message_put(rm);
254 return ERR_PTR(-ENOMEM);
255 }
249 256
250 for (i = 0; i < rm->data.op_nents; ++i) { 257 for (i = 0; i < rm->data.op_nents; ++i) {
251 sg_set_page(&rm->data.op_sg[i], 258 sg_set_page(&rm->data.op_sg[i],
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 1a41debca1ce..4e37c1cbe8b2 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -479,13 +479,38 @@ void rds_atomic_free_op(struct rm_atomic_op *ao)
479 479
480 480
481/* 481/*
482 * Count the number of pages needed to describe an incoming iovec. 482 * Count the number of pages needed to describe an incoming iovec array.
483 */ 483 */
484static int rds_rdma_pages(struct rds_rdma_args *args) 484static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs)
485{
486 int tot_pages = 0;
487 unsigned int nr_pages;
488 unsigned int i;
489
490 /* figure out the number of pages in the vector */
491 for (i = 0; i < nr_iovecs; i++) {
492 nr_pages = rds_pages_in_vec(&iov[i]);
493 if (nr_pages == 0)
494 return -EINVAL;
495
496 tot_pages += nr_pages;
497
498 /*
499 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
500 * so tot_pages cannot overflow without first going negative.
501 */
502 if (tot_pages < 0)
503 return -EINVAL;
504 }
505
506 return tot_pages;
507}
508
509int rds_rdma_extra_size(struct rds_rdma_args *args)
485{ 510{
486 struct rds_iovec vec; 511 struct rds_iovec vec;
487 struct rds_iovec __user *local_vec; 512 struct rds_iovec __user *local_vec;
488 unsigned int tot_pages = 0; 513 int tot_pages = 0;
489 unsigned int nr_pages; 514 unsigned int nr_pages;
490 unsigned int i; 515 unsigned int i;
491 516
@@ -502,14 +527,16 @@ static int rds_rdma_pages(struct rds_rdma_args *args)
502 return -EINVAL; 527 return -EINVAL;
503 528
504 tot_pages += nr_pages; 529 tot_pages += nr_pages;
505 }
506 530
507 return tot_pages; 531 /*
508} 532 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
533 * so tot_pages cannot overflow without first going negative.
534 */
535 if (tot_pages < 0)
536 return -EINVAL;
537 }
509 538
510int rds_rdma_extra_size(struct rds_rdma_args *args) 539 return tot_pages * sizeof(struct scatterlist);
511{
512 return rds_rdma_pages(args) * sizeof(struct scatterlist);
513} 540}
514 541
515/* 542/*
@@ -520,13 +547,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
520 struct cmsghdr *cmsg) 547 struct cmsghdr *cmsg)
521{ 548{
522 struct rds_rdma_args *args; 549 struct rds_rdma_args *args;
523 struct rds_iovec vec;
524 struct rm_rdma_op *op = &rm->rdma; 550 struct rm_rdma_op *op = &rm->rdma;
525 int nr_pages; 551 int nr_pages;
526 unsigned int nr_bytes; 552 unsigned int nr_bytes;
527 struct page **pages = NULL; 553 struct page **pages = NULL;
528 struct rds_iovec __user *local_vec; 554 struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack;
529 unsigned int nr; 555 int iov_size;
530 unsigned int i, j; 556 unsigned int i, j;
531 int ret = 0; 557 int ret = 0;
532 558
@@ -541,14 +567,31 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
541 goto out; 567 goto out;
542 } 568 }
543 569
544 if (args->nr_local > (u64)UINT_MAX) { 570 if (args->nr_local > UIO_MAXIOV) {
545 ret = -EMSGSIZE; 571 ret = -EMSGSIZE;
546 goto out; 572 goto out;
547 } 573 }
548 574
549 nr_pages = rds_rdma_pages(args); 575 /* Check whether to allocate the iovec area */
550 if (nr_pages < 0) 576 iov_size = args->nr_local * sizeof(struct rds_iovec);
577 if (args->nr_local > UIO_FASTIOV) {
578 iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL);
579 if (!iovs) {
580 ret = -ENOMEM;
581 goto out;
582 }
583 }
584
585 if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) {
586 ret = -EFAULT;
587 goto out;
588 }
589
590 nr_pages = rds_rdma_pages(iovs, args->nr_local);
591 if (nr_pages < 0) {
592 ret = -EINVAL;
551 goto out; 593 goto out;
594 }
552 595
553 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 596 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
554 if (!pages) { 597 if (!pages) {
@@ -564,6 +607,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
564 op->op_recverr = rs->rs_recverr; 607 op->op_recverr = rs->rs_recverr;
565 WARN_ON(!nr_pages); 608 WARN_ON(!nr_pages);
566 op->op_sg = rds_message_alloc_sgs(rm, nr_pages); 609 op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
610 if (!op->op_sg) {
611 ret = -ENOMEM;
612 goto out;
613 }
567 614
568 if (op->op_notify || op->op_recverr) { 615 if (op->op_notify || op->op_recverr) {
569 /* We allocate an uninitialized notifier here, because 616 /* We allocate an uninitialized notifier here, because
@@ -597,50 +644,40 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
597 (unsigned long long)args->remote_vec.addr, 644 (unsigned long long)args->remote_vec.addr,
598 op->op_rkey); 645 op->op_rkey);
599 646
600 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
601
602 for (i = 0; i < args->nr_local; i++) { 647 for (i = 0; i < args->nr_local; i++) {
603 if (copy_from_user(&vec, &local_vec[i], 648 struct rds_iovec *iov = &iovs[i];
604 sizeof(struct rds_iovec))) { 649 /* don't need to check, rds_rdma_pages() verified nr will be +nonzero */
605 ret = -EFAULT; 650 unsigned int nr = rds_pages_in_vec(iov);
606 goto out;
607 }
608
609 nr = rds_pages_in_vec(&vec);
610 if (nr == 0) {
611 ret = -EINVAL;
612 goto out;
613 }
614 651
615 rs->rs_user_addr = vec.addr; 652 rs->rs_user_addr = iov->addr;
616 rs->rs_user_bytes = vec.bytes; 653 rs->rs_user_bytes = iov->bytes;
617 654
618 /* If it's a WRITE operation, we want to pin the pages for reading. 655 /* If it's a WRITE operation, we want to pin the pages for reading.
619 * If it's a READ operation, we need to pin the pages for writing. 656 * If it's a READ operation, we need to pin the pages for writing.
620 */ 657 */
621 ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write); 658 ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
622 if (ret < 0) 659 if (ret < 0)
623 goto out; 660 goto out;
624 661
625 rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n", 662 rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n",
626 nr_bytes, nr, vec.bytes, vec.addr); 663 nr_bytes, nr, iov->bytes, iov->addr);
627 664
628 nr_bytes += vec.bytes; 665 nr_bytes += iov->bytes;
629 666
630 for (j = 0; j < nr; j++) { 667 for (j = 0; j < nr; j++) {
631 unsigned int offset = vec.addr & ~PAGE_MASK; 668 unsigned int offset = iov->addr & ~PAGE_MASK;
632 struct scatterlist *sg; 669 struct scatterlist *sg;
633 670
634 sg = &op->op_sg[op->op_nents + j]; 671 sg = &op->op_sg[op->op_nents + j];
635 sg_set_page(sg, pages[j], 672 sg_set_page(sg, pages[j],
636 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), 673 min_t(unsigned int, iov->bytes, PAGE_SIZE - offset),
637 offset); 674 offset);
638 675
639 rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n", 676 rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n",
640 sg->offset, sg->length, vec.addr, vec.bytes); 677 sg->offset, sg->length, iov->addr, iov->bytes);
641 678
642 vec.addr += sg->length; 679 iov->addr += sg->length;
643 vec.bytes -= sg->length; 680 iov->bytes -= sg->length;
644 } 681 }
645 682
646 op->op_nents += nr; 683 op->op_nents += nr;
@@ -655,13 +692,14 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
655 } 692 }
656 op->op_bytes = nr_bytes; 693 op->op_bytes = nr_bytes;
657 694
658 ret = 0;
659out: 695out:
696 if (iovs != iovstack)
697 sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size);
660 kfree(pages); 698 kfree(pages);
661 if (ret) 699 if (ret)
662 rds_rdma_free_op(op); 700 rds_rdma_free_op(op);
663 701 else
664 rds_stats_inc(s_send_rdma); 702 rds_stats_inc(s_send_rdma);
665 703
666 return ret; 704 return ret;
667} 705}
@@ -773,6 +811,10 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
773 rm->atomic.op_active = 1; 811 rm->atomic.op_active = 1;
774 rm->atomic.op_recverr = rs->rs_recverr; 812 rm->atomic.op_recverr = rs->rs_recverr;
775 rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); 813 rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
814 if (!rm->atomic.op_sg) {
815 ret = -ENOMEM;
816 goto err;
817 }
776 818
777 /* verify 8 byte-aligned */ 819 /* verify 8 byte-aligned */
778 if (args->local_addr & 0x7) { 820 if (args->local_addr & 0x7) {
diff --git a/net/rds/send.c b/net/rds/send.c
index 0bc9db17a87d..35b9c2e9caf1 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -973,6 +973,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
973 /* Attach data to the rm */ 973 /* Attach data to the rm */
974 if (payload_len) { 974 if (payload_len) {
975 rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE)); 975 rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
976 if (!rm->data.op_sg) {
977 ret = -ENOMEM;
978 goto out;
979 }
976 ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len); 980 ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
977 if (ret) 981 if (ret)
978 goto out; 982 goto out;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 08a8c6cf2d10..8e0a32001c90 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -221,7 +221,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
221static void rds_tcp_conn_free(void *arg) 221static void rds_tcp_conn_free(void *arg)
222{ 222{
223 struct rds_tcp_connection *tc = arg; 223 struct rds_tcp_connection *tc = arg;
224 unsigned long flags;
224 rdsdebug("freeing tc %p\n", tc); 225 rdsdebug("freeing tc %p\n", tc);
226
227 spin_lock_irqsave(&rds_tcp_conn_lock, flags);
228 list_del(&tc->t_tcp_node);
229 spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
230
225 kmem_cache_free(rds_tcp_conn_slab, tc); 231 kmem_cache_free(rds_tcp_conn_slab, tc);
226} 232}
227 233
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index efd4f95fd050..f23d9155b1ef 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -268,6 +268,10 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
268 goto nla_put_failure; 268 goto nla_put_failure;
269 269
270 nla_nest_end(skb, nest); 270 nla_nest_end(skb, nest);
271
272 if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
273 goto nla_put_failure;
274
271 return skb->len; 275 return skb->len;
272 276
273nla_put_failure: 277nla_put_failure:
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 37dff78e9cb1..d49c40fb7e09 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -34,8 +34,6 @@ struct cgroup_subsys net_cls_subsys = {
34 .populate = cgrp_populate, 34 .populate = cgrp_populate,
35#ifdef CONFIG_NET_CLS_CGROUP 35#ifdef CONFIG_NET_CLS_CGROUP
36 .subsys_id = net_cls_subsys_id, 36 .subsys_id = net_cls_subsys_id,
37#else
38#define net_cls_subsys_id net_cls_subsys.subsys_id
39#endif 37#endif
40 .module = THIS_MODULE, 38 .module = THIS_MODULE,
41}; 39};
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 763253257411..ea8f566e720c 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -103,7 +103,8 @@ retry:
103 103
104static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m) 104static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
105{ 105{
106 textsearch_destroy(EM_TEXT_PRIV(m)->config); 106 if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
107 textsearch_destroy(EM_TEXT_PRIV(m)->config);
107} 108}
108 109
109static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) 110static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3cf478d012dd..7150705f1d0b 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -270,7 +270,6 @@ static unsigned int sfq_drop(struct Qdisc *sch)
270 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ 270 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
271 d = q->next[q->tail]; 271 d = q->next[q->tail];
272 q->next[q->tail] = q->next[d]; 272 q->next[q->tail] = q->next[d];
273 q->allot[q->next[d]] += q->quantum;
274 skb = q->qs[d].prev; 273 skb = q->qs[d].prev;
275 len = qdisc_pkt_len(skb); 274 len = qdisc_pkt_len(skb);
276 __skb_unlink(skb, &q->qs[d]); 275 __skb_unlink(skb, &q->qs[d]);
@@ -321,14 +320,13 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
321 sfq_inc(q, x); 320 sfq_inc(q, x);
322 if (q->qs[x].qlen == 1) { /* The flow is new */ 321 if (q->qs[x].qlen == 1) { /* The flow is new */
323 if (q->tail == SFQ_DEPTH) { /* It is the first flow */ 322 if (q->tail == SFQ_DEPTH) { /* It is the first flow */
324 q->tail = x;
325 q->next[x] = x; 323 q->next[x] = x;
326 q->allot[x] = q->quantum;
327 } else { 324 } else {
328 q->next[x] = q->next[q->tail]; 325 q->next[x] = q->next[q->tail];
329 q->next[q->tail] = x; 326 q->next[q->tail] = x;
330 q->tail = x;
331 } 327 }
328 q->tail = x;
329 q->allot[x] = q->quantum;
332 } 330 }
333 if (++sch->q.qlen <= q->limit) { 331 if (++sch->q.qlen <= q->limit) {
334 sch->bstats.bytes += qdisc_pkt_len(skb); 332 sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -359,13 +357,13 @@ sfq_dequeue(struct Qdisc *sch)
359{ 357{
360 struct sfq_sched_data *q = qdisc_priv(sch); 358 struct sfq_sched_data *q = qdisc_priv(sch);
361 struct sk_buff *skb; 359 struct sk_buff *skb;
362 sfq_index a, old_a; 360 sfq_index a, next_a;
363 361
364 /* No active slots */ 362 /* No active slots */
365 if (q->tail == SFQ_DEPTH) 363 if (q->tail == SFQ_DEPTH)
366 return NULL; 364 return NULL;
367 365
368 a = old_a = q->next[q->tail]; 366 a = q->next[q->tail];
369 367
370 /* Grab packet */ 368 /* Grab packet */
371 skb = __skb_dequeue(&q->qs[a]); 369 skb = __skb_dequeue(&q->qs[a]);
@@ -376,17 +374,15 @@ sfq_dequeue(struct Qdisc *sch)
376 /* Is the slot empty? */ 374 /* Is the slot empty? */
377 if (q->qs[a].qlen == 0) { 375 if (q->qs[a].qlen == 0) {
378 q->ht[q->hash[a]] = SFQ_DEPTH; 376 q->ht[q->hash[a]] = SFQ_DEPTH;
379 a = q->next[a]; 377 next_a = q->next[a];
380 if (a == old_a) { 378 if (a == next_a) {
381 q->tail = SFQ_DEPTH; 379 q->tail = SFQ_DEPTH;
382 return skb; 380 return skb;
383 } 381 }
384 q->next[q->tail] = a; 382 q->next[q->tail] = next_a;
385 q->allot[a] += q->quantum;
386 } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { 383 } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
387 q->tail = a;
388 a = q->next[a];
389 q->allot[a] += q->quantum; 384 q->allot[a] += q->quantum;
385 q->tail = a;
390 } 386 }
391 return skb; 387 return skb;
392} 388}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1ef29c74d85e..e58f9476f29c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -92,7 +92,7 @@ static struct sctp_af *sctp_af_v6_specific;
92struct kmem_cache *sctp_chunk_cachep __read_mostly; 92struct kmem_cache *sctp_chunk_cachep __read_mostly;
93struct kmem_cache *sctp_bucket_cachep __read_mostly; 93struct kmem_cache *sctp_bucket_cachep __read_mostly;
94 94
95int sysctl_sctp_mem[3]; 95long sysctl_sctp_mem[3];
96int sysctl_sctp_rmem[3]; 96int sysctl_sctp_rmem[3];
97int sysctl_sctp_wmem[3]; 97int sysctl_sctp_wmem[3];
98 98
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e34ca9cc1167..fff0926b1111 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -111,12 +111,12 @@ static void sctp_sock_migrate(struct sock *, struct sock *,
111static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; 111static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
112 112
113extern struct kmem_cache *sctp_bucket_cachep; 113extern struct kmem_cache *sctp_bucket_cachep;
114extern int sysctl_sctp_mem[3]; 114extern long sysctl_sctp_mem[3];
115extern int sysctl_sctp_rmem[3]; 115extern int sysctl_sctp_rmem[3];
116extern int sysctl_sctp_wmem[3]; 116extern int sysctl_sctp_wmem[3];
117 117
118static int sctp_memory_pressure; 118static int sctp_memory_pressure;
119static atomic_t sctp_memory_allocated; 119static atomic_long_t sctp_memory_allocated;
120struct percpu_counter sctp_sockets_allocated; 120struct percpu_counter sctp_sockets_allocated;
121 121
122static void sctp_enter_memory_pressure(struct sock *sk) 122static void sctp_enter_memory_pressure(struct sock *sk)
@@ -2932,6 +2932,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
2932 struct sctp_association *asoc = NULL; 2932 struct sctp_association *asoc = NULL;
2933 struct sctp_setpeerprim prim; 2933 struct sctp_setpeerprim prim;
2934 struct sctp_chunk *chunk; 2934 struct sctp_chunk *chunk;
2935 struct sctp_af *af;
2935 int err; 2936 int err;
2936 2937
2937 sp = sctp_sk(sk); 2938 sp = sctp_sk(sk);
@@ -2959,6 +2960,13 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
2959 if (!sctp_state(asoc, ESTABLISHED)) 2960 if (!sctp_state(asoc, ESTABLISHED))
2960 return -ENOTCONN; 2961 return -ENOTCONN;
2961 2962
2963 af = sctp_get_af_specific(prim.sspp_addr.ss_family);
2964 if (!af)
2965 return -EINVAL;
2966
2967 if (!af->addr_valid((union sctp_addr *)&prim.sspp_addr, sp, NULL))
2968 return -EADDRNOTAVAIL;
2969
2962 if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim.sspp_addr)) 2970 if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim.sspp_addr))
2963 return -EADDRNOTAVAIL; 2971 return -EADDRNOTAVAIL;
2964 2972
@@ -5045,7 +5053,7 @@ static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len,
5045 if (copy_to_user(optval, &val, len)) 5053 if (copy_to_user(optval, &val, len))
5046 return -EFAULT; 5054 return -EFAULT;
5047 5055
5048 return -ENOTSUPP; 5056 return 0;
5049} 5057}
5050 5058
5051/* 5059/*
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 832590bbe0c0..50cb57f0919e 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -54,7 +54,7 @@ static int sack_timer_max = 500;
54static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ 54static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
55static int rwnd_scale_max = 16; 55static int rwnd_scale_max = 16;
56 56
57extern int sysctl_sctp_mem[3]; 57extern long sysctl_sctp_mem[3];
58extern int sysctl_sctp_rmem[3]; 58extern int sysctl_sctp_rmem[3];
59extern int sysctl_sctp_wmem[3]; 59extern int sysctl_sctp_wmem[3];
60 60
@@ -203,7 +203,7 @@ static ctl_table sctp_table[] = {
203 .data = &sysctl_sctp_mem, 203 .data = &sysctl_sctp_mem,
204 .maxlen = sizeof(sysctl_sctp_mem), 204 .maxlen = sizeof(sysctl_sctp_mem),
205 .mode = 0644, 205 .mode = 0644,
206 .proc_handler = proc_dointvec, 206 .proc_handler = proc_doulongvec_minmax
207 }, 207 },
208 { 208 {
209 .procname = "sctp_rmem", 209 .procname = "sctp_rmem",
diff --git a/net/socket.c b/net/socket.c
index abf3e2561521..088fb3fd45e0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -305,19 +305,17 @@ static const struct super_operations sockfs_ops = {
305 .statfs = simple_statfs, 305 .statfs = simple_statfs,
306}; 306};
307 307
308static int sockfs_get_sb(struct file_system_type *fs_type, 308static struct dentry *sockfs_mount(struct file_system_type *fs_type,
309 int flags, const char *dev_name, void *data, 309 int flags, const char *dev_name, void *data)
310 struct vfsmount *mnt)
311{ 310{
312 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, 311 return mount_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC);
313 mnt);
314} 312}
315 313
316static struct vfsmount *sock_mnt __read_mostly; 314static struct vfsmount *sock_mnt __read_mostly;
317 315
318static struct file_system_type sock_fs_type = { 316static struct file_system_type sock_fs_type = {
319 .name = "sockfs", 317 .name = "sockfs",
320 .get_sb = sockfs_get_sb, 318 .mount = sockfs_mount,
321 .kill_sb = kill_anon_super, 319 .kill_sb = kill_anon_super,
322}; 320};
323 321
@@ -377,7 +375,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
377 &socket_file_ops); 375 &socket_file_ops);
378 if (unlikely(!file)) { 376 if (unlikely(!file)) {
379 /* drop dentry, keep inode */ 377 /* drop dentry, keep inode */
380 atomic_inc(&path.dentry->d_inode->i_count); 378 ihold(path.dentry->d_inode);
381 path_put(&path); 379 path_put(&path);
382 put_unused_fd(fd); 380 put_unused_fd(fd);
383 return -ENFILE; 381 return -ENFILE;
@@ -480,6 +478,7 @@ static struct socket *sock_alloc(void)
480 sock = SOCKET_I(inode); 478 sock = SOCKET_I(inode);
481 479
482 kmemcheck_annotate_bitfield(sock, type); 480 kmemcheck_annotate_bitfield(sock, type);
481 inode->i_ino = get_next_ino();
483 inode->i_mode = S_IFSOCK | S_IRWXUGO; 482 inode->i_mode = S_IFSOCK | S_IRWXUGO;
484 inode->i_uid = current_fsuid(); 483 inode->i_uid = current_fsuid();
485 inode->i_gid = current_fsgid(); 484 inode->i_gid = current_fsgid();
@@ -733,6 +732,21 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
733 return ret; 732 return ret;
734} 733}
735 734
735/**
736 * kernel_recvmsg - Receive a message from a socket (kernel space)
737 * @sock: The socket to receive the message from
738 * @msg: Received message
739 * @vec: Input s/g array for message data
740 * @num: Size of input s/g array
741 * @size: Number of bytes to read
742 * @flags: Message flags (MSG_DONTWAIT, etc...)
743 *
744 * On return the msg structure contains the scatter/gather array passed in the
745 * vec argument. The array is modified so that it consists of the unfilled
746 * portion of the original array.
747 *
748 * The returned value is the total number of bytes received, or an error.
749 */
736int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 750int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
737 struct kvec *vec, size_t num, size_t size, int flags) 751 struct kvec *vec, size_t num, size_t size, int flags)
738{ 752{
@@ -1145,7 +1159,7 @@ call_kill:
1145} 1159}
1146EXPORT_SYMBOL(sock_wake_async); 1160EXPORT_SYMBOL(sock_wake_async);
1147 1161
1148static int __sock_create(struct net *net, int family, int type, int protocol, 1162int __sock_create(struct net *net, int family, int type, int protocol,
1149 struct socket **res, int kern) 1163 struct socket **res, int kern)
1150{ 1164{
1151 int err; 1165 int err;
@@ -1257,6 +1271,7 @@ out_release:
1257 rcu_read_unlock(); 1271 rcu_read_unlock();
1258 goto out_sock_release; 1272 goto out_sock_release;
1259} 1273}
1274EXPORT_SYMBOL(__sock_create);
1260 1275
1261int sock_create(int family, int type, int protocol, struct socket **res) 1276int sock_create(int family, int type, int protocol, struct socket **res)
1262{ 1277{
@@ -1652,6 +1667,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1652 struct iovec iov; 1667 struct iovec iov;
1653 int fput_needed; 1668 int fput_needed;
1654 1669
1670 if (len > INT_MAX)
1671 len = INT_MAX;
1655 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1672 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1656 if (!sock) 1673 if (!sock)
1657 goto out; 1674 goto out;
@@ -1709,6 +1726,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1709 int err, err2; 1726 int err, err2;
1710 int fput_needed; 1727 int fput_needed;
1711 1728
1729 if (size > INT_MAX)
1730 size = INT_MAX;
1712 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1731 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1713 if (!sock) 1732 if (!sock)
1714 goto out; 1733 goto out;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 3376d7657185..8873fd8ddacd 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -36,22 +36,3 @@ config RPCSEC_GSS_KRB5
36 Kerberos support should be installed. 36 Kerberos support should be installed.
37 37
38 If unsure, say Y. 38 If unsure, say Y.
39
40config RPCSEC_GSS_SPKM3
41 tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
42 depends on SUNRPC && EXPERIMENTAL
43 select SUNRPC_GSS
44 select CRYPTO
45 select CRYPTO_MD5
46 select CRYPTO_DES
47 select CRYPTO_CAST5
48 select CRYPTO_CBC
49 help
50 Choose Y here to enable Secure RPC using the SPKM3 public key
51 GSS-API mechanism (RFC 2025).
52
53 Secure RPC calls with SPKM3 require an auxiliary userspace
54 daemon which may be found in the Linux nfs-utils package
55 available from http://linux-nfs.org/.
56
57 If unsure, say N.
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index e9eaaf7d43c1..afe67849269f 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -595,7 +595,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
595int 595int
596rpcauth_refreshcred(struct rpc_task *task) 596rpcauth_refreshcred(struct rpc_task *task)
597{ 597{
598 struct rpc_cred *cred = task->tk_rqstp->rq_cred; 598 struct rpc_cred *cred;
599 int err; 599 int err;
600 600
601 cred = task->tk_rqstp->rq_cred; 601 cred = task->tk_rqstp->rq_cred;
@@ -658,7 +658,7 @@ out1:
658 return err; 658 return err;
659} 659}
660 660
661void __exit rpcauth_remove_module(void) 661void rpcauth_remove_module(void)
662{ 662{
663 rpc_destroy_authunix(); 663 rpc_destroy_authunix();
664 rpc_destroy_generic_auth(); 664 rpc_destroy_generic_auth();
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 43162bb3b78f..e010a015d996 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -158,7 +158,7 @@ int __init rpc_init_generic_auth(void)
158 return rpcauth_init_credcache(&generic_auth); 158 return rpcauth_init_credcache(&generic_auth);
159} 159}
160 160
161void __exit rpc_destroy_generic_auth(void) 161void rpc_destroy_generic_auth(void)
162{ 162{
163 rpcauth_destroy_credcache(&generic_auth); 163 rpcauth_destroy_credcache(&generic_auth);
164} 164}
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 74a231735f67..7350d86a32ee 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -11,8 +11,3 @@ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 11
12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ 12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o 13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
14
15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
16
17rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \
18 gss_spkm3_token.o
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 778e5dfc5144..f375decc024b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -427,7 +427,7 @@ static int
427context_derive_keys_rc4(struct krb5_ctx *ctx) 427context_derive_keys_rc4(struct krb5_ctx *ctx)
428{ 428{
429 struct crypto_hash *hmac; 429 struct crypto_hash *hmac;
430 char sigkeyconstant[] = "signaturekey"; 430 static const char sigkeyconstant[] = "signaturekey";
431 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */ 431 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */
432 struct hash_desc desc; 432 struct hash_desc desc;
433 struct scatterlist sg[1]; 433 struct scatterlist sg[1];
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
deleted file mode 100644
index adade3d313f2..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ /dev/null
@@ -1,247 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_mech.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 * J. Bruce Fields <bfields@umich.edu>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36
37#include <linux/err.h>
38#include <linux/module.h>
39#include <linux/init.h>
40#include <linux/types.h>
41#include <linux/slab.h>
42#include <linux/sunrpc/auth.h>
43#include <linux/in.h>
44#include <linux/sunrpc/svcauth_gss.h>
45#include <linux/sunrpc/gss_spkm3.h>
46#include <linux/sunrpc/xdr.h>
47#include <linux/crypto.h>
48
49#ifdef RPC_DEBUG
50# define RPCDBG_FACILITY RPCDBG_AUTH
51#endif
52
53static const void *
54simple_get_bytes(const void *p, const void *end, void *res, int len)
55{
56 const void *q = (const void *)((const char *)p + len);
57 if (unlikely(q > end || q < p))
58 return ERR_PTR(-EFAULT);
59 memcpy(res, p, len);
60 return q;
61}
62
63static const void *
64simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
65{
66 const void *q;
67 unsigned int len;
68 p = simple_get_bytes(p, end, &len, sizeof(len));
69 if (IS_ERR(p))
70 return p;
71 res->len = len;
72 if (len == 0) {
73 res->data = NULL;
74 return p;
75 }
76 q = (const void *)((const char *)p + len);
77 if (unlikely(q > end || q < p))
78 return ERR_PTR(-EFAULT);
79 res->data = kmemdup(p, len, GFP_NOFS);
80 if (unlikely(res->data == NULL))
81 return ERR_PTR(-ENOMEM);
82 return q;
83}
84
85static int
86gss_import_sec_context_spkm3(const void *p, size_t len,
87 struct gss_ctx *ctx_id,
88 gfp_t gfp_mask)
89{
90 const void *end = (const void *)((const char *)p + len);
91 struct spkm3_ctx *ctx;
92 int version;
93
94 if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
95 goto out_err;
96
97 p = simple_get_bytes(p, end, &version, sizeof(version));
98 if (IS_ERR(p))
99 goto out_err_free_ctx;
100 if (version != 1) {
101 dprintk("RPC: unknown spkm3 token format: "
102 "obsolete nfs-utils?\n");
103 p = ERR_PTR(-EINVAL);
104 goto out_err_free_ctx;
105 }
106
107 p = simple_get_netobj(p, end, &ctx->ctx_id);
108 if (IS_ERR(p))
109 goto out_err_free_ctx;
110
111 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
112 if (IS_ERR(p))
113 goto out_err_free_ctx_id;
114
115 p = simple_get_netobj(p, end, &ctx->mech_used);
116 if (IS_ERR(p))
117 goto out_err_free_ctx_id;
118
119 p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
120 if (IS_ERR(p))
121 goto out_err_free_mech;
122
123 p = simple_get_netobj(p, end, &ctx->conf_alg);
124 if (IS_ERR(p))
125 goto out_err_free_mech;
126
127 p = simple_get_netobj(p, end, &ctx->derived_conf_key);
128 if (IS_ERR(p))
129 goto out_err_free_conf_alg;
130
131 p = simple_get_netobj(p, end, &ctx->intg_alg);
132 if (IS_ERR(p))
133 goto out_err_free_conf_key;
134
135 p = simple_get_netobj(p, end, &ctx->derived_integ_key);
136 if (IS_ERR(p))
137 goto out_err_free_intg_alg;
138
139 if (p != end) {
140 p = ERR_PTR(-EFAULT);
141 goto out_err_free_intg_key;
142 }
143
144 ctx_id->internal_ctx_id = ctx;
145
146 dprintk("RPC: Successfully imported new spkm context.\n");
147 return 0;
148
149out_err_free_intg_key:
150 kfree(ctx->derived_integ_key.data);
151out_err_free_intg_alg:
152 kfree(ctx->intg_alg.data);
153out_err_free_conf_key:
154 kfree(ctx->derived_conf_key.data);
155out_err_free_conf_alg:
156 kfree(ctx->conf_alg.data);
157out_err_free_mech:
158 kfree(ctx->mech_used.data);
159out_err_free_ctx_id:
160 kfree(ctx->ctx_id.data);
161out_err_free_ctx:
162 kfree(ctx);
163out_err:
164 return PTR_ERR(p);
165}
166
167static void
168gss_delete_sec_context_spkm3(void *internal_ctx)
169{
170 struct spkm3_ctx *sctx = internal_ctx;
171
172 kfree(sctx->derived_integ_key.data);
173 kfree(sctx->intg_alg.data);
174 kfree(sctx->derived_conf_key.data);
175 kfree(sctx->conf_alg.data);
176 kfree(sctx->mech_used.data);
177 kfree(sctx->ctx_id.data);
178 kfree(sctx);
179}
180
181static u32
182gss_verify_mic_spkm3(struct gss_ctx *ctx,
183 struct xdr_buf *signbuf,
184 struct xdr_netobj *checksum)
185{
186 u32 maj_stat = 0;
187 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
188
189 maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK);
190
191 dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
192 return maj_stat;
193}
194
195static u32
196gss_get_mic_spkm3(struct gss_ctx *ctx,
197 struct xdr_buf *message_buffer,
198 struct xdr_netobj *message_token)
199{
200 u32 err = 0;
201 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
202
203 err = spkm3_make_token(sctx, message_buffer,
204 message_token, SPKM_MIC_TOK);
205 dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err);
206 return err;
207}
208
209static const struct gss_api_ops gss_spkm3_ops = {
210 .gss_import_sec_context = gss_import_sec_context_spkm3,
211 .gss_get_mic = gss_get_mic_spkm3,
212 .gss_verify_mic = gss_verify_mic_spkm3,
213 .gss_delete_sec_context = gss_delete_sec_context_spkm3,
214};
215
216static struct pf_desc gss_spkm3_pfs[] = {
217 {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"},
218 {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"},
219};
220
221static struct gss_api_mech gss_spkm3_mech = {
222 .gm_name = "spkm3",
223 .gm_owner = THIS_MODULE,
224 .gm_oid = {7, "\053\006\001\005\005\001\003"},
225 .gm_ops = &gss_spkm3_ops,
226 .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs),
227 .gm_pfs = gss_spkm3_pfs,
228};
229
230static int __init init_spkm3_module(void)
231{
232 int status;
233
234 status = gss_mech_register(&gss_spkm3_mech);
235 if (status)
236 printk("Failed to register spkm3 gss mechanism!\n");
237 return status;
238}
239
240static void __exit cleanup_spkm3_module(void)
241{
242 gss_mech_unregister(&gss_spkm3_mech);
243}
244
245MODULE_LICENSE("GPL");
246module_init(init_spkm3_module);
247module_exit(cleanup_spkm3_module);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
deleted file mode 100644
index 5a3a65a0e2b4..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ /dev/null
@@ -1,186 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_seal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/jiffies.h>
38#include <linux/sunrpc/gss_spkm3.h>
39#include <linux/random.h>
40#include <linux/crypto.h>
41#include <linux/pagemap.h>
42#include <linux/scatterlist.h>
43#include <linux/sunrpc/xdr.h>
44
45#ifdef RPC_DEBUG
46# define RPCDBG_FACILITY RPCDBG_AUTH
47#endif
48
49const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"};
50const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"};
51
52/*
53 * spkm3_make_token()
54 *
55 * Only SPKM_MIC_TOK with md5 intg-alg is supported
56 */
57
58u32
59spkm3_make_token(struct spkm3_ctx *ctx,
60 struct xdr_buf * text, struct xdr_netobj * token,
61 int toktype)
62{
63 s32 checksum_type;
64 char tokhdrbuf[25];
65 char cksumdata[16];
66 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
67 struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf};
68 int tokenlen = 0;
69 unsigned char *ptr;
70 s32 now;
71 int ctxelen = 0, ctxzbit = 0;
72 int md5elen = 0, md5zbit = 0;
73
74 now = jiffies;
75
76 if (ctx->ctx_id.len != 16) {
77 dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
78 ctx->ctx_id.len);
79 goto out_err;
80 }
81
82 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
83 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
84 "algorithm. only support hmac-md5 I-ALG.\n");
85 goto out_err;
86 } else
87 checksum_type = CKSUMTYPE_HMAC_MD5;
88
89 if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) {
90 dprintk("RPC: gss_spkm3_seal: unsupported C-ALG "
91 "algorithm\n");
92 goto out_err;
93 }
94
95 if (toktype == SPKM_MIC_TOK) {
96 /* Calculate checksum over the mic-header */
97 asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
98 spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
99 ctxelen, ctxzbit);
100 if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key,
101 (char *)mic_hdr.data, mic_hdr.len,
102 text, 0, &md5cksum))
103 goto out_err;
104
105 asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
106 tokenlen = 10 + ctxelen + 1 + md5elen + 1;
107
108 /* Create token header using generic routines */
109 token->len = g_token_size(&ctx->mech_used, tokenlen + 2);
110
111 ptr = token->data;
112 g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr);
113
114 spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
115 } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
116 dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK "
117 "not supported\n");
118 goto out_err;
119 }
120
121 /* XXX need to implement sequence numbers, and ctx->expired */
122
123 return GSS_S_COMPLETE;
124out_err:
125 token->data = NULL;
126 token->len = 0;
127 return GSS_S_FAILURE;
128}
129
130static int
131spkm3_checksummer(struct scatterlist *sg, void *data)
132{
133 struct hash_desc *desc = data;
134
135 return crypto_hash_update(desc, sg, sg->length);
136}
137
138/* checksum the plaintext data and hdrlen bytes of the token header */
139s32
140make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
141 unsigned int hdrlen, struct xdr_buf *body,
142 unsigned int body_offset, struct xdr_netobj *cksum)
143{
144 char *cksumname;
145 struct hash_desc desc; /* XXX add to ctx? */
146 struct scatterlist sg[1];
147 int err;
148
149 switch (cksumtype) {
150 case CKSUMTYPE_HMAC_MD5:
151 cksumname = "hmac(md5)";
152 break;
153 default:
154 dprintk("RPC: spkm3_make_checksum:"
155 " unsupported checksum %d", cksumtype);
156 return GSS_S_FAILURE;
157 }
158
159 if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE;
160
161 desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
162 if (IS_ERR(desc.tfm))
163 return GSS_S_FAILURE;
164 cksum->len = crypto_hash_digestsize(desc.tfm);
165 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
166
167 err = crypto_hash_setkey(desc.tfm, key->data, key->len);
168 if (err)
169 goto out;
170
171 err = crypto_hash_init(&desc);
172 if (err)
173 goto out;
174
175 sg_init_one(sg, header, hdrlen);
176 crypto_hash_update(&desc, sg, sg->length);
177
178 xdr_process_buf(body, body_offset, body->len - body_offset,
179 spkm3_checksummer, &desc);
180 crypto_hash_final(&desc, cksum->data);
181
182out:
183 crypto_free_hash(desc.tfm);
184
185 return err ? GSS_S_FAILURE : 0;
186}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
deleted file mode 100644
index a99825d7caa0..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ /dev/null
@@ -1,267 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_token.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/random.h>
41#include <linux/crypto.h>
42
43#ifdef RPC_DEBUG
44# define RPCDBG_FACILITY RPCDBG_AUTH
45#endif
46
47/*
48 * asn1_bitstring_len()
49 *
50 * calculate the asn1 bitstring length of the xdr_netobject
51 */
52void
53asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
54{
55 int i, zbit = 0,elen = in->len;
56 char *ptr;
57
58 ptr = &in->data[in->len -1];
59
60 /* count trailing 0's */
61 for(i = in->len; i > 0; i--) {
62 if (*ptr == 0) {
63 ptr--;
64 elen--;
65 } else
66 break;
67 }
68
69 /* count number of 0 bits in final octet */
70 ptr = &in->data[elen - 1];
71 for(i = 0; i < 8; i++) {
72 short mask = 0x01;
73
74 if (!((mask << i) & *ptr))
75 zbit++;
76 else
77 break;
78 }
79 *enclen = elen;
80 *zerobits = zbit;
81}
82
83/*
84 * decode_asn1_bitstring()
85 *
86 * decode a bitstring into a buffer of the expected length.
87 * enclen = bit string length
88 * explen = expected length (define in rfc)
89 */
90int
91decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
92{
93 if (!(out->data = kzalloc(explen,GFP_NOFS)))
94 return 0;
95 out->len = explen;
96 memcpy(out->data, in, enclen);
97 return 1;
98}
99
100/*
101 * SPKMInnerContextToken choice SPKM_MIC asn1 token layout
102 *
103 * contextid is always 16 bytes plain data. max asn1 bitstring len = 17.
104 *
105 * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum)
106 *
107 * pos value
108 * ----------
109 * [0] a4 SPKM-MIC tag
110 * [1] ?? innertoken length (max 44)
111 *
112 *
113 * tok_hdr piece of checksum data starts here
114 *
115 * the maximum mic-header len = 9 + 17 = 26
116 * mic-header
117 * ----------
118 * [2] 30 SEQUENCE tag
119 * [3] ?? mic-header length: (max 23) = TokenID + ContextID
120 *
121 * TokenID - all fields constant and can be hardcoded
122 * -------
123 * [4] 02 Type 2
124 * [5] 02 Length 2
125 * [6][7] 01 01 TokenID (SPKM_MIC_TOK)
126 *
127 * ContextID - encoded length not constant, calculated
128 * ---------
129 * [8] 03 Type 3
130 * [9] ?? encoded length
131 * [10] ?? ctxzbit
132 * [11] contextid
133 *
134 * mic_header piece of checksum data ends here.
135 *
136 * int-cksum - encoded length not constant, calculated
137 * ---------
138 * [??] 03 Type 3
139 * [??] ?? encoded length
140 * [??] ?? md5zbit
141 * [??] int-cksum (NID_md5 = 16)
142 *
143 * maximum SPKM-MIC innercontext token length =
144 * 10 + encoded contextid_size(17 max) + 2 + encoded
145 * cksum_size (17 maxfor NID_md5) = 46
146 */
147
148/*
149 * spkm3_mic_header()
150 *
151 * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation
152 * elen: 16 byte context id asn1 bitstring encoded length
153 */
154void
155spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit)
156{
157 char *hptr = *hdrbuf;
158 char *top = *hdrbuf;
159
160 *(u8 *)hptr++ = 0x30;
161 *(u8 *)hptr++ = elen + 7; /* on the wire header length */
162
163 /* tokenid */
164 *(u8 *)hptr++ = 0x02;
165 *(u8 *)hptr++ = 0x02;
166 *(u8 *)hptr++ = 0x01;
167 *(u8 *)hptr++ = 0x01;
168
169 /* coniextid */
170 *(u8 *)hptr++ = 0x03;
171 *(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */
172 *(u8 *)hptr++ = zbit;
173 memcpy(hptr, ctxdata, elen);
174 hptr += elen;
175 *hdrlen = hptr - top;
176}
177
178/*
179 * spkm3_mic_innercontext_token()
180 *
181 * *tokp points to the beginning of the SPKM_MIC token described
182 * in rfc 2025, section 3.2.1:
183 *
184 * toklen is the inner token length
185 */
186void
187spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
188{
189 unsigned char *ict = *tokp;
190
191 *(u8 *)ict++ = 0xa4;
192 *(u8 *)ict++ = toklen;
193 memcpy(ict, mic_hdr->data, mic_hdr->len);
194 ict += mic_hdr->len;
195
196 *(u8 *)ict++ = 0x03;
197 *(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */
198 *(u8 *)ict++ = md5zbit;
199 memcpy(ict, md5cksum->data, md5elen);
200}
201
202u32
203spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum)
204{
205 struct xdr_netobj spkm3_ctx_id = {.len =0, .data = NULL};
206 unsigned char *ptr = *tokp;
207 int ctxelen;
208 u32 ret = GSS_S_DEFECTIVE_TOKEN;
209
210 /* spkm3 innercontext token preamble */
211 if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) {
212 dprintk("RPC: BAD SPKM ictoken preamble\n");
213 goto out;
214 }
215
216 *mic_hdrlen = ptr[3];
217
218 /* token type */
219 if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) {
220 dprintk("RPC: BAD asn1 SPKM3 token type\n");
221 goto out;
222 }
223
224 /* only support SPKM_MIC_TOK */
225 if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
226 dprintk("RPC: ERROR unsupported SPKM3 token\n");
227 goto out;
228 }
229
230 /* contextid */
231 if (ptr[8] != 0x03) {
232 dprintk("RPC: BAD SPKM3 asn1 context-id type\n");
233 goto out;
234 }
235
236 ctxelen = ptr[9];
237 if (ctxelen > 17) { /* length includes asn1 zbit octet */
238 dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen);
239 goto out;
240 }
241
242 /* ignore ptr[10] */
243
244 if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16))
245 goto out;
246
247 /*
248 * in the current implementation: the optional int-alg is not present
249 * so the default int-alg (md5) is used the optional snd-seq field is
250 * also not present
251 */
252
253 if (*mic_hdrlen != 6 + ctxelen) {
254 dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only "
255 "support default int-alg (should be absent) "
256 "and do not support snd-seq\n", *mic_hdrlen);
257 goto out;
258 }
259 /* checksum */
260 *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */
261
262 ret = GSS_S_COMPLETE;
263out:
264 kfree(spkm3_ctx_id.data);
265 return ret;
266}
267
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
deleted file mode 100644
index cc21ee860bb6..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ /dev/null
@@ -1,127 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_unseal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/crypto.h>
41
42#ifdef RPC_DEBUG
43# define RPCDBG_FACILITY RPCDBG_AUTH
44#endif
45
46/*
47 * spkm3_read_token()
48 *
49 * only SPKM_MIC_TOK with md5 intg-alg is supported
50 */
51u32
52spkm3_read_token(struct spkm3_ctx *ctx,
53 struct xdr_netobj *read_token, /* checksum */
54 struct xdr_buf *message_buffer, /* signbuf */
55 int toktype)
56{
57 s32 checksum_type;
58 s32 code;
59 struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
60 char cksumdata[16];
61 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
62 unsigned char *ptr = (unsigned char *)read_token->data;
63 unsigned char *cksum;
64 int bodysize, md5elen;
65 int mic_hdrlen;
66 u32 ret = GSS_S_DEFECTIVE_TOKEN;
67
68 if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
69 &bodysize, &ptr, read_token->len))
70 goto out;
71
72 /* decode the token */
73
74 if (toktype != SPKM_MIC_TOK) {
75 dprintk("RPC: BAD SPKM3 token type: %d\n", toktype);
76 goto out;
77 }
78
79 if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
80 goto out;
81
82 if (*cksum++ != 0x03) {
83 dprintk("RPC: spkm3_read_token BAD checksum type\n");
84 goto out;
85 }
86 md5elen = *cksum++;
87 cksum++; /* move past the zbit */
88
89 if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
90 goto out;
91
92 /* HARD CODED FOR MD5 */
93
94 /* compute the checksum of the message.
95 * ptr + 2 = start of header piece of checksum
96 * mic_hdrlen + 2 = length of header piece of checksum
97 */
98 ret = GSS_S_DEFECTIVE_TOKEN;
99 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
100 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
101 "algorithm\n");
102 goto out;
103 }
104
105 checksum_type = CKSUMTYPE_HMAC_MD5;
106
107 code = make_spkm3_checksum(checksum_type,
108 &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2,
109 message_buffer, 0, &md5cksum);
110
111 if (code)
112 goto out;
113
114 ret = GSS_S_BAD_SIG;
115 code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
116 if (code) {
117 dprintk("RPC: bad MIC checksum\n");
118 goto out;
119 }
120
121
122 /* XXX: need to add expiration and sequencing */
123 ret = GSS_S_COMPLETE;
124out:
125 kfree(wire_cksum.data);
126 return ret;
127}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index cc385b3a59c2..dec2a6fc7c12 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -964,7 +964,7 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
964 if (rqstp->rq_gssclient == NULL) 964 if (rqstp->rq_gssclient == NULL)
965 return SVC_DENIED; 965 return SVC_DENIED;
966 stat = svcauth_unix_set_client(rqstp); 966 stat = svcauth_unix_set_client(rqstp);
967 if (stat == SVC_DROP) 967 if (stat == SVC_DROP || stat == SVC_CLOSE)
968 return stat; 968 return stat;
969 return SVC_OK; 969 return SVC_OK;
970} 970}
@@ -1018,7 +1018,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
1018 return SVC_DENIED; 1018 return SVC_DENIED;
1019 memset(&rsikey, 0, sizeof(rsikey)); 1019 memset(&rsikey, 0, sizeof(rsikey));
1020 if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) 1020 if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
1021 return SVC_DROP; 1021 return SVC_CLOSE;
1022 *authp = rpc_autherr_badverf; 1022 *authp = rpc_autherr_badverf;
1023 if (svc_safe_getnetobj(argv, &tmpobj)) { 1023 if (svc_safe_getnetobj(argv, &tmpobj)) {
1024 kfree(rsikey.in_handle.data); 1024 kfree(rsikey.in_handle.data);
@@ -1026,38 +1026,35 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
1026 } 1026 }
1027 if (dup_netobj(&rsikey.in_token, &tmpobj)) { 1027 if (dup_netobj(&rsikey.in_token, &tmpobj)) {
1028 kfree(rsikey.in_handle.data); 1028 kfree(rsikey.in_handle.data);
1029 return SVC_DROP; 1029 return SVC_CLOSE;
1030 } 1030 }
1031 1031
1032 /* Perform upcall, or find upcall result: */ 1032 /* Perform upcall, or find upcall result: */
1033 rsip = rsi_lookup(&rsikey); 1033 rsip = rsi_lookup(&rsikey);
1034 rsi_free(&rsikey); 1034 rsi_free(&rsikey);
1035 if (!rsip) 1035 if (!rsip)
1036 return SVC_DROP; 1036 return SVC_CLOSE;
1037 switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { 1037 if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0)
1038 case -EAGAIN:
1039 case -ETIMEDOUT:
1040 case -ENOENT:
1041 /* No upcall result: */ 1038 /* No upcall result: */
1042 return SVC_DROP; 1039 return SVC_CLOSE;
1043 case 0: 1040
1044 ret = SVC_DROP; 1041 ret = SVC_CLOSE;
1045 /* Got an answer to the upcall; use it: */ 1042 /* Got an answer to the upcall; use it: */
1046 if (gss_write_init_verf(rqstp, rsip)) 1043 if (gss_write_init_verf(rqstp, rsip))
1047 goto out; 1044 goto out;
1048 if (resv->iov_len + 4 > PAGE_SIZE) 1045 if (resv->iov_len + 4 > PAGE_SIZE)
1049 goto out; 1046 goto out;
1050 svc_putnl(resv, RPC_SUCCESS); 1047 svc_putnl(resv, RPC_SUCCESS);
1051 if (svc_safe_putnetobj(resv, &rsip->out_handle)) 1048 if (svc_safe_putnetobj(resv, &rsip->out_handle))
1052 goto out; 1049 goto out;
1053 if (resv->iov_len + 3 * 4 > PAGE_SIZE) 1050 if (resv->iov_len + 3 * 4 > PAGE_SIZE)
1054 goto out; 1051 goto out;
1055 svc_putnl(resv, rsip->major_status); 1052 svc_putnl(resv, rsip->major_status);
1056 svc_putnl(resv, rsip->minor_status); 1053 svc_putnl(resv, rsip->minor_status);
1057 svc_putnl(resv, GSS_SEQ_WIN); 1054 svc_putnl(resv, GSS_SEQ_WIN);
1058 if (svc_safe_putnetobj(resv, &rsip->out_token)) 1055 if (svc_safe_putnetobj(resv, &rsip->out_token))
1059 goto out; 1056 goto out;
1060 } 1057
1061 ret = SVC_COMPLETE; 1058 ret = SVC_COMPLETE;
1062out: 1059out:
1063 cache_put(&rsip->h, &rsi_cache); 1060 cache_put(&rsip->h, &rsi_cache);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 7dce81a926c5..e433e7580e27 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -33,15 +33,16 @@
33#include <linux/sunrpc/cache.h> 33#include <linux/sunrpc/cache.h>
34#include <linux/sunrpc/stats.h> 34#include <linux/sunrpc/stats.h>
35#include <linux/sunrpc/rpc_pipe_fs.h> 35#include <linux/sunrpc/rpc_pipe_fs.h>
36#include "netns.h"
36 37
37#define RPCDBG_FACILITY RPCDBG_CACHE 38#define RPCDBG_FACILITY RPCDBG_CACHE
38 39
39static int cache_defer_req(struct cache_req *req, struct cache_head *item); 40static void cache_defer_req(struct cache_req *req, struct cache_head *item);
40static void cache_revisit_request(struct cache_head *item); 41static void cache_revisit_request(struct cache_head *item);
41 42
42static void cache_init(struct cache_head *h) 43static void cache_init(struct cache_head *h)
43{ 44{
44 time_t now = get_seconds(); 45 time_t now = seconds_since_boot();
45 h->next = NULL; 46 h->next = NULL;
46 h->flags = 0; 47 h->flags = 0;
47 kref_init(&h->ref); 48 kref_init(&h->ref);
@@ -51,7 +52,7 @@ static void cache_init(struct cache_head *h)
51 52
52static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) 53static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
53{ 54{
54 return (h->expiry_time < get_seconds()) || 55 return (h->expiry_time < seconds_since_boot()) ||
55 (detail->flush_time > h->last_refresh); 56 (detail->flush_time > h->last_refresh);
56} 57}
57 58
@@ -126,7 +127,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
126static void cache_fresh_locked(struct cache_head *head, time_t expiry) 127static void cache_fresh_locked(struct cache_head *head, time_t expiry)
127{ 128{
128 head->expiry_time = expiry; 129 head->expiry_time = expiry;
129 head->last_refresh = get_seconds(); 130 head->last_refresh = seconds_since_boot();
130 set_bit(CACHE_VALID, &head->flags); 131 set_bit(CACHE_VALID, &head->flags);
131} 132}
132 133
@@ -237,7 +238,7 @@ int cache_check(struct cache_detail *detail,
237 238
238 /* now see if we want to start an upcall */ 239 /* now see if we want to start an upcall */
239 refresh_age = (h->expiry_time - h->last_refresh); 240 refresh_age = (h->expiry_time - h->last_refresh);
240 age = get_seconds() - h->last_refresh; 241 age = seconds_since_boot() - h->last_refresh;
241 242
242 if (rqstp == NULL) { 243 if (rqstp == NULL) {
243 if (rv == -EAGAIN) 244 if (rv == -EAGAIN)
@@ -252,7 +253,7 @@ int cache_check(struct cache_detail *detail,
252 cache_revisit_request(h); 253 cache_revisit_request(h);
253 if (rv == -EAGAIN) { 254 if (rv == -EAGAIN) {
254 set_bit(CACHE_NEGATIVE, &h->flags); 255 set_bit(CACHE_NEGATIVE, &h->flags);
255 cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY); 256 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
256 cache_fresh_unlocked(h, detail); 257 cache_fresh_unlocked(h, detail);
257 rv = -ENOENT; 258 rv = -ENOENT;
258 } 259 }
@@ -267,7 +268,8 @@ int cache_check(struct cache_detail *detail,
267 } 268 }
268 269
269 if (rv == -EAGAIN) { 270 if (rv == -EAGAIN) {
270 if (cache_defer_req(rqstp, h) < 0) { 271 cache_defer_req(rqstp, h);
272 if (!test_bit(CACHE_PENDING, &h->flags)) {
271 /* Request is not deferred */ 273 /* Request is not deferred */
272 rv = cache_is_valid(detail, h); 274 rv = cache_is_valid(detail, h);
273 if (rv == -EAGAIN) 275 if (rv == -EAGAIN)
@@ -387,11 +389,11 @@ static int cache_clean(void)
387 return -1; 389 return -1;
388 } 390 }
389 current_detail = list_entry(next, struct cache_detail, others); 391 current_detail = list_entry(next, struct cache_detail, others);
390 if (current_detail->nextcheck > get_seconds()) 392 if (current_detail->nextcheck > seconds_since_boot())
391 current_index = current_detail->hash_size; 393 current_index = current_detail->hash_size;
392 else { 394 else {
393 current_index = 0; 395 current_index = 0;
394 current_detail->nextcheck = get_seconds()+30*60; 396 current_detail->nextcheck = seconds_since_boot()+30*60;
395 } 397 }
396 } 398 }
397 399
@@ -476,7 +478,7 @@ EXPORT_SYMBOL_GPL(cache_flush);
476void cache_purge(struct cache_detail *detail) 478void cache_purge(struct cache_detail *detail)
477{ 479{
478 detail->flush_time = LONG_MAX; 480 detail->flush_time = LONG_MAX;
479 detail->nextcheck = get_seconds(); 481 detail->nextcheck = seconds_since_boot();
480 cache_flush(); 482 cache_flush();
481 detail->flush_time = 1; 483 detail->flush_time = 1;
482} 484}
@@ -505,81 +507,155 @@ EXPORT_SYMBOL_GPL(cache_purge);
505 507
506static DEFINE_SPINLOCK(cache_defer_lock); 508static DEFINE_SPINLOCK(cache_defer_lock);
507static LIST_HEAD(cache_defer_list); 509static LIST_HEAD(cache_defer_list);
508static struct list_head cache_defer_hash[DFR_HASHSIZE]; 510static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
509static int cache_defer_cnt; 511static int cache_defer_cnt;
510 512
511static int cache_defer_req(struct cache_req *req, struct cache_head *item) 513static void __unhash_deferred_req(struct cache_deferred_req *dreq)
514{
515 hlist_del_init(&dreq->hash);
516 if (!list_empty(&dreq->recent)) {
517 list_del_init(&dreq->recent);
518 cache_defer_cnt--;
519 }
520}
521
522static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
512{ 523{
513 struct cache_deferred_req *dreq, *discard;
514 int hash = DFR_HASH(item); 524 int hash = DFR_HASH(item);
515 525
516 if (cache_defer_cnt >= DFR_MAX) { 526 INIT_LIST_HEAD(&dreq->recent);
517 /* too much in the cache, randomly drop this one, 527 hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
518 * or continue and drop the oldest below 528}
519 */ 529
520 if (net_random()&1) 530static void setup_deferral(struct cache_deferred_req *dreq,
521 return -ENOMEM; 531 struct cache_head *item,
522 } 532 int count_me)
523 dreq = req->defer(req); 533{
524 if (dreq == NULL)
525 return -ENOMEM;
526 534
527 dreq->item = item; 535 dreq->item = item;
528 536
529 spin_lock(&cache_defer_lock); 537 spin_lock(&cache_defer_lock);
530 538
531 list_add(&dreq->recent, &cache_defer_list); 539 __hash_deferred_req(dreq, item);
532
533 if (cache_defer_hash[hash].next == NULL)
534 INIT_LIST_HEAD(&cache_defer_hash[hash]);
535 list_add(&dreq->hash, &cache_defer_hash[hash]);
536 540
537 /* it is in, now maybe clean up */ 541 if (count_me) {
538 discard = NULL; 542 cache_defer_cnt++;
539 if (++cache_defer_cnt > DFR_MAX) { 543 list_add(&dreq->recent, &cache_defer_list);
540 discard = list_entry(cache_defer_list.prev,
541 struct cache_deferred_req, recent);
542 list_del_init(&discard->recent);
543 list_del_init(&discard->hash);
544 cache_defer_cnt--;
545 } 544 }
545
546 spin_unlock(&cache_defer_lock); 546 spin_unlock(&cache_defer_lock);
547 547
548}
549
550struct thread_deferred_req {
551 struct cache_deferred_req handle;
552 struct completion completion;
553};
554
555static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
556{
557 struct thread_deferred_req *dr =
558 container_of(dreq, struct thread_deferred_req, handle);
559 complete(&dr->completion);
560}
561
562static void cache_wait_req(struct cache_req *req, struct cache_head *item)
563{
564 struct thread_deferred_req sleeper;
565 struct cache_deferred_req *dreq = &sleeper.handle;
566
567 sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
568 dreq->revisit = cache_restart_thread;
569
570 setup_deferral(dreq, item, 0);
571
572 if (!test_bit(CACHE_PENDING, &item->flags) ||
573 wait_for_completion_interruptible_timeout(
574 &sleeper.completion, req->thread_wait) <= 0) {
575 /* The completion wasn't completed, so we need
576 * to clean up
577 */
578 spin_lock(&cache_defer_lock);
579 if (!hlist_unhashed(&sleeper.handle.hash)) {
580 __unhash_deferred_req(&sleeper.handle);
581 spin_unlock(&cache_defer_lock);
582 } else {
583 /* cache_revisit_request already removed
584 * this from the hash table, but hasn't
585 * called ->revisit yet. It will very soon
586 * and we need to wait for it.
587 */
588 spin_unlock(&cache_defer_lock);
589 wait_for_completion(&sleeper.completion);
590 }
591 }
592}
593
594static void cache_limit_defers(void)
595{
596 /* Make sure we haven't exceed the limit of allowed deferred
597 * requests.
598 */
599 struct cache_deferred_req *discard = NULL;
600
601 if (cache_defer_cnt <= DFR_MAX)
602 return;
603
604 spin_lock(&cache_defer_lock);
605
606 /* Consider removing either the first or the last */
607 if (cache_defer_cnt > DFR_MAX) {
608 if (net_random() & 1)
609 discard = list_entry(cache_defer_list.next,
610 struct cache_deferred_req, recent);
611 else
612 discard = list_entry(cache_defer_list.prev,
613 struct cache_deferred_req, recent);
614 __unhash_deferred_req(discard);
615 }
616 spin_unlock(&cache_defer_lock);
548 if (discard) 617 if (discard)
549 /* there was one too many */
550 discard->revisit(discard, 1); 618 discard->revisit(discard, 1);
619}
551 620
552 if (!test_bit(CACHE_PENDING, &item->flags)) { 621static void cache_defer_req(struct cache_req *req, struct cache_head *item)
553 /* must have just been validated... */ 622{
554 cache_revisit_request(item); 623 struct cache_deferred_req *dreq;
555 return -EAGAIN; 624
625 if (req->thread_wait) {
626 cache_wait_req(req, item);
627 if (!test_bit(CACHE_PENDING, &item->flags))
628 return;
556 } 629 }
557 return 0; 630 dreq = req->defer(req);
631 if (dreq == NULL)
632 return;
633 setup_deferral(dreq, item, 1);
634 if (!test_bit(CACHE_PENDING, &item->flags))
635 /* Bit could have been cleared before we managed to
636 * set up the deferral, so need to revisit just in case
637 */
638 cache_revisit_request(item);
639
640 cache_limit_defers();
558} 641}
559 642
560static void cache_revisit_request(struct cache_head *item) 643static void cache_revisit_request(struct cache_head *item)
561{ 644{
562 struct cache_deferred_req *dreq; 645 struct cache_deferred_req *dreq;
563 struct list_head pending; 646 struct list_head pending;
564 647 struct hlist_node *lp, *tmp;
565 struct list_head *lp;
566 int hash = DFR_HASH(item); 648 int hash = DFR_HASH(item);
567 649
568 INIT_LIST_HEAD(&pending); 650 INIT_LIST_HEAD(&pending);
569 spin_lock(&cache_defer_lock); 651 spin_lock(&cache_defer_lock);
570 652
571 lp = cache_defer_hash[hash].next; 653 hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash)
572 if (lp) { 654 if (dreq->item == item) {
573 while (lp != &cache_defer_hash[hash]) { 655 __unhash_deferred_req(dreq);
574 dreq = list_entry(lp, struct cache_deferred_req, hash); 656 list_add(&dreq->recent, &pending);
575 lp = lp->next;
576 if (dreq->item == item) {
577 list_del_init(&dreq->hash);
578 list_move(&dreq->recent, &pending);
579 cache_defer_cnt--;
580 }
581 } 657 }
582 } 658
583 spin_unlock(&cache_defer_lock); 659 spin_unlock(&cache_defer_lock);
584 660
585 while (!list_empty(&pending)) { 661 while (!list_empty(&pending)) {
@@ -600,9 +676,8 @@ void cache_clean_deferred(void *owner)
600 676
601 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { 677 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
602 if (dreq->owner == owner) { 678 if (dreq->owner == owner) {
603 list_del_init(&dreq->hash); 679 __unhash_deferred_req(dreq);
604 list_move(&dreq->recent, &pending); 680 list_add(&dreq->recent, &pending);
605 cache_defer_cnt--;
606 } 681 }
607 } 682 }
608 spin_unlock(&cache_defer_lock); 683 spin_unlock(&cache_defer_lock);
@@ -901,7 +976,7 @@ static int cache_release(struct inode *inode, struct file *filp,
901 filp->private_data = NULL; 976 filp->private_data = NULL;
902 kfree(rp); 977 kfree(rp);
903 978
904 cd->last_close = get_seconds(); 979 cd->last_close = seconds_since_boot();
905 atomic_dec(&cd->readers); 980 atomic_dec(&cd->readers);
906 } 981 }
907 module_put(cd->owner); 982 module_put(cd->owner);
@@ -1014,6 +1089,23 @@ static void warn_no_listener(struct cache_detail *detail)
1014 } 1089 }
1015} 1090}
1016 1091
1092static bool cache_listeners_exist(struct cache_detail *detail)
1093{
1094 if (atomic_read(&detail->readers))
1095 return true;
1096 if (detail->last_close == 0)
1097 /* This cache was never opened */
1098 return false;
1099 if (detail->last_close < seconds_since_boot() - 30)
1100 /*
1101 * We allow for the possibility that someone might
1102 * restart a userspace daemon without restarting the
1103 * server; but after 30 seconds, we give up.
1104 */
1105 return false;
1106 return true;
1107}
1108
1017/* 1109/*
1018 * register an upcall request to user-space and queue it up for read() by the 1110 * register an upcall request to user-space and queue it up for read() by the
1019 * upcall daemon. 1111 * upcall daemon.
@@ -1032,10 +1124,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
1032 char *bp; 1124 char *bp;
1033 int len; 1125 int len;
1034 1126
1035 if (atomic_read(&detail->readers) == 0 && 1127 if (!cache_listeners_exist(detail)) {
1036 detail->last_close < get_seconds() - 30) { 1128 warn_no_listener(detail);
1037 warn_no_listener(detail); 1129 return -EINVAL;
1038 return -EINVAL;
1039 } 1130 }
1040 1131
1041 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1132 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
@@ -1094,13 +1185,19 @@ int qword_get(char **bpp, char *dest, int bufsize)
1094 if (bp[0] == '\\' && bp[1] == 'x') { 1185 if (bp[0] == '\\' && bp[1] == 'x') {
1095 /* HEX STRING */ 1186 /* HEX STRING */
1096 bp += 2; 1187 bp += 2;
1097 while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) { 1188 while (len < bufsize) {
1098 int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; 1189 int h, l;
1099 bp++; 1190
1100 byte <<= 4; 1191 h = hex_to_bin(bp[0]);
1101 byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; 1192 if (h < 0)
1102 *dest++ = byte; 1193 break;
1103 bp++; 1194
1195 l = hex_to_bin(bp[1]);
1196 if (l < 0)
1197 break;
1198
1199 *dest++ = (h << 4) | l;
1200 bp += 2;
1104 len++; 1201 len++;
1105 } 1202 }
1106 } else { 1203 } else {
@@ -1218,7 +1315,8 @@ static int c_show(struct seq_file *m, void *p)
1218 1315
1219 ifdebug(CACHE) 1316 ifdebug(CACHE)
1220 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", 1317 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
1221 cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags); 1318 convert_to_wallclock(cp->expiry_time),
1319 atomic_read(&cp->ref.refcount), cp->flags);
1222 cache_get(cp); 1320 cache_get(cp);
1223 if (cache_check(cd, cp, NULL)) 1321 if (cache_check(cd, cp, NULL))
1224 /* cache_check does a cache_put on failure */ 1322 /* cache_check does a cache_put on failure */
@@ -1284,7 +1382,7 @@ static ssize_t read_flush(struct file *file, char __user *buf,
1284 unsigned long p = *ppos; 1382 unsigned long p = *ppos;
1285 size_t len; 1383 size_t len;
1286 1384
1287 sprintf(tbuf, "%lu\n", cd->flush_time); 1385 sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time));
1288 len = strlen(tbuf); 1386 len = strlen(tbuf);
1289 if (p >= len) 1387 if (p >= len)
1290 return 0; 1388 return 0;
@@ -1302,19 +1400,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1302 struct cache_detail *cd) 1400 struct cache_detail *cd)
1303{ 1401{
1304 char tbuf[20]; 1402 char tbuf[20];
1305 char *ep; 1403 char *bp, *ep;
1306 long flushtime; 1404
1307 if (*ppos || count > sizeof(tbuf)-1) 1405 if (*ppos || count > sizeof(tbuf)-1)
1308 return -EINVAL; 1406 return -EINVAL;
1309 if (copy_from_user(tbuf, buf, count)) 1407 if (copy_from_user(tbuf, buf, count))
1310 return -EFAULT; 1408 return -EFAULT;
1311 tbuf[count] = 0; 1409 tbuf[count] = 0;
1312 flushtime = simple_strtoul(tbuf, &ep, 0); 1410 simple_strtoul(tbuf, &ep, 0);
1313 if (*ep && *ep != '\n') 1411 if (*ep && *ep != '\n')
1314 return -EINVAL; 1412 return -EINVAL;
1315 1413
1316 cd->flush_time = flushtime; 1414 bp = tbuf;
1317 cd->nextcheck = get_seconds(); 1415 cd->flush_time = get_expiry(&bp);
1416 cd->nextcheck = seconds_since_boot();
1318 cache_flush(); 1417 cache_flush();
1319 1418
1320 *ppos += count; 1419 *ppos += count;
@@ -1438,8 +1537,10 @@ static const struct file_operations cache_flush_operations_procfs = {
1438 .llseek = no_llseek, 1537 .llseek = no_llseek,
1439}; 1538};
1440 1539
1441static void remove_cache_proc_entries(struct cache_detail *cd) 1540static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
1442{ 1541{
1542 struct sunrpc_net *sn;
1543
1443 if (cd->u.procfs.proc_ent == NULL) 1544 if (cd->u.procfs.proc_ent == NULL)
1444 return; 1545 return;
1445 if (cd->u.procfs.flush_ent) 1546 if (cd->u.procfs.flush_ent)
@@ -1449,15 +1550,18 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
1449 if (cd->u.procfs.content_ent) 1550 if (cd->u.procfs.content_ent)
1450 remove_proc_entry("content", cd->u.procfs.proc_ent); 1551 remove_proc_entry("content", cd->u.procfs.proc_ent);
1451 cd->u.procfs.proc_ent = NULL; 1552 cd->u.procfs.proc_ent = NULL;
1452 remove_proc_entry(cd->name, proc_net_rpc); 1553 sn = net_generic(net, sunrpc_net_id);
1554 remove_proc_entry(cd->name, sn->proc_net_rpc);
1453} 1555}
1454 1556
1455#ifdef CONFIG_PROC_FS 1557#ifdef CONFIG_PROC_FS
1456static int create_cache_proc_entries(struct cache_detail *cd) 1558static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1457{ 1559{
1458 struct proc_dir_entry *p; 1560 struct proc_dir_entry *p;
1561 struct sunrpc_net *sn;
1459 1562
1460 cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); 1563 sn = net_generic(net, sunrpc_net_id);
1564 cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc);
1461 if (cd->u.procfs.proc_ent == NULL) 1565 if (cd->u.procfs.proc_ent == NULL)
1462 goto out_nomem; 1566 goto out_nomem;
1463 cd->u.procfs.channel_ent = NULL; 1567 cd->u.procfs.channel_ent = NULL;
@@ -1488,11 +1592,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
1488 } 1592 }
1489 return 0; 1593 return 0;
1490out_nomem: 1594out_nomem:
1491 remove_cache_proc_entries(cd); 1595 remove_cache_proc_entries(cd, net);
1492 return -ENOMEM; 1596 return -ENOMEM;
1493} 1597}
1494#else /* CONFIG_PROC_FS */ 1598#else /* CONFIG_PROC_FS */
1495static int create_cache_proc_entries(struct cache_detail *cd) 1599static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1496{ 1600{
1497 return 0; 1601 return 0;
1498} 1602}
@@ -1503,23 +1607,33 @@ void __init cache_initialize(void)
1503 INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); 1607 INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
1504} 1608}
1505 1609
1506int cache_register(struct cache_detail *cd) 1610int cache_register_net(struct cache_detail *cd, struct net *net)
1507{ 1611{
1508 int ret; 1612 int ret;
1509 1613
1510 sunrpc_init_cache_detail(cd); 1614 sunrpc_init_cache_detail(cd);
1511 ret = create_cache_proc_entries(cd); 1615 ret = create_cache_proc_entries(cd, net);
1512 if (ret) 1616 if (ret)
1513 sunrpc_destroy_cache_detail(cd); 1617 sunrpc_destroy_cache_detail(cd);
1514 return ret; 1618 return ret;
1515} 1619}
1620
1621int cache_register(struct cache_detail *cd)
1622{
1623 return cache_register_net(cd, &init_net);
1624}
1516EXPORT_SYMBOL_GPL(cache_register); 1625EXPORT_SYMBOL_GPL(cache_register);
1517 1626
1518void cache_unregister(struct cache_detail *cd) 1627void cache_unregister_net(struct cache_detail *cd, struct net *net)
1519{ 1628{
1520 remove_cache_proc_entries(cd); 1629 remove_cache_proc_entries(cd, net);
1521 sunrpc_destroy_cache_detail(cd); 1630 sunrpc_destroy_cache_detail(cd);
1522} 1631}
1632
1633void cache_unregister(struct cache_detail *cd)
1634{
1635 cache_unregister_net(cd, &init_net);
1636}
1523EXPORT_SYMBOL_GPL(cache_unregister); 1637EXPORT_SYMBOL_GPL(cache_unregister);
1524 1638
1525static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, 1639static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fa5549079d79..9dab9573be41 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -284,6 +284,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
284 struct rpc_xprt *xprt; 284 struct rpc_xprt *xprt;
285 struct rpc_clnt *clnt; 285 struct rpc_clnt *clnt;
286 struct xprt_create xprtargs = { 286 struct xprt_create xprtargs = {
287 .net = args->net,
287 .ident = args->protocol, 288 .ident = args->protocol,
288 .srcaddr = args->saddress, 289 .srcaddr = args->saddress,
289 .dstaddr = args->address, 290 .dstaddr = args->address,
@@ -1675,7 +1676,7 @@ rpc_verify_header(struct rpc_task *task)
1675 rpcauth_invalcred(task); 1676 rpcauth_invalcred(task);
1676 /* Ensure we obtain a new XID! */ 1677 /* Ensure we obtain a new XID! */
1677 xprt_release(task); 1678 xprt_release(task);
1678 task->tk_action = call_refresh; 1679 task->tk_action = call_reserve;
1679 goto out_retry; 1680 goto out_retry;
1680 case RPC_AUTH_BADCRED: 1681 case RPC_AUTH_BADCRED:
1681 case RPC_AUTH_BADVERF: 1682 case RPC_AUTH_BADVERF:
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
new file mode 100644
index 000000000000..d013bf211cae
--- /dev/null
+++ b/net/sunrpc/netns.h
@@ -0,0 +1,19 @@
1#ifndef __SUNRPC_NETNS_H__
2#define __SUNRPC_NETNS_H__
3
4#include <net/net_namespace.h>
5#include <net/netns/generic.h>
6
7struct cache_detail;
8
9struct sunrpc_net {
10 struct proc_dir_entry *proc_net_rpc;
11 struct cache_detail *ip_map_cache;
12};
13
14extern int sunrpc_net_id;
15
16int ip_map_cache_create(struct net *);
17void ip_map_cache_destroy(struct net *);
18
19#endif
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 52f252432144..10a17a37ec4e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -28,7 +28,7 @@
28#include <linux/sunrpc/rpc_pipe_fs.h> 28#include <linux/sunrpc/rpc_pipe_fs.h>
29#include <linux/sunrpc/cache.h> 29#include <linux/sunrpc/cache.h>
30 30
31static struct vfsmount *rpc_mount __read_mostly; 31static struct vfsmount *rpc_mnt __read_mostly;
32static int rpc_mount_count; 32static int rpc_mount_count;
33 33
34static struct file_system_type rpc_pipe_fs_type; 34static struct file_system_type rpc_pipe_fs_type;
@@ -417,16 +417,16 @@ struct vfsmount *rpc_get_mount(void)
417{ 417{
418 int err; 418 int err;
419 419
420 err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count); 420 err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count);
421 if (err != 0) 421 if (err != 0)
422 return ERR_PTR(err); 422 return ERR_PTR(err);
423 return rpc_mount; 423 return rpc_mnt;
424} 424}
425EXPORT_SYMBOL_GPL(rpc_get_mount); 425EXPORT_SYMBOL_GPL(rpc_get_mount);
426 426
427void rpc_put_mount(void) 427void rpc_put_mount(void)
428{ 428{
429 simple_release_fs(&rpc_mount, &rpc_mount_count); 429 simple_release_fs(&rpc_mnt, &rpc_mount_count);
430} 430}
431EXPORT_SYMBOL_GPL(rpc_put_mount); 431EXPORT_SYMBOL_GPL(rpc_put_mount);
432 432
@@ -445,6 +445,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
445 struct inode *inode = new_inode(sb); 445 struct inode *inode = new_inode(sb);
446 if (!inode) 446 if (!inode)
447 return NULL; 447 return NULL;
448 inode->i_ino = get_next_ino();
448 inode->i_mode = mode; 449 inode->i_mode = mode;
449 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 450 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
450 switch(mode & S_IFMT) { 451 switch(mode & S_IFMT) {
@@ -1017,17 +1018,17 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1017 return 0; 1018 return 0;
1018} 1019}
1019 1020
1020static int 1021static struct dentry *
1021rpc_get_sb(struct file_system_type *fs_type, 1022rpc_mount(struct file_system_type *fs_type,
1022 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1023 int flags, const char *dev_name, void *data)
1023{ 1024{
1024 return get_sb_single(fs_type, flags, data, rpc_fill_super, mnt); 1025 return mount_single(fs_type, flags, data, rpc_fill_super);
1025} 1026}
1026 1027
1027static struct file_system_type rpc_pipe_fs_type = { 1028static struct file_system_type rpc_pipe_fs_type = {
1028 .owner = THIS_MODULE, 1029 .owner = THIS_MODULE,
1029 .name = "rpc_pipefs", 1030 .name = "rpc_pipefs",
1030 .get_sb = rpc_get_sb, 1031 .mount = rpc_mount,
1031 .kill_sb = kill_litter_super, 1032 .kill_sb = kill_litter_super,
1032}; 1033};
1033 1034
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index dac219a56ae1..fa6d7ca2c851 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -177,6 +177,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex);
177static int rpcb_create_local(void) 177static int rpcb_create_local(void)
178{ 178{
179 struct rpc_create_args args = { 179 struct rpc_create_args args = {
180 .net = &init_net,
180 .protocol = XPRT_TRANSPORT_TCP, 181 .protocol = XPRT_TRANSPORT_TCP,
181 .address = (struct sockaddr *)&rpcb_inaddr_loopback, 182 .address = (struct sockaddr *)&rpcb_inaddr_loopback,
182 .addrsize = sizeof(rpcb_inaddr_loopback), 183 .addrsize = sizeof(rpcb_inaddr_loopback),
@@ -211,8 +212,9 @@ static int rpcb_create_local(void)
211 */ 212 */
212 clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); 213 clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
213 if (IS_ERR(clnt4)) { 214 if (IS_ERR(clnt4)) {
214 dprintk("RPC: failed to create local rpcbind v4 " 215 dprintk("RPC: failed to bind second program to "
215 "cleint (errno %ld).\n", PTR_ERR(clnt4)); 216 "rpcbind v4 client (errno %ld).\n",
217 PTR_ERR(clnt4));
216 clnt4 = NULL; 218 clnt4 = NULL;
217 } 219 }
218 220
@@ -228,6 +230,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
228 size_t salen, int proto, u32 version) 230 size_t salen, int proto, u32 version)
229{ 231{
230 struct rpc_create_args args = { 232 struct rpc_create_args args = {
233 .net = &init_net,
231 .protocol = proto, 234 .protocol = proto,
232 .address = srvaddr, 235 .address = srvaddr,
233 .addrsize = salen, 236 .addrsize = salen,
@@ -247,7 +250,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
247 ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); 250 ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT);
248 break; 251 break;
249 default: 252 default:
250 return NULL; 253 return ERR_PTR(-EAFNOSUPPORT);
251 } 254 }
252 255
253 return rpc_create(&args); 256 return rpc_create(&args);
@@ -475,57 +478,6 @@ int rpcb_v4_register(const u32 program, const u32 version,
475 return -EAFNOSUPPORT; 478 return -EAFNOSUPPORT;
476} 479}
477 480
478/**
479 * rpcb_getport_sync - obtain the port for an RPC service on a given host
480 * @sin: address of remote peer
481 * @prog: RPC program number to bind
482 * @vers: RPC version number to bind
483 * @prot: transport protocol to use to make this request
484 *
485 * Return value is the requested advertised port number,
486 * or a negative errno value.
487 *
488 * Called from outside the RPC client in a synchronous task context.
489 * Uses default timeout parameters specified by underlying transport.
490 *
491 * XXX: Needs to support IPv6
492 */
493int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
494{
495 struct rpcbind_args map = {
496 .r_prog = prog,
497 .r_vers = vers,
498 .r_prot = prot,
499 .r_port = 0,
500 };
501 struct rpc_message msg = {
502 .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT],
503 .rpc_argp = &map,
504 .rpc_resp = &map,
505 };
506 struct rpc_clnt *rpcb_clnt;
507 int status;
508
509 dprintk("RPC: %s(%pI4, %u, %u, %d)\n",
510 __func__, &sin->sin_addr.s_addr, prog, vers, prot);
511
512 rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
513 sizeof(*sin), prot, RPCBVERS_2);
514 if (IS_ERR(rpcb_clnt))
515 return PTR_ERR(rpcb_clnt);
516
517 status = rpc_call_sync(rpcb_clnt, &msg, 0);
518 rpc_shutdown_client(rpcb_clnt);
519
520 if (status >= 0) {
521 if (map.r_port != 0)
522 return map.r_port;
523 status = -EACCES;
524 }
525 return status;
526}
527EXPORT_SYMBOL_GPL(rpcb_getport_sync);
528
529static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc) 481static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
530{ 482{
531 struct rpc_message msg = { 483 struct rpc_message msg = {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index aa5dbda6608c..243fc09b164e 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -908,7 +908,7 @@ static int rpciod_start(void)
908 * Create the rpciod thread and wait for it to start. 908 * Create the rpciod thread and wait for it to start.
909 */ 909 */
910 dprintk("RPC: creating workqueue rpciod\n"); 910 dprintk("RPC: creating workqueue rpciod\n");
911 wq = create_workqueue("rpciod"); 911 wq = alloc_workqueue("rpciod", WQ_RESCUER, 0);
912 rpciod_workqueue = wq; 912 rpciod_workqueue = wq;
913 return rpciod_workqueue != NULL; 913 return rpciod_workqueue != NULL;
914} 914}
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index ea1046f3f9a3..f71a73107ae9 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -22,11 +22,10 @@
22#include <linux/sunrpc/clnt.h> 22#include <linux/sunrpc/clnt.h>
23#include <linux/sunrpc/svcsock.h> 23#include <linux/sunrpc/svcsock.h>
24#include <linux/sunrpc/metrics.h> 24#include <linux/sunrpc/metrics.h>
25#include <net/net_namespace.h>
26 25
27#define RPCDBG_FACILITY RPCDBG_MISC 26#include "netns.h"
28 27
29struct proc_dir_entry *proc_net_rpc = NULL; 28#define RPCDBG_FACILITY RPCDBG_MISC
30 29
31/* 30/*
32 * Get RPC client stats 31 * Get RPC client stats
@@ -218,10 +217,11 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats);
218static inline struct proc_dir_entry * 217static inline struct proc_dir_entry *
219do_register(const char *name, void *data, const struct file_operations *fops) 218do_register(const char *name, void *data, const struct file_operations *fops)
220{ 219{
221 rpc_proc_init(); 220 struct sunrpc_net *sn;
222 dprintk("RPC: registering /proc/net/rpc/%s\n", name);
223 221
224 return proc_create_data(name, 0, proc_net_rpc, fops, data); 222 dprintk("RPC: registering /proc/net/rpc/%s\n", name);
223 sn = net_generic(&init_net, sunrpc_net_id);
224 return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
225} 225}
226 226
227struct proc_dir_entry * 227struct proc_dir_entry *
@@ -234,7 +234,10 @@ EXPORT_SYMBOL_GPL(rpc_proc_register);
234void 234void
235rpc_proc_unregister(const char *name) 235rpc_proc_unregister(const char *name)
236{ 236{
237 remove_proc_entry(name, proc_net_rpc); 237 struct sunrpc_net *sn;
238
239 sn = net_generic(&init_net, sunrpc_net_id);
240 remove_proc_entry(name, sn->proc_net_rpc);
238} 241}
239EXPORT_SYMBOL_GPL(rpc_proc_unregister); 242EXPORT_SYMBOL_GPL(rpc_proc_unregister);
240 243
@@ -248,25 +251,29 @@ EXPORT_SYMBOL_GPL(svc_proc_register);
248void 251void
249svc_proc_unregister(const char *name) 252svc_proc_unregister(const char *name)
250{ 253{
251 remove_proc_entry(name, proc_net_rpc); 254 struct sunrpc_net *sn;
255
256 sn = net_generic(&init_net, sunrpc_net_id);
257 remove_proc_entry(name, sn->proc_net_rpc);
252} 258}
253EXPORT_SYMBOL_GPL(svc_proc_unregister); 259EXPORT_SYMBOL_GPL(svc_proc_unregister);
254 260
255void 261int rpc_proc_init(struct net *net)
256rpc_proc_init(void)
257{ 262{
263 struct sunrpc_net *sn;
264
258 dprintk("RPC: registering /proc/net/rpc\n"); 265 dprintk("RPC: registering /proc/net/rpc\n");
259 if (!proc_net_rpc) 266 sn = net_generic(net, sunrpc_net_id);
260 proc_net_rpc = proc_mkdir("rpc", init_net.proc_net); 267 sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net);
268 if (sn->proc_net_rpc == NULL)
269 return -ENOMEM;
270
271 return 0;
261} 272}
262 273
263void 274void rpc_proc_exit(struct net *net)
264rpc_proc_exit(void)
265{ 275{
266 dprintk("RPC: unregistering /proc/net/rpc\n"); 276 dprintk("RPC: unregistering /proc/net/rpc\n");
267 if (proc_net_rpc) { 277 remove_proc_entry("rpc", net->proc_net);
268 proc_net_rpc = NULL;
269 remove_proc_entry("rpc", init_net.proc_net);
270 }
271} 278}
272 279
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c0d085013a2b..9d0809160994 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -22,7 +22,44 @@
22#include <linux/sunrpc/rpc_pipe_fs.h> 22#include <linux/sunrpc/rpc_pipe_fs.h>
23#include <linux/sunrpc/xprtsock.h> 23#include <linux/sunrpc/xprtsock.h>
24 24
25extern struct cache_detail ip_map_cache, unix_gid_cache; 25#include "netns.h"
26
27int sunrpc_net_id;
28
29static __net_init int sunrpc_init_net(struct net *net)
30{
31 int err;
32
33 err = rpc_proc_init(net);
34 if (err)
35 goto err_proc;
36
37 err = ip_map_cache_create(net);
38 if (err)
39 goto err_ipmap;
40
41 return 0;
42
43err_ipmap:
44 rpc_proc_exit(net);
45err_proc:
46 return err;
47}
48
49static __net_exit void sunrpc_exit_net(struct net *net)
50{
51 ip_map_cache_destroy(net);
52 rpc_proc_exit(net);
53}
54
55static struct pernet_operations sunrpc_net_ops = {
56 .init = sunrpc_init_net,
57 .exit = sunrpc_exit_net,
58 .id = &sunrpc_net_id,
59 .size = sizeof(struct sunrpc_net),
60};
61
62extern struct cache_detail unix_gid_cache;
26 63
27extern void cleanup_rpcb_clnt(void); 64extern void cleanup_rpcb_clnt(void);
28 65
@@ -38,18 +75,22 @@ init_sunrpc(void)
38 err = rpcauth_init_module(); 75 err = rpcauth_init_module();
39 if (err) 76 if (err)
40 goto out3; 77 goto out3;
78
79 cache_initialize();
80
81 err = register_pernet_subsys(&sunrpc_net_ops);
82 if (err)
83 goto out4;
41#ifdef RPC_DEBUG 84#ifdef RPC_DEBUG
42 rpc_register_sysctl(); 85 rpc_register_sysctl();
43#endif 86#endif
44#ifdef CONFIG_PROC_FS
45 rpc_proc_init();
46#endif
47 cache_initialize();
48 cache_register(&ip_map_cache);
49 cache_register(&unix_gid_cache); 87 cache_register(&unix_gid_cache);
50 svc_init_xprt_sock(); /* svc sock transport */ 88 svc_init_xprt_sock(); /* svc sock transport */
51 init_socket_xprt(); /* clnt sock transport */ 89 init_socket_xprt(); /* clnt sock transport */
52 return 0; 90 return 0;
91
92out4:
93 rpcauth_remove_module();
53out3: 94out3:
54 rpc_destroy_mempool(); 95 rpc_destroy_mempool();
55out2: 96out2:
@@ -67,14 +108,11 @@ cleanup_sunrpc(void)
67 svc_cleanup_xprt_sock(); 108 svc_cleanup_xprt_sock();
68 unregister_rpc_pipefs(); 109 unregister_rpc_pipefs();
69 rpc_destroy_mempool(); 110 rpc_destroy_mempool();
70 cache_unregister(&ip_map_cache);
71 cache_unregister(&unix_gid_cache); 111 cache_unregister(&unix_gid_cache);
112 unregister_pernet_subsys(&sunrpc_net_ops);
72#ifdef RPC_DEBUG 113#ifdef RPC_DEBUG
73 rpc_unregister_sysctl(); 114 rpc_unregister_sysctl();
74#endif 115#endif
75#ifdef CONFIG_PROC_FS
76 rpc_proc_exit();
77#endif
78 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 116 rcu_barrier(); /* Wait for completion of call_rcu()'s */
79} 117}
80MODULE_LICENSE("GPL"); 118MODULE_LICENSE("GPL");
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d9017d64597e..6359c42c4941 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1055,6 +1055,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1055 goto err_bad; 1055 goto err_bad;
1056 case SVC_DENIED: 1056 case SVC_DENIED:
1057 goto err_bad_auth; 1057 goto err_bad_auth;
1058 case SVC_CLOSE:
1059 if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
1060 svc_close_xprt(rqstp->rq_xprt);
1058 case SVC_DROP: 1061 case SVC_DROP:
1059 goto dropit; 1062 goto dropit;
1060 case SVC_COMPLETE: 1063 case SVC_COMPLETE:
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index cbc084939dd8..c82fe739fbdc 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -100,16 +100,14 @@ EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
100 */ 100 */
101int svc_print_xprts(char *buf, int maxlen) 101int svc_print_xprts(char *buf, int maxlen)
102{ 102{
103 struct list_head *le; 103 struct svc_xprt_class *xcl;
104 char tmpstr[80]; 104 char tmpstr[80];
105 int len = 0; 105 int len = 0;
106 buf[0] = '\0'; 106 buf[0] = '\0';
107 107
108 spin_lock(&svc_xprt_class_lock); 108 spin_lock(&svc_xprt_class_lock);
109 list_for_each(le, &svc_xprt_class_list) { 109 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
110 int slen; 110 int slen;
111 struct svc_xprt_class *xcl =
112 list_entry(le, struct svc_xprt_class, xcl_list);
113 111
114 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); 112 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
115 slen = strlen(tmpstr); 113 slen = strlen(tmpstr);
@@ -128,9 +126,9 @@ static void svc_xprt_free(struct kref *kref)
128 struct svc_xprt *xprt = 126 struct svc_xprt *xprt =
129 container_of(kref, struct svc_xprt, xpt_ref); 127 container_of(kref, struct svc_xprt, xpt_ref);
130 struct module *owner = xprt->xpt_class->xcl_owner; 128 struct module *owner = xprt->xpt_class->xcl_owner;
131 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) && 129 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
132 xprt->xpt_auth_cache != NULL) 130 svcauth_unix_info_release(xprt);
133 svcauth_unix_info_release(xprt->xpt_auth_cache); 131 put_net(xprt->xpt_net);
134 xprt->xpt_ops->xpo_free(xprt); 132 xprt->xpt_ops->xpo_free(xprt);
135 module_put(owner); 133 module_put(owner);
136} 134}
@@ -156,15 +154,18 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
156 INIT_LIST_HEAD(&xprt->xpt_list); 154 INIT_LIST_HEAD(&xprt->xpt_list);
157 INIT_LIST_HEAD(&xprt->xpt_ready); 155 INIT_LIST_HEAD(&xprt->xpt_ready);
158 INIT_LIST_HEAD(&xprt->xpt_deferred); 156 INIT_LIST_HEAD(&xprt->xpt_deferred);
157 INIT_LIST_HEAD(&xprt->xpt_users);
159 mutex_init(&xprt->xpt_mutex); 158 mutex_init(&xprt->xpt_mutex);
160 spin_lock_init(&xprt->xpt_lock); 159 spin_lock_init(&xprt->xpt_lock);
161 set_bit(XPT_BUSY, &xprt->xpt_flags); 160 set_bit(XPT_BUSY, &xprt->xpt_flags);
162 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); 161 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
162 xprt->xpt_net = get_net(&init_net);
163} 163}
164EXPORT_SYMBOL_GPL(svc_xprt_init); 164EXPORT_SYMBOL_GPL(svc_xprt_init);
165 165
166static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, 166static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
167 struct svc_serv *serv, 167 struct svc_serv *serv,
168 struct net *net,
168 const int family, 169 const int family,
169 const unsigned short port, 170 const unsigned short port,
170 int flags) 171 int flags)
@@ -199,12 +200,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
199 return ERR_PTR(-EAFNOSUPPORT); 200 return ERR_PTR(-EAFNOSUPPORT);
200 } 201 }
201 202
202 return xcl->xcl_ops->xpo_create(serv, sap, len, flags); 203 return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
203} 204}
204 205
205int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, 206int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
206 const int family, const unsigned short port, 207 struct net *net, const int family,
207 int flags) 208 const unsigned short port, int flags)
208{ 209{
209 struct svc_xprt_class *xcl; 210 struct svc_xprt_class *xcl;
210 211
@@ -220,7 +221,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
220 goto err; 221 goto err;
221 222
222 spin_unlock(&svc_xprt_class_lock); 223 spin_unlock(&svc_xprt_class_lock);
223 newxprt = __svc_xpo_create(xcl, serv, family, port, flags); 224 newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags);
224 if (IS_ERR(newxprt)) { 225 if (IS_ERR(newxprt)) {
225 module_put(xcl->xcl_owner); 226 module_put(xcl->xcl_owner);
226 return PTR_ERR(newxprt); 227 return PTR_ERR(newxprt);
@@ -329,12 +330,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
329 "svc_xprt_enqueue: " 330 "svc_xprt_enqueue: "
330 "threads and transports both waiting??\n"); 331 "threads and transports both waiting??\n");
331 332
332 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
333 /* Don't enqueue dead transports */
334 dprintk("svc: transport %p is dead, not enqueued\n", xprt);
335 goto out_unlock;
336 }
337
338 pool->sp_stats.packets++; 333 pool->sp_stats.packets++;
339 334
340 /* Mark transport as busy. It will remain in this state until 335 /* Mark transport as busy. It will remain in this state until
@@ -651,6 +646,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
651 if (signalled() || kthread_should_stop()) 646 if (signalled() || kthread_should_stop())
652 return -EINTR; 647 return -EINTR;
653 648
649 /* Normally we will wait up to 5 seconds for any required
650 * cache information to be provided.
651 */
652 rqstp->rq_chandle.thread_wait = 5*HZ;
653
654 spin_lock_bh(&pool->sp_lock); 654 spin_lock_bh(&pool->sp_lock);
655 xprt = svc_xprt_dequeue(pool); 655 xprt = svc_xprt_dequeue(pool);
656 if (xprt) { 656 if (xprt) {
@@ -658,6 +658,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
658 svc_xprt_get(xprt); 658 svc_xprt_get(xprt);
659 rqstp->rq_reserved = serv->sv_max_mesg; 659 rqstp->rq_reserved = serv->sv_max_mesg;
660 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); 660 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
661
662 /* As there is a shortage of threads and this request
663 * had to be queued, don't allow the thread to wait so
664 * long for cache updates.
665 */
666 rqstp->rq_chandle.thread_wait = 1*HZ;
661 } else { 667 } else {
662 /* No data pending. Go to sleep */ 668 /* No data pending. Go to sleep */
663 svc_thread_enqueue(pool, rqstp); 669 svc_thread_enqueue(pool, rqstp);
@@ -868,6 +874,19 @@ static void svc_age_temp_xprts(unsigned long closure)
868 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); 874 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
869} 875}
870 876
877static void call_xpt_users(struct svc_xprt *xprt)
878{
879 struct svc_xpt_user *u;
880
881 spin_lock(&xprt->xpt_lock);
882 while (!list_empty(&xprt->xpt_users)) {
883 u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list);
884 list_del(&u->list);
885 u->callback(u);
886 }
887 spin_unlock(&xprt->xpt_lock);
888}
889
871/* 890/*
872 * Remove a dead transport 891 * Remove a dead transport
873 */ 892 */
@@ -878,7 +897,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
878 897
879 /* Only do this once */ 898 /* Only do this once */
880 if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) 899 if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
881 return; 900 BUG();
882 901
883 dprintk("svc: svc_delete_xprt(%p)\n", xprt); 902 dprintk("svc: svc_delete_xprt(%p)\n", xprt);
884 xprt->xpt_ops->xpo_detach(xprt); 903 xprt->xpt_ops->xpo_detach(xprt);
@@ -900,6 +919,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
900 while ((dr = svc_deferred_dequeue(xprt)) != NULL) 919 while ((dr = svc_deferred_dequeue(xprt)) != NULL)
901 kfree(dr); 920 kfree(dr);
902 921
922 call_xpt_users(xprt);
903 svc_xprt_put(xprt); 923 svc_xprt_put(xprt);
904} 924}
905 925
@@ -910,10 +930,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
910 /* someone else will have to effect the close */ 930 /* someone else will have to effect the close */
911 return; 931 return;
912 932
913 svc_xprt_get(xprt);
914 svc_delete_xprt(xprt); 933 svc_delete_xprt(xprt);
915 clear_bit(XPT_BUSY, &xprt->xpt_flags);
916 svc_xprt_put(xprt);
917} 934}
918EXPORT_SYMBOL_GPL(svc_close_xprt); 935EXPORT_SYMBOL_GPL(svc_close_xprt);
919 936
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 207311610988..560677d187f1 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -18,6 +18,8 @@
18 18
19#include <linux/sunrpc/clnt.h> 19#include <linux/sunrpc/clnt.h>
20 20
21#include "netns.h"
22
21/* 23/*
22 * AUTHUNIX and AUTHNULL credentials are both handled here. 24 * AUTHUNIX and AUTHNULL credentials are both handled here.
23 * AUTHNULL is treated just like AUTHUNIX except that the uid/gid 25 * AUTHNULL is treated just like AUTHUNIX except that the uid/gid
@@ -92,7 +94,6 @@ struct ip_map {
92 struct unix_domain *m_client; 94 struct unix_domain *m_client;
93 int m_add_change; 95 int m_add_change;
94}; 96};
95static struct cache_head *ip_table[IP_HASHMAX];
96 97
97static void ip_map_put(struct kref *kref) 98static void ip_map_put(struct kref *kref)
98{ 99{
@@ -178,8 +179,8 @@ static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
178 return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); 179 return sunrpc_cache_pipe_upcall(cd, h, ip_map_request);
179} 180}
180 181
181static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); 182static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
182static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); 183static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
183 184
184static int ip_map_parse(struct cache_detail *cd, 185static int ip_map_parse(struct cache_detail *cd,
185 char *mesg, int mlen) 186 char *mesg, int mlen)
@@ -219,10 +220,9 @@ static int ip_map_parse(struct cache_detail *cd,
219 switch (address.sa.sa_family) { 220 switch (address.sa.sa_family) {
220 case AF_INET: 221 case AF_INET:
221 /* Form a mapped IPv4 address in sin6 */ 222 /* Form a mapped IPv4 address in sin6 */
222 memset(&sin6, 0, sizeof(sin6));
223 sin6.sin6_family = AF_INET6; 223 sin6.sin6_family = AF_INET6;
224 sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); 224 ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
225 sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr; 225 &sin6.sin6_addr);
226 break; 226 break;
227#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 227#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
228 case AF_INET6: 228 case AF_INET6:
@@ -249,9 +249,9 @@ static int ip_map_parse(struct cache_detail *cd,
249 dom = NULL; 249 dom = NULL;
250 250
251 /* IPv6 scope IDs are ignored for now */ 251 /* IPv6 scope IDs are ignored for now */
252 ipmp = ip_map_lookup(class, &sin6.sin6_addr); 252 ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr);
253 if (ipmp) { 253 if (ipmp) {
254 err = ip_map_update(ipmp, 254 err = __ip_map_update(cd, ipmp,
255 container_of(dom, struct unix_domain, h), 255 container_of(dom, struct unix_domain, h),
256 expiry); 256 expiry);
257 } else 257 } else
@@ -294,29 +294,15 @@ static int ip_map_show(struct seq_file *m,
294} 294}
295 295
296 296
297struct cache_detail ip_map_cache = { 297static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
298 .owner = THIS_MODULE, 298 struct in6_addr *addr)
299 .hash_size = IP_HASHMAX,
300 .hash_table = ip_table,
301 .name = "auth.unix.ip",
302 .cache_put = ip_map_put,
303 .cache_upcall = ip_map_upcall,
304 .cache_parse = ip_map_parse,
305 .cache_show = ip_map_show,
306 .match = ip_map_match,
307 .init = ip_map_init,
308 .update = update,
309 .alloc = ip_map_alloc,
310};
311
312static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
313{ 299{
314 struct ip_map ip; 300 struct ip_map ip;
315 struct cache_head *ch; 301 struct cache_head *ch;
316 302
317 strcpy(ip.m_class, class); 303 strcpy(ip.m_class, class);
318 ipv6_addr_copy(&ip.m_addr, addr); 304 ipv6_addr_copy(&ip.m_addr, addr);
319 ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, 305 ch = sunrpc_cache_lookup(cd, &ip.h,
320 hash_str(class, IP_HASHBITS) ^ 306 hash_str(class, IP_HASHBITS) ^
321 hash_ip6(*addr)); 307 hash_ip6(*addr));
322 308
@@ -326,7 +312,17 @@ static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
326 return NULL; 312 return NULL;
327} 313}
328 314
329static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) 315static inline struct ip_map *ip_map_lookup(struct net *net, char *class,
316 struct in6_addr *addr)
317{
318 struct sunrpc_net *sn;
319
320 sn = net_generic(net, sunrpc_net_id);
321 return __ip_map_lookup(sn->ip_map_cache, class, addr);
322}
323
324static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
325 struct unix_domain *udom, time_t expiry)
330{ 326{
331 struct ip_map ip; 327 struct ip_map ip;
332 struct cache_head *ch; 328 struct cache_head *ch;
@@ -344,17 +340,25 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
344 ip.m_add_change++; 340 ip.m_add_change++;
345 } 341 }
346 ip.h.expiry_time = expiry; 342 ip.h.expiry_time = expiry;
347 ch = sunrpc_cache_update(&ip_map_cache, 343 ch = sunrpc_cache_update(cd, &ip.h, &ipm->h,
348 &ip.h, &ipm->h,
349 hash_str(ipm->m_class, IP_HASHBITS) ^ 344 hash_str(ipm->m_class, IP_HASHBITS) ^
350 hash_ip6(ipm->m_addr)); 345 hash_ip6(ipm->m_addr));
351 if (!ch) 346 if (!ch)
352 return -ENOMEM; 347 return -ENOMEM;
353 cache_put(ch, &ip_map_cache); 348 cache_put(ch, cd);
354 return 0; 349 return 0;
355} 350}
356 351
357int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) 352static inline int ip_map_update(struct net *net, struct ip_map *ipm,
353 struct unix_domain *udom, time_t expiry)
354{
355 struct sunrpc_net *sn;
356
357 sn = net_generic(net, sunrpc_net_id);
358 return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
359}
360
361int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom)
358{ 362{
359 struct unix_domain *udom; 363 struct unix_domain *udom;
360 struct ip_map *ipmp; 364 struct ip_map *ipmp;
@@ -362,10 +366,10 @@ int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom)
362 if (dom->flavour != &svcauth_unix) 366 if (dom->flavour != &svcauth_unix)
363 return -EINVAL; 367 return -EINVAL;
364 udom = container_of(dom, struct unix_domain, h); 368 udom = container_of(dom, struct unix_domain, h);
365 ipmp = ip_map_lookup("nfsd", addr); 369 ipmp = ip_map_lookup(net, "nfsd", addr);
366 370
367 if (ipmp) 371 if (ipmp)
368 return ip_map_update(ipmp, udom, NEVER); 372 return ip_map_update(net, ipmp, udom, NEVER);
369 else 373 else
370 return -ENOMEM; 374 return -ENOMEM;
371} 375}
@@ -383,16 +387,18 @@ int auth_unix_forget_old(struct auth_domain *dom)
383} 387}
384EXPORT_SYMBOL_GPL(auth_unix_forget_old); 388EXPORT_SYMBOL_GPL(auth_unix_forget_old);
385 389
386struct auth_domain *auth_unix_lookup(struct in6_addr *addr) 390struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr)
387{ 391{
388 struct ip_map *ipm; 392 struct ip_map *ipm;
389 struct auth_domain *rv; 393 struct auth_domain *rv;
394 struct sunrpc_net *sn;
390 395
391 ipm = ip_map_lookup("nfsd", addr); 396 sn = net_generic(net, sunrpc_net_id);
397 ipm = ip_map_lookup(net, "nfsd", addr);
392 398
393 if (!ipm) 399 if (!ipm)
394 return NULL; 400 return NULL;
395 if (cache_check(&ip_map_cache, &ipm->h, NULL)) 401 if (cache_check(sn->ip_map_cache, &ipm->h, NULL))
396 return NULL; 402 return NULL;
397 403
398 if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { 404 if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) {
@@ -403,22 +409,29 @@ struct auth_domain *auth_unix_lookup(struct in6_addr *addr)
403 rv = &ipm->m_client->h; 409 rv = &ipm->m_client->h;
404 kref_get(&rv->ref); 410 kref_get(&rv->ref);
405 } 411 }
406 cache_put(&ipm->h, &ip_map_cache); 412 cache_put(&ipm->h, sn->ip_map_cache);
407 return rv; 413 return rv;
408} 414}
409EXPORT_SYMBOL_GPL(auth_unix_lookup); 415EXPORT_SYMBOL_GPL(auth_unix_lookup);
410 416
411void svcauth_unix_purge(void) 417void svcauth_unix_purge(void)
412{ 418{
413 cache_purge(&ip_map_cache); 419 struct net *net;
420
421 for_each_net(net) {
422 struct sunrpc_net *sn;
423
424 sn = net_generic(net, sunrpc_net_id);
425 cache_purge(sn->ip_map_cache);
426 }
414} 427}
415EXPORT_SYMBOL_GPL(svcauth_unix_purge); 428EXPORT_SYMBOL_GPL(svcauth_unix_purge);
416 429
417static inline struct ip_map * 430static inline struct ip_map *
418ip_map_cached_get(struct svc_rqst *rqstp) 431ip_map_cached_get(struct svc_xprt *xprt)
419{ 432{
420 struct ip_map *ipm = NULL; 433 struct ip_map *ipm = NULL;
421 struct svc_xprt *xprt = rqstp->rq_xprt; 434 struct sunrpc_net *sn;
422 435
423 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { 436 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
424 spin_lock(&xprt->xpt_lock); 437 spin_lock(&xprt->xpt_lock);
@@ -430,9 +443,10 @@ ip_map_cached_get(struct svc_rqst *rqstp)
430 * remembered, e.g. by a second mount from the 443 * remembered, e.g. by a second mount from the
431 * same IP address. 444 * same IP address.
432 */ 445 */
446 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
433 xprt->xpt_auth_cache = NULL; 447 xprt->xpt_auth_cache = NULL;
434 spin_unlock(&xprt->xpt_lock); 448 spin_unlock(&xprt->xpt_lock);
435 cache_put(&ipm->h, &ip_map_cache); 449 cache_put(&ipm->h, sn->ip_map_cache);
436 return NULL; 450 return NULL;
437 } 451 }
438 cache_get(&ipm->h); 452 cache_get(&ipm->h);
@@ -443,10 +457,8 @@ ip_map_cached_get(struct svc_rqst *rqstp)
443} 457}
444 458
445static inline void 459static inline void
446ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) 460ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm)
447{ 461{
448 struct svc_xprt *xprt = rqstp->rq_xprt;
449
450 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { 462 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
451 spin_lock(&xprt->xpt_lock); 463 spin_lock(&xprt->xpt_lock);
452 if (xprt->xpt_auth_cache == NULL) { 464 if (xprt->xpt_auth_cache == NULL) {
@@ -456,15 +468,26 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
456 } 468 }
457 spin_unlock(&xprt->xpt_lock); 469 spin_unlock(&xprt->xpt_lock);
458 } 470 }
459 if (ipm) 471 if (ipm) {
460 cache_put(&ipm->h, &ip_map_cache); 472 struct sunrpc_net *sn;
473
474 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
475 cache_put(&ipm->h, sn->ip_map_cache);
476 }
461} 477}
462 478
463void 479void
464svcauth_unix_info_release(void *info) 480svcauth_unix_info_release(struct svc_xprt *xpt)
465{ 481{
466 struct ip_map *ipm = info; 482 struct ip_map *ipm;
467 cache_put(&ipm->h, &ip_map_cache); 483
484 ipm = xpt->xpt_auth_cache;
485 if (ipm != NULL) {
486 struct sunrpc_net *sn;
487
488 sn = net_generic(xpt->xpt_net, sunrpc_net_id);
489 cache_put(&ipm->h, sn->ip_map_cache);
490 }
468} 491}
469 492
470/**************************************************************************** 493/****************************************************************************
@@ -674,6 +697,8 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
674 switch (ret) { 697 switch (ret) {
675 case -ENOENT: 698 case -ENOENT:
676 return ERR_PTR(-ENOENT); 699 return ERR_PTR(-ENOENT);
700 case -ETIMEDOUT:
701 return ERR_PTR(-ESHUTDOWN);
677 case 0: 702 case 0:
678 gi = get_group_info(ug->gi); 703 gi = get_group_info(ug->gi);
679 cache_put(&ug->h, &unix_gid_cache); 704 cache_put(&ug->h, &unix_gid_cache);
@@ -691,6 +716,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
691 struct ip_map *ipm; 716 struct ip_map *ipm;
692 struct group_info *gi; 717 struct group_info *gi;
693 struct svc_cred *cred = &rqstp->rq_cred; 718 struct svc_cred *cred = &rqstp->rq_cred;
719 struct svc_xprt *xprt = rqstp->rq_xprt;
720 struct net *net = xprt->xpt_net;
721 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
694 722
695 switch (rqstp->rq_addr.ss_family) { 723 switch (rqstp->rq_addr.ss_family) {
696 case AF_INET: 724 case AF_INET:
@@ -709,26 +737,27 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
709 if (rqstp->rq_proc == 0) 737 if (rqstp->rq_proc == 0)
710 return SVC_OK; 738 return SVC_OK;
711 739
712 ipm = ip_map_cached_get(rqstp); 740 ipm = ip_map_cached_get(xprt);
713 if (ipm == NULL) 741 if (ipm == NULL)
714 ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, 742 ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
715 &sin6->sin6_addr); 743 &sin6->sin6_addr);
716 744
717 if (ipm == NULL) 745 if (ipm == NULL)
718 return SVC_DENIED; 746 return SVC_DENIED;
719 747
720 switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { 748 switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
721 default: 749 default:
722 BUG(); 750 BUG();
723 case -EAGAIN:
724 case -ETIMEDOUT: 751 case -ETIMEDOUT:
752 return SVC_CLOSE;
753 case -EAGAIN:
725 return SVC_DROP; 754 return SVC_DROP;
726 case -ENOENT: 755 case -ENOENT:
727 return SVC_DENIED; 756 return SVC_DENIED;
728 case 0: 757 case 0:
729 rqstp->rq_client = &ipm->m_client->h; 758 rqstp->rq_client = &ipm->m_client->h;
730 kref_get(&rqstp->rq_client->ref); 759 kref_get(&rqstp->rq_client->ref);
731 ip_map_cached_put(rqstp, ipm); 760 ip_map_cached_put(xprt, ipm);
732 break; 761 break;
733 } 762 }
734 763
@@ -736,6 +765,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
736 switch (PTR_ERR(gi)) { 765 switch (PTR_ERR(gi)) {
737 case -EAGAIN: 766 case -EAGAIN:
738 return SVC_DROP; 767 return SVC_DROP;
768 case -ESHUTDOWN:
769 return SVC_CLOSE;
739 case -ENOENT: 770 case -ENOENT:
740 break; 771 break;
741 default: 772 default:
@@ -776,7 +807,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
776 cred->cr_gid = (gid_t) -1; 807 cred->cr_gid = (gid_t) -1;
777 cred->cr_group_info = groups_alloc(0); 808 cred->cr_group_info = groups_alloc(0);
778 if (cred->cr_group_info == NULL) 809 if (cred->cr_group_info == NULL)
779 return SVC_DROP; /* kmalloc failure - client must retry */ 810 return SVC_CLOSE; /* kmalloc failure - client must retry */
780 811
781 /* Put NULL verifier */ 812 /* Put NULL verifier */
782 svc_putnl(resv, RPC_AUTH_NULL); 813 svc_putnl(resv, RPC_AUTH_NULL);
@@ -840,7 +871,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
840 goto badcred; 871 goto badcred;
841 cred->cr_group_info = groups_alloc(slen); 872 cred->cr_group_info = groups_alloc(slen);
842 if (cred->cr_group_info == NULL) 873 if (cred->cr_group_info == NULL)
843 return SVC_DROP; 874 return SVC_CLOSE;
844 for (i = 0; i < slen; i++) 875 for (i = 0; i < slen; i++)
845 GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); 876 GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
846 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { 877 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
@@ -886,3 +917,56 @@ struct auth_ops svcauth_unix = {
886 .set_client = svcauth_unix_set_client, 917 .set_client = svcauth_unix_set_client,
887}; 918};
888 919
920int ip_map_cache_create(struct net *net)
921{
922 int err = -ENOMEM;
923 struct cache_detail *cd;
924 struct cache_head **tbl;
925 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
926
927 cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
928 if (cd == NULL)
929 goto err_cd;
930
931 tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL);
932 if (tbl == NULL)
933 goto err_tbl;
934
935 cd->owner = THIS_MODULE,
936 cd->hash_size = IP_HASHMAX,
937 cd->hash_table = tbl,
938 cd->name = "auth.unix.ip",
939 cd->cache_put = ip_map_put,
940 cd->cache_upcall = ip_map_upcall,
941 cd->cache_parse = ip_map_parse,
942 cd->cache_show = ip_map_show,
943 cd->match = ip_map_match,
944 cd->init = ip_map_init,
945 cd->update = update,
946 cd->alloc = ip_map_alloc,
947
948 err = cache_register_net(cd, net);
949 if (err)
950 goto err_reg;
951
952 sn->ip_map_cache = cd;
953 return 0;
954
955err_reg:
956 kfree(tbl);
957err_tbl:
958 kfree(cd);
959err_cd:
960 return err;
961}
962
963void ip_map_cache_destroy(struct net *net)
964{
965 struct sunrpc_net *sn;
966
967 sn = net_generic(net, sunrpc_net_id);
968 cache_purge(sn->ip_map_cache);
969 cache_unregister_net(sn->ip_map_cache, net);
970 kfree(sn->ip_map_cache->hash_table);
971 kfree(sn->ip_map_cache);
972}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7e534dd09077..07919e16be3e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -64,7 +64,8 @@ static void svc_tcp_sock_detach(struct svc_xprt *);
64static void svc_sock_free(struct svc_xprt *); 64static void svc_sock_free(struct svc_xprt *);
65 65
66static struct svc_xprt *svc_create_socket(struct svc_serv *, int, 66static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
67 struct sockaddr *, int, int); 67 struct net *, struct sockaddr *,
68 int, int);
68#ifdef CONFIG_DEBUG_LOCK_ALLOC 69#ifdef CONFIG_DEBUG_LOCK_ALLOC
69static struct lock_class_key svc_key[2]; 70static struct lock_class_key svc_key[2];
70static struct lock_class_key svc_slock_key[2]; 71static struct lock_class_key svc_slock_key[2];
@@ -657,10 +658,11 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
657} 658}
658 659
659static struct svc_xprt *svc_udp_create(struct svc_serv *serv, 660static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
661 struct net *net,
660 struct sockaddr *sa, int salen, 662 struct sockaddr *sa, int salen,
661 int flags) 663 int flags)
662{ 664{
663 return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); 665 return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
664} 666}
665 667
666static struct svc_xprt_ops svc_udp_ops = { 668static struct svc_xprt_ops svc_udp_ops = {
@@ -1133,9 +1135,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
1133 reclen = htonl(0x80000000|((xbufp->len ) - 4)); 1135 reclen = htonl(0x80000000|((xbufp->len ) - 4));
1134 memcpy(xbufp->head[0].iov_base, &reclen, 4); 1136 memcpy(xbufp->head[0].iov_base, &reclen, 4);
1135 1137
1136 if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags))
1137 return -ENOTCONN;
1138
1139 sent = svc_sendto(rqstp, &rqstp->rq_res); 1138 sent = svc_sendto(rqstp, &rqstp->rq_res);
1140 if (sent != xbufp->len) { 1139 if (sent != xbufp->len) {
1141 printk(KERN_NOTICE 1140 printk(KERN_NOTICE
@@ -1178,10 +1177,11 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
1178} 1177}
1179 1178
1180static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, 1179static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
1180 struct net *net,
1181 struct sockaddr *sa, int salen, 1181 struct sockaddr *sa, int salen,
1182 int flags) 1182 int flags)
1183{ 1183{
1184 return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); 1184 return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
1185} 1185}
1186 1186
1187static struct svc_xprt_ops svc_tcp_ops = { 1187static struct svc_xprt_ops svc_tcp_ops = {
@@ -1258,19 +1258,13 @@ void svc_sock_update_bufs(struct svc_serv *serv)
1258 * The number of server threads has changed. Update 1258 * The number of server threads has changed. Update
1259 * rcvbuf and sndbuf accordingly on all sockets 1259 * rcvbuf and sndbuf accordingly on all sockets
1260 */ 1260 */
1261 struct list_head *le; 1261 struct svc_sock *svsk;
1262 1262
1263 spin_lock_bh(&serv->sv_lock); 1263 spin_lock_bh(&serv->sv_lock);
1264 list_for_each(le, &serv->sv_permsocks) { 1264 list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list)
1265 struct svc_sock *svsk =
1266 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1267 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1265 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1268 } 1266 list_for_each_entry(svsk, &serv->sv_tempsocks, sk_xprt.xpt_list)
1269 list_for_each(le, &serv->sv_tempsocks) {
1270 struct svc_sock *svsk =
1271 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1272 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1267 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1273 }
1274 spin_unlock_bh(&serv->sv_lock); 1268 spin_unlock_bh(&serv->sv_lock);
1275} 1269}
1276EXPORT_SYMBOL_GPL(svc_sock_update_bufs); 1270EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
@@ -1385,6 +1379,7 @@ EXPORT_SYMBOL_GPL(svc_addsock);
1385 */ 1379 */
1386static struct svc_xprt *svc_create_socket(struct svc_serv *serv, 1380static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1387 int protocol, 1381 int protocol,
1382 struct net *net,
1388 struct sockaddr *sin, int len, 1383 struct sockaddr *sin, int len,
1389 int flags) 1384 int flags)
1390{ 1385{
@@ -1421,7 +1416,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1421 return ERR_PTR(-EINVAL); 1416 return ERR_PTR(-EINVAL);
1422 } 1417 }
1423 1418
1424 error = sock_create_kern(family, type, protocol, &sock); 1419 error = __sock_create(net, family, type, protocol, &sock, 1);
1425 if (error < 0) 1420 if (error < 0)
1426 return ERR_PTR(error); 1421 return ERR_PTR(error);
1427 1422
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index a1f82a87d34d..cd9e841e7492 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -111,6 +111,23 @@ xdr_decode_string_inplace(__be32 *p, char **sp,
111} 111}
112EXPORT_SYMBOL_GPL(xdr_decode_string_inplace); 112EXPORT_SYMBOL_GPL(xdr_decode_string_inplace);
113 113
114/**
115 * xdr_terminate_string - '\0'-terminate a string residing in an xdr_buf
116 * @buf: XDR buffer where string resides
117 * @len: length of string, in bytes
118 *
119 */
120void
121xdr_terminate_string(struct xdr_buf *buf, const u32 len)
122{
123 char *kaddr;
124
125 kaddr = kmap_atomic(buf->pages[0], KM_USER0);
126 kaddr[buf->page_base + len] = '\0';
127 kunmap_atomic(kaddr, KM_USER0);
128}
129EXPORT_SYMBOL(xdr_terminate_string);
130
114void 131void
115xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, 132xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
116 unsigned int len) 133 unsigned int len)
@@ -395,24 +412,29 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
395{ 412{
396 struct kvec *tail; 413 struct kvec *tail;
397 size_t copy; 414 size_t copy;
398 char *p;
399 unsigned int pglen = buf->page_len; 415 unsigned int pglen = buf->page_len;
416 unsigned int tailbuf_len;
400 417
401 tail = buf->tail; 418 tail = buf->tail;
402 BUG_ON (len > pglen); 419 BUG_ON (len > pglen);
403 420
421 tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
422
404 /* Shift the tail first */ 423 /* Shift the tail first */
405 if (tail->iov_len != 0) { 424 if (tailbuf_len != 0) {
406 p = (char *)tail->iov_base + len; 425 unsigned int free_space = tailbuf_len - tail->iov_len;
426
427 if (len < free_space)
428 free_space = len;
429 tail->iov_len += free_space;
430
431 copy = len;
407 if (tail->iov_len > len) { 432 if (tail->iov_len > len) {
408 copy = tail->iov_len - len; 433 char *p = (char *)tail->iov_base + len;
409 memmove(p, tail->iov_base, copy); 434 memmove(p, tail->iov_base, tail->iov_len - len);
410 } else 435 } else
411 buf->buflen -= len;
412 /* Copy from the inlined pages into the tail */
413 copy = len;
414 if (copy > tail->iov_len)
415 copy = tail->iov_len; 436 copy = tail->iov_len;
437 /* Copy from the inlined pages into the tail */
416 _copy_from_pages((char *)tail->iov_base, 438 _copy_from_pages((char *)tail->iov_base,
417 buf->pages, buf->page_base + pglen - len, 439 buf->pages, buf->page_base + pglen - len,
418 copy); 440 copy);
@@ -551,6 +573,27 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
551EXPORT_SYMBOL_GPL(xdr_init_decode); 573EXPORT_SYMBOL_GPL(xdr_init_decode);
552 574
553/** 575/**
576 * xdr_inline_peek - Allow read-ahead in the XDR data stream
577 * @xdr: pointer to xdr_stream struct
578 * @nbytes: number of bytes of data to decode
579 *
580 * Check if the input buffer is long enough to enable us to decode
581 * 'nbytes' more bytes of data starting at the current position.
582 * If so return the current pointer without updating the current
583 * pointer position.
584 */
585__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
586{
587 __be32 *p = xdr->p;
588 __be32 *q = p + XDR_QUADLEN(nbytes);
589
590 if (unlikely(q > xdr->end || q < p))
591 return NULL;
592 return p;
593}
594EXPORT_SYMBOL_GPL(xdr_inline_peek);
595
596/**
554 * xdr_inline_decode - Retrieve non-page XDR data to decode 597 * xdr_inline_decode - Retrieve non-page XDR data to decode
555 * @xdr: pointer to xdr_stream struct 598 * @xdr: pointer to xdr_stream struct
556 * @nbytes: number of bytes of data to decode 599 * @nbytes: number of bytes of data to decode
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 970fb00f388c..4c8f18aff7c3 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -199,8 +199,6 @@ int xprt_reserve_xprt(struct rpc_task *task)
199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
200 if (task == xprt->snd_task) 200 if (task == xprt->snd_task)
201 return 1; 201 return 1;
202 if (task == NULL)
203 return 0;
204 goto out_sleep; 202 goto out_sleep;
205 } 203 }
206 xprt->snd_task = task; 204 xprt->snd_task = task;
@@ -757,13 +755,11 @@ static void xprt_connect_status(struct rpc_task *task)
757 */ 755 */
758struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) 756struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
759{ 757{
760 struct list_head *pos; 758 struct rpc_rqst *entry;
761 759
762 list_for_each(pos, &xprt->recv) { 760 list_for_each_entry(entry, &xprt->recv, rq_list)
763 struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
764 if (entry->rq_xid == xid) 761 if (entry->rq_xid == xid)
765 return entry; 762 return entry;
766 }
767 763
768 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", 764 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n",
769 ntohl(xid)); 765 ntohl(xid));
@@ -962,6 +958,37 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
962 spin_unlock(&xprt->reserve_lock); 958 spin_unlock(&xprt->reserve_lock);
963} 959}
964 960
961struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
962{
963 struct rpc_xprt *xprt;
964
965 xprt = kzalloc(size, GFP_KERNEL);
966 if (xprt == NULL)
967 goto out;
968
969 xprt->max_reqs = max_req;
970 xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
971 if (xprt->slot == NULL)
972 goto out_free;
973
974 xprt->xprt_net = get_net(net);
975 return xprt;
976
977out_free:
978 kfree(xprt);
979out:
980 return NULL;
981}
982EXPORT_SYMBOL_GPL(xprt_alloc);
983
984void xprt_free(struct rpc_xprt *xprt)
985{
986 put_net(xprt->xprt_net);
987 kfree(xprt->slot);
988 kfree(xprt);
989}
990EXPORT_SYMBOL_GPL(xprt_free);
991
965/** 992/**
966 * xprt_reserve - allocate an RPC request slot 993 * xprt_reserve - allocate an RPC request slot
967 * @task: RPC task requesting a slot allocation 994 * @task: RPC task requesting a slot allocation
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index d718b8fa9525..09af4fab1a45 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -43,6 +43,7 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/fs.h> 44#include <linux/fs.h>
45#include <linux/sysctl.h> 45#include <linux/sysctl.h>
46#include <linux/workqueue.h>
46#include <linux/sunrpc/clnt.h> 47#include <linux/sunrpc/clnt.h>
47#include <linux/sunrpc/sched.h> 48#include <linux/sunrpc/sched.h>
48#include <linux/sunrpc/svc_rdma.h> 49#include <linux/sunrpc/svc_rdma.h>
@@ -74,6 +75,8 @@ atomic_t rdma_stat_sq_prod;
74struct kmem_cache *svc_rdma_map_cachep; 75struct kmem_cache *svc_rdma_map_cachep;
75struct kmem_cache *svc_rdma_ctxt_cachep; 76struct kmem_cache *svc_rdma_ctxt_cachep;
76 77
78struct workqueue_struct *svc_rdma_wq;
79
77/* 80/*
78 * This function implements reading and resetting an atomic_t stat 81 * This function implements reading and resetting an atomic_t stat
79 * variable through read/write to a proc file. Any write to the file 82 * variable through read/write to a proc file. Any write to the file
@@ -231,7 +234,7 @@ static ctl_table svcrdma_root_table[] = {
231void svc_rdma_cleanup(void) 234void svc_rdma_cleanup(void)
232{ 235{
233 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); 236 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
234 flush_scheduled_work(); 237 destroy_workqueue(svc_rdma_wq);
235 if (svcrdma_table_header) { 238 if (svcrdma_table_header) {
236 unregister_sysctl_table(svcrdma_table_header); 239 unregister_sysctl_table(svcrdma_table_header);
237 svcrdma_table_header = NULL; 240 svcrdma_table_header = NULL;
@@ -249,6 +252,11 @@ int svc_rdma_init(void)
249 dprintk("\tsq_depth : %d\n", 252 dprintk("\tsq_depth : %d\n",
250 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); 253 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
251 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); 254 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
255
256 svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
257 if (!svc_rdma_wq)
258 return -ENOMEM;
259
252 if (!svcrdma_table_header) 260 if (!svcrdma_table_header)
253 svcrdma_table_header = 261 svcrdma_table_header =
254 register_sysctl_table(svcrdma_root_table); 262 register_sysctl_table(svcrdma_root_table);
@@ -283,6 +291,7 @@ int svc_rdma_init(void)
283 kmem_cache_destroy(svc_rdma_map_cachep); 291 kmem_cache_destroy(svc_rdma_map_cachep);
284 err0: 292 err0:
285 unregister_sysctl_table(svcrdma_table_header); 293 unregister_sysctl_table(svcrdma_table_header);
294 destroy_workqueue(svc_rdma_wq);
286 return -ENOMEM; 295 return -ENOMEM;
287} 296}
288MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); 297MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 0194de814933..df67211c4baf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -263,9 +263,9 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; 263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) { 264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
265 frmr->page_list->page_list[page_no] = 265 frmr->page_list->page_list[page_no] =
266 ib_dma_map_single(xprt->sc_cm_id->device, 266 ib_dma_map_page(xprt->sc_cm_id->device,
267 page_address(rqstp->rq_arg.pages[page_no]), 267 rqstp->rq_arg.pages[page_no], 0,
268 PAGE_SIZE, DMA_FROM_DEVICE); 268 PAGE_SIZE, DMA_FROM_DEVICE);
269 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 269 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
270 frmr->page_list->page_list[page_no])) 270 frmr->page_list->page_list[page_no]))
271 goto fatal_err; 271 goto fatal_err;
@@ -309,17 +309,21 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
309 int count) 309 int count)
310{ 310{
311 int i; 311 int i;
312 unsigned long off;
312 313
313 ctxt->count = count; 314 ctxt->count = count;
314 ctxt->direction = DMA_FROM_DEVICE; 315 ctxt->direction = DMA_FROM_DEVICE;
315 for (i = 0; i < count; i++) { 316 for (i = 0; i < count; i++) {
316 ctxt->sge[i].length = 0; /* in case map fails */ 317 ctxt->sge[i].length = 0; /* in case map fails */
317 if (!frmr) { 318 if (!frmr) {
319 BUG_ON(0 == virt_to_page(vec[i].iov_base));
320 off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
318 ctxt->sge[i].addr = 321 ctxt->sge[i].addr =
319 ib_dma_map_single(xprt->sc_cm_id->device, 322 ib_dma_map_page(xprt->sc_cm_id->device,
320 vec[i].iov_base, 323 virt_to_page(vec[i].iov_base),
321 vec[i].iov_len, 324 off,
322 DMA_FROM_DEVICE); 325 vec[i].iov_len,
326 DMA_FROM_DEVICE);
323 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 327 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
324 ctxt->sge[i].addr)) 328 ctxt->sge[i].addr))
325 return -EINVAL; 329 return -EINVAL;
@@ -491,6 +495,7 @@ next_sge:
491 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", 495 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
492 err); 496 err);
493 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 497 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
498 svc_rdma_unmap_dma(ctxt);
494 svc_rdma_put_context(ctxt, 0); 499 svc_rdma_put_context(ctxt, 0);
495 goto out; 500 goto out;
496 } 501 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b15e1ebb2bfa..249a835b703f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -70,8 +70,8 @@
70 * on extra page for the RPCRMDA header. 70 * on extra page for the RPCRMDA header.
71 */ 71 */
72static int fast_reg_xdr(struct svcxprt_rdma *xprt, 72static int fast_reg_xdr(struct svcxprt_rdma *xprt,
73 struct xdr_buf *xdr, 73 struct xdr_buf *xdr,
74 struct svc_rdma_req_map *vec) 74 struct svc_rdma_req_map *vec)
75{ 75{
76 int sge_no; 76 int sge_no;
77 u32 sge_bytes; 77 u32 sge_bytes;
@@ -96,21 +96,25 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
96 vec->count = 2; 96 vec->count = 2;
97 sge_no++; 97 sge_no++;
98 98
99 /* Build the FRMR */ 99 /* Map the XDR head */
100 frmr->kva = frva; 100 frmr->kva = frva;
101 frmr->direction = DMA_TO_DEVICE; 101 frmr->direction = DMA_TO_DEVICE;
102 frmr->access_flags = 0; 102 frmr->access_flags = 0;
103 frmr->map_len = PAGE_SIZE; 103 frmr->map_len = PAGE_SIZE;
104 frmr->page_list_len = 1; 104 frmr->page_list_len = 1;
105 page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
105 frmr->page_list->page_list[page_no] = 106 frmr->page_list->page_list[page_no] =
106 ib_dma_map_single(xprt->sc_cm_id->device, 107 ib_dma_map_page(xprt->sc_cm_id->device,
107 (void *)xdr->head[0].iov_base, 108 virt_to_page(xdr->head[0].iov_base),
108 PAGE_SIZE, DMA_TO_DEVICE); 109 page_off,
110 PAGE_SIZE - page_off,
111 DMA_TO_DEVICE);
109 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 112 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
110 frmr->page_list->page_list[page_no])) 113 frmr->page_list->page_list[page_no]))
111 goto fatal_err; 114 goto fatal_err;
112 atomic_inc(&xprt->sc_dma_used); 115 atomic_inc(&xprt->sc_dma_used);
113 116
117 /* Map the XDR page list */
114 page_off = xdr->page_base; 118 page_off = xdr->page_base;
115 page_bytes = xdr->page_len + page_off; 119 page_bytes = xdr->page_len + page_off;
116 if (!page_bytes) 120 if (!page_bytes)
@@ -128,9 +132,9 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
128 page_bytes -= sge_bytes; 132 page_bytes -= sge_bytes;
129 133
130 frmr->page_list->page_list[page_no] = 134 frmr->page_list->page_list[page_no] =
131 ib_dma_map_single(xprt->sc_cm_id->device, 135 ib_dma_map_page(xprt->sc_cm_id->device,
132 page_address(page), 136 page, page_off,
133 PAGE_SIZE, DMA_TO_DEVICE); 137 sge_bytes, DMA_TO_DEVICE);
134 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 138 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
135 frmr->page_list->page_list[page_no])) 139 frmr->page_list->page_list[page_no]))
136 goto fatal_err; 140 goto fatal_err;
@@ -166,8 +170,10 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
166 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; 170 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
167 171
168 frmr->page_list->page_list[page_no] = 172 frmr->page_list->page_list[page_no] =
169 ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, 173 ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
170 DMA_TO_DEVICE); 174 page_off,
175 PAGE_SIZE,
176 DMA_TO_DEVICE);
171 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 177 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
172 frmr->page_list->page_list[page_no])) 178 frmr->page_list->page_list[page_no]))
173 goto fatal_err; 179 goto fatal_err;
@@ -245,6 +251,35 @@ static int map_xdr(struct svcxprt_rdma *xprt,
245 return 0; 251 return 0;
246} 252}
247 253
254static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
255 struct xdr_buf *xdr,
256 u32 xdr_off, size_t len, int dir)
257{
258 struct page *page;
259 dma_addr_t dma_addr;
260 if (xdr_off < xdr->head[0].iov_len) {
261 /* This offset is in the head */
262 xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
263 page = virt_to_page(xdr->head[0].iov_base);
264 } else {
265 xdr_off -= xdr->head[0].iov_len;
266 if (xdr_off < xdr->page_len) {
267 /* This offset is in the page list */
268 page = xdr->pages[xdr_off >> PAGE_SHIFT];
269 xdr_off &= ~PAGE_MASK;
270 } else {
271 /* This offset is in the tail */
272 xdr_off -= xdr->page_len;
273 xdr_off += (unsigned long)
274 xdr->tail[0].iov_base & ~PAGE_MASK;
275 page = virt_to_page(xdr->tail[0].iov_base);
276 }
277 }
278 dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
279 min_t(size_t, PAGE_SIZE, len), dir);
280 return dma_addr;
281}
282
248/* Assumptions: 283/* Assumptions:
249 * - We are using FRMR 284 * - We are using FRMR
250 * - or - 285 * - or -
@@ -293,10 +328,9 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
293 sge[sge_no].length = sge_bytes; 328 sge[sge_no].length = sge_bytes;
294 if (!vec->frmr) { 329 if (!vec->frmr) {
295 sge[sge_no].addr = 330 sge[sge_no].addr =
296 ib_dma_map_single(xprt->sc_cm_id->device, 331 dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
297 (void *) 332 sge_bytes, DMA_TO_DEVICE);
298 vec->sge[xdr_sge_no].iov_base + sge_off, 333 xdr_off += sge_bytes;
299 sge_bytes, DMA_TO_DEVICE);
300 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 334 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
301 sge[sge_no].addr)) 335 sge[sge_no].addr))
302 goto err; 336 goto err;
@@ -333,6 +367,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
333 goto err; 367 goto err;
334 return 0; 368 return 0;
335 err: 369 err:
370 svc_rdma_unmap_dma(ctxt);
371 svc_rdma_put_frmr(xprt, vec->frmr);
336 svc_rdma_put_context(ctxt, 0); 372 svc_rdma_put_context(ctxt, 0);
337 /* Fatal error, close transport */ 373 /* Fatal error, close transport */
338 return -EIO; 374 return -EIO;
@@ -494,7 +530,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
494 * In all three cases, this function prepares the RPCRDMA header in 530 * In all three cases, this function prepares the RPCRDMA header in
495 * sge[0], the 'type' parameter indicates the type to place in the 531 * sge[0], the 'type' parameter indicates the type to place in the
496 * RPCRDMA header, and the 'byte_count' field indicates how much of 532 * RPCRDMA header, and the 'byte_count' field indicates how much of
497 * the XDR to include in this RDMA_SEND. 533 * the XDR to include in this RDMA_SEND. NB: The offset of the payload
534 * to send is zero in the XDR.
498 */ 535 */
499static int send_reply(struct svcxprt_rdma *rdma, 536static int send_reply(struct svcxprt_rdma *rdma,
500 struct svc_rqst *rqstp, 537 struct svc_rqst *rqstp,
@@ -536,23 +573,24 @@ static int send_reply(struct svcxprt_rdma *rdma,
536 ctxt->sge[0].lkey = rdma->sc_dma_lkey; 573 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
537 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 574 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
538 ctxt->sge[0].addr = 575 ctxt->sge[0].addr =
539 ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), 576 ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
540 ctxt->sge[0].length, DMA_TO_DEVICE); 577 ctxt->sge[0].length, DMA_TO_DEVICE);
541 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) 578 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
542 goto err; 579 goto err;
543 atomic_inc(&rdma->sc_dma_used); 580 atomic_inc(&rdma->sc_dma_used);
544 581
545 ctxt->direction = DMA_TO_DEVICE; 582 ctxt->direction = DMA_TO_DEVICE;
546 583
547 /* Determine how many of our SGE are to be transmitted */ 584 /* Map the payload indicated by 'byte_count' */
548 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 585 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
586 int xdr_off = 0;
549 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 587 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
550 byte_count -= sge_bytes; 588 byte_count -= sge_bytes;
551 if (!vec->frmr) { 589 if (!vec->frmr) {
552 ctxt->sge[sge_no].addr = 590 ctxt->sge[sge_no].addr =
553 ib_dma_map_single(rdma->sc_cm_id->device, 591 dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
554 vec->sge[sge_no].iov_base, 592 sge_bytes, DMA_TO_DEVICE);
555 sge_bytes, DMA_TO_DEVICE); 593 xdr_off += sge_bytes;
556 if (ib_dma_mapping_error(rdma->sc_cm_id->device, 594 if (ib_dma_mapping_error(rdma->sc_cm_id->device,
557 ctxt->sge[sge_no].addr)) 595 ctxt->sge[sge_no].addr))
558 goto err; 596 goto err;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index edea15a54e51..9df1eadc912a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -45,6 +45,7 @@
45#include <linux/sched.h> 45#include <linux/sched.h>
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/spinlock.h> 47#include <linux/spinlock.h>
48#include <linux/workqueue.h>
48#include <rdma/ib_verbs.h> 49#include <rdma/ib_verbs.h>
49#include <rdma/rdma_cm.h> 50#include <rdma/rdma_cm.h>
50#include <linux/sunrpc/svc_rdma.h> 51#include <linux/sunrpc/svc_rdma.h>
@@ -52,6 +53,7 @@
52#define RPCDBG_FACILITY RPCDBG_SVCXPRT 53#define RPCDBG_FACILITY RPCDBG_SVCXPRT
53 54
54static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 55static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
56 struct net *net,
55 struct sockaddr *sa, int salen, 57 struct sockaddr *sa, int salen,
56 int flags); 58 int flags);
57static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); 59static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
@@ -89,6 +91,9 @@ struct svc_xprt_class svc_rdma_class = {
89/* WR context cache. Created in svc_rdma.c */ 91/* WR context cache. Created in svc_rdma.c */
90extern struct kmem_cache *svc_rdma_ctxt_cachep; 92extern struct kmem_cache *svc_rdma_ctxt_cachep;
91 93
94/* Workqueue created in svc_rdma.c */
95extern struct workqueue_struct *svc_rdma_wq;
96
92struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) 97struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
93{ 98{
94 struct svc_rdma_op_ctxt *ctxt; 99 struct svc_rdma_op_ctxt *ctxt;
@@ -120,7 +125,7 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
120 */ 125 */
121 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { 126 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
122 atomic_dec(&xprt->sc_dma_used); 127 atomic_dec(&xprt->sc_dma_used);
123 ib_dma_unmap_single(xprt->sc_cm_id->device, 128 ib_dma_unmap_page(xprt->sc_cm_id->device,
124 ctxt->sge[i].addr, 129 ctxt->sge[i].addr,
125 ctxt->sge[i].length, 130 ctxt->sge[i].length,
126 ctxt->direction); 131 ctxt->direction);
@@ -502,8 +507,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
502 BUG_ON(sge_no >= xprt->sc_max_sge); 507 BUG_ON(sge_no >= xprt->sc_max_sge);
503 page = svc_rdma_get_page(); 508 page = svc_rdma_get_page();
504 ctxt->pages[sge_no] = page; 509 ctxt->pages[sge_no] = page;
505 pa = ib_dma_map_single(xprt->sc_cm_id->device, 510 pa = ib_dma_map_page(xprt->sc_cm_id->device,
506 page_address(page), PAGE_SIZE, 511 page, 0, PAGE_SIZE,
507 DMA_FROM_DEVICE); 512 DMA_FROM_DEVICE);
508 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) 513 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
509 goto err_put_ctxt; 514 goto err_put_ctxt;
@@ -511,9 +516,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
511 ctxt->sge[sge_no].addr = pa; 516 ctxt->sge[sge_no].addr = pa;
512 ctxt->sge[sge_no].length = PAGE_SIZE; 517 ctxt->sge[sge_no].length = PAGE_SIZE;
513 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; 518 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
519 ctxt->count = sge_no + 1;
514 buflen += PAGE_SIZE; 520 buflen += PAGE_SIZE;
515 } 521 }
516 ctxt->count = sge_no;
517 recv_wr.next = NULL; 522 recv_wr.next = NULL;
518 recv_wr.sg_list = &ctxt->sge[0]; 523 recv_wr.sg_list = &ctxt->sge[0];
519 recv_wr.num_sge = ctxt->count; 524 recv_wr.num_sge = ctxt->count;
@@ -529,6 +534,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
529 return ret; 534 return ret;
530 535
531 err_put_ctxt: 536 err_put_ctxt:
537 svc_rdma_unmap_dma(ctxt);
532 svc_rdma_put_context(ctxt, 1); 538 svc_rdma_put_context(ctxt, 1);
533 return -ENOMEM; 539 return -ENOMEM;
534} 540}
@@ -670,6 +676,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
670 * Create a listening RDMA service endpoint. 676 * Create a listening RDMA service endpoint.
671 */ 677 */
672static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 678static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
679 struct net *net,
673 struct sockaddr *sa, int salen, 680 struct sockaddr *sa, int salen,
674 int flags) 681 int flags)
675{ 682{
@@ -798,8 +805,8 @@ static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
798 if (ib_dma_mapping_error(frmr->mr->device, addr)) 805 if (ib_dma_mapping_error(frmr->mr->device, addr))
799 continue; 806 continue;
800 atomic_dec(&xprt->sc_dma_used); 807 atomic_dec(&xprt->sc_dma_used);
801 ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, 808 ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
802 frmr->direction); 809 frmr->direction);
803 } 810 }
804} 811}
805 812
@@ -1184,7 +1191,7 @@ static void svc_rdma_free(struct svc_xprt *xprt)
1184 struct svcxprt_rdma *rdma = 1191 struct svcxprt_rdma *rdma =
1185 container_of(xprt, struct svcxprt_rdma, sc_xprt); 1192 container_of(xprt, struct svcxprt_rdma, sc_xprt);
1186 INIT_WORK(&rdma->sc_work, __svc_rdma_free); 1193 INIT_WORK(&rdma->sc_work, __svc_rdma_free);
1187 schedule_work(&rdma->sc_work); 1194 queue_work(svc_rdma_wq, &rdma->sc_work);
1188} 1195}
1189 1196
1190static int svc_rdma_has_wspace(struct svc_xprt *xprt) 1197static int svc_rdma_has_wspace(struct svc_xprt *xprt)
@@ -1274,7 +1281,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1274 atomic_read(&xprt->sc_sq_count) < 1281 atomic_read(&xprt->sc_sq_count) <
1275 xprt->sc_sq_depth); 1282 xprt->sc_sq_depth);
1276 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1283 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1277 return 0; 1284 return -ENOTCONN;
1278 continue; 1285 continue;
1279 } 1286 }
1280 /* Take a transport ref for each WR posted */ 1287 /* Take a transport ref for each WR posted */
@@ -1306,7 +1313,6 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1306 enum rpcrdma_errcode err) 1313 enum rpcrdma_errcode err)
1307{ 1314{
1308 struct ib_send_wr err_wr; 1315 struct ib_send_wr err_wr;
1309 struct ib_sge sge;
1310 struct page *p; 1316 struct page *p;
1311 struct svc_rdma_op_ctxt *ctxt; 1317 struct svc_rdma_op_ctxt *ctxt;
1312 u32 *va; 1318 u32 *va;
@@ -1319,26 +1325,27 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1319 /* XDR encode error */ 1325 /* XDR encode error */
1320 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1326 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1321 1327
1328 ctxt = svc_rdma_get_context(xprt);
1329 ctxt->direction = DMA_FROM_DEVICE;
1330 ctxt->count = 1;
1331 ctxt->pages[0] = p;
1332
1322 /* Prepare SGE for local address */ 1333 /* Prepare SGE for local address */
1323 sge.addr = ib_dma_map_single(xprt->sc_cm_id->device, 1334 ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
1324 page_address(p), PAGE_SIZE, DMA_FROM_DEVICE); 1335 p, 0, length, DMA_FROM_DEVICE);
1325 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { 1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1326 put_page(p); 1337 put_page(p);
1327 return; 1338 return;
1328 } 1339 }
1329 atomic_inc(&xprt->sc_dma_used); 1340 atomic_inc(&xprt->sc_dma_used);
1330 sge.lkey = xprt->sc_dma_lkey; 1341 ctxt->sge[0].lkey = xprt->sc_dma_lkey;
1331 sge.length = length; 1342 ctxt->sge[0].length = length;
1332
1333 ctxt = svc_rdma_get_context(xprt);
1334 ctxt->count = 1;
1335 ctxt->pages[0] = p;
1336 1343
1337 /* Prepare SEND WR */ 1344 /* Prepare SEND WR */
1338 memset(&err_wr, 0, sizeof err_wr); 1345 memset(&err_wr, 0, sizeof err_wr);
1339 ctxt->wr_op = IB_WR_SEND; 1346 ctxt->wr_op = IB_WR_SEND;
1340 err_wr.wr_id = (unsigned long)ctxt; 1347 err_wr.wr_id = (unsigned long)ctxt;
1341 err_wr.sg_list = &sge; 1348 err_wr.sg_list = ctxt->sge;
1342 err_wr.num_sge = 1; 1349 err_wr.num_sge = 1;
1343 err_wr.opcode = IB_WR_SEND; 1350 err_wr.opcode = IB_WR_SEND;
1344 err_wr.send_flags = IB_SEND_SIGNALED; 1351 err_wr.send_flags = IB_SEND_SIGNALED;
@@ -1348,9 +1355,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1348 if (ret) { 1355 if (ret) {
1349 dprintk("svcrdma: Error %d posting send for protocol error\n", 1356 dprintk("svcrdma: Error %d posting send for protocol error\n",
1350 ret); 1357 ret);
1351 ib_dma_unmap_single(xprt->sc_cm_id->device, 1358 svc_rdma_unmap_dma(ctxt);
1352 sge.addr, PAGE_SIZE,
1353 DMA_FROM_DEVICE);
1354 svc_rdma_put_context(ctxt, 1); 1359 svc_rdma_put_context(ctxt, 1);
1355 } 1360 }
1356} 1361}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a85e866a77f7..0867070bb5ca 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -237,8 +237,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
237 237
238 dprintk("RPC: %s: called\n", __func__); 238 dprintk("RPC: %s: called\n", __func__);
239 239
240 cancel_delayed_work(&r_xprt->rdma_connect); 240 cancel_delayed_work_sync(&r_xprt->rdma_connect);
241 flush_scheduled_work();
242 241
243 xprt_clear_connected(xprt); 242 xprt_clear_connected(xprt);
244 243
@@ -251,9 +250,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
251 250
252 xprt_rdma_free_addresses(xprt); 251 xprt_rdma_free_addresses(xprt);
253 252
254 kfree(xprt->slot); 253 xprt_free(xprt);
255 xprt->slot = NULL;
256 kfree(xprt);
257 254
258 dprintk("RPC: %s: returning\n", __func__); 255 dprintk("RPC: %s: returning\n", __func__);
259 256
@@ -285,23 +282,14 @@ xprt_setup_rdma(struct xprt_create *args)
285 return ERR_PTR(-EBADF); 282 return ERR_PTR(-EBADF);
286 } 283 }
287 284
288 xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); 285 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
286 xprt_rdma_slot_table_entries);
289 if (xprt == NULL) { 287 if (xprt == NULL) {
290 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", 288 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
291 __func__); 289 __func__);
292 return ERR_PTR(-ENOMEM); 290 return ERR_PTR(-ENOMEM);
293 } 291 }
294 292
295 xprt->max_reqs = xprt_rdma_slot_table_entries;
296 xprt->slot = kcalloc(xprt->max_reqs,
297 sizeof(struct rpc_rqst), GFP_KERNEL);
298 if (xprt->slot == NULL) {
299 dprintk("RPC: %s: couldn't allocate %d slots\n",
300 __func__, xprt->max_reqs);
301 kfree(xprt);
302 return ERR_PTR(-ENOMEM);
303 }
304
305 /* 60 second timeout, no retries */ 293 /* 60 second timeout, no retries */
306 xprt->timeout = &xprt_rdma_default_timeout; 294 xprt->timeout = &xprt_rdma_default_timeout;
307 xprt->bind_timeout = (60U * HZ); 295 xprt->bind_timeout = (60U * HZ);
@@ -410,8 +398,7 @@ out3:
410out2: 398out2:
411 rpcrdma_ia_close(&new_xprt->rx_ia); 399 rpcrdma_ia_close(&new_xprt->rx_ia);
412out1: 400out1:
413 kfree(xprt->slot); 401 xprt_free(xprt);
414 kfree(xprt);
415 return ERR_PTR(rc); 402 return ERR_PTR(rc);
416} 403}
417 404
@@ -460,7 +447,7 @@ xprt_rdma_connect(struct rpc_task *task)
460 } else { 447 } else {
461 schedule_delayed_work(&r_xprt->rdma_connect, 0); 448 schedule_delayed_work(&r_xprt->rdma_connect, 0);
462 if (!RPC_IS_ASYNC(task)) 449 if (!RPC_IS_ASYNC(task))
463 flush_scheduled_work(); 450 flush_delayed_work(&r_xprt->rdma_connect);
464 } 451 }
465} 452}
466 453
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fe9306bf10cc..dfcab5ac65af 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -774,8 +774,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
774 774
775 xs_close(xprt); 775 xs_close(xprt);
776 xs_free_peer_addresses(xprt); 776 xs_free_peer_addresses(xprt);
777 kfree(xprt->slot); 777 xprt_free(xprt);
778 kfree(xprt);
779 module_put(THIS_MODULE); 778 module_put(THIS_MODULE);
780} 779}
781 780
@@ -1516,7 +1515,7 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
1516 xs_update_peer_port(xprt); 1515 xs_update_peer_port(xprt);
1517} 1516}
1518 1517
1519static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) 1518static unsigned short xs_get_srcport(struct sock_xprt *transport)
1520{ 1519{
1521 unsigned short port = transport->srcport; 1520 unsigned short port = transport->srcport;
1522 1521
@@ -1525,7 +1524,7 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket
1525 return port; 1524 return port;
1526} 1525}
1527 1526
1528static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) 1527static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port)
1529{ 1528{
1530 if (transport->srcport != 0) 1529 if (transport->srcport != 0)
1531 transport->srcport = 0; 1530 transport->srcport = 0;
@@ -1535,23 +1534,18 @@ static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket
1535 return xprt_max_resvport; 1534 return xprt_max_resvport;
1536 return --port; 1535 return --port;
1537} 1536}
1538 1537static int xs_bind(struct sock_xprt *transport, struct socket *sock)
1539static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1540{ 1538{
1541 struct sockaddr_in myaddr = { 1539 struct sockaddr_storage myaddr;
1542 .sin_family = AF_INET,
1543 };
1544 struct sockaddr_in *sa;
1545 int err, nloop = 0; 1540 int err, nloop = 0;
1546 unsigned short port = xs_get_srcport(transport, sock); 1541 unsigned short port = xs_get_srcport(transport);
1547 unsigned short last; 1542 unsigned short last;
1548 1543
1549 sa = (struct sockaddr_in *)&transport->srcaddr; 1544 memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
1550 myaddr.sin_addr = sa->sin_addr;
1551 do { 1545 do {
1552 myaddr.sin_port = htons(port); 1546 rpc_set_port((struct sockaddr *)&myaddr, port);
1553 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1547 err = kernel_bind(sock, (struct sockaddr *)&myaddr,
1554 sizeof(myaddr)); 1548 transport->xprt.addrlen);
1555 if (port == 0) 1549 if (port == 0)
1556 break; 1550 break;
1557 if (err == 0) { 1551 if (err == 0) {
@@ -1559,48 +1553,23 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1559 break; 1553 break;
1560 } 1554 }
1561 last = port; 1555 last = port;
1562 port = xs_next_srcport(transport, sock, port); 1556 port = xs_next_srcport(transport, port);
1563 if (port > last) 1557 if (port > last)
1564 nloop++; 1558 nloop++;
1565 } while (err == -EADDRINUSE && nloop != 2); 1559 } while (err == -EADDRINUSE && nloop != 2);
1566 dprintk("RPC: %s %pI4:%u: %s (%d)\n",
1567 __func__, &myaddr.sin_addr,
1568 port, err ? "failed" : "ok", err);
1569 return err;
1570}
1571
1572static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
1573{
1574 struct sockaddr_in6 myaddr = {
1575 .sin6_family = AF_INET6,
1576 };
1577 struct sockaddr_in6 *sa;
1578 int err, nloop = 0;
1579 unsigned short port = xs_get_srcport(transport, sock);
1580 unsigned short last;
1581 1560
1582 sa = (struct sockaddr_in6 *)&transport->srcaddr; 1561 if (myaddr.ss_family == AF_INET)
1583 myaddr.sin6_addr = sa->sin6_addr; 1562 dprintk("RPC: %s %pI4:%u: %s (%d)\n", __func__,
1584 do { 1563 &((struct sockaddr_in *)&myaddr)->sin_addr,
1585 myaddr.sin6_port = htons(port); 1564 port, err ? "failed" : "ok", err);
1586 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1565 else
1587 sizeof(myaddr)); 1566 dprintk("RPC: %s %pI6:%u: %s (%d)\n", __func__,
1588 if (port == 0) 1567 &((struct sockaddr_in6 *)&myaddr)->sin6_addr,
1589 break; 1568 port, err ? "failed" : "ok", err);
1590 if (err == 0) {
1591 transport->srcport = port;
1592 break;
1593 }
1594 last = port;
1595 port = xs_next_srcport(transport, sock, port);
1596 if (port > last)
1597 nloop++;
1598 } while (err == -EADDRINUSE && nloop != 2);
1599 dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n",
1600 &myaddr.sin6_addr, port, err ? "failed" : "ok", err);
1601 return err; 1569 return err;
1602} 1570}
1603 1571
1572
1604#ifdef CONFIG_DEBUG_LOCK_ALLOC 1573#ifdef CONFIG_DEBUG_LOCK_ALLOC
1605static struct lock_class_key xs_key[2]; 1574static struct lock_class_key xs_key[2];
1606static struct lock_class_key xs_slock_key[2]; 1575static struct lock_class_key xs_slock_key[2];
@@ -1622,6 +1591,18 @@ static inline void xs_reclassify_socket6(struct socket *sock)
1622 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", 1591 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
1623 &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); 1592 &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
1624} 1593}
1594
1595static inline void xs_reclassify_socket(int family, struct socket *sock)
1596{
1597 switch (family) {
1598 case AF_INET:
1599 xs_reclassify_socket4(sock);
1600 break;
1601 case AF_INET6:
1602 xs_reclassify_socket6(sock);
1603 break;
1604 }
1605}
1625#else 1606#else
1626static inline void xs_reclassify_socket4(struct socket *sock) 1607static inline void xs_reclassify_socket4(struct socket *sock)
1627{ 1608{
@@ -1630,8 +1611,36 @@ static inline void xs_reclassify_socket4(struct socket *sock)
1630static inline void xs_reclassify_socket6(struct socket *sock) 1611static inline void xs_reclassify_socket6(struct socket *sock)
1631{ 1612{
1632} 1613}
1614
1615static inline void xs_reclassify_socket(int family, struct socket *sock)
1616{
1617}
1633#endif 1618#endif
1634 1619
1620static struct socket *xs_create_sock(struct rpc_xprt *xprt,
1621 struct sock_xprt *transport, int family, int type, int protocol)
1622{
1623 struct socket *sock;
1624 int err;
1625
1626 err = __sock_create(xprt->xprt_net, family, type, protocol, &sock, 1);
1627 if (err < 0) {
1628 dprintk("RPC: can't create %d transport socket (%d).\n",
1629 protocol, -err);
1630 goto out;
1631 }
1632 xs_reclassify_socket(family, sock);
1633
1634 if (xs_bind(transport, sock)) {
1635 sock_release(sock);
1636 goto out;
1637 }
1638
1639 return sock;
1640out:
1641 return ERR_PTR(err);
1642}
1643
1635static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1644static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1636{ 1645{
1637 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1646 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1661,82 +1670,23 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1661 xs_udp_do_set_buffer_size(xprt); 1670 xs_udp_do_set_buffer_size(xprt);
1662} 1671}
1663 1672
1664/** 1673static void xs_udp_setup_socket(struct work_struct *work)
1665 * xs_udp_connect_worker4 - set up a UDP socket
1666 * @work: RPC transport to connect
1667 *
1668 * Invoked by a work queue tasklet.
1669 */
1670static void xs_udp_connect_worker4(struct work_struct *work)
1671{ 1674{
1672 struct sock_xprt *transport = 1675 struct sock_xprt *transport =
1673 container_of(work, struct sock_xprt, connect_worker.work); 1676 container_of(work, struct sock_xprt, connect_worker.work);
1674 struct rpc_xprt *xprt = &transport->xprt; 1677 struct rpc_xprt *xprt = &transport->xprt;
1675 struct socket *sock = transport->sock; 1678 struct socket *sock = transport->sock;
1676 int err, status = -EIO; 1679 int status = -EIO;
1677 1680
1678 if (xprt->shutdown) 1681 if (xprt->shutdown)
1679 goto out; 1682 goto out;
1680 1683
1681 /* Start by resetting any existing state */ 1684 /* Start by resetting any existing state */
1682 xs_reset_transport(transport); 1685 xs_reset_transport(transport);
1683 1686 sock = xs_create_sock(xprt, transport,
1684 err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1687 xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP);
1685 if (err < 0) { 1688 if (IS_ERR(sock))
1686 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1687 goto out; 1689 goto out;
1688 }
1689 xs_reclassify_socket4(sock);
1690
1691 if (xs_bind4(transport, sock)) {
1692 sock_release(sock);
1693 goto out;
1694 }
1695
1696 dprintk("RPC: worker connecting xprt %p via %s to "
1697 "%s (port %s)\n", xprt,
1698 xprt->address_strings[RPC_DISPLAY_PROTO],
1699 xprt->address_strings[RPC_DISPLAY_ADDR],
1700 xprt->address_strings[RPC_DISPLAY_PORT]);
1701
1702 xs_udp_finish_connecting(xprt, sock);
1703 status = 0;
1704out:
1705 xprt_clear_connecting(xprt);
1706 xprt_wake_pending_tasks(xprt, status);
1707}
1708
1709/**
1710 * xs_udp_connect_worker6 - set up a UDP socket
1711 * @work: RPC transport to connect
1712 *
1713 * Invoked by a work queue tasklet.
1714 */
1715static void xs_udp_connect_worker6(struct work_struct *work)
1716{
1717 struct sock_xprt *transport =
1718 container_of(work, struct sock_xprt, connect_worker.work);
1719 struct rpc_xprt *xprt = &transport->xprt;
1720 struct socket *sock = transport->sock;
1721 int err, status = -EIO;
1722
1723 if (xprt->shutdown)
1724 goto out;
1725
1726 /* Start by resetting any existing state */
1727 xs_reset_transport(transport);
1728
1729 err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
1730 if (err < 0) {
1731 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1732 goto out;
1733 }
1734 xs_reclassify_socket6(sock);
1735
1736 if (xs_bind6(transport, sock) < 0) {
1737 sock_release(sock);
1738 goto out;
1739 }
1740 1690
1741 dprintk("RPC: worker connecting xprt %p via %s to " 1691 dprintk("RPC: worker connecting xprt %p via %s to "
1742 "%s (port %s)\n", xprt, 1692 "%s (port %s)\n", xprt,
@@ -1755,12 +1705,12 @@ out:
1755 * We need to preserve the port number so the reply cache on the server can 1705 * We need to preserve the port number so the reply cache on the server can
1756 * find our cached RPC replies when we get around to reconnecting. 1706 * find our cached RPC replies when we get around to reconnecting.
1757 */ 1707 */
1758static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 1708static void xs_abort_connection(struct sock_xprt *transport)
1759{ 1709{
1760 int result; 1710 int result;
1761 struct sockaddr any; 1711 struct sockaddr any;
1762 1712
1763 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); 1713 dprintk("RPC: disconnecting xprt %p to reuse port\n", transport);
1764 1714
1765 /* 1715 /*
1766 * Disconnect the transport socket by doing a connect operation 1716 * Disconnect the transport socket by doing a connect operation
@@ -1770,13 +1720,13 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo
1770 any.sa_family = AF_UNSPEC; 1720 any.sa_family = AF_UNSPEC;
1771 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 1721 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
1772 if (!result) 1722 if (!result)
1773 xs_sock_mark_closed(xprt); 1723 xs_sock_mark_closed(&transport->xprt);
1774 else 1724 else
1775 dprintk("RPC: AF_UNSPEC connect return code %d\n", 1725 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1776 result); 1726 result);
1777} 1727}
1778 1728
1779static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 1729static void xs_tcp_reuse_connection(struct sock_xprt *transport)
1780{ 1730{
1781 unsigned int state = transport->inet->sk_state; 1731 unsigned int state = transport->inet->sk_state;
1782 1732
@@ -1799,7 +1749,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra
1799 "sk_shutdown set to %d\n", 1749 "sk_shutdown set to %d\n",
1800 __func__, transport->inet->sk_shutdown); 1750 __func__, transport->inet->sk_shutdown);
1801 } 1751 }
1802 xs_abort_connection(xprt, transport); 1752 xs_abort_connection(transport);
1803} 1753}
1804 1754
1805static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1755static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
@@ -1852,12 +1802,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1852 * 1802 *
1853 * Invoked by a work queue tasklet. 1803 * Invoked by a work queue tasklet.
1854 */ 1804 */
1855static void xs_tcp_setup_socket(struct rpc_xprt *xprt, 1805static void xs_tcp_setup_socket(struct work_struct *work)
1856 struct sock_xprt *transport,
1857 struct socket *(*create_sock)(struct rpc_xprt *,
1858 struct sock_xprt *))
1859{ 1806{
1807 struct sock_xprt *transport =
1808 container_of(work, struct sock_xprt, connect_worker.work);
1860 struct socket *sock = transport->sock; 1809 struct socket *sock = transport->sock;
1810 struct rpc_xprt *xprt = &transport->xprt;
1861 int status = -EIO; 1811 int status = -EIO;
1862 1812
1863 if (xprt->shutdown) 1813 if (xprt->shutdown)
@@ -1865,7 +1815,8 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1865 1815
1866 if (!sock) { 1816 if (!sock) {
1867 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 1817 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1868 sock = create_sock(xprt, transport); 1818 sock = xs_create_sock(xprt, transport,
1819 xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP);
1869 if (IS_ERR(sock)) { 1820 if (IS_ERR(sock)) {
1870 status = PTR_ERR(sock); 1821 status = PTR_ERR(sock);
1871 goto out; 1822 goto out;
@@ -1876,7 +1827,7 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1876 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, 1827 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
1877 &xprt->state); 1828 &xprt->state);
1878 /* "close" the socket, preserving the local port */ 1829 /* "close" the socket, preserving the local port */
1879 xs_tcp_reuse_connection(xprt, transport); 1830 xs_tcp_reuse_connection(transport);
1880 1831
1881 if (abort_and_exit) 1832 if (abort_and_exit)
1882 goto out_eagain; 1833 goto out_eagain;
@@ -1925,84 +1876,6 @@ out:
1925 xprt_wake_pending_tasks(xprt, status); 1876 xprt_wake_pending_tasks(xprt, status);
1926} 1877}
1927 1878
1928static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
1929 struct sock_xprt *transport)
1930{
1931 struct socket *sock;
1932 int err;
1933
1934 /* start from scratch */
1935 err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
1936 if (err < 0) {
1937 dprintk("RPC: can't create TCP transport socket (%d).\n",
1938 -err);
1939 goto out_err;
1940 }
1941 xs_reclassify_socket4(sock);
1942
1943 if (xs_bind4(transport, sock) < 0) {
1944 sock_release(sock);
1945 goto out_err;
1946 }
1947 return sock;
1948out_err:
1949 return ERR_PTR(-EIO);
1950}
1951
1952/**
1953 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
1954 * @work: RPC transport to connect
1955 *
1956 * Invoked by a work queue tasklet.
1957 */
1958static void xs_tcp_connect_worker4(struct work_struct *work)
1959{
1960 struct sock_xprt *transport =
1961 container_of(work, struct sock_xprt, connect_worker.work);
1962 struct rpc_xprt *xprt = &transport->xprt;
1963
1964 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
1965}
1966
1967static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
1968 struct sock_xprt *transport)
1969{
1970 struct socket *sock;
1971 int err;
1972
1973 /* start from scratch */
1974 err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
1975 if (err < 0) {
1976 dprintk("RPC: can't create TCP transport socket (%d).\n",
1977 -err);
1978 goto out_err;
1979 }
1980 xs_reclassify_socket6(sock);
1981
1982 if (xs_bind6(transport, sock) < 0) {
1983 sock_release(sock);
1984 goto out_err;
1985 }
1986 return sock;
1987out_err:
1988 return ERR_PTR(-EIO);
1989}
1990
1991/**
1992 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
1993 * @work: RPC transport to connect
1994 *
1995 * Invoked by a work queue tasklet.
1996 */
1997static void xs_tcp_connect_worker6(struct work_struct *work)
1998{
1999 struct sock_xprt *transport =
2000 container_of(work, struct sock_xprt, connect_worker.work);
2001 struct rpc_xprt *xprt = &transport->xprt;
2002
2003 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
2004}
2005
2006/** 1879/**
2007 * xs_connect - connect a socket to a remote endpoint 1880 * xs_connect - connect a socket to a remote endpoint
2008 * @task: address of RPC task that manages state of connect request 1881 * @task: address of RPC task that manages state of connect request
@@ -2262,6 +2135,31 @@ static struct rpc_xprt_ops bc_tcp_ops = {
2262 .print_stats = xs_tcp_print_stats, 2135 .print_stats = xs_tcp_print_stats,
2263}; 2136};
2264 2137
2138static int xs_init_anyaddr(const int family, struct sockaddr *sap)
2139{
2140 static const struct sockaddr_in sin = {
2141 .sin_family = AF_INET,
2142 .sin_addr.s_addr = htonl(INADDR_ANY),
2143 };
2144 static const struct sockaddr_in6 sin6 = {
2145 .sin6_family = AF_INET6,
2146 .sin6_addr = IN6ADDR_ANY_INIT,
2147 };
2148
2149 switch (family) {
2150 case AF_INET:
2151 memcpy(sap, &sin, sizeof(sin));
2152 break;
2153 case AF_INET6:
2154 memcpy(sap, &sin6, sizeof(sin6));
2155 break;
2156 default:
2157 dprintk("RPC: %s: Bad address family\n", __func__);
2158 return -EAFNOSUPPORT;
2159 }
2160 return 0;
2161}
2162
2265static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, 2163static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2266 unsigned int slot_table_size) 2164 unsigned int slot_table_size)
2267{ 2165{
@@ -2273,27 +2171,25 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2273 return ERR_PTR(-EBADF); 2171 return ERR_PTR(-EBADF);
2274 } 2172 }
2275 2173
2276 new = kzalloc(sizeof(*new), GFP_KERNEL); 2174 xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
2277 if (new == NULL) { 2175 if (xprt == NULL) {
2278 dprintk("RPC: xs_setup_xprt: couldn't allocate " 2176 dprintk("RPC: xs_setup_xprt: couldn't allocate "
2279 "rpc_xprt\n"); 2177 "rpc_xprt\n");
2280 return ERR_PTR(-ENOMEM); 2178 return ERR_PTR(-ENOMEM);
2281 } 2179 }
2282 xprt = &new->xprt;
2283
2284 xprt->max_reqs = slot_table_size;
2285 xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
2286 if (xprt->slot == NULL) {
2287 kfree(xprt);
2288 dprintk("RPC: xs_setup_xprt: couldn't allocate slot "
2289 "table\n");
2290 return ERR_PTR(-ENOMEM);
2291 }
2292 2180
2181 new = container_of(xprt, struct sock_xprt, xprt);
2293 memcpy(&xprt->addr, args->dstaddr, args->addrlen); 2182 memcpy(&xprt->addr, args->dstaddr, args->addrlen);
2294 xprt->addrlen = args->addrlen; 2183 xprt->addrlen = args->addrlen;
2295 if (args->srcaddr) 2184 if (args->srcaddr)
2296 memcpy(&new->srcaddr, args->srcaddr, args->addrlen); 2185 memcpy(&new->srcaddr, args->srcaddr, args->addrlen);
2186 else {
2187 int err;
2188 err = xs_init_anyaddr(args->dstaddr->sa_family,
2189 (struct sockaddr *)&new->srcaddr);
2190 if (err != 0)
2191 return ERR_PTR(err);
2192 }
2297 2193
2298 return xprt; 2194 return xprt;
2299} 2195}
@@ -2341,7 +2237,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2341 xprt_set_bound(xprt); 2237 xprt_set_bound(xprt);
2342 2238
2343 INIT_DELAYED_WORK(&transport->connect_worker, 2239 INIT_DELAYED_WORK(&transport->connect_worker,
2344 xs_udp_connect_worker4); 2240 xs_udp_setup_socket);
2345 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); 2241 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
2346 break; 2242 break;
2347 case AF_INET6: 2243 case AF_INET6:
@@ -2349,7 +2245,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2349 xprt_set_bound(xprt); 2245 xprt_set_bound(xprt);
2350 2246
2351 INIT_DELAYED_WORK(&transport->connect_worker, 2247 INIT_DELAYED_WORK(&transport->connect_worker,
2352 xs_udp_connect_worker6); 2248 xs_udp_setup_socket);
2353 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); 2249 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
2354 break; 2250 break;
2355 default: 2251 default:
@@ -2371,8 +2267,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2371 return xprt; 2267 return xprt;
2372 ret = ERR_PTR(-EINVAL); 2268 ret = ERR_PTR(-EINVAL);
2373out_err: 2269out_err:
2374 kfree(xprt->slot); 2270 xprt_free(xprt);
2375 kfree(xprt);
2376 return ret; 2271 return ret;
2377} 2272}
2378 2273
@@ -2416,7 +2311,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2416 xprt_set_bound(xprt); 2311 xprt_set_bound(xprt);
2417 2312
2418 INIT_DELAYED_WORK(&transport->connect_worker, 2313 INIT_DELAYED_WORK(&transport->connect_worker,
2419 xs_tcp_connect_worker4); 2314 xs_tcp_setup_socket);
2420 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); 2315 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
2421 break; 2316 break;
2422 case AF_INET6: 2317 case AF_INET6:
@@ -2424,7 +2319,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2424 xprt_set_bound(xprt); 2319 xprt_set_bound(xprt);
2425 2320
2426 INIT_DELAYED_WORK(&transport->connect_worker, 2321 INIT_DELAYED_WORK(&transport->connect_worker,
2427 xs_tcp_connect_worker6); 2322 xs_tcp_setup_socket);
2428 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 2323 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
2429 break; 2324 break;
2430 default: 2325 default:
@@ -2447,8 +2342,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2447 return xprt; 2342 return xprt;
2448 ret = ERR_PTR(-EINVAL); 2343 ret = ERR_PTR(-EINVAL);
2449out_err: 2344out_err:
2450 kfree(xprt->slot); 2345 xprt_free(xprt);
2451 kfree(xprt);
2452 return ret; 2346 return ret;
2453} 2347}
2454 2348
@@ -2507,15 +2401,10 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2507 goto out_err; 2401 goto out_err;
2508 } 2402 }
2509 2403
2510 if (xprt_bound(xprt)) 2404 dprintk("RPC: set up xprt to %s (port %s) via %s\n",
2511 dprintk("RPC: set up xprt to %s (port %s) via %s\n", 2405 xprt->address_strings[RPC_DISPLAY_ADDR],
2512 xprt->address_strings[RPC_DISPLAY_ADDR], 2406 xprt->address_strings[RPC_DISPLAY_PORT],
2513 xprt->address_strings[RPC_DISPLAY_PORT], 2407 xprt->address_strings[RPC_DISPLAY_PROTO]);
2514 xprt->address_strings[RPC_DISPLAY_PROTO]);
2515 else
2516 dprintk("RPC: set up xprt to %s (autobind) via %s\n",
2517 xprt->address_strings[RPC_DISPLAY_ADDR],
2518 xprt->address_strings[RPC_DISPLAY_PROTO]);
2519 2408
2520 /* 2409 /*
2521 * Since we don't want connections for the backchannel, we set 2410 * Since we don't want connections for the backchannel, we set
@@ -2528,8 +2417,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2528 return xprt; 2417 return xprt;
2529 ret = ERR_PTR(-EINVAL); 2418 ret = ERR_PTR(-EINVAL);
2530out_err: 2419out_err:
2531 kfree(xprt->slot); 2420 xprt_free(xprt);
2532 kfree(xprt);
2533 return ret; 2421 return ret;
2534} 2422}
2535 2423
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 33217fc3d697..e9f0d5004483 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -396,6 +396,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
396 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; 396 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
397 struct tipc_sock *tsock = tipc_sk(sock->sk); 397 struct tipc_sock *tsock = tipc_sk(sock->sk);
398 398
399 memset(addr, 0, sizeof(*addr));
399 if (peer) { 400 if (peer) {
400 if ((sock->state != SS_CONNECTED) && 401 if ((sock->state != SS_CONNECTED) &&
401 ((peer != 2) || (sock->state != SS_DISCONNECTING))) 402 ((peer != 2) || (sock->state != SS_DISCONNECTING)))
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0ebc777a6660..2268e6798124 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,7 +117,7 @@
117 117
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; 118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock); 119static DEFINE_SPINLOCK(unix_table_lock);
120static atomic_t unix_nr_socks = ATOMIC_INIT(0); 120static atomic_long_t unix_nr_socks;
121 121
122#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) 122#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
123 123
@@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk)
360 if (u->addr) 360 if (u->addr)
361 unix_release_addr(u->addr); 361 unix_release_addr(u->addr);
362 362
363 atomic_dec(&unix_nr_socks); 363 atomic_long_dec(&unix_nr_socks);
364 local_bh_disable(); 364 local_bh_disable();
365 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 365 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366 local_bh_enable(); 366 local_bh_enable();
367#ifdef UNIX_REFCNT_DEBUG 367#ifdef UNIX_REFCNT_DEBUG
368 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, 368 printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
369 atomic_read(&unix_nr_socks)); 369 atomic_long_read(&unix_nr_socks));
370#endif 370#endif
371} 371}
372 372
@@ -606,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
606 struct sock *sk = NULL; 606 struct sock *sk = NULL;
607 struct unix_sock *u; 607 struct unix_sock *u;
608 608
609 atomic_inc(&unix_nr_socks); 609 atomic_long_inc(&unix_nr_socks);
610 if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) 610 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
611 goto out; 611 goto out;
612 612
613 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); 613 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
@@ -632,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
632 unix_insert_socket(unix_sockets_unbound, sk); 632 unix_insert_socket(unix_sockets_unbound, sk);
633out: 633out:
634 if (sk == NULL) 634 if (sk == NULL)
635 atomic_dec(&unix_nr_socks); 635 atomic_long_dec(&unix_nr_socks);
636 else { 636 else {
637 local_bh_disable(); 637 local_bh_disable();
638 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 638 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -1343,9 +1343,25 @@ static void unix_destruct_scm(struct sk_buff *skb)
1343 sock_wfree(skb); 1343 sock_wfree(skb);
1344} 1344}
1345 1345
1346#define MAX_RECURSION_LEVEL 4
1347
1346static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1348static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1347{ 1349{
1348 int i; 1350 int i;
1351 unsigned char max_level = 0;
1352 int unix_sock_count = 0;
1353
1354 for (i = scm->fp->count - 1; i >= 0; i--) {
1355 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1356
1357 if (sk) {
1358 unix_sock_count++;
1359 max_level = max(max_level,
1360 unix_sk(sk)->recursion_level);
1361 }
1362 }
1363 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1364 return -ETOOMANYREFS;
1349 1365
1350 /* 1366 /*
1351 * Need to duplicate file references for the sake of garbage 1367 * Need to duplicate file references for the sake of garbage
@@ -1356,9 +1372,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1356 if (!UNIXCB(skb).fp) 1372 if (!UNIXCB(skb).fp)
1357 return -ENOMEM; 1373 return -ENOMEM;
1358 1374
1359 for (i = scm->fp->count-1; i >= 0; i--) 1375 if (unix_sock_count) {
1360 unix_inflight(scm->fp->fp[i]); 1376 for (i = scm->fp->count - 1; i >= 0; i--)
1361 return 0; 1377 unix_inflight(scm->fp->fp[i]);
1378 }
1379 return max_level;
1362} 1380}
1363 1381
1364static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1382static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1393,6 +1411,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1393 struct sk_buff *skb; 1411 struct sk_buff *skb;
1394 long timeo; 1412 long timeo;
1395 struct scm_cookie tmp_scm; 1413 struct scm_cookie tmp_scm;
1414 int max_level;
1396 1415
1397 if (NULL == siocb->scm) 1416 if (NULL == siocb->scm)
1398 siocb->scm = &tmp_scm; 1417 siocb->scm = &tmp_scm;
@@ -1431,8 +1450,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1431 goto out; 1450 goto out;
1432 1451
1433 err = unix_scm_to_skb(siocb->scm, skb, true); 1452 err = unix_scm_to_skb(siocb->scm, skb, true);
1434 if (err) 1453 if (err < 0)
1435 goto out_free; 1454 goto out_free;
1455 max_level = err + 1;
1436 unix_get_secdata(siocb->scm, skb); 1456 unix_get_secdata(siocb->scm, skb);
1437 1457
1438 skb_reset_transport_header(skb); 1458 skb_reset_transport_header(skb);
@@ -1514,6 +1534,8 @@ restart:
1514 if (sock_flag(other, SOCK_RCVTSTAMP)) 1534 if (sock_flag(other, SOCK_RCVTSTAMP))
1515 __net_timestamp(skb); 1535 __net_timestamp(skb);
1516 skb_queue_tail(&other->sk_receive_queue, skb); 1536 skb_queue_tail(&other->sk_receive_queue, skb);
1537 if (max_level > unix_sk(other)->recursion_level)
1538 unix_sk(other)->recursion_level = max_level;
1517 unix_state_unlock(other); 1539 unix_state_unlock(other);
1518 other->sk_data_ready(other, len); 1540 other->sk_data_ready(other, len);
1519 sock_put(other); 1541 sock_put(other);
@@ -1544,6 +1566,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1544 int sent = 0; 1566 int sent = 0;
1545 struct scm_cookie tmp_scm; 1567 struct scm_cookie tmp_scm;
1546 bool fds_sent = false; 1568 bool fds_sent = false;
1569 int max_level;
1547 1570
1548 if (NULL == siocb->scm) 1571 if (NULL == siocb->scm)
1549 siocb->scm = &tmp_scm; 1572 siocb->scm = &tmp_scm;
@@ -1607,10 +1630,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1607 1630
1608 /* Only send the fds in the first buffer */ 1631 /* Only send the fds in the first buffer */
1609 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); 1632 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1610 if (err) { 1633 if (err < 0) {
1611 kfree_skb(skb); 1634 kfree_skb(skb);
1612 goto out_err; 1635 goto out_err;
1613 } 1636 }
1637 max_level = err + 1;
1614 fds_sent = true; 1638 fds_sent = true;
1615 1639
1616 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 1640 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
@@ -1626,6 +1650,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1626 goto pipe_err_free; 1650 goto pipe_err_free;
1627 1651
1628 skb_queue_tail(&other->sk_receive_queue, skb); 1652 skb_queue_tail(&other->sk_receive_queue, skb);
1653 if (max_level > unix_sk(other)->recursion_level)
1654 unix_sk(other)->recursion_level = max_level;
1629 unix_state_unlock(other); 1655 unix_state_unlock(other);
1630 other->sk_data_ready(other, size); 1656 other->sk_data_ready(other, size);
1631 sent += size; 1657 sent += size;
@@ -1845,6 +1871,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1845 unix_state_lock(sk); 1871 unix_state_lock(sk);
1846 skb = skb_dequeue(&sk->sk_receive_queue); 1872 skb = skb_dequeue(&sk->sk_receive_queue);
1847 if (skb == NULL) { 1873 if (skb == NULL) {
1874 unix_sk(sk)->recursion_level = 0;
1848 if (copied >= target) 1875 if (copied >= target)
1849 goto unlock; 1876 goto unlock;
1850 1877
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c8df6fda0b1f..f89f83bf828e 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
96unsigned int unix_tot_inflight; 96unsigned int unix_tot_inflight;
97 97
98 98
99static struct sock *unix_get_socket(struct file *filp) 99struct sock *unix_get_socket(struct file *filp)
100{ 100{
101 struct sock *u_sock = NULL; 101 struct sock *u_sock = NULL;
102 struct inode *inode = filp->f_path.dentry->d_inode; 102 struct inode *inode = filp->f_path.dentry->d_inode;
@@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
259} 259}
260 260
261static bool gc_in_progress = false; 261static bool gc_in_progress = false;
262#define UNIX_INFLIGHT_TRIGGER_GC 16000
262 263
263void wait_for_unix_gc(void) 264void wait_for_unix_gc(void)
264{ 265{
266 /*
267 * If number of inflight sockets is insane,
268 * force a garbage collect right now.
269 */
270 if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
271 unix_gc();
265 wait_event(unix_gc_wait, gc_in_progress == false); 272 wait_event(unix_gc_wait, gc_in_progress == false);
266} 273}
267 274
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 771bab00754b..55187c8f6420 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -61,6 +61,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
61 while (len > 0) { 61 while (len > 0) {
62 switch (*p & X25_FAC_CLASS_MASK) { 62 switch (*p & X25_FAC_CLASS_MASK) {
63 case X25_FAC_CLASS_A: 63 case X25_FAC_CLASS_A:
64 if (len < 2)
65 return 0;
64 switch (*p) { 66 switch (*p) {
65 case X25_FAC_REVERSE: 67 case X25_FAC_REVERSE:
66 if((p[1] & 0x81) == 0x81) { 68 if((p[1] & 0x81) == 0x81) {
@@ -104,6 +106,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
104 len -= 2; 106 len -= 2;
105 break; 107 break;
106 case X25_FAC_CLASS_B: 108 case X25_FAC_CLASS_B:
109 if (len < 3)
110 return 0;
107 switch (*p) { 111 switch (*p) {
108 case X25_FAC_PACKET_SIZE: 112 case X25_FAC_PACKET_SIZE:
109 facilities->pacsize_in = p[1]; 113 facilities->pacsize_in = p[1];
@@ -125,6 +129,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
125 len -= 3; 129 len -= 3;
126 break; 130 break;
127 case X25_FAC_CLASS_C: 131 case X25_FAC_CLASS_C:
132 if (len < 4)
133 return 0;
128 printk(KERN_DEBUG "X.25: unknown facility %02X, " 134 printk(KERN_DEBUG "X.25: unknown facility %02X, "
129 "values %02X, %02X, %02X\n", 135 "values %02X, %02X, %02X\n",
130 p[0], p[1], p[2], p[3]); 136 p[0], p[1], p[2], p[3]);
@@ -132,26 +138,26 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
132 len -= 4; 138 len -= 4;
133 break; 139 break;
134 case X25_FAC_CLASS_D: 140 case X25_FAC_CLASS_D:
141 if (len < p[1] + 2)
142 return 0;
135 switch (*p) { 143 switch (*p) {
136 case X25_FAC_CALLING_AE: 144 case X25_FAC_CALLING_AE:
137 if (p[1] > X25_MAX_DTE_FACIL_LEN) 145 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
138 break; 146 return 0;
139 dte_facs->calling_len = p[2]; 147 dte_facs->calling_len = p[2];
140 memcpy(dte_facs->calling_ae, &p[3], p[1] - 1); 148 memcpy(dte_facs->calling_ae, &p[3], p[1] - 1);
141 *vc_fac_mask |= X25_MASK_CALLING_AE; 149 *vc_fac_mask |= X25_MASK_CALLING_AE;
142 break; 150 break;
143 case X25_FAC_CALLED_AE: 151 case X25_FAC_CALLED_AE:
144 if (p[1] > X25_MAX_DTE_FACIL_LEN) 152 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
145 break; 153 return 0;
146 dte_facs->called_len = p[2]; 154 dte_facs->called_len = p[2];
147 memcpy(dte_facs->called_ae, &p[3], p[1] - 1); 155 memcpy(dte_facs->called_ae, &p[3], p[1] - 1);
148 *vc_fac_mask |= X25_MASK_CALLED_AE; 156 *vc_fac_mask |= X25_MASK_CALLED_AE;
149 break; 157 break;
150 default: 158 default:
151 printk(KERN_DEBUG "X.25: unknown facility %02X," 159 printk(KERN_DEBUG "X.25: unknown facility %02X,"
152 "length %d, values %02X, %02X, " 160 "length %d\n", p[0], p[1]);
153 "%02X, %02X\n",
154 p[0], p[1], p[2], p[3], p[4], p[5]);
155 break; 161 break;
156 } 162 }
157 len -= p[1] + 2; 163 len -= p[1] + 2;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 63178961efac..f729f022be69 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -119,6 +119,8 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
119 &x25->vc_facil_mask); 119 &x25->vc_facil_mask);
120 if (len > 0) 120 if (len > 0)
121 skb_pull(skb, len); 121 skb_pull(skb, len);
122 else
123 return -1;
122 /* 124 /*
123 * Copy any Call User Data. 125 * Copy any Call User Data.
124 */ 126 */
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 73e7b954ad28..b25c6463c3e9 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -394,6 +394,7 @@ void __exit x25_link_free(void)
394 list_for_each_safe(entry, tmp, &x25_neigh_list) { 394 list_for_each_safe(entry, tmp, &x25_neigh_list) {
395 nb = list_entry(entry, struct x25_neigh, node); 395 nb = list_entry(entry, struct x25_neigh, node);
396 __x25_remove_neigh(nb); 396 __x25_remove_neigh(nb);
397 dev_put(nb->dev);
397 } 398 }
398 write_unlock_bh(&x25_neigh_list_lock); 399 write_unlock_bh(&x25_neigh_list_lock);
399} 400}
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index a2023ec52329..1e98bc0fe0a5 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -19,7 +19,7 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz)
19 if (sz <= PAGE_SIZE) 19 if (sz <= PAGE_SIZE)
20 n = kzalloc(sz, GFP_KERNEL); 20 n = kzalloc(sz, GFP_KERNEL);
21 else if (hashdist) 21 else if (hashdist)
22 n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 22 n = vzalloc(sz);
23 else 23 else
24 n = (struct hlist_head *) 24 n = (struct hlist_head *)
25 __get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 25 __get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index eb96ce52f178..220ebc05c7af 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1268,7 +1268,7 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
1268 1268
1269 return xc; 1269 return xc;
1270error: 1270error:
1271 kfree(xc); 1271 xfrm_state_put(xc);
1272 return NULL; 1272 return NULL;
1273} 1273}
1274EXPORT_SYMBOL(xfrm_state_migrate); 1274EXPORT_SYMBOL(xfrm_state_migrate);