aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c18
-rw-r--r--net/802/stp.c4
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/9p/client.c178
-rw-r--r--net/9p/protocol.c5
-rw-r--r--net/9p/trans_virtio.c76
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/bluetooth/hci_event.c6
-rw-r--r--net/bluetooth/hidp/Kconfig2
-rw-r--r--net/bluetooth/l2cap.c8
-rw-r--r--net/bluetooth/rfcomm/core.c13
-rw-r--r--net/caif/caif_config_util.c13
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/caif/caif_socket.c45
-rw-r--r--net/caif/cfcnfg.c17
-rw-r--r--net/caif/cfctrl.c3
-rw-r--r--net/caif/cfdbgl.c14
-rw-r--r--net/caif/cfrfml.c2
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/ceph/Makefile22
-rw-r--r--net/ceph/buffer.c2
-rw-r--r--net/ceph/messenger.c13
-rw-r--r--net/ceph/osd_client.c25
-rw-r--r--net/ceph/pagevec.c3
-rw-r--r--net/compat.c10
-rw-r--r--net/core/dev.c40
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/fib_rules.c21
-rw-r--r--net/core/filter.c70
-rw-r--r--net/core/iovec.c20
-rw-r--r--net/core/net-sysfs.c26
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/pktgen.c41
-rw-r--r--net/core/request_sock.c4
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/sock.c16
-rw-r--r--net/core/sysctl_net_core.c3
-rw-r--r--net/dccp/ccid.h34
-rw-r--r--net/dccp/ccids/ccid2.c23
-rw-r--r--net/dccp/ccids/ccid2.h5
-rw-r--r--net/dccp/ccids/ccid3.c12
-rw-r--r--net/dccp/dccp.h5
-rw-r--r--net/dccp/input.c3
-rw-r--r--net/dccp/output.c209
-rw-r--r--net/dccp/proto.c21
-rw-r--r--net/dccp/timer.c27
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/decnet/sysctl_net_decnet.c4
-rw-r--r--net/econet/af_econet.c91
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_hash.c54
-rw-r--r--net/ipv4/fib_lookup.h5
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/gre.c5
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_diag.c27
-rw-r--r--net/ipv4/inet_hashtables.c3
-rw-r--r--net/ipv4/inetpeer.c138
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ip_sockglue.c10
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c40
-rw-r--r--net/ipv4/proc.c8
-rw-r--r--net/ipv4/protocol.c8
-rw-r--r--net/ipv4/route.c75
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tunnel4.c29
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/addrconf.c68
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/netfilter/Kconfig5
-rw-r--r--net/ipv6/netfilter/Makefile5
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c7
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/protocol.c8
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c8
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/ipv6/tunnel6.c24
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/irda/af_irda.c1
-rw-r--r--net/irda/irnet/irnet_ppp.c1
-rw-r--r--net/irda/irttp.c30
-rw-r--r--net/iucv/iucv.c3
-rw-r--r--net/l2tp/l2tp_core.c53
-rw-r--r--net/l2tp/l2tp_core.h33
-rw-r--r--net/l2tp/l2tp_debugfs.c2
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/mac80211/Kconfig2
-rw-r--r--net/mac80211/debugfs_key.c6
-rw-r--r--net/mac80211/ibss.c1
-rw-r--r--net/mac80211/iface.c6
-rw-r--r--net/mac80211/main.c13
-rw-r--r--net/mac80211/rate.c3
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/ipvs/Kconfig1
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c6
-rw-r--r--net/netfilter/xt_TPROXY.c10
-rw-r--r--net/netfilter/xt_socket.c19
-rw-r--r--net/netlink/af_netlink.c65
-rw-r--r--net/packet/af_packet.c7
-rw-r--r--net/rds/loop.c4
-rw-r--r--net/rds/message.c7
-rw-r--r--net/rds/rdma.c128
-rw-r--r--net/rds/send.c4
-rw-r--r--net/rds/tcp.c6
-rw-r--r--net/sched/cls_basic.c4
-rw-r--r--net/sched/cls_cgroup.c2
-rw-r--r--net/sched/em_text.c3
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/sctp/sysctl.c4
-rw-r--r--net/socket.c20
-rw-r--r--net/sunrpc/Kconfig19
-rw-r--r--net/sunrpc/auth.c4
-rw-r--r--net/sunrpc/auth_generic.c2
-rw-r--r--net/sunrpc/auth_gss/Makefile5
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c2
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c247
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c186
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c267
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c127
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c51
-rw-r--r--net/sunrpc/cache.c288
-rw-r--r--net/sunrpc/clnt.c27
-rw-r--r--net/sunrpc/netns.h19
-rw-r--r--net/sunrpc/rpc_pipe.c19
-rw-r--r--net/sunrpc/rpcb_clnt.c60
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/stats.c47
-rw-r--r--net/sunrpc/sunrpc_syms.c58
-rw-r--r--net/sunrpc/svc.c3
-rw-r--r--net/sunrpc/svc_xprt.c60
-rw-r--r--net/sunrpc/svcauth_unix.c194
-rw-r--r--net/sunrpc/svcsock.c27
-rw-r--r--net/sunrpc/xdr.c61
-rw-r--r--net/sunrpc/xprt.c39
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c49
-rw-r--r--net/sunrpc/xprtrdma/transport.c25
-rw-r--r--net/sunrpc/xprtsock.c358
-rw-r--r--net/tipc/socket.c1
-rw-r--r--net/unix/af_unix.c51
-rw-r--r--net/unix/garbage.c9
-rw-r--r--net/wireless/chan.c54
-rw-r--r--net/wireless/nl80211.c4
-rw-r--r--net/wireless/reg.c2
-rw-r--r--net/x25/x25_facilities.c20
-rw-r--r--net/x25/x25_in.c2
-rw-r--r--net/xfrm/xfrm_hash.c2
162 files changed, 2377 insertions, 2384 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 941f2a324d3a..c1df2dad8c6b 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -346,8 +346,8 @@ int garp_request_join(const struct net_device *dev,
346 const struct garp_application *appl, 346 const struct garp_application *appl,
347 const void *data, u8 len, u8 type) 347 const void *data, u8 len, u8 type)
348{ 348{
349 struct garp_port *port = dev->garp_port; 349 struct garp_port *port = rtnl_dereference(dev->garp_port);
350 struct garp_applicant *app = port->applicants[appl->type]; 350 struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
351 struct garp_attr *attr; 351 struct garp_attr *attr;
352 352
353 spin_lock_bh(&app->lock); 353 spin_lock_bh(&app->lock);
@@ -366,8 +366,8 @@ void garp_request_leave(const struct net_device *dev,
366 const struct garp_application *appl, 366 const struct garp_application *appl,
367 const void *data, u8 len, u8 type) 367 const void *data, u8 len, u8 type)
368{ 368{
369 struct garp_port *port = dev->garp_port; 369 struct garp_port *port = rtnl_dereference(dev->garp_port);
370 struct garp_applicant *app = port->applicants[appl->type]; 370 struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
371 struct garp_attr *attr; 371 struct garp_attr *attr;
372 372
373 spin_lock_bh(&app->lock); 373 spin_lock_bh(&app->lock);
@@ -546,11 +546,11 @@ static int garp_init_port(struct net_device *dev)
546 546
547static void garp_release_port(struct net_device *dev) 547static void garp_release_port(struct net_device *dev)
548{ 548{
549 struct garp_port *port = dev->garp_port; 549 struct garp_port *port = rtnl_dereference(dev->garp_port);
550 unsigned int i; 550 unsigned int i;
551 551
552 for (i = 0; i <= GARP_APPLICATION_MAX; i++) { 552 for (i = 0; i <= GARP_APPLICATION_MAX; i++) {
553 if (port->applicants[i]) 553 if (rtnl_dereference(port->applicants[i]))
554 return; 554 return;
555 } 555 }
556 rcu_assign_pointer(dev->garp_port, NULL); 556 rcu_assign_pointer(dev->garp_port, NULL);
@@ -565,7 +565,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
565 565
566 ASSERT_RTNL(); 566 ASSERT_RTNL();
567 567
568 if (!dev->garp_port) { 568 if (!rtnl_dereference(dev->garp_port)) {
569 err = garp_init_port(dev); 569 err = garp_init_port(dev);
570 if (err < 0) 570 if (err < 0)
571 goto err1; 571 goto err1;
@@ -601,8 +601,8 @@ EXPORT_SYMBOL_GPL(garp_init_applicant);
601 601
602void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl) 602void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl)
603{ 603{
604 struct garp_port *port = dev->garp_port; 604 struct garp_port *port = rtnl_dereference(dev->garp_port);
605 struct garp_applicant *app = port->applicants[appl->type]; 605 struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
606 606
607 ASSERT_RTNL(); 607 ASSERT_RTNL();
608 608
diff --git a/net/802/stp.c b/net/802/stp.c
index 53c8f77f0ccd..978c30b1b36b 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c
@@ -21,8 +21,8 @@
21#define GARP_ADDR_MAX 0x2F 21#define GARP_ADDR_MAX 0x2F
22#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN) 22#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN)
23 23
24static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly; 24static const struct stp_proto __rcu *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly;
25static const struct stp_proto *stp_proto __read_mostly; 25static const struct stp_proto __rcu *stp_proto __read_mostly;
26 26
27static struct llc_sap *sap __read_mostly; 27static struct llc_sap *sap __read_mostly;
28static unsigned int sap_registered; 28static unsigned int sap_registered;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 05b867e43757..52077ca22072 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -112,7 +112,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
112 112
113 ASSERT_RTNL(); 113 ASSERT_RTNL();
114 114
115 grp = real_dev->vlgrp; 115 grp = rtnl_dereference(real_dev->vlgrp);
116 BUG_ON(!grp); 116 BUG_ON(!grp);
117 117
118 /* Take it out of our own structures, but be sure to interlock with 118 /* Take it out of our own structures, but be sure to interlock with
@@ -177,7 +177,7 @@ int register_vlan_dev(struct net_device *dev)
177 struct vlan_group *grp, *ngrp = NULL; 177 struct vlan_group *grp, *ngrp = NULL;
178 int err; 178 int err;
179 179
180 grp = real_dev->vlgrp; 180 grp = rtnl_dereference(real_dev->vlgrp);
181 if (!grp) { 181 if (!grp) {
182 ngrp = grp = vlan_group_alloc(real_dev); 182 ngrp = grp = vlan_group_alloc(real_dev);
183 if (!grp) 183 if (!grp)
@@ -385,7 +385,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
385 dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0); 385 dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
386 } 386 }
387 387
388 grp = dev->vlgrp; 388 grp = rtnl_dereference(dev->vlgrp);
389 if (!grp) 389 if (!grp)
390 goto out; 390 goto out;
391 391
diff --git a/net/9p/client.c b/net/9p/client.c
index 83bf0541d66f..a848bca9fbff 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -450,32 +450,43 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
450 return err; 450 return err;
451 } 451 }
452 452
453 if (type == P9_RERROR) { 453 if (type == P9_RERROR || type == P9_RLERROR) {
454 int ecode; 454 int ecode;
455 char *ename;
456 455
457 err = p9pdu_readf(req->rc, c->proto_version, "s?d", 456 if (!p9_is_proto_dotl(c)) {
458 &ename, &ecode); 457 char *ename;
459 if (err) {
460 P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
461 err);
462 return err;
463 }
464 458
465 if (p9_is_proto_dotu(c) || 459 err = p9pdu_readf(req->rc, c->proto_version, "s?d",
466 p9_is_proto_dotl(c)) 460 &ename, &ecode);
467 err = -ecode; 461 if (err)
462 goto out_err;
463
464 if (p9_is_proto_dotu(c))
465 err = -ecode;
466
467 if (!err || !IS_ERR_VALUE(err)) {
468 err = p9_errstr2errno(ename, strlen(ename));
469
470 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
468 471
469 if (!err || !IS_ERR_VALUE(err)) 472 kfree(ename);
470 err = p9_errstr2errno(ename, strlen(ename)); 473 }
474 } else {
475 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
476 err = -ecode;
471 477
472 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); 478 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
479 }
473 480
474 kfree(ename);
475 } else 481 } else
476 err = 0; 482 err = 0;
477 483
478 return err; 484 return err;
485
486out_err:
487 P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
488
489 return err;
479} 490}
480 491
481/** 492/**
@@ -568,11 +579,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
568 va_start(ap, fmt); 579 va_start(ap, fmt);
569 err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); 580 err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
570 va_end(ap); 581 va_end(ap);
582 if (err)
583 goto reterr;
571 p9pdu_finalize(req->tc); 584 p9pdu_finalize(req->tc);
572 585
573 err = c->trans_mod->request(c, req); 586 err = c->trans_mod->request(c, req);
574 if (err < 0) { 587 if (err < 0) {
575 c->status = Disconnected; 588 if (err != -ERESTARTSYS)
589 c->status = Disconnected;
576 goto reterr; 590 goto reterr;
577 } 591 }
578 592
@@ -1151,12 +1165,44 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
1151} 1165}
1152EXPORT_SYMBOL(p9_client_link); 1166EXPORT_SYMBOL(p9_client_link);
1153 1167
1168int p9_client_fsync(struct p9_fid *fid, int datasync)
1169{
1170 int err;
1171 struct p9_client *clnt;
1172 struct p9_req_t *req;
1173
1174 P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
1175 fid->fid, datasync);
1176 err = 0;
1177 clnt = fid->clnt;
1178
1179 req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync);
1180 if (IS_ERR(req)) {
1181 err = PTR_ERR(req);
1182 goto error;
1183 }
1184
1185 P9_DPRINTK(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
1186
1187 p9_free_req(clnt, req);
1188
1189error:
1190 return err;
1191}
1192EXPORT_SYMBOL(p9_client_fsync);
1193
1154int p9_client_clunk(struct p9_fid *fid) 1194int p9_client_clunk(struct p9_fid *fid)
1155{ 1195{
1156 int err; 1196 int err;
1157 struct p9_client *clnt; 1197 struct p9_client *clnt;
1158 struct p9_req_t *req; 1198 struct p9_req_t *req;
1159 1199
1200 if (!fid) {
1201 P9_EPRINTK(KERN_WARNING, "Trying to clunk with NULL fid\n");
1202 dump_stack();
1203 return 0;
1204 }
1205
1160 P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); 1206 P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid);
1161 err = 0; 1207 err = 0;
1162 clnt = fid->clnt; 1208 clnt = fid->clnt;
@@ -1240,16 +1286,13 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1240 1286
1241 if (data) { 1287 if (data) {
1242 memmove(data, dataptr, count); 1288 memmove(data, dataptr, count);
1243 } 1289 } else {
1244
1245 if (udata) {
1246 err = copy_to_user(udata, dataptr, count); 1290 err = copy_to_user(udata, dataptr, count);
1247 if (err) { 1291 if (err) {
1248 err = -EFAULT; 1292 err = -EFAULT;
1249 goto free_and_error; 1293 goto free_and_error;
1250 } 1294 }
1251 } 1295 }
1252
1253 p9_free_req(clnt, req); 1296 p9_free_req(clnt, req);
1254 return count; 1297 return count;
1255 1298
@@ -1761,3 +1804,96 @@ error:
1761 1804
1762} 1805}
1763EXPORT_SYMBOL(p9_client_mkdir_dotl); 1806EXPORT_SYMBOL(p9_client_mkdir_dotl);
1807
1808int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
1809{
1810 int err;
1811 struct p9_client *clnt;
1812 struct p9_req_t *req;
1813
1814 err = 0;
1815 clnt = fid->clnt;
1816 P9_DPRINTK(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d "
1817 "start %lld length %lld proc_id %d client_id %s\n",
1818 fid->fid, flock->type, flock->flags, flock->start,
1819 flock->length, flock->proc_id, flock->client_id);
1820
1821 req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type,
1822 flock->flags, flock->start, flock->length,
1823 flock->proc_id, flock->client_id);
1824
1825 if (IS_ERR(req))
1826 return PTR_ERR(req);
1827
1828 err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
1829 if (err) {
1830 p9pdu_dump(1, req->rc);
1831 goto error;
1832 }
1833 P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
1834error:
1835 p9_free_req(clnt, req);
1836 return err;
1837
1838}
1839EXPORT_SYMBOL(p9_client_lock_dotl);
1840
1841int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
1842{
1843 int err;
1844 struct p9_client *clnt;
1845 struct p9_req_t *req;
1846
1847 err = 0;
1848 clnt = fid->clnt;
1849 P9_DPRINTK(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld "
1850 "length %lld proc_id %d client_id %s\n", fid->fid, glock->type,
1851 glock->start, glock->length, glock->proc_id, glock->client_id);
1852
1853 req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid, glock->type,
1854 glock->start, glock->length, glock->proc_id, glock->client_id);
1855
1856 if (IS_ERR(req))
1857 return PTR_ERR(req);
1858
1859 err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
1860 &glock->start, &glock->length, &glock->proc_id,
1861 &glock->client_id);
1862 if (err) {
1863 p9pdu_dump(1, req->rc);
1864 goto error;
1865 }
1866 P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
1867 "proc_id %d client_id %s\n", glock->type, glock->start,
1868 glock->length, glock->proc_id, glock->client_id);
1869error:
1870 p9_free_req(clnt, req);
1871 return err;
1872}
1873EXPORT_SYMBOL(p9_client_getlock_dotl);
1874
1875int p9_client_readlink(struct p9_fid *fid, char **target)
1876{
1877 int err;
1878 struct p9_client *clnt;
1879 struct p9_req_t *req;
1880
1881 err = 0;
1882 clnt = fid->clnt;
1883 P9_DPRINTK(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
1884
1885 req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid);
1886 if (IS_ERR(req))
1887 return PTR_ERR(req);
1888
1889 err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
1890 if (err) {
1891 p9pdu_dump(1, req->rc);
1892 goto error;
1893 }
1894 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
1895error:
1896 p9_free_req(clnt, req);
1897 return err;
1898}
1899EXPORT_SYMBOL(p9_client_readlink);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 3acd3afb20c8..45c15f491401 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -122,9 +122,8 @@ static size_t
122pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) 122pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
123{ 123{
124 size_t len = MIN(pdu->capacity - pdu->size, size); 124 size_t len = MIN(pdu->capacity - pdu->size, size);
125 int err = copy_from_user(&pdu->sdata[pdu->size], udata, len); 125 if (copy_from_user(&pdu->sdata[pdu->size], udata, len))
126 if (err) 126 len = 0;
127 printk(KERN_WARNING "pdu_write_u returning: %d\n", err);
128 127
129 pdu->size += len; 128 pdu->size += len;
130 return size - len; 129 return size - len;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index b88515936e4b..c8f3f72ab20e 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -75,6 +75,8 @@ struct virtio_chan {
75 struct p9_client *client; 75 struct p9_client *client;
76 struct virtio_device *vdev; 76 struct virtio_device *vdev;
77 struct virtqueue *vq; 77 struct virtqueue *vq;
78 int ring_bufs_avail;
79 wait_queue_head_t *vc_wq;
78 80
79 /* Scatterlist: can be too big for stack. */ 81 /* Scatterlist: can be too big for stack. */
80 struct scatterlist sg[VIRTQUEUE_NUM]; 82 struct scatterlist sg[VIRTQUEUE_NUM];
@@ -134,16 +136,30 @@ static void req_done(struct virtqueue *vq)
134 struct p9_fcall *rc; 136 struct p9_fcall *rc;
135 unsigned int len; 137 unsigned int len;
136 struct p9_req_t *req; 138 struct p9_req_t *req;
139 unsigned long flags;
137 140
138 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); 141 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
139 142
140 while ((rc = virtqueue_get_buf(chan->vq, &len)) != NULL) { 143 do {
141 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); 144 spin_lock_irqsave(&chan->lock, flags);
142 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); 145 rc = virtqueue_get_buf(chan->vq, &len);
143 req = p9_tag_lookup(chan->client, rc->tag); 146
144 req->status = REQ_STATUS_RCVD; 147 if (rc != NULL) {
145 p9_client_cb(chan->client, req); 148 if (!chan->ring_bufs_avail) {
146 } 149 chan->ring_bufs_avail = 1;
150 wake_up(chan->vc_wq);
151 }
152 spin_unlock_irqrestore(&chan->lock, flags);
153 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
154 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
155 rc->tag);
156 req = p9_tag_lookup(chan->client, rc->tag);
157 req->status = REQ_STATUS_RCVD;
158 p9_client_cb(chan->client, req);
159 } else {
160 spin_unlock_irqrestore(&chan->lock, flags);
161 }
162 } while (rc != NULL);
147} 163}
148 164
149/** 165/**
@@ -199,23 +215,43 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
199 int in, out; 215 int in, out;
200 struct virtio_chan *chan = client->trans; 216 struct virtio_chan *chan = client->trans;
201 char *rdata = (char *)req->rc+sizeof(struct p9_fcall); 217 char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
218 unsigned long flags;
219 int err;
202 220
203 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 221 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
204 222
223req_retry:
224 req->status = REQ_STATUS_SENT;
225
226 spin_lock_irqsave(&chan->lock, flags);
205 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, 227 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
206 req->tc->size); 228 req->tc->size);
207 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, 229 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
208 client->msize); 230 client->msize);
209 231
210 req->status = REQ_STATUS_SENT; 232 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
211 233 if (err < 0) {
212 if (virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) { 234 if (err == -ENOSPC) {
213 P9_DPRINTK(P9_DEBUG_TRANS, 235 chan->ring_bufs_avail = 0;
214 "9p debug: virtio rpc add_buf returned failure"); 236 spin_unlock_irqrestore(&chan->lock, flags);
215 return -EIO; 237 err = wait_event_interruptible(*chan->vc_wq,
238 chan->ring_bufs_avail);
239 if (err == -ERESTARTSYS)
240 return err;
241
242 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
243 goto req_retry;
244 } else {
245 spin_unlock_irqrestore(&chan->lock, flags);
246 P9_DPRINTK(P9_DEBUG_TRANS,
247 "9p debug: "
248 "virtio rpc add_buf returned failure");
249 return -EIO;
250 }
216 } 251 }
217 252
218 virtqueue_kick(chan->vq); 253 virtqueue_kick(chan->vq);
254 spin_unlock_irqrestore(&chan->lock, flags);
219 255
220 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); 256 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
221 return 0; 257 return 0;
@@ -290,14 +326,23 @@ static int p9_virtio_probe(struct virtio_device *vdev)
290 chan->tag_len = tag_len; 326 chan->tag_len = tag_len;
291 err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 327 err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
292 if (err) { 328 if (err) {
293 kfree(tag); 329 goto out_free_tag;
294 goto out_free_vq;
295 } 330 }
331 chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
332 if (!chan->vc_wq) {
333 err = -ENOMEM;
334 goto out_free_tag;
335 }
336 init_waitqueue_head(chan->vc_wq);
337 chan->ring_bufs_avail = 1;
338
296 mutex_lock(&virtio_9p_lock); 339 mutex_lock(&virtio_9p_lock);
297 list_add_tail(&chan->chan_list, &virtio_chan_list); 340 list_add_tail(&chan->chan_list, &virtio_chan_list);
298 mutex_unlock(&virtio_9p_lock); 341 mutex_unlock(&virtio_9p_lock);
299 return 0; 342 return 0;
300 343
344out_free_tag:
345 kfree(tag);
301out_free_vq: 346out_free_vq:
302 vdev->config->del_vqs(vdev); 347 vdev->config->del_vqs(vdev);
303 kfree(chan); 348 kfree(chan);
@@ -371,6 +416,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
371 mutex_unlock(&virtio_9p_lock); 416 mutex_unlock(&virtio_9p_lock);
372 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 417 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
373 kfree(chan->tag); 418 kfree(chan->tag);
419 kfree(chan->vc_wq);
374 kfree(chan); 420 kfree(chan);
375 421
376} 422}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 26eaebf4aaa9..bb86d2932394 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1392,6 +1392,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
1392 ax25_cb *ax25; 1392 ax25_cb *ax25;
1393 int err = 0; 1393 int err = 0;
1394 1394
1395 memset(fsa, 0, sizeof(fsa));
1395 lock_sock(sk); 1396 lock_sock(sk);
1396 ax25 = ax25_sk(sk); 1397 ax25 = ax25_sk(sk);
1397 1398
@@ -1403,7 +1404,6 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
1403 1404
1404 fsa->fsa_ax25.sax25_family = AF_AX25; 1405 fsa->fsa_ax25.sax25_family = AF_AX25;
1405 fsa->fsa_ax25.sax25_call = ax25->dest_addr; 1406 fsa->fsa_ax25.sax25_call = ax25->dest_addr;
1406 fsa->fsa_ax25.sax25_ndigis = 0;
1407 1407
1408 if (ax25->digipeat != NULL) { 1408 if (ax25->digipeat != NULL) {
1409 ndigi = ax25->digipeat->ndigi; 1409 ndigi = ax25->digipeat->ndigi;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index bfef5bae0b3a..84093b0000b9 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1175,6 +1175,12 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff
1175 hci_send_cmd(hdev, 1175 hci_send_cmd(hdev,
1176 HCI_OP_READ_REMOTE_EXT_FEATURES, 1176 HCI_OP_READ_REMOTE_EXT_FEATURES,
1177 sizeof(cp), &cp); 1177 sizeof(cp), &cp);
1178 } else if (!ev->status && conn->out &&
1179 conn->sec_level == BT_SECURITY_HIGH) {
1180 struct hci_cp_auth_requested cp;
1181 cp.handle = ev->handle;
1182 hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED,
1183 sizeof(cp), &cp);
1178 } else { 1184 } else {
1179 conn->state = BT_CONNECTED; 1185 conn->state = BT_CONNECTED;
1180 hci_proto_connect_cfm(conn, ev->status); 1186 hci_proto_connect_cfm(conn, ev->status);
diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig
index 98fdfa1fbddd..86a91543172a 100644
--- a/net/bluetooth/hidp/Kconfig
+++ b/net/bluetooth/hidp/Kconfig
@@ -1,6 +1,6 @@
1config BT_HIDP 1config BT_HIDP
2 tristate "HIDP protocol support" 2 tristate "HIDP protocol support"
3 depends on BT && BT_L2CAP && INPUT 3 depends on BT && BT_L2CAP && INPUT && HID_SUPPORT
4 select HID 4 select HID
5 help 5 help
6 HIDP (Human Interface Device Protocol) is a transport layer 6 HIDP (Human Interface Device Protocol) is a transport layer
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index daa7a988d9a6..cd8f6ea03841 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2421,11 +2421,11 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned
2421 break; 2421 break;
2422 2422
2423 case 2: 2423 case 2:
2424 *val = __le16_to_cpu(*((__le16 *) opt->val)); 2424 *val = get_unaligned_le16(opt->val);
2425 break; 2425 break;
2426 2426
2427 case 4: 2427 case 4:
2428 *val = __le32_to_cpu(*((__le32 *) opt->val)); 2428 *val = get_unaligned_le32(opt->val);
2429 break; 2429 break;
2430 2430
2431 default: 2431 default:
@@ -2452,11 +2452,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
2452 break; 2452 break;
2453 2453
2454 case 2: 2454 case 2:
2455 *((__le16 *) opt->val) = cpu_to_le16(val); 2455 put_unaligned_le16(val, opt->val);
2456 break; 2456 break;
2457 2457
2458 case 4: 2458 case 4:
2459 *((__le32 *) opt->val) = cpu_to_le32(val); 2459 put_unaligned_le32(val, opt->val);
2460 break; 2460 break;
2461 2461
2462 default: 2462 default:
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 39a5d87e33b4..fa642aa652bd 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -79,7 +79,10 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr);
79 79
80static void rfcomm_process_connect(struct rfcomm_session *s); 80static void rfcomm_process_connect(struct rfcomm_session *s);
81 81
82static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst, int *err); 82static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
83 bdaddr_t *dst,
84 u8 sec_level,
85 int *err);
83static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst); 86static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst);
84static void rfcomm_session_del(struct rfcomm_session *s); 87static void rfcomm_session_del(struct rfcomm_session *s);
85 88
@@ -401,7 +404,7 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst,
401 404
402 s = rfcomm_session_get(src, dst); 405 s = rfcomm_session_get(src, dst);
403 if (!s) { 406 if (!s) {
404 s = rfcomm_session_create(src, dst, &err); 407 s = rfcomm_session_create(src, dst, d->sec_level, &err);
405 if (!s) 408 if (!s)
406 return err; 409 return err;
407 } 410 }
@@ -679,7 +682,10 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err)
679 rfcomm_session_put(s); 682 rfcomm_session_put(s);
680} 683}
681 684
682static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst, int *err) 685static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
686 bdaddr_t *dst,
687 u8 sec_level,
688 int *err)
683{ 689{
684 struct rfcomm_session *s = NULL; 690 struct rfcomm_session *s = NULL;
685 struct sockaddr_l2 addr; 691 struct sockaddr_l2 addr;
@@ -704,6 +710,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
704 sk = sock->sk; 710 sk = sock->sk;
705 lock_sock(sk); 711 lock_sock(sk);
706 l2cap_pi(sk)->imtu = l2cap_mtu; 712 l2cap_pi(sk)->imtu = l2cap_mtu;
713 l2cap_pi(sk)->sec_level = sec_level;
707 if (l2cap_ertm) 714 if (l2cap_ertm)
708 l2cap_pi(sk)->mode = L2CAP_MODE_ERTM; 715 l2cap_pi(sk)->mode = L2CAP_MODE_ERTM;
709 release_sock(sk); 716 release_sock(sk);
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
index 76ae68303d3a..d522d8c1703e 100644
--- a/net/caif/caif_config_util.c
+++ b/net/caif/caif_config_util.c
@@ -16,11 +16,18 @@ int connect_req_to_link_param(struct cfcnfg *cnfg,
16{ 16{
17 struct dev_info *dev_info; 17 struct dev_info *dev_info;
18 enum cfcnfg_phy_preference pref; 18 enum cfcnfg_phy_preference pref;
19 int res;
20
19 memset(l, 0, sizeof(*l)); 21 memset(l, 0, sizeof(*l));
20 l->priority = s->priority; 22 /* In caif protocol low value is high priority */
23 l->priority = CAIF_PRIO_MAX - s->priority + 1;
21 24
22 if (s->link_name[0] != '\0') 25 if (s->ifindex != 0){
23 l->phyid = cfcnfg_get_named(cnfg, s->link_name); 26 res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex);
27 if (res < 0)
28 return res;
29 l->phyid = res;
30 }
24 else { 31 else {
25 switch (s->link_selector) { 32 switch (s->link_selector) {
26 case CAIF_LINK_HIGH_BANDW: 33 case CAIF_LINK_HIGH_BANDW:
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index b99369a055d1..a42a408306e4 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -307,6 +307,8 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
307 307
308 case NETDEV_UNREGISTER: 308 case NETDEV_UNREGISTER:
309 caifd = caif_get(dev); 309 caifd = caif_get(dev);
310 if (caifd == NULL)
311 break;
310 netdev_info(dev, "unregister\n"); 312 netdev_info(dev, "unregister\n");
311 atomic_set(&caifd->state, what); 313 atomic_set(&caifd->state, what);
312 caif_device_destroy(dev); 314 caif_device_destroy(dev);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 2eca2dd0000f..1bf0cf503796 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -716,8 +716,7 @@ static int setsockopt(struct socket *sock,
716{ 716{
717 struct sock *sk = sock->sk; 717 struct sock *sk = sock->sk;
718 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 718 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
719 int prio, linksel; 719 int linksel;
720 struct ifreq ifreq;
721 720
722 if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED) 721 if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
723 return -ENOPROTOOPT; 722 return -ENOPROTOOPT;
@@ -735,33 +734,6 @@ static int setsockopt(struct socket *sock,
735 release_sock(&cf_sk->sk); 734 release_sock(&cf_sk->sk);
736 return 0; 735 return 0;
737 736
738 case SO_PRIORITY:
739 if (lvl != SOL_SOCKET)
740 goto bad_sol;
741 if (ol < sizeof(int))
742 return -EINVAL;
743 if (copy_from_user(&prio, ov, sizeof(int)))
744 return -EINVAL;
745 lock_sock(&(cf_sk->sk));
746 cf_sk->conn_req.priority = prio;
747 release_sock(&cf_sk->sk);
748 return 0;
749
750 case SO_BINDTODEVICE:
751 if (lvl != SOL_SOCKET)
752 goto bad_sol;
753 if (ol < sizeof(struct ifreq))
754 return -EINVAL;
755 if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
756 return -EFAULT;
757 lock_sock(&(cf_sk->sk));
758 strncpy(cf_sk->conn_req.link_name, ifreq.ifr_name,
759 sizeof(cf_sk->conn_req.link_name));
760 cf_sk->conn_req.link_name
761 [sizeof(cf_sk->conn_req.link_name)-1] = 0;
762 release_sock(&cf_sk->sk);
763 return 0;
764
765 case CAIFSO_REQ_PARAM: 737 case CAIFSO_REQ_PARAM:
766 if (lvl != SOL_CAIF) 738 if (lvl != SOL_CAIF)
767 goto bad_sol; 739 goto bad_sol;
@@ -880,6 +852,18 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
880 sock->state = SS_CONNECTING; 852 sock->state = SS_CONNECTING;
881 sk->sk_state = CAIF_CONNECTING; 853 sk->sk_state = CAIF_CONNECTING;
882 854
855 /* Check priority value comming from socket */
856 /* if priority value is out of range it will be ajusted */
857 if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX)
858 cf_sk->conn_req.priority = CAIF_PRIO_MAX;
859 else if (cf_sk->sk.sk_priority < CAIF_PRIO_MIN)
860 cf_sk->conn_req.priority = CAIF_PRIO_MIN;
861 else
862 cf_sk->conn_req.priority = cf_sk->sk.sk_priority;
863
864 /*ifindex = id of the interface.*/
865 cf_sk->conn_req.ifindex = cf_sk->sk.sk_bound_dev_if;
866
883 dbfs_atomic_inc(&cnt.num_connect_req); 867 dbfs_atomic_inc(&cnt.num_connect_req);
884 cf_sk->layer.receive = caif_sktrecv_cb; 868 cf_sk->layer.receive = caif_sktrecv_cb;
885 err = caif_connect_client(&cf_sk->conn_req, 869 err = caif_connect_client(&cf_sk->conn_req,
@@ -905,6 +889,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
905 cf_sk->maxframe = mtu - (headroom + tailroom); 889 cf_sk->maxframe = mtu - (headroom + tailroom);
906 if (cf_sk->maxframe < 1) { 890 if (cf_sk->maxframe < 1) {
907 pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu); 891 pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
892 err = -ENODEV;
908 goto out; 893 goto out;
909 } 894 }
910 895
@@ -1142,7 +1127,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
1142 set_rx_flow_on(cf_sk); 1127 set_rx_flow_on(cf_sk);
1143 1128
1144 /* Set default options on configuration */ 1129 /* Set default options on configuration */
1145 cf_sk->conn_req.priority = CAIF_PRIO_NORMAL; 1130 cf_sk->sk.sk_priority= CAIF_PRIO_NORMAL;
1146 cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY; 1131 cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
1147 cf_sk->conn_req.protocol = protocol; 1132 cf_sk->conn_req.protocol = protocol;
1148 /* Increase the number of sockets created. */ 1133 /* Increase the number of sockets created. */
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 41adafd18914..21ede141018a 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -173,18 +173,15 @@ static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg,
173 return NULL; 173 return NULL;
174} 174}
175 175
176int cfcnfg_get_named(struct cfcnfg *cnfg, char *name) 176
177int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi)
177{ 178{
178 int i; 179 int i;
179 180 for (i = 0; i < MAX_PHY_LAYERS; i++)
180 /* Try to match with specified name */ 181 if (cnfg->phy_layers[i].frm_layer != NULL &&
181 for (i = 0; i < MAX_PHY_LAYERS; i++) { 182 cnfg->phy_layers[i].ifindex == ifi)
182 if (cnfg->phy_layers[i].frm_layer != NULL 183 return i;
183 && strcmp(cnfg->phy_layers[i].phy_layer->name, 184 return -ENODEV;
184 name) == 0)
185 return cnfg->phy_layers[i].frm_layer->id;
186 }
187 return 0;
188} 185}
189 186
190int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer) 187int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 08f267a109aa..3cd8f978e309 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -361,11 +361,10 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
361 struct cfctrl_request_info *p, *tmp; 361 struct cfctrl_request_info *p, *tmp;
362 struct cfctrl *ctrl = container_obj(layr); 362 struct cfctrl *ctrl = container_obj(layr);
363 spin_lock(&ctrl->info_list_lock); 363 spin_lock(&ctrl->info_list_lock);
364 pr_warn("enter\n");
365 364
366 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 365 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
367 if (p->client_layer == adap_layer) { 366 if (p->client_layer == adap_layer) {
368 pr_warn("cancel req :%d\n", p->sequence_no); 367 pr_debug("cancel req :%d\n", p->sequence_no);
369 list_del(&p->list); 368 list_del(&p->list);
370 kfree(p); 369 kfree(p);
371 } 370 }
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 496fda9ac66f..11a2af4c162a 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -12,6 +12,8 @@
12#include <net/caif/cfsrvl.h> 12#include <net/caif/cfsrvl.h>
13#include <net/caif/cfpkt.h> 13#include <net/caif/cfpkt.h>
14 14
15#define container_obj(layr) ((struct cfsrvl *) layr)
16
15static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt); 17static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
16static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt); 18static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
17 19
@@ -38,5 +40,17 @@ static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
38 40
39static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt) 41static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
40{ 42{
43 struct cfsrvl *service = container_obj(layr);
44 struct caif_payload_info *info;
45 int ret;
46
47 if (!cfsrvl_ready(service, &ret))
48 return ret;
49
50 /* Add info for MUX-layer to route the packet out */
51 info = cfpkt_info(pkt);
52 info->channel_id = service->layer.id;
53 info->dev_info = &service->dev_info;
54
41 return layr->dn->transmit(layr->dn, pkt); 55 return layr->dn->transmit(layr->dn, pkt);
42} 56}
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index bde8481e8d25..e2fb5fa75795 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -193,7 +193,7 @@ out:
193 193
194static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt) 194static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
195{ 195{
196 caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size); 196 caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size);
197 197
198 /* Add info for MUX-layer to route the packet out. */ 198 /* Add info for MUX-layer to route the packet out. */
199 cfpkt_info(pkt)->channel_id = rfml->serv.layer.id; 199 cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 08ffe9e4be20..6faa8256e10c 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -125,7 +125,7 @@ struct bcm_sock {
125 struct list_head tx_ops; 125 struct list_head tx_ops;
126 unsigned long dropped_usr_msgs; 126 unsigned long dropped_usr_msgs;
127 struct proc_dir_entry *bcm_proc_read; 127 struct proc_dir_entry *bcm_proc_read;
128 char procname [9]; /* pointer printed in ASCII with \0 */ 128 char procname [20]; /* pointer printed in ASCII with \0 */
129}; 129};
130 130
131static inline struct bcm_sock *bcm_sk(const struct sock *sk) 131static inline struct bcm_sock *bcm_sk(const struct sock *sk)
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index aab1cabb8035..5f19415ec9c0 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -1,9 +1,6 @@
1# 1#
2# Makefile for CEPH filesystem. 2# Makefile for CEPH filesystem.
3# 3#
4
5ifneq ($(KERNELRELEASE),)
6
7obj-$(CONFIG_CEPH_LIB) += libceph.o 4obj-$(CONFIG_CEPH_LIB) += libceph.o
8 5
9libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ 6libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
@@ -16,22 +13,3 @@ libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
16 ceph_fs.o ceph_strings.o ceph_hash.o \ 13 ceph_fs.o ceph_strings.o ceph_hash.o \
17 pagevec.o 14 pagevec.o
18 15
19else
20#Otherwise we were called directly from the command
21# line; invoke the kernel build system.
22
23KERNELDIR ?= /lib/modules/$(shell uname -r)/build
24PWD := $(shell pwd)
25
26default: all
27
28all:
29 $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
30
31modules_install:
32 $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
33
34clean:
35 $(MAKE) -C $(KERNELDIR) M=$(PWD) clean
36
37endif
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 53d8abfa25d5..bf3e6a13c215 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -19,7 +19,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
19 if (b->vec.iov_base) { 19 if (b->vec.iov_base) {
20 b->is_vmalloc = false; 20 b->is_vmalloc = false;
21 } else { 21 } else {
22 b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL); 22 b->vec.iov_base = __vmalloc(len, gfp | __GFP_HIGHMEM, PAGE_KERNEL);
23 if (!b->vec.iov_base) { 23 if (!b->vec.iov_base) {
24 kfree(b); 24 kfree(b);
25 return NULL; 25 return NULL;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 0e8157ee5d43..1c7a2ec4f3cc 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -540,8 +540,7 @@ static void prepare_write_message(struct ceph_connection *con)
540 /* initialize page iterator */ 540 /* initialize page iterator */
541 con->out_msg_pos.page = 0; 541 con->out_msg_pos.page = 0;
542 if (m->pages) 542 if (m->pages)
543 con->out_msg_pos.page_pos = 543 con->out_msg_pos.page_pos = m->page_alignment;
544 le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK;
545 else 544 else
546 con->out_msg_pos.page_pos = 0; 545 con->out_msg_pos.page_pos = 0;
547 con->out_msg_pos.data_pos = 0; 546 con->out_msg_pos.data_pos = 0;
@@ -1491,7 +1490,7 @@ static int read_partial_message(struct ceph_connection *con)
1491 struct ceph_msg *m = con->in_msg; 1490 struct ceph_msg *m = con->in_msg;
1492 int ret; 1491 int ret;
1493 int to, left; 1492 int to, left;
1494 unsigned front_len, middle_len, data_len, data_off; 1493 unsigned front_len, middle_len, data_len;
1495 int datacrc = con->msgr->nocrc; 1494 int datacrc = con->msgr->nocrc;
1496 int skip; 1495 int skip;
1497 u64 seq; 1496 u64 seq;
@@ -1527,19 +1526,17 @@ static int read_partial_message(struct ceph_connection *con)
1527 data_len = le32_to_cpu(con->in_hdr.data_len); 1526 data_len = le32_to_cpu(con->in_hdr.data_len);
1528 if (data_len > CEPH_MSG_MAX_DATA_LEN) 1527 if (data_len > CEPH_MSG_MAX_DATA_LEN)
1529 return -EIO; 1528 return -EIO;
1530 data_off = le16_to_cpu(con->in_hdr.data_off);
1531 1529
1532 /* verify seq# */ 1530 /* verify seq# */
1533 seq = le64_to_cpu(con->in_hdr.seq); 1531 seq = le64_to_cpu(con->in_hdr.seq);
1534 if ((s64)seq - (s64)con->in_seq < 1) { 1532 if ((s64)seq - (s64)con->in_seq < 1) {
1535 pr_info("skipping %s%lld %s seq %lld, expected %lld\n", 1533 pr_info("skipping %s%lld %s seq %lld expected %lld\n",
1536 ENTITY_NAME(con->peer_name), 1534 ENTITY_NAME(con->peer_name),
1537 ceph_pr_addr(&con->peer_addr.in_addr), 1535 ceph_pr_addr(&con->peer_addr.in_addr),
1538 seq, con->in_seq + 1); 1536 seq, con->in_seq + 1);
1539 con->in_base_pos = -front_len - middle_len - data_len - 1537 con->in_base_pos = -front_len - middle_len - data_len -
1540 sizeof(m->footer); 1538 sizeof(m->footer);
1541 con->in_tag = CEPH_MSGR_TAG_READY; 1539 con->in_tag = CEPH_MSGR_TAG_READY;
1542 con->in_seq++;
1543 return 0; 1540 return 0;
1544 } else if ((s64)seq - (s64)con->in_seq > 1) { 1541 } else if ((s64)seq - (s64)con->in_seq > 1) {
1545 pr_err("read_partial_message bad seq %lld expected %lld\n", 1542 pr_err("read_partial_message bad seq %lld expected %lld\n",
@@ -1576,7 +1573,7 @@ static int read_partial_message(struct ceph_connection *con)
1576 1573
1577 con->in_msg_pos.page = 0; 1574 con->in_msg_pos.page = 0;
1578 if (m->pages) 1575 if (m->pages)
1579 con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; 1576 con->in_msg_pos.page_pos = m->page_alignment;
1580 else 1577 else
1581 con->in_msg_pos.page_pos = 0; 1578 con->in_msg_pos.page_pos = 0;
1582 con->in_msg_pos.data_pos = 0; 1579 con->in_msg_pos.data_pos = 0;
@@ -2301,6 +2298,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
2301 2298
2302 /* data */ 2299 /* data */
2303 m->nr_pages = 0; 2300 m->nr_pages = 0;
2301 m->page_alignment = 0;
2304 m->pages = NULL; 2302 m->pages = NULL;
2305 m->pagelist = NULL; 2303 m->pagelist = NULL;
2306 m->bio = NULL; 2304 m->bio = NULL;
@@ -2370,6 +2368,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
2370 type, front_len); 2368 type, front_len);
2371 return NULL; 2369 return NULL;
2372 } 2370 }
2371 msg->page_alignment = le16_to_cpu(hdr->data_off);
2373 } 2372 }
2374 memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); 2373 memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
2375 2374
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 79391994b3ed..3e20a122ffa2 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -71,6 +71,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
71 op->extent.length = objlen; 71 op->extent.length = objlen;
72 } 72 }
73 req->r_num_pages = calc_pages_for(off, *plen); 73 req->r_num_pages = calc_pages_for(off, *plen);
74 req->r_page_alignment = off & ~PAGE_MASK;
74 if (op->op == CEPH_OSD_OP_WRITE) 75 if (op->op == CEPH_OSD_OP_WRITE)
75 op->payload_len = *plen; 76 op->payload_len = *plen;
76 77
@@ -390,6 +391,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
390 req->r_request->hdr.data_len = cpu_to_le32(data_len); 391 req->r_request->hdr.data_len = cpu_to_le32(data_len);
391 } 392 }
392 393
394 req->r_request->page_alignment = req->r_page_alignment;
395
393 BUG_ON(p > msg->front.iov_base + msg->front.iov_len); 396 BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
394 msg_size = p - msg->front.iov_base; 397 msg_size = p - msg->front.iov_base;
395 msg->front.iov_len = msg_size; 398 msg->front.iov_len = msg_size;
@@ -419,7 +422,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
419 u32 truncate_seq, 422 u32 truncate_seq,
420 u64 truncate_size, 423 u64 truncate_size,
421 struct timespec *mtime, 424 struct timespec *mtime,
422 bool use_mempool, int num_reply) 425 bool use_mempool, int num_reply,
426 int page_align)
423{ 427{
424 struct ceph_osd_req_op ops[3]; 428 struct ceph_osd_req_op ops[3];
425 struct ceph_osd_request *req; 429 struct ceph_osd_request *req;
@@ -447,6 +451,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
447 calc_layout(osdc, vino, layout, off, plen, req, ops); 451 calc_layout(osdc, vino, layout, off, plen, req, ops);
448 req->r_file_layout = *layout; /* keep a copy */ 452 req->r_file_layout = *layout; /* keep a copy */
449 453
454 /* in case it differs from natural alignment that calc_layout
455 filled in for us */
456 req->r_page_alignment = page_align;
457
450 ceph_osdc_build_request(req, off, plen, ops, 458 ceph_osdc_build_request(req, off, plen, ops,
451 snapc, 459 snapc,
452 mtime, 460 mtime,
@@ -1489,7 +1497,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
1489 struct ceph_vino vino, struct ceph_file_layout *layout, 1497 struct ceph_vino vino, struct ceph_file_layout *layout,
1490 u64 off, u64 *plen, 1498 u64 off, u64 *plen,
1491 u32 truncate_seq, u64 truncate_size, 1499 u32 truncate_seq, u64 truncate_size,
1492 struct page **pages, int num_pages) 1500 struct page **pages, int num_pages, int page_align)
1493{ 1501{
1494 struct ceph_osd_request *req; 1502 struct ceph_osd_request *req;
1495 int rc = 0; 1503 int rc = 0;
@@ -1499,15 +1507,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
1499 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1507 req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
1500 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 1508 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
1501 NULL, 0, truncate_seq, truncate_size, NULL, 1509 NULL, 0, truncate_seq, truncate_size, NULL,
1502 false, 1); 1510 false, 1, page_align);
1503 if (!req) 1511 if (!req)
1504 return -ENOMEM; 1512 return -ENOMEM;
1505 1513
1506 /* it may be a short read due to an object boundary */ 1514 /* it may be a short read due to an object boundary */
1507 req->r_pages = pages; 1515 req->r_pages = pages;
1508 1516
1509 dout("readpages final extent is %llu~%llu (%d pages)\n", 1517 dout("readpages final extent is %llu~%llu (%d pages align %d)\n",
1510 off, *plen, req->r_num_pages); 1518 off, *plen, req->r_num_pages, page_align);
1511 1519
1512 rc = ceph_osdc_start_request(osdc, req, false); 1520 rc = ceph_osdc_start_request(osdc, req, false);
1513 if (!rc) 1521 if (!rc)
@@ -1533,6 +1541,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
1533{ 1541{
1534 struct ceph_osd_request *req; 1542 struct ceph_osd_request *req;
1535 int rc = 0; 1543 int rc = 0;
1544 int page_align = off & ~PAGE_MASK;
1536 1545
1537 BUG_ON(vino.snap != CEPH_NOSNAP); 1546 BUG_ON(vino.snap != CEPH_NOSNAP);
1538 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1547 req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
@@ -1541,7 +1550,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
1541 CEPH_OSD_FLAG_WRITE, 1550 CEPH_OSD_FLAG_WRITE,
1542 snapc, do_sync, 1551 snapc, do_sync,
1543 truncate_seq, truncate_size, mtime, 1552 truncate_seq, truncate_size, mtime,
1544 nofail, 1); 1553 nofail, 1, page_align);
1545 if (!req) 1554 if (!req)
1546 return -ENOMEM; 1555 return -ENOMEM;
1547 1556
@@ -1638,8 +1647,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
1638 m = ceph_msg_get(req->r_reply); 1647 m = ceph_msg_get(req->r_reply);
1639 1648
1640 if (data_len > 0) { 1649 if (data_len > 0) {
1641 unsigned data_off = le16_to_cpu(hdr->data_off); 1650 int want = calc_pages_for(req->r_page_alignment, data_len);
1642 int want = calc_pages_for(data_off & ~PAGE_MASK, data_len);
1643 1651
1644 if (unlikely(req->r_num_pages < want)) { 1652 if (unlikely(req->r_num_pages < want)) {
1645 pr_warning("tid %lld reply %d > expected %d pages\n", 1653 pr_warning("tid %lld reply %d > expected %d pages\n",
@@ -1651,6 +1659,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
1651 } 1659 }
1652 m->pages = req->r_pages; 1660 m->pages = req->r_pages;
1653 m->nr_pages = req->r_num_pages; 1661 m->nr_pages = req->r_num_pages;
1662 m->page_alignment = req->r_page_alignment;
1654#ifdef CONFIG_BLOCK 1663#ifdef CONFIG_BLOCK
1655 m->bio = req->r_bio; 1664 m->bio = req->r_bio;
1656#endif 1665#endif
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 54caf0687155..ac34feeb2b3a 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -13,8 +13,7 @@
13 * build a vector of user pages 13 * build a vector of user pages
14 */ 14 */
15struct page **ceph_get_direct_page_vector(const char __user *data, 15struct page **ceph_get_direct_page_vector(const char __user *data,
16 int num_pages, 16 int num_pages)
17 loff_t off, size_t len)
18{ 17{
19 struct page **pages; 18 struct page **pages;
20 int rc; 19 int rc;
diff --git a/net/compat.c b/net/compat.c
index 63d260e81472..3649d5895361 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -41,10 +41,12 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
41 compat_size_t len; 41 compat_size_t len;
42 42
43 if (get_user(len, &uiov32->iov_len) || 43 if (get_user(len, &uiov32->iov_len) ||
44 get_user(buf, &uiov32->iov_base)) { 44 get_user(buf, &uiov32->iov_base))
45 tot_len = -EFAULT; 45 return -EFAULT;
46 break; 46
47 } 47 if (len > INT_MAX - tot_len)
48 len = INT_MAX - tot_len;
49
48 tot_len += len; 50 tot_len += len;
49 kiov->iov_base = compat_ptr(buf); 51 kiov->iov_base = compat_ptr(buf);
50 kiov->iov_len = (__kernel_size_t) len; 52 kiov->iov_len = (__kernel_size_t) len;
diff --git a/net/core/dev.c b/net/core/dev.c
index 78b5a89b0f40..0dd54a69dace 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1685,10 +1685,10 @@ EXPORT_SYMBOL(netif_device_attach);
1685 1685
1686static bool can_checksum_protocol(unsigned long features, __be16 protocol) 1686static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1687{ 1687{
1688 return ((features & NETIF_F_GEN_CSUM) || 1688 return ((features & NETIF_F_NO_CSUM) ||
1689 ((features & NETIF_F_IP_CSUM) && 1689 ((features & NETIF_F_V4_CSUM) &&
1690 protocol == htons(ETH_P_IP)) || 1690 protocol == htons(ETH_P_IP)) ||
1691 ((features & NETIF_F_IPV6_CSUM) && 1691 ((features & NETIF_F_V6_CSUM) &&
1692 protocol == htons(ETH_P_IPV6)) || 1692 protocol == htons(ETH_P_IPV6)) ||
1693 ((features & NETIF_F_FCOE_CRC) && 1693 ((features & NETIF_F_FCOE_CRC) &&
1694 protocol == htons(ETH_P_FCOE))); 1694 protocol == htons(ETH_P_FCOE)));
@@ -1696,22 +1696,18 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1696 1696
1697static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) 1697static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1698{ 1698{
1699 __be16 protocol = skb->protocol;
1699 int features = dev->features; 1700 int features = dev->features;
1700 1701
1701 if (vlan_tx_tag_present(skb)) 1702 if (vlan_tx_tag_present(skb)) {
1702 features &= dev->vlan_features; 1703 features &= dev->vlan_features;
1703 1704 } else if (protocol == htons(ETH_P_8021Q)) {
1704 if (can_checksum_protocol(features, skb->protocol))
1705 return true;
1706
1707 if (skb->protocol == htons(ETH_P_8021Q)) {
1708 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 1705 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1709 if (can_checksum_protocol(dev->features & dev->vlan_features, 1706 protocol = veh->h_vlan_encapsulated_proto;
1710 veh->h_vlan_encapsulated_proto)) 1707 features &= dev->vlan_features;
1711 return true;
1712 } 1708 }
1713 1709
1714 return false; 1710 return can_checksum_protocol(features, protocol);
1715} 1711}
1716 1712
1717/** 1713/**
@@ -2135,7 +2131,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2135 } else { 2131 } else {
2136 struct sock *sk = skb->sk; 2132 struct sock *sk = skb->sk;
2137 queue_index = sk_tx_queue_get(sk); 2133 queue_index = sk_tx_queue_get(sk);
2138 if (queue_index < 0) { 2134 if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
2139 2135
2140 queue_index = 0; 2136 queue_index = 0;
2141 if (dev->real_num_tx_queues > 1) 2137 if (dev->real_num_tx_queues > 1)
@@ -2213,7 +2209,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2213} 2209}
2214 2210
2215static DEFINE_PER_CPU(int, xmit_recursion); 2211static DEFINE_PER_CPU(int, xmit_recursion);
2216#define RECURSION_LIMIT 3 2212#define RECURSION_LIMIT 10
2217 2213
2218/** 2214/**
2219 * dev_queue_xmit - transmit a buffer 2215 * dev_queue_xmit - transmit a buffer
@@ -2413,7 +2409,7 @@ EXPORT_SYMBOL(__skb_get_rxhash);
2413#ifdef CONFIG_RPS 2409#ifdef CONFIG_RPS
2414 2410
2415/* One global table that all flow-based protocols share. */ 2411/* One global table that all flow-based protocols share. */
2416struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; 2412struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2417EXPORT_SYMBOL(rps_sock_flow_table); 2413EXPORT_SYMBOL(rps_sock_flow_table);
2418 2414
2419/* 2415/*
@@ -2425,7 +2421,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2425 struct rps_dev_flow **rflowp) 2421 struct rps_dev_flow **rflowp)
2426{ 2422{
2427 struct netdev_rx_queue *rxqueue; 2423 struct netdev_rx_queue *rxqueue;
2428 struct rps_map *map = NULL; 2424 struct rps_map *map;
2429 struct rps_dev_flow_table *flow_table; 2425 struct rps_dev_flow_table *flow_table;
2430 struct rps_sock_flow_table *sock_flow_table; 2426 struct rps_sock_flow_table *sock_flow_table;
2431 int cpu = -1; 2427 int cpu = -1;
@@ -2444,15 +2440,15 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2444 } else 2440 } else
2445 rxqueue = dev->_rx; 2441 rxqueue = dev->_rx;
2446 2442
2447 if (rxqueue->rps_map) { 2443 map = rcu_dereference(rxqueue->rps_map);
2448 map = rcu_dereference(rxqueue->rps_map); 2444 if (map) {
2449 if (map && map->len == 1) { 2445 if (map->len == 1) {
2450 tcpu = map->cpus[0]; 2446 tcpu = map->cpus[0];
2451 if (cpu_online(tcpu)) 2447 if (cpu_online(tcpu))
2452 cpu = tcpu; 2448 cpu = tcpu;
2453 goto done; 2449 goto done;
2454 } 2450 }
2455 } else if (!rxqueue->rps_flow_table) { 2451 } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
2456 goto done; 2452 goto done;
2457 } 2453 }
2458 2454
@@ -5416,7 +5412,7 @@ void netdev_run_todo(void)
5416 /* paranoia */ 5412 /* paranoia */
5417 BUG_ON(netdev_refcnt_read(dev)); 5413 BUG_ON(netdev_refcnt_read(dev));
5418 WARN_ON(rcu_dereference_raw(dev->ip_ptr)); 5414 WARN_ON(rcu_dereference_raw(dev->ip_ptr));
5419 WARN_ON(dev->ip6_ptr); 5415 WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
5420 WARN_ON(dev->dn_ptr); 5416 WARN_ON(dev->dn_ptr);
5421 5417
5422 if (dev->destructor) 5418 if (dev->destructor)
diff --git a/net/core/dst.c b/net/core/dst.c
index 8abe628b79f1..b99c7c7ffce2 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -370,6 +370,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
370 370
371static struct notifier_block dst_dev_notifier = { 371static struct notifier_block dst_dev_notifier = {
372 .notifier_call = dst_dev_event, 372 .notifier_call = dst_dev_event,
373 .priority = -10, /* must be called after other network notifiers */
373}; 374};
374 375
375void __init dst_init(void) 376void __init dst_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 1bc3f253ba6c..82a4369ae150 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -351,12 +351,12 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
351 351
352 list_for_each_entry(r, &ops->rules_list, list) { 352 list_for_each_entry(r, &ops->rules_list, list) {
353 if (r->pref == rule->target) { 353 if (r->pref == rule->target) {
354 rule->ctarget = r; 354 RCU_INIT_POINTER(rule->ctarget, r);
355 break; 355 break;
356 } 356 }
357 } 357 }
358 358
359 if (rule->ctarget == NULL) 359 if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
360 unresolved = 1; 360 unresolved = 1;
361 } else if (rule->action == FR_ACT_GOTO) 361 } else if (rule->action == FR_ACT_GOTO)
362 goto errout_free; 362 goto errout_free;
@@ -373,6 +373,11 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
373 373
374 fib_rule_get(rule); 374 fib_rule_get(rule);
375 375
376 if (last)
377 list_add_rcu(&rule->list, &last->list);
378 else
379 list_add_rcu(&rule->list, &ops->rules_list);
380
376 if (ops->unresolved_rules) { 381 if (ops->unresolved_rules) {
377 /* 382 /*
378 * There are unresolved goto rules in the list, check if 383 * There are unresolved goto rules in the list, check if
@@ -381,7 +386,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
381 list_for_each_entry(r, &ops->rules_list, list) { 386 list_for_each_entry(r, &ops->rules_list, list) {
382 if (r->action == FR_ACT_GOTO && 387 if (r->action == FR_ACT_GOTO &&
383 r->target == rule->pref) { 388 r->target == rule->pref) {
384 BUG_ON(r->ctarget != NULL); 389 BUG_ON(rtnl_dereference(r->ctarget) != NULL);
385 rcu_assign_pointer(r->ctarget, rule); 390 rcu_assign_pointer(r->ctarget, rule);
386 if (--ops->unresolved_rules == 0) 391 if (--ops->unresolved_rules == 0)
387 break; 392 break;
@@ -395,11 +400,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
395 if (unresolved) 400 if (unresolved)
396 ops->unresolved_rules++; 401 ops->unresolved_rules++;
397 402
398 if (last)
399 list_add_rcu(&rule->list, &last->list);
400 else
401 list_add_rcu(&rule->list, &ops->rules_list);
402
403 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); 403 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
404 flush_route_cache(ops); 404 flush_route_cache(ops);
405 rules_ops_put(ops); 405 rules_ops_put(ops);
@@ -487,7 +487,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
487 */ 487 */
488 if (ops->nr_goto_rules > 0) { 488 if (ops->nr_goto_rules > 0) {
489 list_for_each_entry(tmp, &ops->rules_list, list) { 489 list_for_each_entry(tmp, &ops->rules_list, list) {
490 if (tmp->ctarget == rule) { 490 if (rtnl_dereference(tmp->ctarget) == rule) {
491 rcu_assign_pointer(tmp->ctarget, NULL); 491 rcu_assign_pointer(tmp->ctarget, NULL);
492 ops->unresolved_rules++; 492 ops->unresolved_rules++;
493 } 493 }
@@ -545,7 +545,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
545 frh->action = rule->action; 545 frh->action = rule->action;
546 frh->flags = rule->flags; 546 frh->flags = rule->flags;
547 547
548 if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) 548 if (rule->action == FR_ACT_GOTO &&
549 rcu_dereference_raw(rule->ctarget) == NULL)
549 frh->flags |= FIB_RULE_UNRESOLVED; 550 frh->flags |= FIB_RULE_UNRESOLVED;
550 551
551 if (rule->iifname[0]) { 552 if (rule->iifname[0]) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 7adf50352918..c1ee800bc080 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -89,8 +89,8 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
89 rcu_read_lock_bh(); 89 rcu_read_lock_bh();
90 filter = rcu_dereference_bh(sk->sk_filter); 90 filter = rcu_dereference_bh(sk->sk_filter);
91 if (filter) { 91 if (filter) {
92 unsigned int pkt_len = sk_run_filter(skb, filter->insns, 92 unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
93 filter->len); 93
94 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 94 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
95 } 95 }
96 rcu_read_unlock_bh(); 96 rcu_read_unlock_bh();
@@ -112,39 +112,41 @@ EXPORT_SYMBOL(sk_filter);
112 */ 112 */
113unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) 113unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
114{ 114{
115 struct sock_filter *fentry; /* We walk down these */
116 void *ptr; 115 void *ptr;
117 u32 A = 0; /* Accumulator */ 116 u32 A = 0; /* Accumulator */
118 u32 X = 0; /* Index Register */ 117 u32 X = 0; /* Index Register */
119 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ 118 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
119 unsigned long memvalid = 0;
120 u32 tmp; 120 u32 tmp;
121 int k; 121 int k;
122 int pc; 122 int pc;
123 123
124 BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
124 /* 125 /*
125 * Process array of filter instructions. 126 * Process array of filter instructions.
126 */ 127 */
127 for (pc = 0; pc < flen; pc++) { 128 for (pc = 0; pc < flen; pc++) {
128 fentry = &filter[pc]; 129 const struct sock_filter *fentry = &filter[pc];
130 u32 f_k = fentry->k;
129 131
130 switch (fentry->code) { 132 switch (fentry->code) {
131 case BPF_S_ALU_ADD_X: 133 case BPF_S_ALU_ADD_X:
132 A += X; 134 A += X;
133 continue; 135 continue;
134 case BPF_S_ALU_ADD_K: 136 case BPF_S_ALU_ADD_K:
135 A += fentry->k; 137 A += f_k;
136 continue; 138 continue;
137 case BPF_S_ALU_SUB_X: 139 case BPF_S_ALU_SUB_X:
138 A -= X; 140 A -= X;
139 continue; 141 continue;
140 case BPF_S_ALU_SUB_K: 142 case BPF_S_ALU_SUB_K:
141 A -= fentry->k; 143 A -= f_k;
142 continue; 144 continue;
143 case BPF_S_ALU_MUL_X: 145 case BPF_S_ALU_MUL_X:
144 A *= X; 146 A *= X;
145 continue; 147 continue;
146 case BPF_S_ALU_MUL_K: 148 case BPF_S_ALU_MUL_K:
147 A *= fentry->k; 149 A *= f_k;
148 continue; 150 continue;
149 case BPF_S_ALU_DIV_X: 151 case BPF_S_ALU_DIV_X:
150 if (X == 0) 152 if (X == 0)
@@ -152,49 +154,49 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
152 A /= X; 154 A /= X;
153 continue; 155 continue;
154 case BPF_S_ALU_DIV_K: 156 case BPF_S_ALU_DIV_K:
155 A /= fentry->k; 157 A /= f_k;
156 continue; 158 continue;
157 case BPF_S_ALU_AND_X: 159 case BPF_S_ALU_AND_X:
158 A &= X; 160 A &= X;
159 continue; 161 continue;
160 case BPF_S_ALU_AND_K: 162 case BPF_S_ALU_AND_K:
161 A &= fentry->k; 163 A &= f_k;
162 continue; 164 continue;
163 case BPF_S_ALU_OR_X: 165 case BPF_S_ALU_OR_X:
164 A |= X; 166 A |= X;
165 continue; 167 continue;
166 case BPF_S_ALU_OR_K: 168 case BPF_S_ALU_OR_K:
167 A |= fentry->k; 169 A |= f_k;
168 continue; 170 continue;
169 case BPF_S_ALU_LSH_X: 171 case BPF_S_ALU_LSH_X:
170 A <<= X; 172 A <<= X;
171 continue; 173 continue;
172 case BPF_S_ALU_LSH_K: 174 case BPF_S_ALU_LSH_K:
173 A <<= fentry->k; 175 A <<= f_k;
174 continue; 176 continue;
175 case BPF_S_ALU_RSH_X: 177 case BPF_S_ALU_RSH_X:
176 A >>= X; 178 A >>= X;
177 continue; 179 continue;
178 case BPF_S_ALU_RSH_K: 180 case BPF_S_ALU_RSH_K:
179 A >>= fentry->k; 181 A >>= f_k;
180 continue; 182 continue;
181 case BPF_S_ALU_NEG: 183 case BPF_S_ALU_NEG:
182 A = -A; 184 A = -A;
183 continue; 185 continue;
184 case BPF_S_JMP_JA: 186 case BPF_S_JMP_JA:
185 pc += fentry->k; 187 pc += f_k;
186 continue; 188 continue;
187 case BPF_S_JMP_JGT_K: 189 case BPF_S_JMP_JGT_K:
188 pc += (A > fentry->k) ? fentry->jt : fentry->jf; 190 pc += (A > f_k) ? fentry->jt : fentry->jf;
189 continue; 191 continue;
190 case BPF_S_JMP_JGE_K: 192 case BPF_S_JMP_JGE_K:
191 pc += (A >= fentry->k) ? fentry->jt : fentry->jf; 193 pc += (A >= f_k) ? fentry->jt : fentry->jf;
192 continue; 194 continue;
193 case BPF_S_JMP_JEQ_K: 195 case BPF_S_JMP_JEQ_K:
194 pc += (A == fentry->k) ? fentry->jt : fentry->jf; 196 pc += (A == f_k) ? fentry->jt : fentry->jf;
195 continue; 197 continue;
196 case BPF_S_JMP_JSET_K: 198 case BPF_S_JMP_JSET_K:
197 pc += (A & fentry->k) ? fentry->jt : fentry->jf; 199 pc += (A & f_k) ? fentry->jt : fentry->jf;
198 continue; 200 continue;
199 case BPF_S_JMP_JGT_X: 201 case BPF_S_JMP_JGT_X:
200 pc += (A > X) ? fentry->jt : fentry->jf; 202 pc += (A > X) ? fentry->jt : fentry->jf;
@@ -209,7 +211,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
209 pc += (A & X) ? fentry->jt : fentry->jf; 211 pc += (A & X) ? fentry->jt : fentry->jf;
210 continue; 212 continue;
211 case BPF_S_LD_W_ABS: 213 case BPF_S_LD_W_ABS:
212 k = fentry->k; 214 k = f_k;
213load_w: 215load_w:
214 ptr = load_pointer(skb, k, 4, &tmp); 216 ptr = load_pointer(skb, k, 4, &tmp);
215 if (ptr != NULL) { 217 if (ptr != NULL) {
@@ -218,7 +220,7 @@ load_w:
218 } 220 }
219 break; 221 break;
220 case BPF_S_LD_H_ABS: 222 case BPF_S_LD_H_ABS:
221 k = fentry->k; 223 k = f_k;
222load_h: 224load_h:
223 ptr = load_pointer(skb, k, 2, &tmp); 225 ptr = load_pointer(skb, k, 2, &tmp);
224 if (ptr != NULL) { 226 if (ptr != NULL) {
@@ -227,7 +229,7 @@ load_h:
227 } 229 }
228 break; 230 break;
229 case BPF_S_LD_B_ABS: 231 case BPF_S_LD_B_ABS:
230 k = fentry->k; 232 k = f_k;
231load_b: 233load_b:
232 ptr = load_pointer(skb, k, 1, &tmp); 234 ptr = load_pointer(skb, k, 1, &tmp);
233 if (ptr != NULL) { 235 if (ptr != NULL) {
@@ -242,32 +244,34 @@ load_b:
242 X = skb->len; 244 X = skb->len;
243 continue; 245 continue;
244 case BPF_S_LD_W_IND: 246 case BPF_S_LD_W_IND:
245 k = X + fentry->k; 247 k = X + f_k;
246 goto load_w; 248 goto load_w;
247 case BPF_S_LD_H_IND: 249 case BPF_S_LD_H_IND:
248 k = X + fentry->k; 250 k = X + f_k;
249 goto load_h; 251 goto load_h;
250 case BPF_S_LD_B_IND: 252 case BPF_S_LD_B_IND:
251 k = X + fentry->k; 253 k = X + f_k;
252 goto load_b; 254 goto load_b;
253 case BPF_S_LDX_B_MSH: 255 case BPF_S_LDX_B_MSH:
254 ptr = load_pointer(skb, fentry->k, 1, &tmp); 256 ptr = load_pointer(skb, f_k, 1, &tmp);
255 if (ptr != NULL) { 257 if (ptr != NULL) {
256 X = (*(u8 *)ptr & 0xf) << 2; 258 X = (*(u8 *)ptr & 0xf) << 2;
257 continue; 259 continue;
258 } 260 }
259 return 0; 261 return 0;
260 case BPF_S_LD_IMM: 262 case BPF_S_LD_IMM:
261 A = fentry->k; 263 A = f_k;
262 continue; 264 continue;
263 case BPF_S_LDX_IMM: 265 case BPF_S_LDX_IMM:
264 X = fentry->k; 266 X = f_k;
265 continue; 267 continue;
266 case BPF_S_LD_MEM: 268 case BPF_S_LD_MEM:
267 A = mem[fentry->k]; 269 A = (memvalid & (1UL << f_k)) ?
270 mem[f_k] : 0;
268 continue; 271 continue;
269 case BPF_S_LDX_MEM: 272 case BPF_S_LDX_MEM:
270 X = mem[fentry->k]; 273 X = (memvalid & (1UL << f_k)) ?
274 mem[f_k] : 0;
271 continue; 275 continue;
272 case BPF_S_MISC_TAX: 276 case BPF_S_MISC_TAX:
273 X = A; 277 X = A;
@@ -276,14 +280,16 @@ load_b:
276 A = X; 280 A = X;
277 continue; 281 continue;
278 case BPF_S_RET_K: 282 case BPF_S_RET_K:
279 return fentry->k; 283 return f_k;
280 case BPF_S_RET_A: 284 case BPF_S_RET_A:
281 return A; 285 return A;
282 case BPF_S_ST: 286 case BPF_S_ST:
283 mem[fentry->k] = A; 287 memvalid |= 1UL << f_k;
288 mem[f_k] = A;
284 continue; 289 continue;
285 case BPF_S_STX: 290 case BPF_S_STX:
286 mem[fentry->k] = X; 291 memvalid |= 1UL << f_k;
292 mem[f_k] = X;
287 continue; 293 continue;
288 default: 294 default:
289 WARN_ON(1); 295 WARN_ON(1);
@@ -583,7 +589,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
583EXPORT_SYMBOL(sk_chk_filter); 589EXPORT_SYMBOL(sk_chk_filter);
584 590
585/** 591/**
586 * sk_filter_rcu_release: Release a socket filter by rcu_head 592 * sk_filter_rcu_release - Release a socket filter by rcu_head
587 * @rcu: rcu_head that contains the sk_filter to free 593 * @rcu: rcu_head that contains the sk_filter to free
588 */ 594 */
589static void sk_filter_rcu_release(struct rcu_head *rcu) 595static void sk_filter_rcu_release(struct rcu_head *rcu)
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 72aceb1fe4fa..c40f27e7d208 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -35,10 +35,9 @@
35 * in any case. 35 * in any case.
36 */ 36 */
37 37
38long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) 38int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
39{ 39{
40 int size, ct; 40 int size, ct, err;
41 long err;
42 41
43 if (m->msg_namelen) { 42 if (m->msg_namelen) {
44 if (mode == VERIFY_READ) { 43 if (mode == VERIFY_READ) {
@@ -62,14 +61,13 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
62 err = 0; 61 err = 0;
63 62
64 for (ct = 0; ct < m->msg_iovlen; ct++) { 63 for (ct = 0; ct < m->msg_iovlen; ct++) {
65 err += iov[ct].iov_len; 64 size_t len = iov[ct].iov_len;
66 /* 65
67 * Goal is not to verify user data, but to prevent returning 66 if (len > INT_MAX - err) {
68 * negative value, which is interpreted as errno. 67 len = INT_MAX - err;
69 * Overflow is still possible, but it is harmless. 68 iov[ct].iov_len = len;
70 */ 69 }
71 if (err < 0) 70 err += len;
72 return -EMSGSIZE;
73 } 71 }
74 72
75 return err; 73 return err;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b143173e3eb2..7f902cad10f8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -598,7 +598,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
598 } 598 }
599 599
600 spin_lock(&rps_map_lock); 600 spin_lock(&rps_map_lock);
601 old_map = queue->rps_map; 601 old_map = rcu_dereference_protected(queue->rps_map,
602 lockdep_is_held(&rps_map_lock));
602 rcu_assign_pointer(queue->rps_map, map); 603 rcu_assign_pointer(queue->rps_map, map);
603 spin_unlock(&rps_map_lock); 604 spin_unlock(&rps_map_lock);
604 605
@@ -677,7 +678,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
677 table = NULL; 678 table = NULL;
678 679
679 spin_lock(&rps_dev_flow_lock); 680 spin_lock(&rps_dev_flow_lock);
680 old_table = queue->rps_flow_table; 681 old_table = rcu_dereference_protected(queue->rps_flow_table,
682 lockdep_is_held(&rps_dev_flow_lock));
681 rcu_assign_pointer(queue->rps_flow_table, table); 683 rcu_assign_pointer(queue->rps_flow_table, table);
682 spin_unlock(&rps_dev_flow_lock); 684 spin_unlock(&rps_dev_flow_lock);
683 685
@@ -705,16 +707,26 @@ static void rx_queue_release(struct kobject *kobj)
705{ 707{
706 struct netdev_rx_queue *queue = to_rx_queue(kobj); 708 struct netdev_rx_queue *queue = to_rx_queue(kobj);
707 struct netdev_rx_queue *first = queue->first; 709 struct netdev_rx_queue *first = queue->first;
710 struct rps_map *map;
711 struct rps_dev_flow_table *flow_table;
708 712
709 if (queue->rps_map)
710 call_rcu(&queue->rps_map->rcu, rps_map_release);
711 713
712 if (queue->rps_flow_table) 714 map = rcu_dereference_raw(queue->rps_map);
713 call_rcu(&queue->rps_flow_table->rcu, 715 if (map) {
714 rps_dev_flow_table_release); 716 RCU_INIT_POINTER(queue->rps_map, NULL);
717 call_rcu(&map->rcu, rps_map_release);
718 }
719
720 flow_table = rcu_dereference_raw(queue->rps_flow_table);
721 if (flow_table) {
722 RCU_INIT_POINTER(queue->rps_flow_table, NULL);
723 call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
724 }
715 725
716 if (atomic_dec_and_test(&first->count)) 726 if (atomic_dec_and_test(&first->count))
717 kfree(first); 727 kfree(first);
728 else
729 memset(kobj, 0, sizeof(*kobj));
718} 730}
719 731
720static struct kobj_type rx_queue_ktype = { 732static struct kobj_type rx_queue_ktype = {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index c988e685433a..3f860261c5ee 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -42,7 +42,9 @@ static int net_assign_generic(struct net *net, int id, void *data)
42 BUG_ON(!mutex_is_locked(&net_mutex)); 42 BUG_ON(!mutex_is_locked(&net_mutex));
43 BUG_ON(id == 0); 43 BUG_ON(id == 0);
44 44
45 ng = old_ng = net->gen; 45 old_ng = rcu_dereference_protected(net->gen,
46 lockdep_is_held(&net_mutex));
47 ng = old_ng;
46 if (old_ng->len >= id) 48 if (old_ng->len >= id)
47 goto assign; 49 goto assign;
48 50
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2c0df0f95b3d..33bc3823ac6f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -771,10 +771,10 @@ done:
771static unsigned long num_arg(const char __user * user_buffer, 771static unsigned long num_arg(const char __user * user_buffer,
772 unsigned long maxlen, unsigned long *num) 772 unsigned long maxlen, unsigned long *num)
773{ 773{
774 int i = 0; 774 int i;
775 *num = 0; 775 *num = 0;
776 776
777 for (; i < maxlen; i++) { 777 for (i = 0; i < maxlen; i++) {
778 char c; 778 char c;
779 if (get_user(c, &user_buffer[i])) 779 if (get_user(c, &user_buffer[i]))
780 return -EFAULT; 780 return -EFAULT;
@@ -789,9 +789,9 @@ static unsigned long num_arg(const char __user * user_buffer,
789 789
790static int strn_len(const char __user * user_buffer, unsigned int maxlen) 790static int strn_len(const char __user * user_buffer, unsigned int maxlen)
791{ 791{
792 int i = 0; 792 int i;
793 793
794 for (; i < maxlen; i++) { 794 for (i = 0; i < maxlen; i++) {
795 char c; 795 char c;
796 if (get_user(c, &user_buffer[i])) 796 if (get_user(c, &user_buffer[i]))
797 return -EFAULT; 797 return -EFAULT;
@@ -846,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file,
846{ 846{
847 struct seq_file *seq = file->private_data; 847 struct seq_file *seq = file->private_data;
848 struct pktgen_dev *pkt_dev = seq->private; 848 struct pktgen_dev *pkt_dev = seq->private;
849 int i = 0, max, len; 849 int i, max, len;
850 char name[16], valstr[32]; 850 char name[16], valstr[32];
851 unsigned long value = 0; 851 unsigned long value = 0;
852 char *pg_result = NULL; 852 char *pg_result = NULL;
@@ -860,13 +860,13 @@ static ssize_t pktgen_if_write(struct file *file,
860 return -EINVAL; 860 return -EINVAL;
861 } 861 }
862 862
863 max = count - i; 863 max = count;
864 tmp = count_trail_chars(&user_buffer[i], max); 864 tmp = count_trail_chars(user_buffer, max);
865 if (tmp < 0) { 865 if (tmp < 0) {
866 pr_warning("illegal format\n"); 866 pr_warning("illegal format\n");
867 return tmp; 867 return tmp;
868 } 868 }
869 i += tmp; 869 i = tmp;
870 870
871 /* Read variable name */ 871 /* Read variable name */
872 872
@@ -887,10 +887,11 @@ static ssize_t pktgen_if_write(struct file *file,
887 i += len; 887 i += len;
888 888
889 if (debug) { 889 if (debug) {
890 char tb[count + 1]; 890 size_t copy = min_t(size_t, count, 1023);
891 if (copy_from_user(tb, user_buffer, count)) 891 char tb[copy + 1];
892 if (copy_from_user(tb, user_buffer, copy))
892 return -EFAULT; 893 return -EFAULT;
893 tb[count] = 0; 894 tb[copy] = 0;
894 printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name, 895 printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name,
895 (unsigned long)count, tb); 896 (unsigned long)count, tb);
896 } 897 }
@@ -1764,7 +1765,7 @@ static ssize_t pktgen_thread_write(struct file *file,
1764{ 1765{
1765 struct seq_file *seq = file->private_data; 1766 struct seq_file *seq = file->private_data;
1766 struct pktgen_thread *t = seq->private; 1767 struct pktgen_thread *t = seq->private;
1767 int i = 0, max, len, ret; 1768 int i, max, len, ret;
1768 char name[40]; 1769 char name[40];
1769 char *pg_result; 1770 char *pg_result;
1770 1771
@@ -1773,12 +1774,12 @@ static ssize_t pktgen_thread_write(struct file *file,
1773 return -EINVAL; 1774 return -EINVAL;
1774 } 1775 }
1775 1776
1776 max = count - i; 1777 max = count;
1777 len = count_trail_chars(&user_buffer[i], max); 1778 len = count_trail_chars(user_buffer, max);
1778 if (len < 0) 1779 if (len < 0)
1779 return len; 1780 return len;
1780 1781
1781 i += len; 1782 i = len;
1782 1783
1783 /* Read variable name */ 1784 /* Read variable name */
1784 1785
@@ -1975,7 +1976,7 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
1975 const char *ifname) 1976 const char *ifname)
1976{ 1977{
1977 char b[IFNAMSIZ+5]; 1978 char b[IFNAMSIZ+5];
1978 int i = 0; 1979 int i;
1979 1980
1980 for (i = 0; ifname[i] != '@'; i++) { 1981 for (i = 0; ifname[i] != '@'; i++) {
1981 if (i == IFNAMSIZ) 1982 if (i == IFNAMSIZ)
@@ -2519,8 +2520,8 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
2519{ 2520{
2520 if (pkt_dev->cflows) { 2521 if (pkt_dev->cflows) {
2521 /* let go of the SAs if we have them */ 2522 /* let go of the SAs if we have them */
2522 int i = 0; 2523 int i;
2523 for (; i < pkt_dev->cflows; i++) { 2524 for (i = 0; i < pkt_dev->cflows; i++) {
2524 struct xfrm_state *x = pkt_dev->flows[i].x; 2525 struct xfrm_state *x = pkt_dev->flows[i].x;
2525 if (x) { 2526 if (x) {
2526 xfrm_state_put(x); 2527 xfrm_state_put(x);
@@ -2611,8 +2612,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2611 /* Update any of the values, used when we're incrementing various 2612 /* Update any of the values, used when we're incrementing various
2612 * fields. 2613 * fields.
2613 */ 2614 */
2614 queue_map = pkt_dev->cur_queue_map;
2615 mod_cur_headers(pkt_dev); 2615 mod_cur_headers(pkt_dev);
2616 queue_map = pkt_dev->cur_queue_map;
2616 2617
2617 datalen = (odev->hard_header_len + 16) & ~0xf; 2618 datalen = (odev->hard_header_len + 16) & ~0xf;
2618 2619
@@ -2975,8 +2976,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2975 /* Update any of the values, used when we're incrementing various 2976 /* Update any of the values, used when we're incrementing various
2976 * fields. 2977 * fields.
2977 */ 2978 */
2978 queue_map = pkt_dev->cur_queue_map;
2979 mod_cur_headers(pkt_dev); 2979 mod_cur_headers(pkt_dev);
2980 queue_map = pkt_dev->cur_queue_map;
2980 2981
2981 skb = __netdev_alloc_skb(odev, 2982 skb = __netdev_alloc_skb(odev,
2982 pkt_dev->cur_pkt_size + 64 2983 pkt_dev->cur_pkt_size + 64
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 7552495aff7a..fceeb37d7161 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -45,9 +45,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
45 nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); 45 nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
46 lopt_size += nr_table_entries * sizeof(struct request_sock *); 46 lopt_size += nr_table_entries * sizeof(struct request_sock *);
47 if (lopt_size > PAGE_SIZE) 47 if (lopt_size > PAGE_SIZE)
48 lopt = __vmalloc(lopt_size, 48 lopt = vzalloc(lopt_size);
49 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
50 PAGE_KERNEL);
51 else 49 else
52 lopt = kzalloc(lopt_size, GFP_KERNEL); 50 lopt = kzalloc(lopt_size, GFP_KERNEL);
53 if (lopt == NULL) 51 if (lopt == NULL)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8121268ddbdd..841c287ef40a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -347,16 +347,17 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
347 if (!ops) 347 if (!ops)
348 return 0; 348 return 0;
349 349
350 size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ 350 size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
351 nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ 351 nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
352 352
353 if (ops->get_size) 353 if (ops->get_size)
354 /* IFLA_INFO_DATA + nested data */ 354 /* IFLA_INFO_DATA + nested data */
355 size += nlmsg_total_size(sizeof(struct nlattr)) + 355 size += nla_total_size(sizeof(struct nlattr)) +
356 ops->get_size(dev); 356 ops->get_size(dev);
357 357
358 if (ops->get_xstats_size) 358 if (ops->get_xstats_size)
359 size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ 359 /* IFLA_INFO_XSTATS */
360 size += nla_total_size(ops->get_xstats_size(dev));
360 361
361 return size; 362 return size;
362} 363}
diff --git a/net/core/sock.c b/net/core/sock.c
index 11db43632df8..fb6080111461 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1225,7 +1225,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1225 sock_reset_flag(newsk, SOCK_DONE); 1225 sock_reset_flag(newsk, SOCK_DONE);
1226 skb_queue_head_init(&newsk->sk_error_queue); 1226 skb_queue_head_init(&newsk->sk_error_queue);
1227 1227
1228 filter = newsk->sk_filter; 1228 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1229 if (filter != NULL) 1229 if (filter != NULL)
1230 sk_filter_charge(newsk, filter); 1230 sk_filter_charge(newsk, filter);
1231 1231
@@ -1653,10 +1653,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
1653{ 1653{
1654 struct proto *prot = sk->sk_prot; 1654 struct proto *prot = sk->sk_prot;
1655 int amt = sk_mem_pages(size); 1655 int amt = sk_mem_pages(size);
1656 int allocated; 1656 long allocated;
1657 1657
1658 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; 1658 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1659 allocated = atomic_add_return(amt, prot->memory_allocated); 1659 allocated = atomic_long_add_return(amt, prot->memory_allocated);
1660 1660
1661 /* Under limit. */ 1661 /* Under limit. */
1662 if (allocated <= prot->sysctl_mem[0]) { 1662 if (allocated <= prot->sysctl_mem[0]) {
@@ -1714,7 +1714,7 @@ suppress_allocation:
1714 1714
1715 /* Alas. Undo changes. */ 1715 /* Alas. Undo changes. */
1716 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; 1716 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
1717 atomic_sub(amt, prot->memory_allocated); 1717 atomic_long_sub(amt, prot->memory_allocated);
1718 return 0; 1718 return 0;
1719} 1719}
1720EXPORT_SYMBOL(__sk_mem_schedule); 1720EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1727,12 +1727,12 @@ void __sk_mem_reclaim(struct sock *sk)
1727{ 1727{
1728 struct proto *prot = sk->sk_prot; 1728 struct proto *prot = sk->sk_prot;
1729 1729
1730 atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 1730 atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
1731 prot->memory_allocated); 1731 prot->memory_allocated);
1732 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; 1732 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
1733 1733
1734 if (prot->memory_pressure && *prot->memory_pressure && 1734 if (prot->memory_pressure && *prot->memory_pressure &&
1735 (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) 1735 (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
1736 *prot->memory_pressure = 0; 1736 *prot->memory_pressure = 0;
1737} 1737}
1738EXPORT_SYMBOL(__sk_mem_reclaim); 1738EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -2452,12 +2452,12 @@ static char proto_method_implemented(const void *method)
2452 2452
2453static void proto_seq_printf(struct seq_file *seq, struct proto *proto) 2453static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2454{ 2454{
2455 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s " 2455 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
2456 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 2456 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2457 proto->name, 2457 proto->name,
2458 proto->obj_size, 2458 proto->obj_size,
2459 sock_prot_inuse_get(seq_file_net(seq), proto), 2459 sock_prot_inuse_get(seq_file_net(seq), proto),
2460 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, 2460 proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
2461 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", 2461 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
2462 proto->max_header, 2462 proto->max_header,
2463 proto->slab == NULL ? "no" : "yes", 2463 proto->slab == NULL ? "no" : "yes",
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 01eee5d984be..385b6095fdc4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -34,7 +34,8 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
34 34
35 mutex_lock(&sock_flow_mutex); 35 mutex_lock(&sock_flow_mutex);
36 36
37 orig_sock_table = rps_sock_flow_table; 37 orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
38 lockdep_is_held(&sock_flow_mutex));
38 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 39 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
39 40
40 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 41 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 117fb093dcaf..75c3582a7678 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -134,13 +134,41 @@ static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
134extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); 134extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
135extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); 135extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
136 136
137/*
138 * Congestion control of queued data packets via CCID decision.
139 *
140 * The TX CCID performs its congestion-control by indicating whether and when a
141 * queued packet may be sent, using the return code of ccid_hc_tx_send_packet().
142 * The following modes are supported via the symbolic constants below:
143 * - timer-based pacing (CCID returns a delay value in milliseconds);
144 * - autonomous dequeueing (CCID internally schedules dccps_xmitlet).
145 */
146
147enum ccid_dequeueing_decision {
148 CCID_PACKET_SEND_AT_ONCE = 0x00000, /* "green light": no delay */
149 CCID_PACKET_DELAY_MAX = 0x0FFFF, /* maximum delay in msecs */
150 CCID_PACKET_DELAY = 0x10000, /* CCID msec-delay mode */
151 CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000, /* CCID autonomous mode */
152 CCID_PACKET_ERR = 0xF0000, /* error condition */
153};
154
155static inline int ccid_packet_dequeue_eval(const int return_code)
156{
157 if (return_code < 0)
158 return CCID_PACKET_ERR;
159 if (return_code == 0)
160 return CCID_PACKET_SEND_AT_ONCE;
161 if (return_code <= CCID_PACKET_DELAY_MAX)
162 return CCID_PACKET_DELAY;
163 return return_code;
164}
165
137static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, 166static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
138 struct sk_buff *skb) 167 struct sk_buff *skb)
139{ 168{
140 int rc = 0;
141 if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL) 169 if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
142 rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb); 170 return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
143 return rc; 171 return CCID_PACKET_SEND_AT_ONCE;
144} 172}
145 173
146static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, 174static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index d850e291f87c..6576eae9e779 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -78,12 +78,9 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
78 78
79static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) 79static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
80{ 80{
81 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 81 if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
82 82 return CCID_PACKET_WILL_DEQUEUE_LATER;
83 if (hc->tx_pipe < hc->tx_cwnd) 83 return CCID_PACKET_SEND_AT_ONCE;
84 return 0;
85
86 return 1; /* XXX CCID should dequeue when ready instead of polling */
87} 84}
88 85
89static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) 86static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
@@ -115,6 +112,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
115{ 112{
116 struct sock *sk = (struct sock *)data; 113 struct sock *sk = (struct sock *)data;
117 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 114 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
115 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
118 116
119 bh_lock_sock(sk); 117 bh_lock_sock(sk);
120 if (sock_owned_by_user(sk)) { 118 if (sock_owned_by_user(sk)) {
@@ -129,8 +127,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
129 if (hc->tx_rto > DCCP_RTO_MAX) 127 if (hc->tx_rto > DCCP_RTO_MAX)
130 hc->tx_rto = DCCP_RTO_MAX; 128 hc->tx_rto = DCCP_RTO_MAX;
131 129
132 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
133
134 /* adjust pipe, cwnd etc */ 130 /* adjust pipe, cwnd etc */
135 hc->tx_ssthresh = hc->tx_cwnd / 2; 131 hc->tx_ssthresh = hc->tx_cwnd / 2;
136 if (hc->tx_ssthresh < 2) 132 if (hc->tx_ssthresh < 2)
@@ -146,6 +142,12 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
146 hc->tx_rpseq = 0; 142 hc->tx_rpseq = 0;
147 hc->tx_rpdupack = -1; 143 hc->tx_rpdupack = -1;
148 ccid2_change_l_ack_ratio(sk, 1); 144 ccid2_change_l_ack_ratio(sk, 1);
145
146 /* if we were blocked before, we may now send cwnd=1 packet */
147 if (sender_was_blocked)
148 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
149 /* restart backed-off timer */
150 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
149out: 151out:
150 bh_unlock_sock(sk); 152 bh_unlock_sock(sk);
151 sock_put(sk); 153 sock_put(sk);
@@ -434,6 +436,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
434{ 436{
435 struct dccp_sock *dp = dccp_sk(sk); 437 struct dccp_sock *dp = dccp_sk(sk);
436 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 438 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
439 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
437 u64 ackno, seqno; 440 u64 ackno, seqno;
438 struct ccid2_seq *seqp; 441 struct ccid2_seq *seqp;
439 unsigned char *vector; 442 unsigned char *vector;
@@ -631,6 +634,10 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
631 sk_stop_timer(sk, &hc->tx_rtotimer); 634 sk_stop_timer(sk, &hc->tx_rtotimer);
632 else 635 else
633 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); 636 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
637
638 /* check if incoming Acks allow pending packets to be sent */
639 if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
640 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
634} 641}
635 642
636static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 643static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 9731c2dc1487..25cb6b216eda 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -81,6 +81,11 @@ struct ccid2_hc_tx_sock {
81 u64 tx_high_ack; 81 u64 tx_high_ack;
82}; 82};
83 83
84static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
85{
86 return hc->tx_pipe >= hc->tx_cwnd;
87}
88
84struct ccid2_hc_rx_sock { 89struct ccid2_hc_rx_sock {
85 int rx_data; 90 int rx_data;
86}; 91};
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 3060a60ed5ab..3d604e1349c0 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -268,11 +268,11 @@ out:
268 sock_put(sk); 268 sock_put(sk);
269} 269}
270 270
271/* 271/**
272 * returns 272 * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets
273 * > 0: delay (in msecs) that should pass before actually sending 273 * @skb: next packet candidate to send on @sk
274 * = 0: can send immediately 274 * This function uses the convention of ccid_packet_dequeue_eval() and
275 * < 0: error condition; do not send packet 275 * returns a millisecond-delay value between 0 and t_mbi = 64000 msec.
276 */ 276 */
277static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) 277static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
278{ 278{
@@ -348,7 +348,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
348 348
349 /* set the nominal send time for the next following packet */ 349 /* set the nominal send time for the next following packet */
350 hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi); 350 hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi);
351 return 0; 351 return CCID_PACKET_SEND_AT_ONCE;
352} 352}
353 353
354static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len) 354static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3eb264b60823..a8ed459508b2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,8 +243,9 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
243extern void dccp_send_sync(struct sock *sk, const u64 seq, 243extern void dccp_send_sync(struct sock *sk, const u64 seq,
244 const enum dccp_pkt_type pkt_type); 244 const enum dccp_pkt_type pkt_type);
245 245
246extern void dccp_write_xmit(struct sock *sk, int block); 246extern void dccp_write_xmit(struct sock *sk);
247extern void dccp_write_space(struct sock *sk); 247extern void dccp_write_space(struct sock *sk);
248extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
248 249
249extern void dccp_init_xmit_timers(struct sock *sk); 250extern void dccp_init_xmit_timers(struct sock *sk);
250static inline void dccp_clear_xmit_timers(struct sock *sk) 251static inline void dccp_clear_xmit_timers(struct sock *sk)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 265985370fa1..e424a09e83f6 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -239,7 +239,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
239 dccp_update_gsr(sk, seqno); 239 dccp_update_gsr(sk, seqno);
240 240
241 if (dh->dccph_type != DCCP_PKT_SYNC && 241 if (dh->dccph_type != DCCP_PKT_SYNC &&
242 (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) 242 ackno != DCCP_PKT_WITHOUT_ACK_SEQ &&
243 after48(ackno, dp->dccps_gar))
243 dp->dccps_gar = ackno; 244 dp->dccps_gar = ackno;
244 } else { 245 } else {
245 unsigned long now = jiffies; 246 unsigned long now = jiffies;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index a988fe9ffcba..45b91853f5ae 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -209,108 +209,150 @@ void dccp_write_space(struct sock *sk)
209} 209}
210 210
211/** 211/**
212 * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet 212 * dccp_wait_for_ccid - Await CCID send permission
213 * @sk: socket to wait for 213 * @sk: socket to wait for
214 * @skb: current skb to pass on for waiting 214 * @delay: timeout in jiffies
215 * @delay: sleep timeout in milliseconds (> 0) 215 * This is used by CCIDs which need to delay the send time in process context.
216 * This function is called by default when the socket is closed, and
217 * when a non-zero linger time is set on the socket. For consistency
218 */ 216 */
219static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) 217static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
220{ 218{
221 struct dccp_sock *dp = dccp_sk(sk);
222 DEFINE_WAIT(wait); 219 DEFINE_WAIT(wait);
223 unsigned long jiffdelay; 220 long remaining;
224 int rc; 221
222 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
223 sk->sk_write_pending++;
224 release_sock(sk);
225
226 remaining = schedule_timeout(delay);
227
228 lock_sock(sk);
229 sk->sk_write_pending--;
230 finish_wait(sk_sleep(sk), &wait);
231
232 if (signal_pending(current) || sk->sk_err)
233 return -1;
234 return remaining;
235}
236
237/**
238 * dccp_xmit_packet - Send data packet under control of CCID
239 * Transmits next-queued payload and informs CCID to account for the packet.
240 */
241static void dccp_xmit_packet(struct sock *sk)
242{
243 int err, len;
244 struct dccp_sock *dp = dccp_sk(sk);
245 struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
225 246
226 do { 247 if (unlikely(skb == NULL))
227 dccp_pr_debug("delayed send by %d msec\n", delay); 248 return;
228 jiffdelay = msecs_to_jiffies(delay); 249 len = skb->len;
229 250
230 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 251 if (sk->sk_state == DCCP_PARTOPEN) {
252 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
253 /*
254 * See 8.1.5 - Handshake Completion.
255 *
256 * For robustness we resend Confirm options until the client has
257 * entered OPEN. During the initial feature negotiation, the MPS
258 * is smaller than usual, reduced by the Change/Confirm options.
259 */
260 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
261 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
262 dccp_send_ack(sk);
263 dccp_feat_list_purge(&dp->dccps_featneg);
264 }
231 265
232 sk->sk_write_pending++; 266 inet_csk_schedule_ack(sk);
233 release_sock(sk); 267 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
234 schedule_timeout(jiffdelay); 268 inet_csk(sk)->icsk_rto,
235 lock_sock(sk); 269 DCCP_RTO_MAX);
236 sk->sk_write_pending--; 270 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
271 } else if (dccp_ack_pending(sk)) {
272 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
273 } else {
274 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
275 }
276
277 err = dccp_transmit_skb(sk, skb);
278 if (err)
279 dccp_pr_debug("transmit_skb() returned err=%d\n", err);
280 /*
281 * Register this one as sent even if an error occurred. To the remote
282 * end a local packet drop is indistinguishable from network loss, i.e.
283 * any local drop will eventually be reported via receiver feedback.
284 */
285 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
286}
237 287
238 if (sk->sk_err) 288/**
239 goto do_error; 289 * dccp_flush_write_queue - Drain queue at end of connection
240 if (signal_pending(current)) 290 * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
241 goto do_interrupted; 291 * happen that the TX queue is not empty at the end of a connection. We give the
292 * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
293 * returns with a non-empty write queue, it will be purged later.
294 */
295void dccp_flush_write_queue(struct sock *sk, long *time_budget)
296{
297 struct dccp_sock *dp = dccp_sk(sk);
298 struct sk_buff *skb;
299 long delay, rc;
242 300
301 while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
243 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 302 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
244 } while ((delay = rc) > 0); 303
245out: 304 switch (ccid_packet_dequeue_eval(rc)) {
246 finish_wait(sk_sleep(sk), &wait); 305 case CCID_PACKET_WILL_DEQUEUE_LATER:
247 return rc; 306 /*
248 307 * If the CCID determines when to send, the next sending
249do_error: 308 * time is unknown or the CCID may not even send again
250 rc = -EPIPE; 309 * (e.g. remote host crashes or lost Ack packets).
251 goto out; 310 */
252do_interrupted: 311 DCCP_WARN("CCID did not manage to send all packets\n");
253 rc = -EINTR; 312 return;
254 goto out; 313 case CCID_PACKET_DELAY:
314 delay = msecs_to_jiffies(rc);
315 if (delay > *time_budget)
316 return;
317 rc = dccp_wait_for_ccid(sk, delay);
318 if (rc < 0)
319 return;
320 *time_budget -= (delay - rc);
321 /* check again if we can send now */
322 break;
323 case CCID_PACKET_SEND_AT_ONCE:
324 dccp_xmit_packet(sk);
325 break;
326 case CCID_PACKET_ERR:
327 skb_dequeue(&sk->sk_write_queue);
328 kfree_skb(skb);
329 dccp_pr_debug("packet discarded due to err=%ld\n", rc);
330 }
331 }
255} 332}
256 333
257void dccp_write_xmit(struct sock *sk, int block) 334void dccp_write_xmit(struct sock *sk)
258{ 335{
259 struct dccp_sock *dp = dccp_sk(sk); 336 struct dccp_sock *dp = dccp_sk(sk);
260 struct sk_buff *skb; 337 struct sk_buff *skb;
261 338
262 while ((skb = skb_peek(&sk->sk_write_queue))) { 339 while ((skb = skb_peek(&sk->sk_write_queue))) {
263 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 340 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
264
265 if (err > 0) {
266 if (!block) {
267 sk_reset_timer(sk, &dp->dccps_xmit_timer,
268 msecs_to_jiffies(err)+jiffies);
269 break;
270 } else
271 err = dccp_wait_for_ccid(sk, skb, err);
272 if (err && err != -EINTR)
273 DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
274 }
275 341
276 skb_dequeue(&sk->sk_write_queue); 342 switch (ccid_packet_dequeue_eval(rc)) {
277 if (err == 0) { 343 case CCID_PACKET_WILL_DEQUEUE_LATER:
278 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 344 return;
279 const int len = skb->len; 345 case CCID_PACKET_DELAY:
280 346 sk_reset_timer(sk, &dp->dccps_xmit_timer,
281 if (sk->sk_state == DCCP_PARTOPEN) { 347 jiffies + msecs_to_jiffies(rc));
282 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; 348 return;
283 /* 349 case CCID_PACKET_SEND_AT_ONCE:
284 * See 8.1.5 - Handshake Completion. 350 dccp_xmit_packet(sk);
285 * 351 break;
286 * For robustness we resend Confirm options until the client has 352 case CCID_PACKET_ERR:
287 * entered OPEN. During the initial feature negotiation, the MPS 353 skb_dequeue(&sk->sk_write_queue);
288 * is smaller than usual, reduced by the Change/Confirm options.
289 */
290 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
291 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
292 dccp_send_ack(sk);
293 dccp_feat_list_purge(&dp->dccps_featneg);
294 }
295
296 inet_csk_schedule_ack(sk);
297 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
298 inet_csk(sk)->icsk_rto,
299 DCCP_RTO_MAX);
300 dcb->dccpd_type = DCCP_PKT_DATAACK;
301 } else if (dccp_ack_pending(sk))
302 dcb->dccpd_type = DCCP_PKT_DATAACK;
303 else
304 dcb->dccpd_type = DCCP_PKT_DATA;
305
306 err = dccp_transmit_skb(sk, skb);
307 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
308 if (err)
309 DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
310 err);
311 } else {
312 dccp_pr_debug("packet discarded due to err=%d\n", err);
313 kfree_skb(skb); 354 kfree_skb(skb);
355 dccp_pr_debug("packet discarded due to err=%d\n", rc);
314 } 356 }
315 } 357 }
316} 358}
@@ -622,7 +664,6 @@ void dccp_send_close(struct sock *sk, const int active)
622 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; 664 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
623 665
624 if (active) { 666 if (active) {
625 dccp_write_xmit(sk, 1);
626 dccp_skb_entail(sk, skb); 667 dccp_skb_entail(sk, skb);
627 dccp_transmit_skb(sk, skb_clone(skb, prio)); 668 dccp_transmit_skb(sk, skb_clone(skb, prio));
628 /* 669 /*
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 7e5fc04eb6d1..ef343d53fcea 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -726,7 +726,13 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
726 goto out_discard; 726 goto out_discard;
727 727
728 skb_queue_tail(&sk->sk_write_queue, skb); 728 skb_queue_tail(&sk->sk_write_queue, skb);
729 dccp_write_xmit(sk,0); 729 /*
730 * The xmit_timer is set if the TX CCID is rate-based and will expire
731 * when congestion control permits to release further packets into the
732 * network. Window-based CCIDs do not use this timer.
733 */
734 if (!timer_pending(&dp->dccps_xmit_timer))
735 dccp_write_xmit(sk);
730out_release: 736out_release:
731 release_sock(sk); 737 release_sock(sk);
732 return rc ? : len; 738 return rc ? : len;
@@ -951,9 +957,22 @@ void dccp_close(struct sock *sk, long timeout)
951 /* Check zero linger _after_ checking for unread data. */ 957 /* Check zero linger _after_ checking for unread data. */
952 sk->sk_prot->disconnect(sk, 0); 958 sk->sk_prot->disconnect(sk, 0);
953 } else if (sk->sk_state != DCCP_CLOSED) { 959 } else if (sk->sk_state != DCCP_CLOSED) {
960 /*
961 * Normal connection termination. May need to wait if there are
962 * still packets in the TX queue that are delayed by the CCID.
963 */
964 dccp_flush_write_queue(sk, &timeout);
954 dccp_terminate_connection(sk); 965 dccp_terminate_connection(sk);
955 } 966 }
956 967
968 /*
969 * Flush write queue. This may be necessary in several cases:
970 * - we have been closed by the peer but still have application data;
971 * - abortive termination (unread data or zero linger time),
972 * - normal termination but queue could not be flushed within time limit
973 */
974 __skb_queue_purge(&sk->sk_write_queue);
975
957 sk_stream_wait_close(sk, timeout); 976 sk_stream_wait_close(sk, timeout);
958 977
959adjudge_to_death: 978adjudge_to_death:
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 1a9aa05d4dc4..7587870b7040 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -237,32 +237,35 @@ out:
237 sock_put(sk); 237 sock_put(sk);
238} 238}
239 239
240/* Transmit-delay timer: used by the CCIDs to delay actual send time */ 240/**
241static void dccp_write_xmit_timer(unsigned long data) 241 * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface
242 * See the comments above %ccid_dequeueing_decision for supported modes.
243 */
244static void dccp_write_xmitlet(unsigned long data)
242{ 245{
243 struct sock *sk = (struct sock *)data; 246 struct sock *sk = (struct sock *)data;
244 struct dccp_sock *dp = dccp_sk(sk);
245 247
246 bh_lock_sock(sk); 248 bh_lock_sock(sk);
247 if (sock_owned_by_user(sk)) 249 if (sock_owned_by_user(sk))
248 sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); 250 sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
249 else 251 else
250 dccp_write_xmit(sk, 0); 252 dccp_write_xmit(sk);
251 bh_unlock_sock(sk); 253 bh_unlock_sock(sk);
252 sock_put(sk);
253} 254}
254 255
255static void dccp_init_write_xmit_timer(struct sock *sk) 256static void dccp_write_xmit_timer(unsigned long data)
256{ 257{
257 struct dccp_sock *dp = dccp_sk(sk); 258 dccp_write_xmitlet(data);
258 259 sock_put((struct sock *)data);
259 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
260 (unsigned long)sk);
261} 260}
262 261
263void dccp_init_xmit_timers(struct sock *sk) 262void dccp_init_xmit_timers(struct sock *sk)
264{ 263{
265 dccp_init_write_xmit_timer(sk); 264 struct dccp_sock *dp = dccp_sk(sk);
265
266 tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
267 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
268 (unsigned long)sk);
266 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, 269 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
267 &dccp_keepalive_timer); 270 &dccp_keepalive_timer);
268} 271}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d6b93d19790f..6f97268ed85f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -155,7 +155,7 @@ static const struct proto_ops dn_proto_ops;
155static DEFINE_RWLOCK(dn_hash_lock); 155static DEFINE_RWLOCK(dn_hash_lock);
156static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; 156static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
157static struct hlist_head dn_wild_sk; 157static struct hlist_head dn_wild_sk;
158static atomic_t decnet_memory_allocated; 158static atomic_long_t decnet_memory_allocated;
159 159
160static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags); 160static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
161static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags); 161static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
@@ -1556,6 +1556,8 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
1556 if (r_len > sizeof(struct linkinfo_dn)) 1556 if (r_len > sizeof(struct linkinfo_dn))
1557 r_len = sizeof(struct linkinfo_dn); 1557 r_len = sizeof(struct linkinfo_dn);
1558 1558
1559 memset(&link, 0, sizeof(link));
1560
1559 switch(sock->state) { 1561 switch(sock->state) {
1560 case SS_CONNECTING: 1562 case SS_CONNECTING:
1561 link.idn_linkstate = LL_CONNECTING; 1563 link.idn_linkstate = LL_CONNECTING;
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index be3eb8e23288..28f8b5e5f73b 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -38,7 +38,7 @@ int decnet_log_martians = 1;
38int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW; 38int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
39 39
40/* Reasonable defaults, I hope, based on tcp's defaults */ 40/* Reasonable defaults, I hope, based on tcp's defaults */
41int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 }; 41long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
42int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; 42int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
43int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; 43int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
44 44
@@ -324,7 +324,7 @@ static ctl_table dn_table[] = {
324 .data = &sysctl_decnet_mem, 324 .data = &sysctl_decnet_mem,
325 .maxlen = sizeof(sysctl_decnet_mem), 325 .maxlen = sizeof(sysctl_decnet_mem),
326 .mode = 0644, 326 .mode = 0644,
327 .proc_handler = proc_dointvec, 327 .proc_handler = proc_doulongvec_minmax
328 }, 328 },
329 { 329 {
330 .procname = "decnet_rmem", 330 .procname = "decnet_rmem",
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index f8c1ae4b41f0..13992e1d2726 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -31,6 +31,7 @@
31#include <linux/skbuff.h> 31#include <linux/skbuff.h>
32#include <linux/udp.h> 32#include <linux/udp.h>
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/vmalloc.h>
34#include <net/sock.h> 35#include <net/sock.h>
35#include <net/inet_common.h> 36#include <net/inet_common.h>
36#include <linux/stat.h> 37#include <linux/stat.h>
@@ -276,12 +277,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
276#endif 277#endif
277#ifdef CONFIG_ECONET_AUNUDP 278#ifdef CONFIG_ECONET_AUNUDP
278 struct msghdr udpmsg; 279 struct msghdr udpmsg;
279 struct iovec iov[msg->msg_iovlen+1]; 280 struct iovec iov[2];
280 struct aunhdr ah; 281 struct aunhdr ah;
281 struct sockaddr_in udpdest; 282 struct sockaddr_in udpdest;
282 __kernel_size_t size; 283 __kernel_size_t size;
283 int i;
284 mm_segment_t oldfs; 284 mm_segment_t oldfs;
285 char *userbuf;
285#endif 286#endif
286 287
287 /* 288 /*
@@ -297,23 +298,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
297 298
298 mutex_lock(&econet_mutex); 299 mutex_lock(&econet_mutex);
299 300
300 if (saddr == NULL) { 301 if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) {
301 struct econet_sock *eo = ec_sk(sk); 302 mutex_unlock(&econet_mutex);
302 303 return -EINVAL;
303 addr.station = eo->station; 304 }
304 addr.net = eo->net; 305 addr.station = saddr->addr.station;
305 port = eo->port; 306 addr.net = saddr->addr.net;
306 cb = eo->cb; 307 port = saddr->port;
307 } else { 308 cb = saddr->cb;
308 if (msg->msg_namelen < sizeof(struct sockaddr_ec)) {
309 mutex_unlock(&econet_mutex);
310 return -EINVAL;
311 }
312 addr.station = saddr->addr.station;
313 addr.net = saddr->addr.net;
314 port = saddr->port;
315 cb = saddr->cb;
316 }
317 309
318 /* Look for a device with the right network number. */ 310 /* Look for a device with the right network number. */
319 dev = net2dev_map[addr.net]; 311 dev = net2dev_map[addr.net];
@@ -328,17 +320,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
328 } 320 }
329 } 321 }
330 322
331 if (len + 15 > dev->mtu) {
332 mutex_unlock(&econet_mutex);
333 return -EMSGSIZE;
334 }
335
336 if (dev->type == ARPHRD_ECONET) { 323 if (dev->type == ARPHRD_ECONET) {
337 /* Real hardware Econet. We're not worthy etc. */ 324 /* Real hardware Econet. We're not worthy etc. */
338#ifdef CONFIG_ECONET_NATIVE 325#ifdef CONFIG_ECONET_NATIVE
339 unsigned short proto = 0; 326 unsigned short proto = 0;
340 int res; 327 int res;
341 328
329 if (len + 15 > dev->mtu) {
330 mutex_unlock(&econet_mutex);
331 return -EMSGSIZE;
332 }
333
342 dev_hold(dev); 334 dev_hold(dev);
343 335
344 skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev), 336 skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev),
@@ -351,7 +343,6 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
351 343
352 eb = (struct ec_cb *)&skb->cb; 344 eb = (struct ec_cb *)&skb->cb;
353 345
354 /* BUG: saddr may be NULL */
355 eb->cookie = saddr->cookie; 346 eb->cookie = saddr->cookie;
356 eb->sec = *saddr; 347 eb->sec = *saddr;
357 eb->sent = ec_tx_done; 348 eb->sent = ec_tx_done;
@@ -415,6 +406,11 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
415 return -ENETDOWN; /* No socket - can't send */ 406 return -ENETDOWN; /* No socket - can't send */
416 } 407 }
417 408
409 if (len > 32768) {
410 err = -E2BIG;
411 goto error;
412 }
413
418 /* Make up a UDP datagram and hand it off to some higher intellect. */ 414 /* Make up a UDP datagram and hand it off to some higher intellect. */
419 415
420 memset(&udpdest, 0, sizeof(udpdest)); 416 memset(&udpdest, 0, sizeof(udpdest));
@@ -446,36 +442,26 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
446 442
447 /* tack our header on the front of the iovec */ 443 /* tack our header on the front of the iovec */
448 size = sizeof(struct aunhdr); 444 size = sizeof(struct aunhdr);
449 /*
450 * XXX: that is b0rken. We can't mix userland and kernel pointers
451 * in iovec, since on a lot of platforms copy_from_user() will
452 * *not* work with the kernel and userland ones at the same time,
453 * regardless of what we do with set_fs(). And we are talking about
454 * econet-over-ethernet here, so "it's only ARM anyway" doesn't
455 * apply. Any suggestions on fixing that code? -- AV
456 */
457 iov[0].iov_base = (void *)&ah; 445 iov[0].iov_base = (void *)&ah;
458 iov[0].iov_len = size; 446 iov[0].iov_len = size;
459 for (i = 0; i < msg->msg_iovlen; i++) { 447
460 void __user *base = msg->msg_iov[i].iov_base; 448 userbuf = vmalloc(len);
461 size_t iov_len = msg->msg_iov[i].iov_len; 449 if (userbuf == NULL) {
462 /* Check it now since we switch to KERNEL_DS later. */ 450 err = -ENOMEM;
463 if (!access_ok(VERIFY_READ, base, iov_len)) { 451 goto error;
464 mutex_unlock(&econet_mutex);
465 return -EFAULT;
466 }
467 iov[i+1].iov_base = base;
468 iov[i+1].iov_len = iov_len;
469 size += iov_len;
470 } 452 }
471 453
454 iov[1].iov_base = userbuf;
455 iov[1].iov_len = len;
456 err = memcpy_fromiovec(userbuf, msg->msg_iov, len);
457 if (err)
458 goto error_free_buf;
459
472 /* Get a skbuff (no data, just holds our cb information) */ 460 /* Get a skbuff (no data, just holds our cb information) */
473 if ((skb = sock_alloc_send_skb(sk, 0, 461 if ((skb = sock_alloc_send_skb(sk, 0,
474 msg->msg_flags & MSG_DONTWAIT, 462 msg->msg_flags & MSG_DONTWAIT,
475 &err)) == NULL) { 463 &err)) == NULL)
476 mutex_unlock(&econet_mutex); 464 goto error_free_buf;
477 return err;
478 }
479 465
480 eb = (struct ec_cb *)&skb->cb; 466 eb = (struct ec_cb *)&skb->cb;
481 467
@@ -491,7 +477,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
491 udpmsg.msg_name = (void *)&udpdest; 477 udpmsg.msg_name = (void *)&udpdest;
492 udpmsg.msg_namelen = sizeof(udpdest); 478 udpmsg.msg_namelen = sizeof(udpdest);
493 udpmsg.msg_iov = &iov[0]; 479 udpmsg.msg_iov = &iov[0];
494 udpmsg.msg_iovlen = msg->msg_iovlen + 1; 480 udpmsg.msg_iovlen = 2;
495 udpmsg.msg_control = NULL; 481 udpmsg.msg_control = NULL;
496 udpmsg.msg_controllen = 0; 482 udpmsg.msg_controllen = 0;
497 udpmsg.msg_flags=0; 483 udpmsg.msg_flags=0;
@@ -499,9 +485,13 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
499 oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */ 485 oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */
500 err = sock_sendmsg(udpsock, &udpmsg, size); 486 err = sock_sendmsg(udpsock, &udpmsg, size);
501 set_fs(oldfs); 487 set_fs(oldfs);
488
489error_free_buf:
490 vfree(userbuf);
502#else 491#else
503 err = -EPROTOTYPE; 492 err = -EPROTOTYPE;
504#endif 493#endif
494 error:
505 mutex_unlock(&econet_mutex); 495 mutex_unlock(&econet_mutex);
506 496
507 return err; 497 return err;
@@ -671,6 +661,9 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
671 err = 0; 661 err = 0;
672 switch (cmd) { 662 switch (cmd) {
673 case SIOCSIFADDR: 663 case SIOCSIFADDR:
664 if (!capable(CAP_NET_ADMIN))
665 return -EPERM;
666
674 edev = dev->ec_ptr; 667 edev = dev->ec_ptr;
675 if (edev == NULL) { 668 if (edev == NULL) {
676 /* Magic up a new one. */ 669 /* Magic up a new one. */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 36e27c2107de..eb6f69a8f27a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1052,7 +1052,7 @@ static void ip_fib_net_exit(struct net *net)
1052 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1052 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1053 hlist_del(node); 1053 hlist_del(node);
1054 fib_table_flush(tb); 1054 fib_table_flush(tb);
1055 kfree(tb); 1055 fib_free_table(tb);
1056 } 1056 }
1057 } 1057 }
1058 kfree(net->ipv4.fib_table_hash); 1058 kfree(net->ipv4.fib_table_hash);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 43e1c594ce8f..b3acb0417b21 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -120,11 +120,12 @@ static inline void fn_rebuild_zone(struct fn_zone *fz,
120 struct fib_node *f; 120 struct fib_node *f;
121 121
122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { 122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
123 struct hlist_head __rcu *new_head; 123 struct hlist_head *new_head;
124 124
125 hlist_del_rcu(&f->fn_hash); 125 hlist_del_rcu(&f->fn_hash);
126 126
127 new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 127 new_head = rcu_dereference_protected(fz->fz_hash, 1) +
128 fn_hash(f->fn_key, fz);
128 hlist_add_head_rcu(&f->fn_hash, new_head); 129 hlist_add_head_rcu(&f->fn_hash, new_head);
129 } 130 }
130 } 131 }
@@ -179,8 +180,8 @@ static void fn_rehash_zone(struct fn_zone *fz)
179 memcpy(&nfz, fz, sizeof(nfz)); 180 memcpy(&nfz, fz, sizeof(nfz));
180 181
181 write_seqlock_bh(&fz->fz_lock); 182 write_seqlock_bh(&fz->fz_lock);
182 old_ht = fz->fz_hash; 183 old_ht = rcu_dereference_protected(fz->fz_hash, 1);
183 nfz.fz_hash = ht; 184 RCU_INIT_POINTER(nfz.fz_hash, ht);
184 nfz.fz_hashmask = new_hashmask; 185 nfz.fz_hashmask = new_hashmask;
185 nfz.fz_divisor = new_divisor; 186 nfz.fz_divisor = new_divisor;
186 fn_rebuild_zone(&nfz, old_ht, old_divisor); 187 fn_rebuild_zone(&nfz, old_ht, old_divisor);
@@ -236,7 +237,7 @@ fn_new_zone(struct fn_hash *table, int z)
236 seqlock_init(&fz->fz_lock); 237 seqlock_init(&fz->fz_lock);
237 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1; 238 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
238 fz->fz_hashmask = fz->fz_divisor - 1; 239 fz->fz_hashmask = fz->fz_divisor - 1;
239 fz->fz_hash = fz->fz_embedded_hash; 240 RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash);
240 fz->fz_order = z; 241 fz->fz_order = z;
241 fz->fz_revorder = 32 - z; 242 fz->fz_revorder = 32 - z;
242 fz->fz_mask = inet_make_mask(z); 243 fz->fz_mask = inet_make_mask(z);
@@ -272,7 +273,7 @@ int fib_table_lookup(struct fib_table *tb,
272 for (fz = rcu_dereference(t->fn_zone_list); 273 for (fz = rcu_dereference(t->fn_zone_list);
273 fz != NULL; 274 fz != NULL;
274 fz = rcu_dereference(fz->fz_next)) { 275 fz = rcu_dereference(fz->fz_next)) {
275 struct hlist_head __rcu *head; 276 struct hlist_head *head;
276 struct hlist_node *node; 277 struct hlist_node *node;
277 struct fib_node *f; 278 struct fib_node *f;
278 __be32 k; 279 __be32 k;
@@ -282,7 +283,7 @@ int fib_table_lookup(struct fib_table *tb,
282 seq = read_seqbegin(&fz->fz_lock); 283 seq = read_seqbegin(&fz->fz_lock);
283 k = fz_key(flp->fl4_dst, fz); 284 k = fz_key(flp->fl4_dst, fz);
284 285
285 head = &fz->fz_hash[fn_hash(k, fz)]; 286 head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
286 hlist_for_each_entry_rcu(f, node, head, fn_hash) { 287 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
287 if (f->fn_key != k) 288 if (f->fn_key != k)
288 continue; 289 continue;
@@ -311,6 +312,7 @@ void fib_table_select_default(struct fib_table *tb,
311 struct fib_info *last_resort; 312 struct fib_info *last_resort;
312 struct fn_hash *t = (struct fn_hash *)tb->tb_data; 313 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
313 struct fn_zone *fz = t->fn_zones[0]; 314 struct fn_zone *fz = t->fn_zones[0];
315 struct hlist_head *head;
314 316
315 if (fz == NULL) 317 if (fz == NULL)
316 return; 318 return;
@@ -320,7 +322,8 @@ void fib_table_select_default(struct fib_table *tb,
320 order = -1; 322 order = -1;
321 323
322 rcu_read_lock(); 324 rcu_read_lock();
323 hlist_for_each_entry_rcu(f, node, &fz->fz_hash[0], fn_hash) { 325 head = rcu_dereference(fz->fz_hash);
326 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
324 struct fib_alias *fa; 327 struct fib_alias *fa;
325 328
326 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { 329 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
@@ -374,7 +377,7 @@ out:
374/* Insert node F to FZ. */ 377/* Insert node F to FZ. */
375static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) 378static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
376{ 379{
377 struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 380 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz);
378 381
379 hlist_add_head_rcu(&f->fn_hash, head); 382 hlist_add_head_rcu(&f->fn_hash, head);
380} 383}
@@ -382,7 +385,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
382/* Return the node in FZ matching KEY. */ 385/* Return the node in FZ matching KEY. */
383static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) 386static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
384{ 387{
385 struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)]; 388 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz);
386 struct hlist_node *node; 389 struct hlist_node *node;
387 struct fib_node *f; 390 struct fib_node *f;
388 391
@@ -662,7 +665,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
662 665
663static int fn_flush_list(struct fn_zone *fz, int idx) 666static int fn_flush_list(struct fn_zone *fz, int idx)
664{ 667{
665 struct hlist_head *head = &fz->fz_hash[idx]; 668 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx;
666 struct hlist_node *node, *n; 669 struct hlist_node *node, *n;
667 struct fib_node *f; 670 struct fib_node *f;
668 int found = 0; 671 int found = 0;
@@ -713,6 +716,24 @@ int fib_table_flush(struct fib_table *tb)
713 return found; 716 return found;
714} 717}
715 718
719void fib_free_table(struct fib_table *tb)
720{
721 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
722 struct fn_zone *fz, *next;
723
724 next = table->fn_zone_list;
725 while (next != NULL) {
726 fz = next;
727 next = fz->fz_next;
728
729 if (fz->fz_hash != fz->fz_embedded_hash)
730 fz_hash_free(fz->fz_hash, fz->fz_divisor);
731
732 kfree(fz);
733 }
734
735 kfree(tb);
736}
716 737
717static inline int 738static inline int
718fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, 739fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
@@ -761,14 +782,15 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
761 struct fn_zone *fz) 782 struct fn_zone *fz)
762{ 783{
763 int h, s_h; 784 int h, s_h;
785 struct hlist_head *head = rcu_dereference(fz->fz_hash);
764 786
765 if (fz->fz_hash == NULL) 787 if (head == NULL)
766 return skb->len; 788 return skb->len;
767 s_h = cb->args[3]; 789 s_h = cb->args[3];
768 for (h = s_h; h < fz->fz_divisor; h++) { 790 for (h = s_h; h < fz->fz_divisor; h++) {
769 if (hlist_empty(&fz->fz_hash[h])) 791 if (hlist_empty(head + h))
770 continue; 792 continue;
771 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h]) < 0) { 793 if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) {
772 cb->args[3] = h; 794 cb->args[3] = h;
773 return -1; 795 return -1;
774 } 796 }
@@ -872,7 +894,7 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
872 if (!iter->zone->fz_nent) 894 if (!iter->zone->fz_nent)
873 continue; 895 continue;
874 896
875 iter->hash_head = iter->zone->fz_hash; 897 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
876 maxslot = iter->zone->fz_divisor; 898 maxslot = iter->zone->fz_divisor;
877 899
878 for (iter->bucket = 0; iter->bucket < maxslot; 900 for (iter->bucket = 0; iter->bucket < maxslot;
@@ -957,7 +979,7 @@ static struct fib_alias *fib_get_next(struct seq_file *seq)
957 goto out; 979 goto out;
958 980
959 iter->bucket = 0; 981 iter->bucket = 0;
960 iter->hash_head = iter->zone->fz_hash; 982 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
961 983
962 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { 984 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
963 list_for_each_entry(fa, &fn->fn_alias, fa_list) { 985 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index a29edf2219c8..c079cc0ec651 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -47,11 +47,8 @@ extern int fib_detect_death(struct fib_info *fi, int order,
47static inline void fib_result_assign(struct fib_result *res, 47static inline void fib_result_assign(struct fib_result *res,
48 struct fib_info *fi) 48 struct fib_info *fi)
49{ 49{
50 if (res->fi != NULL) 50 /* we used to play games with refcounts, but we now use RCU */
51 fib_info_put(res->fi);
52 res->fi = fi; 51 res->fi = fi;
53 if (fi != NULL)
54 atomic_inc(&fi->fib_clntref);
55} 52}
56 53
57#endif /* _FIB_LOOKUP_H */ 54#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b14450895102..0f280348e0fd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -365,7 +365,7 @@ static struct tnode *tnode_alloc(size_t size)
365 if (size <= PAGE_SIZE) 365 if (size <= PAGE_SIZE)
366 return kzalloc(size, GFP_KERNEL); 366 return kzalloc(size, GFP_KERNEL);
367 else 367 else
368 return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 368 return vzalloc(size);
369} 369}
370 370
371static void __tnode_vfree(struct work_struct *arg) 371static void __tnode_vfree(struct work_struct *arg)
@@ -1797,6 +1797,11 @@ int fib_table_flush(struct fib_table *tb)
1797 return found; 1797 return found;
1798} 1798}
1799 1799
1800void fib_free_table(struct fib_table *tb)
1801{
1802 kfree(tb);
1803}
1804
1800void fib_table_select_default(struct fib_table *tb, 1805void fib_table_select_default(struct fib_table *tb,
1801 const struct flowi *flp, 1806 const struct flowi *flp,
1802 struct fib_result *res) 1807 struct fib_result *res)
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index caea6885fdbd..c6933f2ea310 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -22,7 +22,7 @@
22#include <net/gre.h> 22#include <net/gre.h>
23 23
24 24
25static const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly; 25static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
26static DEFINE_SPINLOCK(gre_proto_lock); 26static DEFINE_SPINLOCK(gre_proto_lock);
27 27
28int gre_add_protocol(const struct gre_protocol *proto, u8 version) 28int gre_add_protocol(const struct gre_protocol *proto, u8 version)
@@ -51,7 +51,8 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
51 goto err_out; 51 goto err_out;
52 52
53 spin_lock(&gre_proto_lock); 53 spin_lock(&gre_proto_lock);
54 if (gre_proto[version] != proto) 54 if (rcu_dereference_protected(gre_proto[version],
55 lockdep_is_held(&gre_proto_lock)) != proto)
55 goto err_out_unlock; 56 goto err_out_unlock;
56 rcu_assign_pointer(gre_proto[version], NULL); 57 rcu_assign_pointer(gre_proto[version], NULL);
57 spin_unlock(&gre_proto_lock); 58 spin_unlock(&gre_proto_lock);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 96bc7f9475a3..e5d1a44bcbdf 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -569,6 +569,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
569 /* No need to clone since we're just using its address. */ 569 /* No need to clone since we're just using its address. */
570 rt2 = rt; 570 rt2 = rt;
571 571
572 if (!fl.nl_u.ip4_u.saddr)
573 fl.nl_u.ip4_u.saddr = rt->rt_src;
574
572 err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); 575 err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);
573 switch (err) { 576 switch (err) {
574 case 0: 577 case 0:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c8877c6c7216..3c53c2d89e3b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2306,10 +2306,8 @@ void ip_mc_drop_socket(struct sock *sk)
2306 2306
2307 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); 2307 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
2308 (void) ip_mc_leave_src(sk, iml, in_dev); 2308 (void) ip_mc_leave_src(sk, iml, in_dev);
2309 if (in_dev != NULL) { 2309 if (in_dev != NULL)
2310 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); 2310 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
2311 in_dev_put(in_dev);
2312 }
2313 /* decrease mem now to avoid the memleak warning */ 2311 /* decrease mem now to avoid the memleak warning */
2314 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); 2312 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
2315 call_rcu(&iml->rcu, ip_mc_socklist_reclaim); 2313 call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ba8042665849..2ada17129fce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -490,9 +490,11 @@ static int inet_csk_diag_dump(struct sock *sk,
490{ 490{
491 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 491 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
492 492
493 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 493 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
494 struct inet_diag_entry entry; 494 struct inet_diag_entry entry;
495 struct rtattr *bc = (struct rtattr *)(r + 1); 495 const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
496 sizeof(*r),
497 INET_DIAG_REQ_BYTECODE);
496 struct inet_sock *inet = inet_sk(sk); 498 struct inet_sock *inet = inet_sk(sk);
497 499
498 entry.family = sk->sk_family; 500 entry.family = sk->sk_family;
@@ -512,7 +514,7 @@ static int inet_csk_diag_dump(struct sock *sk,
512 entry.dport = ntohs(inet->inet_dport); 514 entry.dport = ntohs(inet->inet_dport);
513 entry.userlocks = sk->sk_userlocks; 515 entry.userlocks = sk->sk_userlocks;
514 516
515 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) 517 if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
516 return 0; 518 return 0;
517 } 519 }
518 520
@@ -527,9 +529,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
527{ 529{
528 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 530 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
529 531
530 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 532 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
531 struct inet_diag_entry entry; 533 struct inet_diag_entry entry;
532 struct rtattr *bc = (struct rtattr *)(r + 1); 534 const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
535 sizeof(*r),
536 INET_DIAG_REQ_BYTECODE);
533 537
534 entry.family = tw->tw_family; 538 entry.family = tw->tw_family;
535#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 539#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -548,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
548 entry.dport = ntohs(tw->tw_dport); 552 entry.dport = ntohs(tw->tw_dport);
549 entry.userlocks = 0; 553 entry.userlocks = 0;
550 554
551 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) 555 if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
552 return 0; 556 return 0;
553 } 557 }
554 558
@@ -618,7 +622,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
618 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 622 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
619 struct inet_connection_sock *icsk = inet_csk(sk); 623 struct inet_connection_sock *icsk = inet_csk(sk);
620 struct listen_sock *lopt; 624 struct listen_sock *lopt;
621 struct rtattr *bc = NULL; 625 const struct nlattr *bc = NULL;
622 struct inet_sock *inet = inet_sk(sk); 626 struct inet_sock *inet = inet_sk(sk);
623 int j, s_j; 627 int j, s_j;
624 int reqnum, s_reqnum; 628 int reqnum, s_reqnum;
@@ -638,8 +642,9 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
638 if (!lopt || !lopt->qlen) 642 if (!lopt || !lopt->qlen)
639 goto out; 643 goto out;
640 644
641 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 645 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
642 bc = (struct rtattr *)(r + 1); 646 bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
647 INET_DIAG_REQ_BYTECODE);
643 entry.sport = inet->inet_num; 648 entry.sport = inet->inet_num;
644 entry.userlocks = sk->sk_userlocks; 649 entry.userlocks = sk->sk_userlocks;
645 } 650 }
@@ -672,8 +677,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
672 &ireq->rmt_addr; 677 &ireq->rmt_addr;
673 entry.dport = ntohs(ireq->rmt_port); 678 entry.dport = ntohs(ireq->rmt_port);
674 679
675 if (!inet_diag_bc_run(RTA_DATA(bc), 680 if (!inet_diag_bc_run(nla_data(bc),
676 RTA_PAYLOAD(bc), &entry)) 681 nla_len(bc), &entry))
677 continue; 682 continue;
678 } 683 }
679 684
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 1b344f30b463..3c0369a3a663 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
133 } 133 }
134 } 134 }
135 } 135 }
136 sk_add_bind_node(child, &tb->owners); 136 inet_bind_hash(child, tb, port);
137 inet_csk(child)->icsk_bind_hash = tb;
138 spin_unlock(&head->lock); 137 spin_unlock(&head->lock);
139 138
140 return 0; 139 return 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9ffa24b9a804..9e94d7cf4f8a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -72,18 +72,19 @@ static struct kmem_cache *peer_cachep __read_mostly;
72#define node_height(x) x->avl_height 72#define node_height(x) x->avl_height
73 73
74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) 74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
75#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
75static const struct inet_peer peer_fake_node = { 76static const struct inet_peer peer_fake_node = {
76 .avl_left = peer_avl_empty, 77 .avl_left = peer_avl_empty_rcu,
77 .avl_right = peer_avl_empty, 78 .avl_right = peer_avl_empty_rcu,
78 .avl_height = 0 79 .avl_height = 0
79}; 80};
80 81
81static struct { 82static struct {
82 struct inet_peer *root; 83 struct inet_peer __rcu *root;
83 spinlock_t lock; 84 spinlock_t lock;
84 int total; 85 int total;
85} peers = { 86} peers = {
86 .root = peer_avl_empty, 87 .root = peer_avl_empty_rcu,
87 .lock = __SPIN_LOCK_UNLOCKED(peers.lock), 88 .lock = __SPIN_LOCK_UNLOCKED(peers.lock),
88 .total = 0, 89 .total = 0,
89}; 90};
@@ -156,11 +157,14 @@ static void unlink_from_unused(struct inet_peer *p)
156 */ 157 */
157#define lookup(_daddr, _stack) \ 158#define lookup(_daddr, _stack) \
158({ \ 159({ \
159 struct inet_peer *u, **v; \ 160 struct inet_peer *u; \
161 struct inet_peer __rcu **v; \
160 \ 162 \
161 stackptr = _stack; \ 163 stackptr = _stack; \
162 *stackptr++ = &peers.root; \ 164 *stackptr++ = &peers.root; \
163 for (u = peers.root; u != peer_avl_empty; ) { \ 165 for (u = rcu_dereference_protected(peers.root, \
166 lockdep_is_held(&peers.lock)); \
167 u != peer_avl_empty; ) { \
164 if (_daddr == u->v4daddr) \ 168 if (_daddr == u->v4daddr) \
165 break; \ 169 break; \
166 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ 170 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
@@ -168,7 +172,8 @@ static void unlink_from_unused(struct inet_peer *p)
168 else \ 172 else \
169 v = &u->avl_right; \ 173 v = &u->avl_right; \
170 *stackptr++ = v; \ 174 *stackptr++ = v; \
171 u = *v; \ 175 u = rcu_dereference_protected(*v, \
176 lockdep_is_held(&peers.lock)); \
172 } \ 177 } \
173 u; \ 178 u; \
174}) 179})
@@ -209,13 +214,17 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
209/* Called with local BH disabled and the pool lock held. */ 214/* Called with local BH disabled and the pool lock held. */
210#define lookup_rightempty(start) \ 215#define lookup_rightempty(start) \
211({ \ 216({ \
212 struct inet_peer *u, **v; \ 217 struct inet_peer *u; \
218 struct inet_peer __rcu **v; \
213 *stackptr++ = &start->avl_left; \ 219 *stackptr++ = &start->avl_left; \
214 v = &start->avl_left; \ 220 v = &start->avl_left; \
215 for (u = *v; u->avl_right != peer_avl_empty; ) { \ 221 for (u = rcu_dereference_protected(*v, \
222 lockdep_is_held(&peers.lock)); \
223 u->avl_right != peer_avl_empty_rcu; ) { \
216 v = &u->avl_right; \ 224 v = &u->avl_right; \
217 *stackptr++ = v; \ 225 *stackptr++ = v; \
218 u = *v; \ 226 u = rcu_dereference_protected(*v, \
227 lockdep_is_held(&peers.lock)); \
219 } \ 228 } \
220 u; \ 229 u; \
221}) 230})
@@ -224,74 +233,86 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
224 * Variable names are the proof of operation correctness. 233 * Variable names are the proof of operation correctness.
225 * Look into mm/map_avl.c for more detail description of the ideas. 234 * Look into mm/map_avl.c for more detail description of the ideas.
226 */ 235 */
227static void peer_avl_rebalance(struct inet_peer **stack[], 236static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
228 struct inet_peer ***stackend) 237 struct inet_peer __rcu ***stackend)
229{ 238{
230 struct inet_peer **nodep, *node, *l, *r; 239 struct inet_peer __rcu **nodep;
240 struct inet_peer *node, *l, *r;
231 int lh, rh; 241 int lh, rh;
232 242
233 while (stackend > stack) { 243 while (stackend > stack) {
234 nodep = *--stackend; 244 nodep = *--stackend;
235 node = *nodep; 245 node = rcu_dereference_protected(*nodep,
236 l = node->avl_left; 246 lockdep_is_held(&peers.lock));
237 r = node->avl_right; 247 l = rcu_dereference_protected(node->avl_left,
248 lockdep_is_held(&peers.lock));
249 r = rcu_dereference_protected(node->avl_right,
250 lockdep_is_held(&peers.lock));
238 lh = node_height(l); 251 lh = node_height(l);
239 rh = node_height(r); 252 rh = node_height(r);
240 if (lh > rh + 1) { /* l: RH+2 */ 253 if (lh > rh + 1) { /* l: RH+2 */
241 struct inet_peer *ll, *lr, *lrl, *lrr; 254 struct inet_peer *ll, *lr, *lrl, *lrr;
242 int lrh; 255 int lrh;
243 ll = l->avl_left; 256 ll = rcu_dereference_protected(l->avl_left,
244 lr = l->avl_right; 257 lockdep_is_held(&peers.lock));
258 lr = rcu_dereference_protected(l->avl_right,
259 lockdep_is_held(&peers.lock));
245 lrh = node_height(lr); 260 lrh = node_height(lr);
246 if (lrh <= node_height(ll)) { /* ll: RH+1 */ 261 if (lrh <= node_height(ll)) { /* ll: RH+1 */
247 node->avl_left = lr; /* lr: RH or RH+1 */ 262 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
248 node->avl_right = r; /* r: RH */ 263 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
249 node->avl_height = lrh + 1; /* RH+1 or RH+2 */ 264 node->avl_height = lrh + 1; /* RH+1 or RH+2 */
250 l->avl_left = ll; /* ll: RH+1 */ 265 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
251 l->avl_right = node; /* node: RH+1 or RH+2 */ 266 RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
252 l->avl_height = node->avl_height + 1; 267 l->avl_height = node->avl_height + 1;
253 *nodep = l; 268 RCU_INIT_POINTER(*nodep, l);
254 } else { /* ll: RH, lr: RH+1 */ 269 } else { /* ll: RH, lr: RH+1 */
255 lrl = lr->avl_left; /* lrl: RH or RH-1 */ 270 lrl = rcu_dereference_protected(lr->avl_left,
256 lrr = lr->avl_right; /* lrr: RH or RH-1 */ 271 lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */
257 node->avl_left = lrr; /* lrr: RH or RH-1 */ 272 lrr = rcu_dereference_protected(lr->avl_right,
258 node->avl_right = r; /* r: RH */ 273 lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */
274 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
275 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
259 node->avl_height = rh + 1; /* node: RH+1 */ 276 node->avl_height = rh + 1; /* node: RH+1 */
260 l->avl_left = ll; /* ll: RH */ 277 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
261 l->avl_right = lrl; /* lrl: RH or RH-1 */ 278 RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
262 l->avl_height = rh + 1; /* l: RH+1 */ 279 l->avl_height = rh + 1; /* l: RH+1 */
263 lr->avl_left = l; /* l: RH+1 */ 280 RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
264 lr->avl_right = node; /* node: RH+1 */ 281 RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
265 lr->avl_height = rh + 2; 282 lr->avl_height = rh + 2;
266 *nodep = lr; 283 RCU_INIT_POINTER(*nodep, lr);
267 } 284 }
268 } else if (rh > lh + 1) { /* r: LH+2 */ 285 } else if (rh > lh + 1) { /* r: LH+2 */
269 struct inet_peer *rr, *rl, *rlr, *rll; 286 struct inet_peer *rr, *rl, *rlr, *rll;
270 int rlh; 287 int rlh;
271 rr = r->avl_right; 288 rr = rcu_dereference_protected(r->avl_right,
272 rl = r->avl_left; 289 lockdep_is_held(&peers.lock));
290 rl = rcu_dereference_protected(r->avl_left,
291 lockdep_is_held(&peers.lock));
273 rlh = node_height(rl); 292 rlh = node_height(rl);
274 if (rlh <= node_height(rr)) { /* rr: LH+1 */ 293 if (rlh <= node_height(rr)) { /* rr: LH+1 */
275 node->avl_right = rl; /* rl: LH or LH+1 */ 294 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
276 node->avl_left = l; /* l: LH */ 295 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
277 node->avl_height = rlh + 1; /* LH+1 or LH+2 */ 296 node->avl_height = rlh + 1; /* LH+1 or LH+2 */
278 r->avl_right = rr; /* rr: LH+1 */ 297 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
279 r->avl_left = node; /* node: LH+1 or LH+2 */ 298 RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
280 r->avl_height = node->avl_height + 1; 299 r->avl_height = node->avl_height + 1;
281 *nodep = r; 300 RCU_INIT_POINTER(*nodep, r);
282 } else { /* rr: RH, rl: RH+1 */ 301 } else { /* rr: RH, rl: RH+1 */
283 rlr = rl->avl_right; /* rlr: LH or LH-1 */ 302 rlr = rcu_dereference_protected(rl->avl_right,
284 rll = rl->avl_left; /* rll: LH or LH-1 */ 303 lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */
285 node->avl_right = rll; /* rll: LH or LH-1 */ 304 rll = rcu_dereference_protected(rl->avl_left,
286 node->avl_left = l; /* l: LH */ 305 lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */
306 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
307 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
287 node->avl_height = lh + 1; /* node: LH+1 */ 308 node->avl_height = lh + 1; /* node: LH+1 */
288 r->avl_right = rr; /* rr: LH */ 309 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
289 r->avl_left = rlr; /* rlr: LH or LH-1 */ 310 RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
290 r->avl_height = lh + 1; /* r: LH+1 */ 311 r->avl_height = lh + 1; /* r: LH+1 */
291 rl->avl_right = r; /* r: LH+1 */ 312 RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
292 rl->avl_left = node; /* node: LH+1 */ 313 RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */
293 rl->avl_height = lh + 2; 314 rl->avl_height = lh + 2;
294 *nodep = rl; 315 RCU_INIT_POINTER(*nodep, rl);
295 } 316 }
296 } else { 317 } else {
297 node->avl_height = (lh > rh ? lh : rh) + 1; 318 node->avl_height = (lh > rh ? lh : rh) + 1;
@@ -303,10 +324,10 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
303#define link_to_pool(n) \ 324#define link_to_pool(n) \
304do { \ 325do { \
305 n->avl_height = 1; \ 326 n->avl_height = 1; \
306 n->avl_left = peer_avl_empty; \ 327 n->avl_left = peer_avl_empty_rcu; \
307 n->avl_right = peer_avl_empty; \ 328 n->avl_right = peer_avl_empty_rcu; \
308 smp_wmb(); /* lockless readers can catch us now */ \ 329 /* lockless readers can catch us now */ \
309 **--stackptr = n; \ 330 rcu_assign_pointer(**--stackptr, n); \
310 peer_avl_rebalance(stack, stackptr); \ 331 peer_avl_rebalance(stack, stackptr); \
311} while (0) 332} while (0)
312 333
@@ -330,24 +351,25 @@ static void unlink_from_pool(struct inet_peer *p)
330 * We use refcnt=-1 to alert lockless readers this entry is deleted. 351 * We use refcnt=-1 to alert lockless readers this entry is deleted.
331 */ 352 */
332 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { 353 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
333 struct inet_peer **stack[PEER_MAXDEPTH]; 354 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
334 struct inet_peer ***stackptr, ***delp; 355 struct inet_peer __rcu ***stackptr, ***delp;
335 if (lookup(p->v4daddr, stack) != p) 356 if (lookup(p->v4daddr, stack) != p)
336 BUG(); 357 BUG();
337 delp = stackptr - 1; /* *delp[0] == p */ 358 delp = stackptr - 1; /* *delp[0] == p */
338 if (p->avl_left == peer_avl_empty) { 359 if (p->avl_left == peer_avl_empty_rcu) {
339 *delp[0] = p->avl_right; 360 *delp[0] = p->avl_right;
340 --stackptr; 361 --stackptr;
341 } else { 362 } else {
342 /* look for a node to insert instead of p */ 363 /* look for a node to insert instead of p */
343 struct inet_peer *t; 364 struct inet_peer *t;
344 t = lookup_rightempty(p); 365 t = lookup_rightempty(p);
345 BUG_ON(*stackptr[-1] != t); 366 BUG_ON(rcu_dereference_protected(*stackptr[-1],
367 lockdep_is_held(&peers.lock)) != t);
346 **--stackptr = t->avl_left; 368 **--stackptr = t->avl_left;
347 /* t is removed, t->v4daddr > x->v4daddr for any 369 /* t is removed, t->v4daddr > x->v4daddr for any
348 * x in p->avl_left subtree. 370 * x in p->avl_left subtree.
349 * Put t in the old place of p. */ 371 * Put t in the old place of p. */
350 *delp[0] = t; 372 RCU_INIT_POINTER(*delp[0], t);
351 t->avl_left = p->avl_left; 373 t->avl_left = p->avl_left;
352 t->avl_right = p->avl_right; 374 t->avl_right = p->avl_right;
353 t->avl_height = p->avl_height; 375 t->avl_height = p->avl_height;
@@ -414,7 +436,7 @@ static int cleanup_once(unsigned long ttl)
414struct inet_peer *inet_getpeer(__be32 daddr, int create) 436struct inet_peer *inet_getpeer(__be32 daddr, int create)
415{ 437{
416 struct inet_peer *p; 438 struct inet_peer *p;
417 struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; 439 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
418 440
419 /* Look up for the address quickly, lockless. 441 /* Look up for the address quickly, lockless.
420 * Because of a concurrent writer, we might not find an existing entry. 442 * Because of a concurrent writer, we might not find an existing entry.
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d0ffcbe369b7..70ff77f02eee 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1072,6 +1072,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1072 break; 1072 break;
1073 } 1073 }
1074 ipgre_tunnel_unlink(ign, t); 1074 ipgre_tunnel_unlink(ign, t);
1075 synchronize_net();
1075 t->parms.iph.saddr = p.iph.saddr; 1076 t->parms.iph.saddr = p.iph.saddr;
1076 t->parms.iph.daddr = p.iph.daddr; 1077 t->parms.iph.daddr = p.iph.daddr;
1077 t->parms.i_key = p.i_key; 1078 t->parms.i_key = p.i_key;
@@ -1324,7 +1325,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1324{ 1325{
1325 struct ip_tunnel *tunnel = netdev_priv(dev); 1326 struct ip_tunnel *tunnel = netdev_priv(dev);
1326 struct iphdr *iph = &tunnel->parms.iph; 1327 struct iphdr *iph = &tunnel->parms.iph;
1327 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1328 1328
1329 tunnel->dev = dev; 1329 tunnel->dev = dev;
1330 strcpy(tunnel->parms.name, dev->name); 1330 strcpy(tunnel->parms.name, dev->name);
@@ -1335,7 +1335,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1335 tunnel->hlen = sizeof(struct iphdr) + 4; 1335 tunnel->hlen = sizeof(struct iphdr) + 4;
1336 1336
1337 dev_hold(dev); 1337 dev_hold(dev);
1338 rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
1339} 1338}
1340 1339
1341 1340
@@ -1382,10 +1381,12 @@ static int __net_init ipgre_init_net(struct net *net)
1382 if ((err = register_netdev(ign->fb_tunnel_dev))) 1381 if ((err = register_netdev(ign->fb_tunnel_dev)))
1383 goto err_reg_dev; 1382 goto err_reg_dev;
1384 1383
1384 rcu_assign_pointer(ign->tunnels_wc[0],
1385 netdev_priv(ign->fb_tunnel_dev));
1385 return 0; 1386 return 0;
1386 1387
1387err_reg_dev: 1388err_reg_dev:
1388 free_netdev(ign->fb_tunnel_dev); 1389 ipgre_dev_free(ign->fb_tunnel_dev);
1389err_alloc_dev: 1390err_alloc_dev:
1390 return err; 1391 return err;
1391} 1392}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 64b70ad162e3..3948c86e59ca 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -238,7 +238,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
238 but receiver should be enough clever f.e. to forward mtrace requests, 238 but receiver should be enough clever f.e. to forward mtrace requests,
239 sent to multicast group to reach destination designated router. 239 sent to multicast group to reach destination designated router.
240 */ 240 */
241struct ip_ra_chain *ip_ra_chain; 241struct ip_ra_chain __rcu *ip_ra_chain;
242static DEFINE_SPINLOCK(ip_ra_lock); 242static DEFINE_SPINLOCK(ip_ra_lock);
243 243
244 244
@@ -253,7 +253,8 @@ static void ip_ra_destroy_rcu(struct rcu_head *head)
253int ip_ra_control(struct sock *sk, unsigned char on, 253int ip_ra_control(struct sock *sk, unsigned char on,
254 void (*destructor)(struct sock *)) 254 void (*destructor)(struct sock *))
255{ 255{
256 struct ip_ra_chain *ra, *new_ra, **rap; 256 struct ip_ra_chain *ra, *new_ra;
257 struct ip_ra_chain __rcu **rap;
257 258
258 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) 259 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
259 return -EINVAL; 260 return -EINVAL;
@@ -261,7 +262,10 @@ int ip_ra_control(struct sock *sk, unsigned char on,
261 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 262 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
262 263
263 spin_lock_bh(&ip_ra_lock); 264 spin_lock_bh(&ip_ra_lock);
264 for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { 265 for (rap = &ip_ra_chain;
266 (ra = rcu_dereference_protected(*rap,
267 lockdep_is_held(&ip_ra_lock))) != NULL;
268 rap = &ra->next) {
265 if (ra->sk == sk) { 269 if (ra->sk == sk) {
266 if (on) { 270 if (on) {
267 spin_unlock_bh(&ip_ra_lock); 271 spin_unlock_bh(&ip_ra_lock);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e9b816e6cd73..cd300aaee78f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -676,6 +676,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
676 } 676 }
677 t = netdev_priv(dev); 677 t = netdev_priv(dev);
678 ipip_tunnel_unlink(ipn, t); 678 ipip_tunnel_unlink(ipn, t);
679 synchronize_net();
679 t->parms.iph.saddr = p.iph.saddr; 680 t->parms.iph.saddr = p.iph.saddr;
680 t->parms.iph.daddr = p.iph.daddr; 681 t->parms.iph.daddr = p.iph.daddr;
681 memcpy(dev->dev_addr, &p.iph.saddr, 4); 682 memcpy(dev->dev_addr, &p.iph.saddr, 4);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3cad2591ace0..3fac340a28d5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -927,6 +927,7 @@ static int get_info(struct net *net, void __user *user,
927 private = &tmp; 927 private = &tmp;
928 } 928 }
929#endif 929#endif
930 memset(&info, 0, sizeof(info));
930 info.valid_hooks = t->valid_hooks; 931 info.valid_hooks = t->valid_hooks;
931 memcpy(info.hook_entry, private->hook_entry, 932 memcpy(info.hook_entry, private->hook_entry,
932 sizeof(info.hook_entry)); 933 sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d31b007a6d80..a846d633b3b6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1124,6 +1124,7 @@ static int get_info(struct net *net, void __user *user,
1124 private = &tmp; 1124 private = &tmp;
1125 } 1125 }
1126#endif 1126#endif
1127 memset(&info, 0, sizeof(info));
1127 info.valid_hooks = t->valid_hooks; 1128 info.valid_hooks = t->valid_hooks;
1128 memcpy(info.hook_entry, private->hook_entry, 1129 memcpy(info.hook_entry, private->hook_entry,
1129 sizeof(info.hook_entry)); 1130 sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 295c97431e43..c04787ce1a71 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,26 +47,6 @@ __nf_nat_proto_find(u_int8_t protonum)
47 return rcu_dereference(nf_nat_protos[protonum]); 47 return rcu_dereference(nf_nat_protos[protonum]);
48} 48}
49 49
50static const struct nf_nat_protocol *
51nf_nat_proto_find_get(u_int8_t protonum)
52{
53 const struct nf_nat_protocol *p;
54
55 rcu_read_lock();
56 p = __nf_nat_proto_find(protonum);
57 if (!try_module_get(p->me))
58 p = &nf_nat_unknown_protocol;
59 rcu_read_unlock();
60
61 return p;
62}
63
64static void
65nf_nat_proto_put(const struct nf_nat_protocol *p)
66{
67 module_put(p->me);
68}
69
70/* We keep an extra hash for each conntrack, for fast searching. */ 50/* We keep an extra hash for each conntrack, for fast searching. */
71static inline unsigned int 51static inline unsigned int
72hash_by_src(const struct net *net, u16 zone, 52hash_by_src(const struct net *net, u16 zone,
@@ -588,6 +568,26 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
588#include <linux/netfilter/nfnetlink.h> 568#include <linux/netfilter/nfnetlink.h>
589#include <linux/netfilter/nfnetlink_conntrack.h> 569#include <linux/netfilter/nfnetlink_conntrack.h>
590 570
571static const struct nf_nat_protocol *
572nf_nat_proto_find_get(u_int8_t protonum)
573{
574 const struct nf_nat_protocol *p;
575
576 rcu_read_lock();
577 p = __nf_nat_proto_find(protonum);
578 if (!try_module_get(p->me))
579 p = &nf_nat_unknown_protocol;
580 rcu_read_unlock();
581
582 return p;
583}
584
585static void
586nf_nat_proto_put(const struct nf_nat_protocol *p)
587{
588 module_put(p->me);
589}
590
591static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { 591static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
592 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, 592 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
593 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, 593 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4ae1f203f7cb..1b48eb1ed453 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
59 local_bh_enable(); 59 local_bh_enable();
60 60
61 socket_seq_show(seq); 61 socket_seq_show(seq);
62 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", 62 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
63 sock_prot_inuse_get(net, &tcp_prot), orphans, 63 sock_prot_inuse_get(net, &tcp_prot), orphans,
64 tcp_death_row.tw_count, sockets, 64 tcp_death_row.tw_count, sockets,
65 atomic_read(&tcp_memory_allocated)); 65 atomic_long_read(&tcp_memory_allocated));
66 seq_printf(seq, "UDP: inuse %d mem %d\n", 66 seq_printf(seq, "UDP: inuse %d mem %ld\n",
67 sock_prot_inuse_get(net, &udp_prot), 67 sock_prot_inuse_get(net, &udp_prot),
68 atomic_read(&udp_memory_allocated)); 68 atomic_long_read(&udp_memory_allocated));
69 seq_printf(seq, "UDPLITE: inuse %d\n", 69 seq_printf(seq, "UDPLITE: inuse %d\n",
70 sock_prot_inuse_get(net, &udplite_prot)); 70 sock_prot_inuse_get(net, &udplite_prot));
71 seq_printf(seq, "RAW: inuse %d\n", 71 seq_printf(seq, "RAW: inuse %d\n",
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 65699c24411c..9ae5c01cd0b2 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <net/protocol.h> 29#include <net/protocol.h>
30 30
31const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly; 31const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
32 32
33/* 33/*
34 * Add a protocol handler to the hash tables 34 * Add a protocol handler to the hash tables
@@ -38,7 +38,8 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
38{ 38{
39 int hash = protocol & (MAX_INET_PROTOS - 1); 39 int hash = protocol & (MAX_INET_PROTOS - 1);
40 40
41 return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1; 41 return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
42 NULL, prot) ? 0 : -1;
42} 43}
43EXPORT_SYMBOL(inet_add_protocol); 44EXPORT_SYMBOL(inet_add_protocol);
44 45
@@ -50,7 +51,8 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
50{ 51{
51 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 52 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
52 53
53 ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1; 54 ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash],
55 prot, NULL) == prot) ? 0 : -1;
54 56
55 synchronize_net(); 57 synchronize_net();
56 58
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d6cb2bfcd8e1..987bf9adb318 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -198,7 +198,7 @@ const __u8 ip_tos2prio[16] = {
198 */ 198 */
199 199
200struct rt_hash_bucket { 200struct rt_hash_bucket {
201 struct rtable *chain; 201 struct rtable __rcu *chain;
202}; 202};
203 203
204#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ 204#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
@@ -280,7 +280,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
280 struct rtable *r = NULL; 280 struct rtable *r = NULL;
281 281
282 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 282 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
283 if (!rt_hash_table[st->bucket].chain) 283 if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
284 continue; 284 continue;
285 rcu_read_lock_bh(); 285 rcu_read_lock_bh();
286 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); 286 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -300,17 +300,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
300{ 300{
301 struct rt_cache_iter_state *st = seq->private; 301 struct rt_cache_iter_state *st = seq->private;
302 302
303 r = r->dst.rt_next; 303 r = rcu_dereference_bh(r->dst.rt_next);
304 while (!r) { 304 while (!r) {
305 rcu_read_unlock_bh(); 305 rcu_read_unlock_bh();
306 do { 306 do {
307 if (--st->bucket < 0) 307 if (--st->bucket < 0)
308 return NULL; 308 return NULL;
309 } while (!rt_hash_table[st->bucket].chain); 309 } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
310 rcu_read_lock_bh(); 310 rcu_read_lock_bh();
311 r = rt_hash_table[st->bucket].chain; 311 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
312 } 312 }
313 return rcu_dereference_bh(r); 313 return r;
314} 314}
315 315
316static struct rtable *rt_cache_get_next(struct seq_file *seq, 316static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -721,19 +721,23 @@ static void rt_do_flush(int process_context)
721 for (i = 0; i <= rt_hash_mask; i++) { 721 for (i = 0; i <= rt_hash_mask; i++) {
722 if (process_context && need_resched()) 722 if (process_context && need_resched())
723 cond_resched(); 723 cond_resched();
724 rth = rt_hash_table[i].chain; 724 rth = rcu_dereference_raw(rt_hash_table[i].chain);
725 if (!rth) 725 if (!rth)
726 continue; 726 continue;
727 727
728 spin_lock_bh(rt_hash_lock_addr(i)); 728 spin_lock_bh(rt_hash_lock_addr(i));
729#ifdef CONFIG_NET_NS 729#ifdef CONFIG_NET_NS
730 { 730 {
731 struct rtable ** prev, * p; 731 struct rtable __rcu **prev;
732 struct rtable *p;
732 733
733 rth = rt_hash_table[i].chain; 734 rth = rcu_dereference_protected(rt_hash_table[i].chain,
735 lockdep_is_held(rt_hash_lock_addr(i)));
734 736
735 /* defer releasing the head of the list after spin_unlock */ 737 /* defer releasing the head of the list after spin_unlock */
736 for (tail = rth; tail; tail = tail->dst.rt_next) 738 for (tail = rth; tail;
739 tail = rcu_dereference_protected(tail->dst.rt_next,
740 lockdep_is_held(rt_hash_lock_addr(i))))
737 if (!rt_is_expired(tail)) 741 if (!rt_is_expired(tail))
738 break; 742 break;
739 if (rth != tail) 743 if (rth != tail)
@@ -741,8 +745,12 @@ static void rt_do_flush(int process_context)
741 745
742 /* call rt_free on entries after the tail requiring flush */ 746 /* call rt_free on entries after the tail requiring flush */
743 prev = &rt_hash_table[i].chain; 747 prev = &rt_hash_table[i].chain;
744 for (p = *prev; p; p = next) { 748 for (p = rcu_dereference_protected(*prev,
745 next = p->dst.rt_next; 749 lockdep_is_held(rt_hash_lock_addr(i)));
750 p != NULL;
751 p = next) {
752 next = rcu_dereference_protected(p->dst.rt_next,
753 lockdep_is_held(rt_hash_lock_addr(i)));
746 if (!rt_is_expired(p)) { 754 if (!rt_is_expired(p)) {
747 prev = &p->dst.rt_next; 755 prev = &p->dst.rt_next;
748 } else { 756 } else {
@@ -752,14 +760,15 @@ static void rt_do_flush(int process_context)
752 } 760 }
753 } 761 }
754#else 762#else
755 rth = rt_hash_table[i].chain; 763 rth = rcu_dereference_protected(rt_hash_table[i].chain,
756 rt_hash_table[i].chain = NULL; 764 lockdep_is_held(rt_hash_lock_addr(i)));
765 rcu_assign_pointer(rt_hash_table[i].chain, NULL);
757 tail = NULL; 766 tail = NULL;
758#endif 767#endif
759 spin_unlock_bh(rt_hash_lock_addr(i)); 768 spin_unlock_bh(rt_hash_lock_addr(i));
760 769
761 for (; rth != tail; rth = next) { 770 for (; rth != tail; rth = next) {
762 next = rth->dst.rt_next; 771 next = rcu_dereference_protected(rth->dst.rt_next, 1);
763 rt_free(rth); 772 rt_free(rth);
764 } 773 }
765 } 774 }
@@ -790,7 +799,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
790 while (aux != rth) { 799 while (aux != rth) {
791 if (compare_hash_inputs(&aux->fl, &rth->fl)) 800 if (compare_hash_inputs(&aux->fl, &rth->fl))
792 return 0; 801 return 0;
793 aux = aux->dst.rt_next; 802 aux = rcu_dereference_protected(aux->dst.rt_next, 1);
794 } 803 }
795 return ONE; 804 return ONE;
796} 805}
@@ -799,7 +808,8 @@ static void rt_check_expire(void)
799{ 808{
800 static unsigned int rover; 809 static unsigned int rover;
801 unsigned int i = rover, goal; 810 unsigned int i = rover, goal;
802 struct rtable *rth, **rthp; 811 struct rtable *rth;
812 struct rtable __rcu **rthp;
803 unsigned long samples = 0; 813 unsigned long samples = 0;
804 unsigned long sum = 0, sum2 = 0; 814 unsigned long sum = 0, sum2 = 0;
805 unsigned long delta; 815 unsigned long delta;
@@ -825,11 +835,12 @@ static void rt_check_expire(void)
825 835
826 samples++; 836 samples++;
827 837
828 if (*rthp == NULL) 838 if (rcu_dereference_raw(*rthp) == NULL)
829 continue; 839 continue;
830 length = 0; 840 length = 0;
831 spin_lock_bh(rt_hash_lock_addr(i)); 841 spin_lock_bh(rt_hash_lock_addr(i));
832 while ((rth = *rthp) != NULL) { 842 while ((rth = rcu_dereference_protected(*rthp,
843 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
833 prefetch(rth->dst.rt_next); 844 prefetch(rth->dst.rt_next);
834 if (rt_is_expired(rth)) { 845 if (rt_is_expired(rth)) {
835 *rthp = rth->dst.rt_next; 846 *rthp = rth->dst.rt_next;
@@ -941,7 +952,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
941 static unsigned long last_gc; 952 static unsigned long last_gc;
942 static int rover; 953 static int rover;
943 static int equilibrium; 954 static int equilibrium;
944 struct rtable *rth, **rthp; 955 struct rtable *rth;
956 struct rtable __rcu **rthp;
945 unsigned long now = jiffies; 957 unsigned long now = jiffies;
946 int goal; 958 int goal;
947 int entries = dst_entries_get_fast(&ipv4_dst_ops); 959 int entries = dst_entries_get_fast(&ipv4_dst_ops);
@@ -995,7 +1007,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
995 k = (k + 1) & rt_hash_mask; 1007 k = (k + 1) & rt_hash_mask;
996 rthp = &rt_hash_table[k].chain; 1008 rthp = &rt_hash_table[k].chain;
997 spin_lock_bh(rt_hash_lock_addr(k)); 1009 spin_lock_bh(rt_hash_lock_addr(k));
998 while ((rth = *rthp) != NULL) { 1010 while ((rth = rcu_dereference_protected(*rthp,
1011 lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
999 if (!rt_is_expired(rth) && 1012 if (!rt_is_expired(rth) &&
1000 !rt_may_expire(rth, tmo, expire)) { 1013 !rt_may_expire(rth, tmo, expire)) {
1001 tmo >>= 1; 1014 tmo >>= 1;
@@ -1071,7 +1084,7 @@ static int slow_chain_length(const struct rtable *head)
1071 1084
1072 while (rth) { 1085 while (rth) {
1073 length += has_noalias(head, rth); 1086 length += has_noalias(head, rth);
1074 rth = rth->dst.rt_next; 1087 rth = rcu_dereference_protected(rth->dst.rt_next, 1);
1075 } 1088 }
1076 return length >> FRACT_BITS; 1089 return length >> FRACT_BITS;
1077} 1090}
@@ -1079,9 +1092,9 @@ static int slow_chain_length(const struct rtable *head)
1079static int rt_intern_hash(unsigned hash, struct rtable *rt, 1092static int rt_intern_hash(unsigned hash, struct rtable *rt,
1080 struct rtable **rp, struct sk_buff *skb, int ifindex) 1093 struct rtable **rp, struct sk_buff *skb, int ifindex)
1081{ 1094{
1082 struct rtable *rth, **rthp; 1095 struct rtable *rth, *cand;
1096 struct rtable __rcu **rthp, **candp;
1083 unsigned long now; 1097 unsigned long now;
1084 struct rtable *cand, **candp;
1085 u32 min_score; 1098 u32 min_score;
1086 int chain_length; 1099 int chain_length;
1087 int attempts = !in_softirq(); 1100 int attempts = !in_softirq();
@@ -1128,7 +1141,8 @@ restart:
1128 rthp = &rt_hash_table[hash].chain; 1141 rthp = &rt_hash_table[hash].chain;
1129 1142
1130 spin_lock_bh(rt_hash_lock_addr(hash)); 1143 spin_lock_bh(rt_hash_lock_addr(hash));
1131 while ((rth = *rthp) != NULL) { 1144 while ((rth = rcu_dereference_protected(*rthp,
1145 lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
1132 if (rt_is_expired(rth)) { 1146 if (rt_is_expired(rth)) {
1133 *rthp = rth->dst.rt_next; 1147 *rthp = rth->dst.rt_next;
1134 rt_free(rth); 1148 rt_free(rth);
@@ -1324,12 +1338,14 @@ EXPORT_SYMBOL(__ip_select_ident);
1324 1338
1325static void rt_del(unsigned hash, struct rtable *rt) 1339static void rt_del(unsigned hash, struct rtable *rt)
1326{ 1340{
1327 struct rtable **rthp, *aux; 1341 struct rtable __rcu **rthp;
1342 struct rtable *aux;
1328 1343
1329 rthp = &rt_hash_table[hash].chain; 1344 rthp = &rt_hash_table[hash].chain;
1330 spin_lock_bh(rt_hash_lock_addr(hash)); 1345 spin_lock_bh(rt_hash_lock_addr(hash));
1331 ip_rt_put(rt); 1346 ip_rt_put(rt);
1332 while ((aux = *rthp) != NULL) { 1347 while ((aux = rcu_dereference_protected(*rthp,
1348 lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
1333 if (aux == rt || rt_is_expired(aux)) { 1349 if (aux == rt || rt_is_expired(aux)) {
1334 *rthp = aux->dst.rt_next; 1350 *rthp = aux->dst.rt_next;
1335 rt_free(aux); 1351 rt_free(aux);
@@ -1346,7 +1362,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1346{ 1362{
1347 int i, k; 1363 int i, k;
1348 struct in_device *in_dev = __in_dev_get_rcu(dev); 1364 struct in_device *in_dev = __in_dev_get_rcu(dev);
1349 struct rtable *rth, **rthp; 1365 struct rtable *rth;
1366 struct rtable __rcu **rthp;
1350 __be32 skeys[2] = { saddr, 0 }; 1367 __be32 skeys[2] = { saddr, 0 };
1351 int ikeys[2] = { dev->ifindex, 0 }; 1368 int ikeys[2] = { dev->ifindex, 0 };
1352 struct netevent_redirect netevent; 1369 struct netevent_redirect netevent;
@@ -1379,7 +1396,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1379 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1396 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1380 rt_genid(net)); 1397 rt_genid(net));
1381 1398
1382 rthp=&rt_hash_table[hash].chain; 1399 rthp = &rt_hash_table[hash].chain;
1383 1400
1384 while ((rth = rcu_dereference(*rthp)) != NULL) { 1401 while ((rth = rcu_dereference(*rthp)) != NULL) {
1385 struct rtable *rt; 1402 struct rtable *rt;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d96c1da4b17c..1b4ec21497a4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -26,6 +26,8 @@ static int zero;
26static int tcp_retr1_max = 255; 26static int tcp_retr1_max = 255;
27static int ip_local_port_range_min[] = { 1, 1 }; 27static int ip_local_port_range_min[] = { 1, 1 };
28static int ip_local_port_range_max[] = { 65535, 65535 }; 28static int ip_local_port_range_max[] = { 65535, 65535 };
29static int tcp_adv_win_scale_min = -31;
30static int tcp_adv_win_scale_max = 31;
29 31
30/* Update system visible IP port range */ 32/* Update system visible IP port range */
31static void set_local_port_range(int range[2]) 33static void set_local_port_range(int range[2])
@@ -398,7 +400,7 @@ static struct ctl_table ipv4_table[] = {
398 .data = &sysctl_tcp_mem, 400 .data = &sysctl_tcp_mem,
399 .maxlen = sizeof(sysctl_tcp_mem), 401 .maxlen = sizeof(sysctl_tcp_mem),
400 .mode = 0644, 402 .mode = 0644,
401 .proc_handler = proc_dointvec 403 .proc_handler = proc_doulongvec_minmax
402 }, 404 },
403 { 405 {
404 .procname = "tcp_wmem", 406 .procname = "tcp_wmem",
@@ -426,7 +428,9 @@ static struct ctl_table ipv4_table[] = {
426 .data = &sysctl_tcp_adv_win_scale, 428 .data = &sysctl_tcp_adv_win_scale,
427 .maxlen = sizeof(int), 429 .maxlen = sizeof(int),
428 .mode = 0644, 430 .mode = 0644,
429 .proc_handler = proc_dointvec 431 .proc_handler = proc_dointvec_minmax,
432 .extra1 = &tcp_adv_win_scale_min,
433 .extra2 = &tcp_adv_win_scale_max,
430 }, 434 },
431 { 435 {
432 .procname = "tcp_tw_reuse", 436 .procname = "tcp_tw_reuse",
@@ -602,8 +606,7 @@ static struct ctl_table ipv4_table[] = {
602 .data = &sysctl_udp_mem, 606 .data = &sysctl_udp_mem,
603 .maxlen = sizeof(sysctl_udp_mem), 607 .maxlen = sizeof(sysctl_udp_mem),
604 .mode = 0644, 608 .mode = 0644,
605 .proc_handler = proc_dointvec_minmax, 609 .proc_handler = proc_doulongvec_minmax,
606 .extra1 = &zero
607 }, 610 },
608 { 611 {
609 .procname = "udp_rmem_min", 612 .procname = "udp_rmem_min",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1664a0590bb8..f15c36a706ec 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
282struct percpu_counter tcp_orphan_count; 282struct percpu_counter tcp_orphan_count;
283EXPORT_SYMBOL_GPL(tcp_orphan_count); 283EXPORT_SYMBOL_GPL(tcp_orphan_count);
284 284
285int sysctl_tcp_mem[3] __read_mostly; 285long sysctl_tcp_mem[3] __read_mostly;
286int sysctl_tcp_wmem[3] __read_mostly; 286int sysctl_tcp_wmem[3] __read_mostly;
287int sysctl_tcp_rmem[3] __read_mostly; 287int sysctl_tcp_rmem[3] __read_mostly;
288 288
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
290EXPORT_SYMBOL(sysctl_tcp_rmem); 290EXPORT_SYMBOL(sysctl_tcp_rmem);
291EXPORT_SYMBOL(sysctl_tcp_wmem); 291EXPORT_SYMBOL(sysctl_tcp_wmem);
292 292
293atomic_t tcp_memory_allocated; /* Current allocated memory. */ 293atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
294EXPORT_SYMBOL(tcp_memory_allocated); 294EXPORT_SYMBOL(tcp_memory_allocated);
295 295
296/* 296/*
@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2246 /* Values greater than interface MTU won't take effect. However 2246 /* Values greater than interface MTU won't take effect. However
2247 * at the point when this call is done we typically don't yet 2247 * at the point when this call is done we typically don't yet
2248 * know which interface is going to be used */ 2248 * know which interface is going to be used */
2249 if (val < 8 || val > MAX_TCP_WINDOW) { 2249 if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
2250 err = -EINVAL; 2250 err = -EINVAL;
2251 break; 2251 break;
2252 } 2252 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3357f69e353d..6d8ab1c4efc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk)
259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + 259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
260 sizeof(struct sk_buff); 260 sizeof(struct sk_buff);
261 261
262 if (sk->sk_sndbuf < 3 * sndmem) 262 if (sk->sk_sndbuf < 3 * sndmem) {
263 sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]); 263 sk->sk_sndbuf = 3 * sndmem;
264 if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
265 sk->sk_sndbuf = sysctl_tcp_wmem[2];
266 }
264} 267}
265 268
266/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) 269/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk)
396 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && 399 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
397 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && 400 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
398 !tcp_memory_pressure && 401 !tcp_memory_pressure &&
399 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { 402 atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
400 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), 403 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
401 sysctl_tcp_rmem[2]); 404 sysctl_tcp_rmem[2]);
402 } 405 }
@@ -4861,7 +4864,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk)
4861 return 0; 4864 return 0;
4862 4865
4863 /* If we are under soft global TCP memory pressure, do not expand. */ 4866 /* If we are under soft global TCP memory pressure, do not expand. */
4864 if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) 4867 if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
4865 return 0; 4868 return 0;
4866 4869
4867 /* If we filled the congestion window, do not expand. */ 4870 /* If we filled the congestion window, do not expand. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8f8527d41682..e13da6de1fc7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
415 !icsk->icsk_backoff) 415 !icsk->icsk_backoff)
416 break; 416 break;
417 417
418 if (sock_owned_by_user(sk))
419 break;
420
418 icsk->icsk_backoff--; 421 icsk->icsk_backoff--;
419 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << 422 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
420 icsk->icsk_backoff; 423 icsk->icsk_backoff;
@@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
429 if (remaining) { 432 if (remaining) {
430 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 433 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
431 remaining, TCP_RTO_MAX); 434 remaining, TCP_RTO_MAX);
432 } else if (sock_owned_by_user(sk)) {
433 /* RTO revert clocked out retransmission,
434 * but socket is locked. Will defer. */
435 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
436 HZ/20, TCP_RTO_MAX);
437 } else { 435 } else {
438 /* RTO revert clocked out retransmission. 436 /* RTO revert clocked out retransmission.
439 * Will retransmit now */ 437 * Will retransmit now */
@@ -2045,7 +2043,9 @@ get_req:
2045 } 2043 }
2046get_sk: 2044get_sk:
2047 sk_nulls_for_each_from(sk, node) { 2045 sk_nulls_for_each_from(sk, node) {
2048 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { 2046 if (!net_eq(sock_net(sk), net))
2047 continue;
2048 if (sk->sk_family == st->family) {
2049 cur = sk; 2049 cur = sk;
2050 goto out; 2050 goto out;
2051 } 2051 }
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 9a17bd2a0a37..ac3b3ee4b07c 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,27 +14,32 @@
14#include <net/protocol.h> 14#include <net/protocol.h>
15#include <net/xfrm.h> 15#include <net/xfrm.h>
16 16
17static struct xfrm_tunnel *tunnel4_handlers __read_mostly; 17static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
18static struct xfrm_tunnel *tunnel64_handlers __read_mostly; 18static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
19static DEFINE_MUTEX(tunnel4_mutex); 19static DEFINE_MUTEX(tunnel4_mutex);
20 20
21static inline struct xfrm_tunnel **fam_handlers(unsigned short family) 21static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
22{ 22{
23 return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers; 23 return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
24} 24}
25 25
26int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family) 26int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
27{ 27{
28 struct xfrm_tunnel **pprev; 28 struct xfrm_tunnel __rcu **pprev;
29 struct xfrm_tunnel *t;
30
29 int ret = -EEXIST; 31 int ret = -EEXIST;
30 int priority = handler->priority; 32 int priority = handler->priority;
31 33
32 mutex_lock(&tunnel4_mutex); 34 mutex_lock(&tunnel4_mutex);
33 35
34 for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { 36 for (pprev = fam_handlers(family);
35 if ((*pprev)->priority > priority) 37 (t = rcu_dereference_protected(*pprev,
38 lockdep_is_held(&tunnel4_mutex))) != NULL;
39 pprev = &t->next) {
40 if (t->priority > priority)
36 break; 41 break;
37 if ((*pprev)->priority == priority) 42 if (t->priority == priority)
38 goto err; 43 goto err;
39 } 44 }
40 45
@@ -52,13 +57,17 @@ EXPORT_SYMBOL(xfrm4_tunnel_register);
52 57
53int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) 58int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
54{ 59{
55 struct xfrm_tunnel **pprev; 60 struct xfrm_tunnel __rcu **pprev;
61 struct xfrm_tunnel *t;
56 int ret = -ENOENT; 62 int ret = -ENOENT;
57 63
58 mutex_lock(&tunnel4_mutex); 64 mutex_lock(&tunnel4_mutex);
59 65
60 for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { 66 for (pprev = fam_handlers(family);
61 if (*pprev == handler) { 67 (t = rcu_dereference_protected(*pprev,
68 lockdep_is_held(&tunnel4_mutex))) != NULL;
69 pprev = &t->next) {
70 if (t == handler) {
62 *pprev = handler->next; 71 *pprev = handler->next;
63 ret = 0; 72 ret = 0;
64 break; 73 break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b3f7e8cf18ac..5e0a3a582a59 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,7 +110,7 @@
110struct udp_table udp_table __read_mostly; 110struct udp_table udp_table __read_mostly;
111EXPORT_SYMBOL(udp_table); 111EXPORT_SYMBOL(udp_table);
112 112
113int sysctl_udp_mem[3] __read_mostly; 113long sysctl_udp_mem[3] __read_mostly;
114EXPORT_SYMBOL(sysctl_udp_mem); 114EXPORT_SYMBOL(sysctl_udp_mem);
115 115
116int sysctl_udp_rmem_min __read_mostly; 116int sysctl_udp_rmem_min __read_mostly;
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min);
119int sysctl_udp_wmem_min __read_mostly; 119int sysctl_udp_wmem_min __read_mostly;
120EXPORT_SYMBOL(sysctl_udp_wmem_min); 120EXPORT_SYMBOL(sysctl_udp_wmem_min);
121 121
122atomic_t udp_memory_allocated; 122atomic_long_t udp_memory_allocated;
123EXPORT_SYMBOL(udp_memory_allocated); 123EXPORT_SYMBOL(udp_memory_allocated);
124 124
125#define MAX_UDP_PORTS 65536 125#define MAX_UDP_PORTS 65536
@@ -1413,7 +1413,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1413 } 1413 }
1414 } 1414 }
1415 1415
1416 if (sk->sk_filter) { 1416 if (rcu_dereference_raw(sk->sk_filter)) {
1417 if (udp_lib_checksum_complete(skb)) 1417 if (udp_lib_checksum_complete(skb))
1418 goto drop; 1418 goto drop;
1419 } 1419 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ec7a91d9e865..23cc8e1ce8d4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -98,7 +98,11 @@
98#endif 98#endif
99 99
100#define INFINITY_LIFE_TIME 0xFFFFFFFF 100#define INFINITY_LIFE_TIME 0xFFFFFFFF
101#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b))) 101
102static inline u32 cstamp_delta(unsigned long cstamp)
103{
104 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
105}
102 106
103#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) 107#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1)
104#define ADDRCONF_TIMER_FUZZ (HZ / 4) 108#define ADDRCONF_TIMER_FUZZ (HZ / 4)
@@ -836,7 +840,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
836{ 840{
837 struct inet6_dev *idev = ifp->idev; 841 struct inet6_dev *idev = ifp->idev;
838 struct in6_addr addr, *tmpaddr; 842 struct in6_addr addr, *tmpaddr;
839 unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp; 843 unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age;
840 unsigned long regen_advance; 844 unsigned long regen_advance;
841 int tmp_plen; 845 int tmp_plen;
842 int ret = 0; 846 int ret = 0;
@@ -886,12 +890,13 @@ retry:
886 goto out; 890 goto out;
887 } 891 }
888 memcpy(&addr.s6_addr[8], idev->rndid, 8); 892 memcpy(&addr.s6_addr[8], idev->rndid, 8);
893 age = (jiffies - ifp->tstamp) / HZ;
889 tmp_valid_lft = min_t(__u32, 894 tmp_valid_lft = min_t(__u32,
890 ifp->valid_lft, 895 ifp->valid_lft,
891 idev->cnf.temp_valid_lft); 896 idev->cnf.temp_valid_lft + age);
892 tmp_prefered_lft = min_t(__u32, 897 tmp_prefered_lft = min_t(__u32,
893 ifp->prefered_lft, 898 ifp->prefered_lft,
894 idev->cnf.temp_prefered_lft - 899 idev->cnf.temp_prefered_lft + age -
895 idev->cnf.max_desync_factor); 900 idev->cnf.max_desync_factor);
896 tmp_plen = ifp->prefix_len; 901 tmp_plen = ifp->prefix_len;
897 max_addresses = idev->cnf.max_addresses; 902 max_addresses = idev->cnf.max_addresses;
@@ -1426,8 +1431,10 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1426{ 1431{
1427 struct inet6_dev *idev = ifp->idev; 1432 struct inet6_dev *idev = ifp->idev;
1428 1433
1429 if (addrconf_dad_end(ifp)) 1434 if (addrconf_dad_end(ifp)) {
1435 in6_ifa_put(ifp);
1430 return; 1436 return;
1437 }
1431 1438
1432 if (net_ratelimit()) 1439 if (net_ratelimit())
1433 printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n", 1440 printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n",
@@ -2021,10 +2028,11 @@ ok:
2021 ipv6_ifa_notify(0, ift); 2028 ipv6_ifa_notify(0, ift);
2022 } 2029 }
2023 2030
2024 if (create && in6_dev->cnf.use_tempaddr > 0) { 2031 if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) {
2025 /* 2032 /*
2026 * When a new public address is created as described in [ADDRCONF], 2033 * When a new public address is created as described in [ADDRCONF],
2027 * also create a new temporary address. 2034 * also create a new temporary address. Also create a temporary
2035 * address if it's enabled but no temporary address currently exists.
2028 */ 2036 */
2029 read_unlock_bh(&in6_dev->lock); 2037 read_unlock_bh(&in6_dev->lock);
2030 ipv6_create_tempaddr(ifp, NULL); 2038 ipv6_create_tempaddr(ifp, NULL);
@@ -2736,10 +2744,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2736 /* Flag it for later restoration when link comes up */ 2744 /* Flag it for later restoration when link comes up */
2737 ifa->flags |= IFA_F_TENTATIVE; 2745 ifa->flags |= IFA_F_TENTATIVE;
2738 ifa->state = INET6_IFADDR_STATE_DAD; 2746 ifa->state = INET6_IFADDR_STATE_DAD;
2739
2740 write_unlock_bh(&idev->lock);
2741
2742 in6_ifa_hold(ifa);
2743 } else { 2747 } else {
2744 list_del(&ifa->if_list); 2748 list_del(&ifa->if_list);
2745 2749
@@ -2754,19 +2758,15 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2754 ifa->state = INET6_IFADDR_STATE_DEAD; 2758 ifa->state = INET6_IFADDR_STATE_DEAD;
2755 spin_unlock_bh(&ifa->state_lock); 2759 spin_unlock_bh(&ifa->state_lock);
2756 2760
2757 if (state == INET6_IFADDR_STATE_DEAD) 2761 if (state != INET6_IFADDR_STATE_DEAD) {
2758 goto put_ifa; 2762 __ipv6_ifa_notify(RTM_DELADDR, ifa);
2759 } 2763 atomic_notifier_call_chain(&inet6addr_chain,
2760 2764 NETDEV_DOWN, ifa);
2761 __ipv6_ifa_notify(RTM_DELADDR, ifa); 2765 }
2762 if (ifa->state == INET6_IFADDR_STATE_DEAD)
2763 atomic_notifier_call_chain(&inet6addr_chain,
2764 NETDEV_DOWN, ifa);
2765
2766put_ifa:
2767 in6_ifa_put(ifa);
2768 2766
2769 write_lock_bh(&idev->lock); 2767 in6_ifa_put(ifa);
2768 write_lock_bh(&idev->lock);
2769 }
2770 } 2770 }
2771 2771
2772 list_splice(&keep_list, &idev->addr_list); 2772 list_splice(&keep_list, &idev->addr_list);
@@ -3448,10 +3448,8 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
3448{ 3448{
3449 struct ifa_cacheinfo ci; 3449 struct ifa_cacheinfo ci;
3450 3450
3451 ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100 3451 ci.cstamp = cstamp_delta(cstamp);
3452 + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); 3452 ci.tstamp = cstamp_delta(tstamp);
3453 ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
3454 + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
3455 ci.ifa_prefered = preferred; 3453 ci.ifa_prefered = preferred;
3456 ci.ifa_valid = valid; 3454 ci.ifa_valid = valid;
3457 3455
@@ -3802,8 +3800,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
3802 array[DEVCONF_AUTOCONF] = cnf->autoconf; 3800 array[DEVCONF_AUTOCONF] = cnf->autoconf;
3803 array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits; 3801 array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
3804 array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; 3802 array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
3805 array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval; 3803 array[DEVCONF_RTR_SOLICIT_INTERVAL] =
3806 array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay; 3804 jiffies_to_msecs(cnf->rtr_solicit_interval);
3805 array[DEVCONF_RTR_SOLICIT_DELAY] =
3806 jiffies_to_msecs(cnf->rtr_solicit_delay);
3807 array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; 3807 array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
3808#ifdef CONFIG_IPV6_PRIVACY 3808#ifdef CONFIG_IPV6_PRIVACY
3809 array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; 3809 array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
@@ -3817,7 +3817,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
3817 array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; 3817 array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
3818#ifdef CONFIG_IPV6_ROUTER_PREF 3818#ifdef CONFIG_IPV6_ROUTER_PREF
3819 array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; 3819 array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
3820 array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval; 3820 array[DEVCONF_RTR_PROBE_INTERVAL] =
3821 jiffies_to_msecs(cnf->rtr_probe_interval);
3821#ifdef CONFIG_IPV6_ROUTE_INFO 3822#ifdef CONFIG_IPV6_ROUTE_INFO
3822 array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; 3823 array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
3823#endif 3824#endif
@@ -3933,10 +3934,9 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
3933 NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); 3934 NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
3934 3935
3935 ci.max_reasm_len = IPV6_MAXPLEN; 3936 ci.max_reasm_len = IPV6_MAXPLEN;
3936 ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100 3937 ci.tstamp = cstamp_delta(idev->tstamp);
3937 + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); 3938 ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
3938 ci.reachable_time = idev->nd_parms->reachable_time; 3939 ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
3939 ci.retrans_time = idev->nd_parms->retrans_time;
3940 NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); 3940 NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
3941 3941
3942 nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); 3942 nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c2c0f89397b1..2a59610c2a58 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1284,6 +1284,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1284 t = netdev_priv(dev); 1284 t = netdev_priv(dev);
1285 1285
1286 ip6_tnl_unlink(ip6n, t); 1286 ip6_tnl_unlink(ip6n, t);
1287 synchronize_net();
1287 err = ip6_tnl_change(t, &p); 1288 err = ip6_tnl_change(t, &p);
1288 ip6_tnl_link(ip6n, t); 1289 ip6_tnl_link(ip6n, t);
1289 netdev_state_change(dev); 1290 netdev_state_change(dev);
@@ -1371,6 +1372,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
1371 dev->flags |= IFF_NOARP; 1372 dev->flags |= IFF_NOARP;
1372 dev->addr_len = sizeof(struct in6_addr); 1373 dev->addr_len = sizeof(struct in6_addr);
1373 dev->features |= NETIF_F_NETNS_LOCAL; 1374 dev->features |= NETIF_F_NETNS_LOCAL;
1375 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1374} 1376}
1375 1377
1376 1378
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0553867a317f..d1770e061c08 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -343,6 +343,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
343 break; 343 break;
344 344
345 case IPV6_TRANSPARENT: 345 case IPV6_TRANSPARENT:
346 if (!capable(CAP_NET_ADMIN)) {
347 retv = -EPERM;
348 break;
349 }
346 if (optlen < sizeof(int)) 350 if (optlen < sizeof(int))
347 goto e_inval; 351 goto e_inval;
348 /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */ 352 /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 44d2eeac089b..448464844a25 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -5,10 +5,15 @@
5menu "IPv6: Netfilter Configuration" 5menu "IPv6: Netfilter Configuration"
6 depends on INET && IPV6 && NETFILTER 6 depends on INET && IPV6 && NETFILTER
7 7
8config NF_DEFRAG_IPV6
9 tristate
10 default n
11
8config NF_CONNTRACK_IPV6 12config NF_CONNTRACK_IPV6
9 tristate "IPv6 connection tracking support" 13 tristate "IPv6 connection tracking support"
10 depends on INET && IPV6 && NF_CONNTRACK 14 depends on INET && IPV6 && NF_CONNTRACK
11 default m if NETFILTER_ADVANCED=n 15 default m if NETFILTER_ADVANCED=n
16 select NF_DEFRAG_IPV6
12 ---help--- 17 ---help---
13 Connection tracking keeps a record of what packets have passed 18 Connection tracking keeps a record of what packets have passed
14 through your machine, in order to figure out how they are related 19 through your machine, in order to figure out how they are related
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 3f8e4a3d83ce..0a432c9b0795 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -12,11 +12,14 @@ obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
12 12
13# objects for l3 independent conntrack 13# objects for l3 independent conntrack
14nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o 14nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
15nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
16 15
17# l3 independent conntrack 16# l3 independent conntrack
18obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o 17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
19 18
19# defrag
20nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
21obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
22
20# matches 23# matches
21obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o 24obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
22obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o 25obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 51df035897e7..455582384ece 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1137,6 +1137,7 @@ static int get_info(struct net *net, void __user *user,
1137 private = &tmp; 1137 private = &tmp;
1138 } 1138 }
1139#endif 1139#endif
1140 memset(&info, 0, sizeof(info));
1140 info.valid_hooks = t->valid_hooks; 1141 info.valid_hooks = t->valid_hooks;
1141 memcpy(info.hook_entry, private->hook_entry, 1142 memcpy(info.hook_entry, private->hook_entry,
1142 sizeof(info.hook_entry)); 1143 sizeof(info.hook_entry));
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 489d71b844ac..79d43aa8fa8d 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -286,7 +286,7 @@ found:
286 286
287 /* Check for overlap with preceding fragment. */ 287 /* Check for overlap with preceding fragment. */
288 if (prev && 288 if (prev &&
289 (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0) 289 (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
290 goto discard_fq; 290 goto discard_fq;
291 291
292 /* Look for overlap with succeeding segment. */ 292 /* Look for overlap with succeeding segment. */
@@ -625,21 +625,24 @@ int nf_ct_frag6_init(void)
625 inet_frags_init_net(&nf_init_frags); 625 inet_frags_init_net(&nf_init_frags);
626 inet_frags_init(&nf_frags); 626 inet_frags_init(&nf_frags);
627 627
628#ifdef CONFIG_SYSCTL
628 nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path, 629 nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
629 nf_ct_frag6_sysctl_table); 630 nf_ct_frag6_sysctl_table);
630 if (!nf_ct_frag6_sysctl_header) { 631 if (!nf_ct_frag6_sysctl_header) {
631 inet_frags_fini(&nf_frags); 632 inet_frags_fini(&nf_frags);
632 return -ENOMEM; 633 return -ENOMEM;
633 } 634 }
635#endif
634 636
635 return 0; 637 return 0;
636} 638}
637 639
638void nf_ct_frag6_cleanup(void) 640void nf_ct_frag6_cleanup(void)
639{ 641{
642#ifdef CONFIG_SYSCTL
640 unregister_sysctl_table(nf_ct_frag6_sysctl_header); 643 unregister_sysctl_table(nf_ct_frag6_sysctl_header);
641 nf_ct_frag6_sysctl_header = NULL; 644 nf_ct_frag6_sysctl_header = NULL;
642 645#endif
643 inet_frags_fini(&nf_frags); 646 inet_frags_fini(&nf_frags);
644 647
645 nf_init_frags.low_thresh = 0; 648 nf_init_frags.low_thresh = 0;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index d082eaeefa25..24b3558b8e67 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -126,6 +126,8 @@ static const struct snmp_mib snmp6_udp6_list[] = {
126 SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS), 126 SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
127 SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS), 127 SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
128 SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS), 128 SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
129 SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
130 SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
129 SNMP_MIB_SENTINEL 131 SNMP_MIB_SENTINEL
130}; 132};
131 133
@@ -134,6 +136,8 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
134 SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), 136 SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
135 SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS), 137 SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
136 SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), 138 SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
139 SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
140 SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
137 SNMP_MIB_SENTINEL 141 SNMP_MIB_SENTINEL
138}; 142};
139 143
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 9bb936ae2452..9a7978fdc02a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,13 +25,14 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <net/protocol.h> 26#include <net/protocol.h>
27 27
28const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] __read_mostly; 28const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
29 29
30int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) 30int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
31{ 31{
32 int hash = protocol & (MAX_INET_PROTOS - 1); 32 int hash = protocol & (MAX_INET_PROTOS - 1);
33 33
34 return !cmpxchg(&inet6_protos[hash], NULL, prot) ? 0 : -1; 34 return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
35 NULL, prot) ? 0 : -1;
35} 36}
36EXPORT_SYMBOL(inet6_add_protocol); 37EXPORT_SYMBOL(inet6_add_protocol);
37 38
@@ -43,7 +44,8 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
43{ 44{
44 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 45 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
45 46
46 ret = (cmpxchg(&inet6_protos[hash], prot, NULL) == prot) ? 0 : -1; 47 ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
48 prot, NULL) == prot) ? 0 : -1;
47 49
48 synchronize_net(); 50 synchronize_net();
49 51
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 45e6efb7f171..86c39526ba5e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -373,7 +373,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
373 373
374static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) 374static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
375{ 375{
376 if ((raw6_sk(sk)->checksum || sk->sk_filter) && 376 if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) &&
377 skb_checksum_complete(skb)) { 377 skb_checksum_complete(skb)) {
378 atomic_inc(&sk->sk_drops); 378 atomic_inc(&sk->sk_drops);
379 kfree_skb(skb); 379 kfree_skb(skb);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index c7ba3149633f..0f2766453759 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -349,7 +349,7 @@ found:
349 349
350 /* Check for overlap with preceding fragment. */ 350 /* Check for overlap with preceding fragment. */
351 if (prev && 351 if (prev &&
352 (FRAG6_CB(prev)->offset + prev->len) - offset > 0) 352 (FRAG6_CB(prev)->offset + prev->len) > offset)
353 goto discard_fq; 353 goto discard_fq;
354 354
355 /* Look for overlap with succeeding segment. */ 355 /* Look for overlap with succeeding segment. */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 25661f968f3f..96455ffb76fb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1945,8 +1945,12 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1945 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1945 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1946 struct neighbour *neigh; 1946 struct neighbour *neigh;
1947 1947
1948 if (rt == NULL) 1948 if (rt == NULL) {
1949 if (net_ratelimit())
1950 pr_warning("IPv6: Maximum number of routes reached,"
1951 " consider increasing route/max_size.\n");
1949 return ERR_PTR(-ENOMEM); 1952 return ERR_PTR(-ENOMEM);
1953 }
1950 1954
1951 dev_hold(net->loopback_dev); 1955 dev_hold(net->loopback_dev);
1952 in6_dev_hold(idev); 1956 in6_dev_hold(idev);
@@ -2741,6 +2745,7 @@ static void __net_exit ip6_route_net_exit(struct net *net)
2741 kfree(net->ipv6.ip6_prohibit_entry); 2745 kfree(net->ipv6.ip6_prohibit_entry);
2742 kfree(net->ipv6.ip6_blk_hole_entry); 2746 kfree(net->ipv6.ip6_blk_hole_entry);
2743#endif 2747#endif
2748 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2744} 2749}
2745 2750
2746static struct pernet_operations ip6_route_net_ops = { 2751static struct pernet_operations ip6_route_net_ops = {
@@ -2832,5 +2837,6 @@ void ip6_route_cleanup(void)
2832 xfrm6_fini(); 2837 xfrm6_fini();
2833 fib6_gc_cleanup(); 2838 fib6_gc_cleanup();
2834 unregister_pernet_subsys(&ip6_route_net_ops); 2839 unregister_pernet_subsys(&ip6_route_net_ops);
2840 dst_entries_destroy(&ip6_dst_blackhole_ops);
2835 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2841 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2836} 2842}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 367a6cc584cc..d6bfaec3bbbf 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -963,6 +963,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
963 } 963 }
964 t = netdev_priv(dev); 964 t = netdev_priv(dev);
965 ipip6_tunnel_unlink(sitn, t); 965 ipip6_tunnel_unlink(sitn, t);
966 synchronize_net();
966 t->parms.iph.saddr = p.iph.saddr; 967 t->parms.iph.saddr = p.iph.saddr;
967 t->parms.iph.daddr = p.iph.daddr; 968 t->parms.iph.daddr = p.iph.daddr;
968 memcpy(dev->dev_addr, &p.iph.saddr, 4); 969 memcpy(dev->dev_addr, &p.iph.saddr, 4);
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index d9864725d0c6..4f3cec12aa85 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -30,23 +30,26 @@
30#include <net/protocol.h> 30#include <net/protocol.h>
31#include <net/xfrm.h> 31#include <net/xfrm.h>
32 32
33static struct xfrm6_tunnel *tunnel6_handlers __read_mostly; 33static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
34static struct xfrm6_tunnel *tunnel46_handlers __read_mostly; 34static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
35static DEFINE_MUTEX(tunnel6_mutex); 35static DEFINE_MUTEX(tunnel6_mutex);
36 36
37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) 37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
38{ 38{
39 struct xfrm6_tunnel **pprev; 39 struct xfrm6_tunnel __rcu **pprev;
40 struct xfrm6_tunnel *t;
40 int ret = -EEXIST; 41 int ret = -EEXIST;
41 int priority = handler->priority; 42 int priority = handler->priority;
42 43
43 mutex_lock(&tunnel6_mutex); 44 mutex_lock(&tunnel6_mutex);
44 45
45 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; 46 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
46 *pprev; pprev = &(*pprev)->next) { 47 (t = rcu_dereference_protected(*pprev,
47 if ((*pprev)->priority > priority) 48 lockdep_is_held(&tunnel6_mutex))) != NULL;
49 pprev = &t->next) {
50 if (t->priority > priority)
48 break; 51 break;
49 if ((*pprev)->priority == priority) 52 if (t->priority == priority)
50 goto err; 53 goto err;
51 } 54 }
52 55
@@ -65,14 +68,17 @@ EXPORT_SYMBOL(xfrm6_tunnel_register);
65 68
66int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) 69int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
67{ 70{
68 struct xfrm6_tunnel **pprev; 71 struct xfrm6_tunnel __rcu **pprev;
72 struct xfrm6_tunnel *t;
69 int ret = -ENOENT; 73 int ret = -ENOENT;
70 74
71 mutex_lock(&tunnel6_mutex); 75 mutex_lock(&tunnel6_mutex);
72 76
73 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; 77 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
74 *pprev; pprev = &(*pprev)->next) { 78 (t = rcu_dereference_protected(*pprev,
75 if (*pprev == handler) { 79 lockdep_is_held(&tunnel6_mutex))) != NULL;
80 pprev = &t->next) {
81 if (t == handler) {
76 *pprev = handler->next; 82 *pprev = handler->next;
77 ret = 0; 83 ret = 0;
78 break; 84 break;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c84dad432114..91def93bec85 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -527,7 +527,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
527 } 527 }
528 } 528 }
529 529
530 if (sk->sk_filter) { 530 if (rcu_dereference_raw(sk->sk_filter)) {
531 if (udp_lib_checksum_complete(skb)) 531 if (udp_lib_checksum_complete(skb))
532 goto drop; 532 goto drop;
533 } 533 }
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 7f097989cde2..a6de3059746d 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -45,7 +45,6 @@
45#include <linux/capability.h> 45#include <linux/capability.h>
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/types.h> 47#include <linux/types.h>
48#include <linux/smp_lock.h>
49#include <linux/socket.h> 48#include <linux/socket.h>
50#include <linux/sockios.h> 49#include <linux/sockios.h>
51#include <linux/slab.h> 50#include <linux/slab.h>
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 7fa86373de41..7c567b8aa89a 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -15,7 +15,6 @@
15 15
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/smp_lock.h>
19#include "irnet_ppp.h" /* Private header */ 18#include "irnet_ppp.h" /* Private header */
20/* Please put other headers in irnet.h - Thanks */ 19/* Please put other headers in irnet.h - Thanks */
21 20
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 285761e77d90..f6054f9ccbe3 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -550,22 +550,30 @@ EXPORT_SYMBOL(irttp_close_tsap);
550 */ 550 */
551int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb) 551int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
552{ 552{
553 int ret;
554
553 IRDA_ASSERT(self != NULL, return -1;); 555 IRDA_ASSERT(self != NULL, return -1;);
554 IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); 556 IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
555 IRDA_ASSERT(skb != NULL, return -1;); 557 IRDA_ASSERT(skb != NULL, return -1;);
556 558
557 IRDA_DEBUG(4, "%s()\n", __func__); 559 IRDA_DEBUG(4, "%s()\n", __func__);
558 560
561 /* Take shortcut on zero byte packets */
562 if (skb->len == 0) {
563 ret = 0;
564 goto err;
565 }
566
559 /* Check that nothing bad happens */ 567 /* Check that nothing bad happens */
560 if ((skb->len == 0) || (!self->connected)) { 568 if (!self->connected) {
561 IRDA_DEBUG(1, "%s(), No data, or not connected\n", 569 IRDA_WARNING("%s(), Not connected\n", __func__);
562 __func__); 570 ret = -ENOTCONN;
563 goto err; 571 goto err;
564 } 572 }
565 573
566 if (skb->len > self->max_seg_size) { 574 if (skb->len > self->max_seg_size) {
567 IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n", 575 IRDA_ERROR("%s(), UData is too large for IrLAP!\n", __func__);
568 __func__); 576 ret = -EMSGSIZE;
569 goto err; 577 goto err;
570 } 578 }
571 579
@@ -576,7 +584,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
576 584
577err: 585err:
578 dev_kfree_skb(skb); 586 dev_kfree_skb(skb);
579 return -1; 587 return ret;
580} 588}
581EXPORT_SYMBOL(irttp_udata_request); 589EXPORT_SYMBOL(irttp_udata_request);
582 590
@@ -599,9 +607,15 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
599 IRDA_DEBUG(2, "%s() : queue len = %d\n", __func__, 607 IRDA_DEBUG(2, "%s() : queue len = %d\n", __func__,
600 skb_queue_len(&self->tx_queue)); 608 skb_queue_len(&self->tx_queue));
601 609
610 /* Take shortcut on zero byte packets */
611 if (skb->len == 0) {
612 ret = 0;
613 goto err;
614 }
615
602 /* Check that nothing bad happens */ 616 /* Check that nothing bad happens */
603 if ((skb->len == 0) || (!self->connected)) { 617 if (!self->connected) {
604 IRDA_WARNING("%s: No data, or not connected\n", __func__); 618 IRDA_WARNING("%s: Not connected\n", __func__);
605 ret = -ENOTCONN; 619 ret = -ENOTCONN;
606 goto err; 620 goto err;
607 } 621 }
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 499c045d6910..f7db676de77d 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1798,7 +1798,8 @@ static void iucv_work_fn(struct work_struct *work)
1798 * Handles external interrupts coming in from CP. 1798 * Handles external interrupts coming in from CP.
1799 * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn(). 1799 * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn().
1800 */ 1800 */
1801static void iucv_external_interrupt(u16 code) 1801static void iucv_external_interrupt(unsigned int ext_int_code,
1802 unsigned int param32, unsigned long param64)
1802{ 1803{
1803 struct iucv_irq_data *p; 1804 struct iucv_irq_data *p;
1804 struct iucv_irq_list *work; 1805 struct iucv_irq_list *work;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1712af1c7b3f..c64ce0a0bb03 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -111,6 +111,10 @@ struct l2tp_net {
111 spinlock_t l2tp_session_hlist_lock; 111 spinlock_t l2tp_session_hlist_lock;
112}; 112};
113 113
114static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
115static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
116static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
117
114static inline struct l2tp_net *l2tp_pernet(struct net *net) 118static inline struct l2tp_net *l2tp_pernet(struct net *net)
115{ 119{
116 BUG_ON(!net); 120 BUG_ON(!net);
@@ -118,6 +122,34 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net)
118 return net_generic(net, l2tp_net_id); 122 return net_generic(net, l2tp_net_id);
119} 123}
120 124
125
126/* Tunnel reference counts. Incremented per session that is added to
127 * the tunnel.
128 */
129static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
130{
131 atomic_inc(&tunnel->ref_count);
132}
133
134static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
135{
136 if (atomic_dec_and_test(&tunnel->ref_count))
137 l2tp_tunnel_free(tunnel);
138}
139#ifdef L2TP_REFCNT_DEBUG
140#define l2tp_tunnel_inc_refcount(_t) do { \
141 printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
142 l2tp_tunnel_inc_refcount_1(_t); \
143 } while (0)
144#define l2tp_tunnel_dec_refcount(_t) do { \
145 printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
146 l2tp_tunnel_dec_refcount_1(_t); \
147 } while (0)
148#else
149#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
150#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
151#endif
152
121/* Session hash global list for L2TPv3. 153/* Session hash global list for L2TPv3.
122 * The session_id SHOULD be random according to RFC3931, but several 154 * The session_id SHOULD be random according to RFC3931, but several
123 * L2TP implementations use incrementing session_ids. So we do a real 155 * L2TP implementations use incrementing session_ids. So we do a real
@@ -699,8 +731,8 @@ EXPORT_SYMBOL(l2tp_recv_common);
699 * Returns 1 if the packet was not a good data packet and could not be 731 * Returns 1 if the packet was not a good data packet and could not be
700 * forwarded. All such packets are passed up to userspace to deal with. 732 * forwarded. All such packets are passed up to userspace to deal with.
701 */ 733 */
702int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, 734static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
703 int (*payload_hook)(struct sk_buff *skb)) 735 int (*payload_hook)(struct sk_buff *skb))
704{ 736{
705 struct l2tp_session *session = NULL; 737 struct l2tp_session *session = NULL;
706 unsigned char *ptr, *optr; 738 unsigned char *ptr, *optr;
@@ -812,7 +844,6 @@ error:
812 844
813 return 1; 845 return 1;
814} 846}
815EXPORT_SYMBOL_GPL(l2tp_udp_recv_core);
816 847
817/* UDP encapsulation receive handler. See net/ipv4/udp.c. 848/* UDP encapsulation receive handler. See net/ipv4/udp.c.
818 * Return codes: 849 * Return codes:
@@ -922,7 +953,8 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
922 return bufp - optr; 953 return bufp - optr;
923} 954}
924 955
925int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len) 956static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
957 size_t data_len)
926{ 958{
927 struct l2tp_tunnel *tunnel = session->tunnel; 959 struct l2tp_tunnel *tunnel = session->tunnel;
928 unsigned int len = skb->len; 960 unsigned int len = skb->len;
@@ -970,7 +1002,6 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
970 1002
971 return 0; 1003 return 0;
972} 1004}
973EXPORT_SYMBOL_GPL(l2tp_xmit_core);
974 1005
975/* Automatically called when the skb is freed. 1006/* Automatically called when the skb is freed.
976 */ 1007 */
@@ -1089,7 +1120,7 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
1089 * The tunnel context is deleted only when all session sockets have been 1120 * The tunnel context is deleted only when all session sockets have been
1090 * closed. 1121 * closed.
1091 */ 1122 */
1092void l2tp_tunnel_destruct(struct sock *sk) 1123static void l2tp_tunnel_destruct(struct sock *sk)
1093{ 1124{
1094 struct l2tp_tunnel *tunnel; 1125 struct l2tp_tunnel *tunnel;
1095 1126
@@ -1128,11 +1159,10 @@ void l2tp_tunnel_destruct(struct sock *sk)
1128end: 1159end:
1129 return; 1160 return;
1130} 1161}
1131EXPORT_SYMBOL(l2tp_tunnel_destruct);
1132 1162
1133/* When the tunnel is closed, all the attached sessions need to go too. 1163/* When the tunnel is closed, all the attached sessions need to go too.
1134 */ 1164 */
1135void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) 1165static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
1136{ 1166{
1137 int hash; 1167 int hash;
1138 struct hlist_node *walk; 1168 struct hlist_node *walk;
@@ -1193,12 +1223,11 @@ again:
1193 } 1223 }
1194 write_unlock_bh(&tunnel->hlist_lock); 1224 write_unlock_bh(&tunnel->hlist_lock);
1195} 1225}
1196EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
1197 1226
1198/* Really kill the tunnel. 1227/* Really kill the tunnel.
1199 * Come here only when all sessions have been cleared from the tunnel. 1228 * Come here only when all sessions have been cleared from the tunnel.
1200 */ 1229 */
1201void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) 1230static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
1202{ 1231{
1203 struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); 1232 struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
1204 1233
@@ -1217,7 +1246,6 @@ void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
1217 atomic_dec(&l2tp_tunnel_count); 1246 atomic_dec(&l2tp_tunnel_count);
1218 kfree(tunnel); 1247 kfree(tunnel);
1219} 1248}
1220EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
1221 1249
1222/* Create a socket for the tunnel, if one isn't set up by 1250/* Create a socket for the tunnel, if one isn't set up by
1223 * userspace. This is used for static tunnels where there is no 1251 * userspace. This is used for static tunnels where there is no
@@ -1512,7 +1540,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete);
1512/* We come here whenever a session's send_seq, cookie_len or 1540/* We come here whenever a session's send_seq, cookie_len or
1513 * l2specific_len parameters are set. 1541 * l2specific_len parameters are set.
1514 */ 1542 */
1515void l2tp_session_set_header_len(struct l2tp_session *session, int version) 1543static void l2tp_session_set_header_len(struct l2tp_session *session, int version)
1516{ 1544{
1517 if (version == L2TP_HDR_VER_2) { 1545 if (version == L2TP_HDR_VER_2) {
1518 session->hdr_len = 6; 1546 session->hdr_len = 6;
@@ -1525,7 +1553,6 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
1525 } 1553 }
1526 1554
1527} 1555}
1528EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
1529 1556
1530struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) 1557struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
1531{ 1558{
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index f0f318edd3f1..a16a48e79fab 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -231,48 +231,15 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i
231extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); 231extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
232extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); 232extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
233extern int l2tp_session_delete(struct l2tp_session *session); 233extern int l2tp_session_delete(struct l2tp_session *session);
234extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
235extern void l2tp_session_free(struct l2tp_session *session); 234extern void l2tp_session_free(struct l2tp_session *session);
236extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); 235extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
237extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
238extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); 236extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
239 237
240extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
241extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); 238extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
242extern void l2tp_tunnel_destruct(struct sock *sk);
243extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
244extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
245 239
246extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); 240extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
247extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); 241extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
248 242
249/* Tunnel reference counts. Incremented per session that is added to
250 * the tunnel.
251 */
252static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
253{
254 atomic_inc(&tunnel->ref_count);
255}
256
257static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
258{
259 if (atomic_dec_and_test(&tunnel->ref_count))
260 l2tp_tunnel_free(tunnel);
261}
262#ifdef L2TP_REFCNT_DEBUG
263#define l2tp_tunnel_inc_refcount(_t) do { \
264 printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
265 l2tp_tunnel_inc_refcount_1(_t); \
266 } while (0)
267#define l2tp_tunnel_dec_refcount(_t) do { \
268 printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
269 l2tp_tunnel_dec_refcount_1(_t); \
270 } while (0)
271#else
272#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
273#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
274#endif
275
276/* Session reference counts. Incremented when code obtains a reference 243/* Session reference counts. Incremented when code obtains a reference
277 * to a session. 244 * to a session.
278 */ 245 */
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 104ec3b283d4..b8dbae82fab8 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -249,7 +249,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
249 struct seq_file *seq; 249 struct seq_file *seq;
250 int rc = -ENOMEM; 250 int rc = -ENOMEM;
251 251
252 pd = kzalloc(GFP_KERNEL, sizeof(*pd)); 252 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
253 if (pd == NULL) 253 if (pd == NULL)
254 goto out; 254 goto out;
255 255
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 1c770c0644d1..0bf6a59545ab 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -576,7 +576,7 @@ out:
576 return copied; 576 return copied;
577} 577}
578 578
579struct proto l2tp_ip_prot = { 579static struct proto l2tp_ip_prot = {
580 .name = "L2TP/IP", 580 .name = "L2TP/IP",
581 .owner = THIS_MODULE, 581 .owner = THIS_MODULE,
582 .init = l2tp_ip_open, 582 .init = l2tp_ip_open,
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 4d6f8653ec88..8e8ea9cb7093 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -92,7 +92,7 @@ config MAC80211_MESH
92config MAC80211_LEDS 92config MAC80211_LEDS
93 bool "Enable LED triggers" 93 bool "Enable LED triggers"
94 depends on MAC80211 94 depends on MAC80211
95 select NEW_LEDS 95 depends on LEDS_CLASS
96 select LEDS_TRIGGERS 96 select LEDS_TRIGGERS
97 ---help--- 97 ---help---
98 This option enables a few LED triggers for different 98 This option enables a few LED triggers for different
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 4aa47d074a79..1243d1db5c59 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -203,9 +203,13 @@ static ssize_t key_key_read(struct file *file, char __user *userbuf,
203 size_t count, loff_t *ppos) 203 size_t count, loff_t *ppos)
204{ 204{
205 struct ieee80211_key *key = file->private_data; 205 struct ieee80211_key *key = file->private_data;
206 int i, res, bufsize = 2 * key->conf.keylen + 2; 206 int i, bufsize = 2 * key->conf.keylen + 2;
207 char *buf = kmalloc(bufsize, GFP_KERNEL); 207 char *buf = kmalloc(bufsize, GFP_KERNEL);
208 char *p = buf; 208 char *p = buf;
209 ssize_t res;
210
211 if (!buf)
212 return -ENOMEM;
209 213
210 for (i = 0; i < key->conf.keylen; i++) 214 for (i = 0; i < key->conf.keylen; i++)
211 p += scnprintf(p, bufsize + buf - p, "%02x", key->conf.key[i]); 215 p += scnprintf(p, bufsize + buf - p, "%02x", key->conf.key[i]);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index ff60c022f51d..239c4836a946 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -456,6 +456,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
456 if (!sta) 456 if (!sta)
457 return NULL; 457 return NULL;
458 458
459 sta->last_rx = jiffies;
459 set_sta_flags(sta, WLAN_STA_AUTHORIZED); 460 set_sta_flags(sta, WLAN_STA_AUTHORIZED);
460 461
461 /* make sure mandatory rates are always added */ 462 /* make sure mandatory rates are always added */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f9163b12c7f1..7aa85591dbe7 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -391,6 +391,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
391 u32 hw_reconf_flags = 0; 391 u32 hw_reconf_flags = 0;
392 int i; 392 int i;
393 393
394 if (local->scan_sdata == sdata)
395 ieee80211_scan_cancel(local);
396
394 clear_bit(SDATA_STATE_RUNNING, &sdata->state); 397 clear_bit(SDATA_STATE_RUNNING, &sdata->state);
395 398
396 /* 399 /*
@@ -523,9 +526,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
523 synchronize_rcu(); 526 synchronize_rcu();
524 skb_queue_purge(&sdata->skb_queue); 527 skb_queue_purge(&sdata->skb_queue);
525 528
526 if (local->scan_sdata == sdata)
527 ieee80211_scan_cancel(local);
528
529 /* 529 /*
530 * Disable beaconing here for mesh only, AP and IBSS 530 * Disable beaconing here for mesh only, AP and IBSS
531 * are already taken care of. 531 * are already taken care of.
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 22bc42b18991..107a0cbe52ac 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -677,10 +677,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
677 /* 677 /*
678 * Calculate scan IE length -- we need this to alloc 678 * Calculate scan IE length -- we need this to alloc
679 * memory and to subtract from the driver limit. It 679 * memory and to subtract from the driver limit. It
680 * includes the (extended) supported rates and HT 680 * includes the DS Params, (extended) supported rates, and HT
681 * information -- SSID is the driver's responsibility. 681 * information -- SSID is the driver's responsibility.
682 */ 682 */
683 local->scan_ies_len = 4 + max_bitrates; /* (ext) supp rates */ 683 local->scan_ies_len = 4 + max_bitrates /* (ext) supp rates */ +
684 3 /* DS Params */;
684 if (supp_ht) 685 if (supp_ht)
685 local->scan_ies_len += 2 + sizeof(struct ieee80211_ht_cap); 686 local->scan_ies_len += 2 + sizeof(struct ieee80211_ht_cap);
686 687
@@ -748,7 +749,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
748 hw->queues = IEEE80211_MAX_QUEUES; 749 hw->queues = IEEE80211_MAX_QUEUES;
749 750
750 local->workqueue = 751 local->workqueue =
751 create_singlethread_workqueue(wiphy_name(local->hw.wiphy)); 752 alloc_ordered_workqueue(wiphy_name(local->hw.wiphy), 0);
752 if (!local->workqueue) { 753 if (!local->workqueue) {
753 result = -ENOMEM; 754 result = -ENOMEM;
754 goto fail_workqueue; 755 goto fail_workqueue;
@@ -962,12 +963,6 @@ static void __exit ieee80211_exit(void)
962 rc80211_minstrel_ht_exit(); 963 rc80211_minstrel_ht_exit();
963 rc80211_minstrel_exit(); 964 rc80211_minstrel_exit();
964 965
965 /*
966 * For key todo, it'll be empty by now but the work
967 * might still be scheduled.
968 */
969 flush_scheduled_work();
970
971 if (mesh_allocated) 966 if (mesh_allocated)
972 ieee80211s_stop(); 967 ieee80211s_stop();
973 968
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 809cf230d251..33f76993da08 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -329,6 +329,9 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
329 * if needed. 329 * if needed.
330 */ 330 */
331 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { 331 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
332 /* Skip invalid rates */
333 if (info->control.rates[i].idx < 0)
334 break;
332 /* Rate masking supports only legacy rates for now */ 335 /* Rate masking supports only legacy rates for now */
333 if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) 336 if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS)
334 continue; 337 continue;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 43288259f4a1..1534f2b44caf 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -525,6 +525,7 @@ config NETFILTER_XT_TARGET_TPROXY
525 depends on NETFILTER_XTABLES 525 depends on NETFILTER_XTABLES
526 depends on NETFILTER_ADVANCED 526 depends on NETFILTER_ADVANCED
527 select NF_DEFRAG_IPV4 527 select NF_DEFRAG_IPV4
528 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
528 help 529 help
529 This option adds a `TPROXY' target, which is somewhat similar to 530 This option adds a `TPROXY' target, which is somewhat similar to
530 REDIRECT. It can only be used in the mangle table and is useful 531 REDIRECT. It can only be used in the mangle table and is useful
@@ -927,6 +928,7 @@ config NETFILTER_XT_MATCH_SOCKET
927 depends on NETFILTER_ADVANCED 928 depends on NETFILTER_ADVANCED
928 depends on !NF_CONNTRACK || NF_CONNTRACK 929 depends on !NF_CONNTRACK || NF_CONNTRACK
929 select NF_DEFRAG_IPV4 930 select NF_DEFRAG_IPV4
931 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
930 help 932 help
931 This option adds a `socket' match, which can be used to match 933 This option adds a `socket' match, which can be used to match
932 packets for which a TCP or UDP socket lookup finds a valid socket. 934 packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index a22dac227055..70bd1d0774c6 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -4,6 +4,7 @@
4menuconfig IP_VS 4menuconfig IP_VS
5 tristate "IP virtual server support" 5 tristate "IP virtual server support"
6 depends on NET && INET && NETFILTER 6 depends on NET && INET && NETFILTER
7 depends on (NF_CONNTRACK || NF_CONNTRACK=n)
7 ---help--- 8 ---help---
8 IP Virtual Server support will let you build a high-performance 9 IP Virtual Server support will let you build a high-performance
9 virtual server based on cluster of two or more real servers. This 10 virtual server based on cluster of two or more real servers. This
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1eacf8d9966a..27a5ea6b6a0f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1312,7 +1312,8 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
1312 if (!hash) { 1312 if (!hash) {
1313 *vmalloced = 1; 1313 *vmalloced = 1;
1314 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1314 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1315 hash = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 1315 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1316 PAGE_KERNEL);
1316 } 1317 }
1317 1318
1318 if (hash && nulls) 1319 if (hash && nulls)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index ed6d92958023..dc7bb74110df 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -292,6 +292,12 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
292 292
293 for (i = 0; i < MAX_NF_CT_PROTO; i++) 293 for (i = 0; i < MAX_NF_CT_PROTO; i++)
294 proto_array[i] = &nf_conntrack_l4proto_generic; 294 proto_array[i] = &nf_conntrack_l4proto_generic;
295
296 /* Before making proto_array visible to lockless readers,
297 * we must make sure its content is committed to memory.
298 */
299 smp_wmb();
300
295 nf_ct_protos[l4proto->l3proto] = proto_array; 301 nf_ct_protos[l4proto->l3proto] = proto_array;
296 } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != 302 } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
297 &nf_conntrack_l4proto_generic) { 303 &nf_conntrack_l4proto_generic) {
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 19c482caf30b..640678f47a2a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -21,7 +21,9 @@
21#include <linux/netfilter_ipv4/ip_tables.h> 21#include <linux/netfilter_ipv4/ip_tables.h>
22 22
23#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 23#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
24#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 24
25#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
26#define XT_TPROXY_HAVE_IPV6 1
25#include <net/if_inet6.h> 27#include <net/if_inet6.h>
26#include <net/addrconf.h> 28#include <net/addrconf.h>
27#include <linux/netfilter_ipv6/ip6_tables.h> 29#include <linux/netfilter_ipv6/ip6_tables.h>
@@ -172,7 +174,7 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
172 return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); 174 return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
173} 175}
174 176
175#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 177#ifdef XT_TPROXY_HAVE_IPV6
176 178
177static inline const struct in6_addr * 179static inline const struct in6_addr *
178tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, 180tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
@@ -372,7 +374,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
372 .hooks = 1 << NF_INET_PRE_ROUTING, 374 .hooks = 1 << NF_INET_PRE_ROUTING,
373 .me = THIS_MODULE, 375 .me = THIS_MODULE,
374 }, 376 },
375#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 377#ifdef XT_TPROXY_HAVE_IPV6
376 { 378 {
377 .name = "TPROXY", 379 .name = "TPROXY",
378 .family = NFPROTO_IPV6, 380 .family = NFPROTO_IPV6,
@@ -391,7 +393,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
391static int __init tproxy_tg_init(void) 393static int __init tproxy_tg_init(void)
392{ 394{
393 nf_defrag_ipv4_enable(); 395 nf_defrag_ipv4_enable();
394#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 396#ifdef XT_TPROXY_HAVE_IPV6
395 nf_defrag_ipv6_enable(); 397 nf_defrag_ipv6_enable();
396#endif 398#endif
397 399
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2dbd4c857735..00d6ae838303 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -14,7 +14,6 @@
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/netfilter/x_tables.h> 15#include <linux/netfilter/x_tables.h>
16#include <linux/netfilter_ipv4/ip_tables.h> 16#include <linux/netfilter_ipv4/ip_tables.h>
17#include <linux/netfilter_ipv6/ip6_tables.h>
18#include <net/tcp.h> 17#include <net/tcp.h>
19#include <net/udp.h> 18#include <net/udp.h>
20#include <net/icmp.h> 19#include <net/icmp.h>
@@ -22,7 +21,12 @@
22#include <net/inet_sock.h> 21#include <net/inet_sock.h>
23#include <net/netfilter/nf_tproxy_core.h> 22#include <net/netfilter/nf_tproxy_core.h>
24#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 23#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
24
25#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
26#define XT_SOCKET_HAVE_IPV6 1
27#include <linux/netfilter_ipv6/ip6_tables.h>
25#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 28#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
29#endif
26 30
27#include <linux/netfilter/xt_socket.h> 31#include <linux/netfilter/xt_socket.h>
28 32
@@ -186,12 +190,12 @@ socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par)
186 return socket_match(skb, par, par->matchinfo); 190 return socket_match(skb, par, par->matchinfo);
187} 191}
188 192
189#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 193#ifdef XT_SOCKET_HAVE_IPV6
190 194
191static int 195static int
192extract_icmp6_fields(const struct sk_buff *skb, 196extract_icmp6_fields(const struct sk_buff *skb,
193 unsigned int outside_hdrlen, 197 unsigned int outside_hdrlen,
194 u8 *protocol, 198 int *protocol,
195 struct in6_addr **raddr, 199 struct in6_addr **raddr,
196 struct in6_addr **laddr, 200 struct in6_addr **laddr,
197 __be16 *rport, 201 __be16 *rport,
@@ -248,8 +252,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
248 struct sock *sk; 252 struct sock *sk;
249 struct in6_addr *daddr, *saddr; 253 struct in6_addr *daddr, *saddr;
250 __be16 dport, sport; 254 __be16 dport, sport;
251 int thoff; 255 int thoff, tproto;
252 u8 tproto;
253 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; 256 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
254 257
255 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL); 258 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
@@ -301,7 +304,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
301 sk = NULL; 304 sk = NULL;
302 } 305 }
303 306
304 pr_debug("proto %hhu %pI6:%hu -> %pI6:%hu " 307 pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
305 "(orig %pI6:%hu) sock %p\n", 308 "(orig %pI6:%hu) sock %p\n",
306 tproto, saddr, ntohs(sport), 309 tproto, saddr, ntohs(sport),
307 daddr, ntohs(dport), 310 daddr, ntohs(dport),
@@ -331,7 +334,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
331 (1 << NF_INET_LOCAL_IN), 334 (1 << NF_INET_LOCAL_IN),
332 .me = THIS_MODULE, 335 .me = THIS_MODULE,
333 }, 336 },
334#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 337#ifdef XT_SOCKET_HAVE_IPV6
335 { 338 {
336 .name = "socket", 339 .name = "socket",
337 .revision = 1, 340 .revision = 1,
@@ -348,7 +351,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
348static int __init socket_mt_init(void) 351static int __init socket_mt_init(void)
349{ 352{
350 nf_defrag_ipv4_enable(); 353 nf_defrag_ipv4_enable();
351#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 354#ifdef XT_SOCKET_HAVE_IPV6
352 nf_defrag_ipv6_enable(); 355 nf_defrag_ipv6_enable();
353#endif 356#endif
354 357
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index cd96ed3ccee4..478181d53c55 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -83,9 +83,9 @@ struct netlink_sock {
83 struct module *module; 83 struct module *module;
84}; 84};
85 85
86struct listeners_rcu_head { 86struct listeners {
87 struct rcu_head rcu_head; 87 struct rcu_head rcu;
88 void *ptr; 88 unsigned long masks[0];
89}; 89};
90 90
91#define NETLINK_KERNEL_SOCKET 0x1 91#define NETLINK_KERNEL_SOCKET 0x1
@@ -119,7 +119,7 @@ struct nl_pid_hash {
119struct netlink_table { 119struct netlink_table {
120 struct nl_pid_hash hash; 120 struct nl_pid_hash hash;
121 struct hlist_head mc_list; 121 struct hlist_head mc_list;
122 unsigned long *listeners; 122 struct listeners __rcu *listeners;
123 unsigned int nl_nonroot; 123 unsigned int nl_nonroot;
124 unsigned int groups; 124 unsigned int groups;
125 struct mutex *cb_mutex; 125 struct mutex *cb_mutex;
@@ -338,7 +338,7 @@ netlink_update_listeners(struct sock *sk)
338 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 338 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
339 mask |= nlk_sk(sk)->groups[i]; 339 mask |= nlk_sk(sk)->groups[i];
340 } 340 }
341 tbl->listeners[i] = mask; 341 tbl->listeners->masks[i] = mask;
342 } 342 }
343 /* this function is only called with the netlink table "grabbed", which 343 /* this function is only called with the netlink table "grabbed", which
344 * makes sure updates are visible before bind or setsockopt return. */ 344 * makes sure updates are visible before bind or setsockopt return. */
@@ -936,7 +936,7 @@ EXPORT_SYMBOL(netlink_unicast);
936int netlink_has_listeners(struct sock *sk, unsigned int group) 936int netlink_has_listeners(struct sock *sk, unsigned int group)
937{ 937{
938 int res = 0; 938 int res = 0;
939 unsigned long *listeners; 939 struct listeners *listeners;
940 940
941 BUG_ON(!netlink_is_kernel(sk)); 941 BUG_ON(!netlink_is_kernel(sk));
942 942
@@ -944,7 +944,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
944 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 944 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
945 945
946 if (group - 1 < nl_table[sk->sk_protocol].groups) 946 if (group - 1 < nl_table[sk->sk_protocol].groups)
947 res = test_bit(group - 1, listeners); 947 res = test_bit(group - 1, listeners->masks);
948 948
949 rcu_read_unlock(); 949 rcu_read_unlock();
950 950
@@ -1498,7 +1498,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1498 struct socket *sock; 1498 struct socket *sock;
1499 struct sock *sk; 1499 struct sock *sk;
1500 struct netlink_sock *nlk; 1500 struct netlink_sock *nlk;
1501 unsigned long *listeners = NULL; 1501 struct listeners *listeners = NULL;
1502 1502
1503 BUG_ON(!nl_table); 1503 BUG_ON(!nl_table);
1504 1504
@@ -1523,8 +1523,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1523 if (groups < 32) 1523 if (groups < 32)
1524 groups = 32; 1524 groups = 32;
1525 1525
1526 listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head), 1526 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1527 GFP_KERNEL);
1528 if (!listeners) 1527 if (!listeners)
1529 goto out_sock_release; 1528 goto out_sock_release;
1530 1529
@@ -1541,7 +1540,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1541 netlink_table_grab(); 1540 netlink_table_grab();
1542 if (!nl_table[unit].registered) { 1541 if (!nl_table[unit].registered) {
1543 nl_table[unit].groups = groups; 1542 nl_table[unit].groups = groups;
1544 nl_table[unit].listeners = listeners; 1543 rcu_assign_pointer(nl_table[unit].listeners, listeners);
1545 nl_table[unit].cb_mutex = cb_mutex; 1544 nl_table[unit].cb_mutex = cb_mutex;
1546 nl_table[unit].module = module; 1545 nl_table[unit].module = module;
1547 nl_table[unit].registered = 1; 1546 nl_table[unit].registered = 1;
@@ -1572,43 +1571,28 @@ netlink_kernel_release(struct sock *sk)
1572EXPORT_SYMBOL(netlink_kernel_release); 1571EXPORT_SYMBOL(netlink_kernel_release);
1573 1572
1574 1573
1575static void netlink_free_old_listeners(struct rcu_head *rcu_head) 1574static void listeners_free_rcu(struct rcu_head *head)
1576{ 1575{
1577 struct listeners_rcu_head *lrh; 1576 kfree(container_of(head, struct listeners, rcu));
1578
1579 lrh = container_of(rcu_head, struct listeners_rcu_head, rcu_head);
1580 kfree(lrh->ptr);
1581} 1577}
1582 1578
1583int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 1579int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1584{ 1580{
1585 unsigned long *listeners, *old = NULL; 1581 struct listeners *new, *old;
1586 struct listeners_rcu_head *old_rcu_head;
1587 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 1582 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1588 1583
1589 if (groups < 32) 1584 if (groups < 32)
1590 groups = 32; 1585 groups = 32;
1591 1586
1592 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 1587 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1593 listeners = kzalloc(NLGRPSZ(groups) + 1588 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1594 sizeof(struct listeners_rcu_head), 1589 if (!new)
1595 GFP_ATOMIC);
1596 if (!listeners)
1597 return -ENOMEM; 1590 return -ENOMEM;
1598 old = tbl->listeners; 1591 old = rcu_dereference_raw(tbl->listeners);
1599 memcpy(listeners, old, NLGRPSZ(tbl->groups)); 1592 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1600 rcu_assign_pointer(tbl->listeners, listeners); 1593 rcu_assign_pointer(tbl->listeners, new);
1601 /* 1594
1602 * Free the old memory after an RCU grace period so we 1595 call_rcu(&old->rcu, listeners_free_rcu);
1603 * don't leak it. We use call_rcu() here in order to be
1604 * able to call this function from atomic contexts. The
1605 * allocation of this memory will have reserved enough
1606 * space for struct listeners_rcu_head at the end.
1607 */
1608 old_rcu_head = (void *)(tbl->listeners +
1609 NLGRPLONGS(tbl->groups));
1610 old_rcu_head->ptr = old;
1611 call_rcu(&old_rcu_head->rcu_head, netlink_free_old_listeners);
1612 } 1596 }
1613 tbl->groups = groups; 1597 tbl->groups = groups;
1614 1598
@@ -2104,18 +2088,17 @@ static void __net_exit netlink_net_exit(struct net *net)
2104 2088
2105static void __init netlink_add_usersock_entry(void) 2089static void __init netlink_add_usersock_entry(void)
2106{ 2090{
2107 unsigned long *listeners; 2091 struct listeners *listeners;
2108 int groups = 32; 2092 int groups = 32;
2109 2093
2110 listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head), 2094 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2111 GFP_KERNEL);
2112 if (!listeners) 2095 if (!listeners)
2113 panic("netlink_add_usersock_entry: Cannot allocate listneres\n"); 2096 panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2114 2097
2115 netlink_table_grab(); 2098 netlink_table_grab();
2116 2099
2117 nl_table[NETLINK_USERSOCK].groups = groups; 2100 nl_table[NETLINK_USERSOCK].groups = groups;
2118 nl_table[NETLINK_USERSOCK].listeners = listeners; 2101 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2119 nl_table[NETLINK_USERSOCK].module = THIS_MODULE; 2102 nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2120 nl_table[NETLINK_USERSOCK].registered = 1; 2103 nl_table[NETLINK_USERSOCK].registered = 1;
2121 2104
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3616f27b9d46..8298e676f5a0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1610,9 +1610,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1610 1610
1611 err = -EINVAL; 1611 err = -EINVAL;
1612 vnet_hdr_len = sizeof(vnet_hdr); 1612 vnet_hdr_len = sizeof(vnet_hdr);
1613 if ((len -= vnet_hdr_len) < 0) 1613 if (len < vnet_hdr_len)
1614 goto out_free; 1614 goto out_free;
1615 1615
1616 len -= vnet_hdr_len;
1617
1616 if (skb_is_gso(skb)) { 1618 if (skb_is_gso(skb)) {
1617 struct skb_shared_info *sinfo = skb_shinfo(skb); 1619 struct skb_shared_info *sinfo = skb_shinfo(skb);
1618 1620
@@ -1719,7 +1721,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1719 rcu_read_lock(); 1721 rcu_read_lock();
1720 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); 1722 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1721 if (dev) 1723 if (dev)
1722 strlcpy(uaddr->sa_data, dev->name, 15); 1724 strncpy(uaddr->sa_data, dev->name, 14);
1723 else 1725 else
1724 memset(uaddr->sa_data, 0, 14); 1726 memset(uaddr->sa_data, 0, 14);
1725 rcu_read_unlock(); 1727 rcu_read_unlock();
@@ -1742,6 +1744,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1742 sll->sll_family = AF_PACKET; 1744 sll->sll_family = AF_PACKET;
1743 sll->sll_ifindex = po->ifindex; 1745 sll->sll_ifindex = po->ifindex;
1744 sll->sll_protocol = po->num; 1746 sll->sll_protocol = po->num;
1747 sll->sll_pkttype = 0;
1745 rcu_read_lock(); 1748 rcu_read_lock();
1746 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); 1749 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1747 if (dev) { 1750 if (dev) {
diff --git a/net/rds/loop.c b/net/rds/loop.c
index c390156b426f..aeec1d483b17 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -134,8 +134,12 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
134static void rds_loop_conn_free(void *arg) 134static void rds_loop_conn_free(void *arg)
135{ 135{
136 struct rds_loop_connection *lc = arg; 136 struct rds_loop_connection *lc = arg;
137 unsigned long flags;
138
137 rdsdebug("lc %p\n", lc); 139 rdsdebug("lc %p\n", lc);
140 spin_lock_irqsave(&loop_conns_lock, flags);
138 list_del(&lc->loop_node); 141 list_del(&lc->loop_node);
142 spin_unlock_irqrestore(&loop_conns_lock, flags);
139 kfree(lc); 143 kfree(lc);
140} 144}
141 145
diff --git a/net/rds/message.c b/net/rds/message.c
index a84545dae370..1fd3d29023d7 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -224,6 +224,9 @@ struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
224 WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs); 224 WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs);
225 WARN_ON(!nents); 225 WARN_ON(!nents);
226 226
227 if (rm->m_used_sgs + nents > rm->m_total_sgs)
228 return NULL;
229
227 sg_ret = &sg_first[rm->m_used_sgs]; 230 sg_ret = &sg_first[rm->m_used_sgs];
228 sg_init_table(sg_ret, nents); 231 sg_init_table(sg_ret, nents);
229 rm->m_used_sgs += nents; 232 rm->m_used_sgs += nents;
@@ -246,6 +249,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
246 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 249 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
247 rm->data.op_nents = ceil(total_len, PAGE_SIZE); 250 rm->data.op_nents = ceil(total_len, PAGE_SIZE);
248 rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); 251 rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
252 if (!rm->data.op_sg) {
253 rds_message_put(rm);
254 return ERR_PTR(-ENOMEM);
255 }
249 256
250 for (i = 0; i < rm->data.op_nents; ++i) { 257 for (i = 0; i < rm->data.op_nents; ++i) {
251 sg_set_page(&rm->data.op_sg[i], 258 sg_set_page(&rm->data.op_sg[i],
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 1a41debca1ce..4e37c1cbe8b2 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -479,13 +479,38 @@ void rds_atomic_free_op(struct rm_atomic_op *ao)
479 479
480 480
481/* 481/*
482 * Count the number of pages needed to describe an incoming iovec. 482 * Count the number of pages needed to describe an incoming iovec array.
483 */ 483 */
484static int rds_rdma_pages(struct rds_rdma_args *args) 484static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs)
485{
486 int tot_pages = 0;
487 unsigned int nr_pages;
488 unsigned int i;
489
490 /* figure out the number of pages in the vector */
491 for (i = 0; i < nr_iovecs; i++) {
492 nr_pages = rds_pages_in_vec(&iov[i]);
493 if (nr_pages == 0)
494 return -EINVAL;
495
496 tot_pages += nr_pages;
497
498 /*
499 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
500 * so tot_pages cannot overflow without first going negative.
501 */
502 if (tot_pages < 0)
503 return -EINVAL;
504 }
505
506 return tot_pages;
507}
508
509int rds_rdma_extra_size(struct rds_rdma_args *args)
485{ 510{
486 struct rds_iovec vec; 511 struct rds_iovec vec;
487 struct rds_iovec __user *local_vec; 512 struct rds_iovec __user *local_vec;
488 unsigned int tot_pages = 0; 513 int tot_pages = 0;
489 unsigned int nr_pages; 514 unsigned int nr_pages;
490 unsigned int i; 515 unsigned int i;
491 516
@@ -502,14 +527,16 @@ static int rds_rdma_pages(struct rds_rdma_args *args)
502 return -EINVAL; 527 return -EINVAL;
503 528
504 tot_pages += nr_pages; 529 tot_pages += nr_pages;
505 }
506 530
507 return tot_pages; 531 /*
508} 532 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
533 * so tot_pages cannot overflow without first going negative.
534 */
535 if (tot_pages < 0)
536 return -EINVAL;
537 }
509 538
510int rds_rdma_extra_size(struct rds_rdma_args *args) 539 return tot_pages * sizeof(struct scatterlist);
511{
512 return rds_rdma_pages(args) * sizeof(struct scatterlist);
513} 540}
514 541
515/* 542/*
@@ -520,13 +547,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
520 struct cmsghdr *cmsg) 547 struct cmsghdr *cmsg)
521{ 548{
522 struct rds_rdma_args *args; 549 struct rds_rdma_args *args;
523 struct rds_iovec vec;
524 struct rm_rdma_op *op = &rm->rdma; 550 struct rm_rdma_op *op = &rm->rdma;
525 int nr_pages; 551 int nr_pages;
526 unsigned int nr_bytes; 552 unsigned int nr_bytes;
527 struct page **pages = NULL; 553 struct page **pages = NULL;
528 struct rds_iovec __user *local_vec; 554 struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack;
529 unsigned int nr; 555 int iov_size;
530 unsigned int i, j; 556 unsigned int i, j;
531 int ret = 0; 557 int ret = 0;
532 558
@@ -541,14 +567,31 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
541 goto out; 567 goto out;
542 } 568 }
543 569
544 if (args->nr_local > (u64)UINT_MAX) { 570 if (args->nr_local > UIO_MAXIOV) {
545 ret = -EMSGSIZE; 571 ret = -EMSGSIZE;
546 goto out; 572 goto out;
547 } 573 }
548 574
549 nr_pages = rds_rdma_pages(args); 575 /* Check whether to allocate the iovec area */
550 if (nr_pages < 0) 576 iov_size = args->nr_local * sizeof(struct rds_iovec);
577 if (args->nr_local > UIO_FASTIOV) {
578 iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL);
579 if (!iovs) {
580 ret = -ENOMEM;
581 goto out;
582 }
583 }
584
585 if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) {
586 ret = -EFAULT;
587 goto out;
588 }
589
590 nr_pages = rds_rdma_pages(iovs, args->nr_local);
591 if (nr_pages < 0) {
592 ret = -EINVAL;
551 goto out; 593 goto out;
594 }
552 595
553 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 596 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
554 if (!pages) { 597 if (!pages) {
@@ -564,6 +607,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
564 op->op_recverr = rs->rs_recverr; 607 op->op_recverr = rs->rs_recverr;
565 WARN_ON(!nr_pages); 608 WARN_ON(!nr_pages);
566 op->op_sg = rds_message_alloc_sgs(rm, nr_pages); 609 op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
610 if (!op->op_sg) {
611 ret = -ENOMEM;
612 goto out;
613 }
567 614
568 if (op->op_notify || op->op_recverr) { 615 if (op->op_notify || op->op_recverr) {
569 /* We allocate an uninitialized notifier here, because 616 /* We allocate an uninitialized notifier here, because
@@ -597,50 +644,40 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
597 (unsigned long long)args->remote_vec.addr, 644 (unsigned long long)args->remote_vec.addr,
598 op->op_rkey); 645 op->op_rkey);
599 646
600 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
601
602 for (i = 0; i < args->nr_local; i++) { 647 for (i = 0; i < args->nr_local; i++) {
603 if (copy_from_user(&vec, &local_vec[i], 648 struct rds_iovec *iov = &iovs[i];
604 sizeof(struct rds_iovec))) { 649 /* don't need to check, rds_rdma_pages() verified nr will be +nonzero */
605 ret = -EFAULT; 650 unsigned int nr = rds_pages_in_vec(iov);
606 goto out;
607 }
608
609 nr = rds_pages_in_vec(&vec);
610 if (nr == 0) {
611 ret = -EINVAL;
612 goto out;
613 }
614 651
615 rs->rs_user_addr = vec.addr; 652 rs->rs_user_addr = iov->addr;
616 rs->rs_user_bytes = vec.bytes; 653 rs->rs_user_bytes = iov->bytes;
617 654
618 /* If it's a WRITE operation, we want to pin the pages for reading. 655 /* If it's a WRITE operation, we want to pin the pages for reading.
619 * If it's a READ operation, we need to pin the pages for writing. 656 * If it's a READ operation, we need to pin the pages for writing.
620 */ 657 */
621 ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write); 658 ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
622 if (ret < 0) 659 if (ret < 0)
623 goto out; 660 goto out;
624 661
625 rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n", 662 rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n",
626 nr_bytes, nr, vec.bytes, vec.addr); 663 nr_bytes, nr, iov->bytes, iov->addr);
627 664
628 nr_bytes += vec.bytes; 665 nr_bytes += iov->bytes;
629 666
630 for (j = 0; j < nr; j++) { 667 for (j = 0; j < nr; j++) {
631 unsigned int offset = vec.addr & ~PAGE_MASK; 668 unsigned int offset = iov->addr & ~PAGE_MASK;
632 struct scatterlist *sg; 669 struct scatterlist *sg;
633 670
634 sg = &op->op_sg[op->op_nents + j]; 671 sg = &op->op_sg[op->op_nents + j];
635 sg_set_page(sg, pages[j], 672 sg_set_page(sg, pages[j],
636 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), 673 min_t(unsigned int, iov->bytes, PAGE_SIZE - offset),
637 offset); 674 offset);
638 675
639 rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n", 676 rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n",
640 sg->offset, sg->length, vec.addr, vec.bytes); 677 sg->offset, sg->length, iov->addr, iov->bytes);
641 678
642 vec.addr += sg->length; 679 iov->addr += sg->length;
643 vec.bytes -= sg->length; 680 iov->bytes -= sg->length;
644 } 681 }
645 682
646 op->op_nents += nr; 683 op->op_nents += nr;
@@ -655,13 +692,14 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
655 } 692 }
656 op->op_bytes = nr_bytes; 693 op->op_bytes = nr_bytes;
657 694
658 ret = 0;
659out: 695out:
696 if (iovs != iovstack)
697 sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size);
660 kfree(pages); 698 kfree(pages);
661 if (ret) 699 if (ret)
662 rds_rdma_free_op(op); 700 rds_rdma_free_op(op);
663 701 else
664 rds_stats_inc(s_send_rdma); 702 rds_stats_inc(s_send_rdma);
665 703
666 return ret; 704 return ret;
667} 705}
@@ -773,6 +811,10 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
773 rm->atomic.op_active = 1; 811 rm->atomic.op_active = 1;
774 rm->atomic.op_recverr = rs->rs_recverr; 812 rm->atomic.op_recverr = rs->rs_recverr;
775 rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); 813 rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
814 if (!rm->atomic.op_sg) {
815 ret = -ENOMEM;
816 goto err;
817 }
776 818
777 /* verify 8 byte-aligned */ 819 /* verify 8 byte-aligned */
778 if (args->local_addr & 0x7) { 820 if (args->local_addr & 0x7) {
diff --git a/net/rds/send.c b/net/rds/send.c
index 0bc9db17a87d..35b9c2e9caf1 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -973,6 +973,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
973 /* Attach data to the rm */ 973 /* Attach data to the rm */
974 if (payload_len) { 974 if (payload_len) {
975 rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE)); 975 rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
976 if (!rm->data.op_sg) {
977 ret = -ENOMEM;
978 goto out;
979 }
976 ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len); 980 ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
977 if (ret) 981 if (ret)
978 goto out; 982 goto out;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 08a8c6cf2d10..8e0a32001c90 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -221,7 +221,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
221static void rds_tcp_conn_free(void *arg) 221static void rds_tcp_conn_free(void *arg)
222{ 222{
223 struct rds_tcp_connection *tc = arg; 223 struct rds_tcp_connection *tc = arg;
224 unsigned long flags;
224 rdsdebug("freeing tc %p\n", tc); 225 rdsdebug("freeing tc %p\n", tc);
226
227 spin_lock_irqsave(&rds_tcp_conn_lock, flags);
228 list_del(&tc->t_tcp_node);
229 spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
230
225 kmem_cache_free(rds_tcp_conn_slab, tc); 231 kmem_cache_free(rds_tcp_conn_slab, tc);
226} 232}
227 233
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index efd4f95fd050..f23d9155b1ef 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -268,6 +268,10 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
268 goto nla_put_failure; 268 goto nla_put_failure;
269 269
270 nla_nest_end(skb, nest); 270 nla_nest_end(skb, nest);
271
272 if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
273 goto nla_put_failure;
274
271 return skb->len; 275 return skb->len;
272 276
273nla_put_failure: 277nla_put_failure:
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 37dff78e9cb1..d49c40fb7e09 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -34,8 +34,6 @@ struct cgroup_subsys net_cls_subsys = {
34 .populate = cgrp_populate, 34 .populate = cgrp_populate,
35#ifdef CONFIG_NET_CLS_CGROUP 35#ifdef CONFIG_NET_CLS_CGROUP
36 .subsys_id = net_cls_subsys_id, 36 .subsys_id = net_cls_subsys_id,
37#else
38#define net_cls_subsys_id net_cls_subsys.subsys_id
39#endif 37#endif
40 .module = THIS_MODULE, 38 .module = THIS_MODULE,
41}; 39};
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 763253257411..ea8f566e720c 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -103,7 +103,8 @@ retry:
103 103
104static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m) 104static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
105{ 105{
106 textsearch_destroy(EM_TEXT_PRIV(m)->config); 106 if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
107 textsearch_destroy(EM_TEXT_PRIV(m)->config);
107} 108}
108 109
109static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) 110static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1ef29c74d85e..e58f9476f29c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -92,7 +92,7 @@ static struct sctp_af *sctp_af_v6_specific;
92struct kmem_cache *sctp_chunk_cachep __read_mostly; 92struct kmem_cache *sctp_chunk_cachep __read_mostly;
93struct kmem_cache *sctp_bucket_cachep __read_mostly; 93struct kmem_cache *sctp_bucket_cachep __read_mostly;
94 94
95int sysctl_sctp_mem[3]; 95long sysctl_sctp_mem[3];
96int sysctl_sctp_rmem[3]; 96int sysctl_sctp_rmem[3];
97int sysctl_sctp_wmem[3]; 97int sysctl_sctp_wmem[3];
98 98
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e34ca9cc1167..6bd554323a34 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -111,12 +111,12 @@ static void sctp_sock_migrate(struct sock *, struct sock *,
111static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; 111static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
112 112
113extern struct kmem_cache *sctp_bucket_cachep; 113extern struct kmem_cache *sctp_bucket_cachep;
114extern int sysctl_sctp_mem[3]; 114extern long sysctl_sctp_mem[3];
115extern int sysctl_sctp_rmem[3]; 115extern int sysctl_sctp_rmem[3];
116extern int sysctl_sctp_wmem[3]; 116extern int sysctl_sctp_wmem[3];
117 117
118static int sctp_memory_pressure; 118static int sctp_memory_pressure;
119static atomic_t sctp_memory_allocated; 119static atomic_long_t sctp_memory_allocated;
120struct percpu_counter sctp_sockets_allocated; 120struct percpu_counter sctp_sockets_allocated;
121 121
122static void sctp_enter_memory_pressure(struct sock *sk) 122static void sctp_enter_memory_pressure(struct sock *sk)
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 832590bbe0c0..50cb57f0919e 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -54,7 +54,7 @@ static int sack_timer_max = 500;
54static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ 54static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
55static int rwnd_scale_max = 16; 55static int rwnd_scale_max = 16;
56 56
57extern int sysctl_sctp_mem[3]; 57extern long sysctl_sctp_mem[3];
58extern int sysctl_sctp_rmem[3]; 58extern int sysctl_sctp_rmem[3];
59extern int sysctl_sctp_wmem[3]; 59extern int sysctl_sctp_wmem[3];
60 60
@@ -203,7 +203,7 @@ static ctl_table sctp_table[] = {
203 .data = &sysctl_sctp_mem, 203 .data = &sysctl_sctp_mem,
204 .maxlen = sizeof(sysctl_sctp_mem), 204 .maxlen = sizeof(sysctl_sctp_mem),
205 .mode = 0644, 205 .mode = 0644,
206 .proc_handler = proc_dointvec, 206 .proc_handler = proc_doulongvec_minmax
207 }, 207 },
208 { 208 {
209 .procname = "sctp_rmem", 209 .procname = "sctp_rmem",
diff --git a/net/socket.c b/net/socket.c
index abf3e2561521..3ca2fd9e3720 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -305,19 +305,17 @@ static const struct super_operations sockfs_ops = {
305 .statfs = simple_statfs, 305 .statfs = simple_statfs,
306}; 306};
307 307
308static int sockfs_get_sb(struct file_system_type *fs_type, 308static struct dentry *sockfs_mount(struct file_system_type *fs_type,
309 int flags, const char *dev_name, void *data, 309 int flags, const char *dev_name, void *data)
310 struct vfsmount *mnt)
311{ 310{
312 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, 311 return mount_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC);
313 mnt);
314} 312}
315 313
316static struct vfsmount *sock_mnt __read_mostly; 314static struct vfsmount *sock_mnt __read_mostly;
317 315
318static struct file_system_type sock_fs_type = { 316static struct file_system_type sock_fs_type = {
319 .name = "sockfs", 317 .name = "sockfs",
320 .get_sb = sockfs_get_sb, 318 .mount = sockfs_mount,
321 .kill_sb = kill_anon_super, 319 .kill_sb = kill_anon_super,
322}; 320};
323 321
@@ -377,7 +375,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
377 &socket_file_ops); 375 &socket_file_ops);
378 if (unlikely(!file)) { 376 if (unlikely(!file)) {
379 /* drop dentry, keep inode */ 377 /* drop dentry, keep inode */
380 atomic_inc(&path.dentry->d_inode->i_count); 378 ihold(path.dentry->d_inode);
381 path_put(&path); 379 path_put(&path);
382 put_unused_fd(fd); 380 put_unused_fd(fd);
383 return -ENFILE; 381 return -ENFILE;
@@ -480,6 +478,7 @@ static struct socket *sock_alloc(void)
480 sock = SOCKET_I(inode); 478 sock = SOCKET_I(inode);
481 479
482 kmemcheck_annotate_bitfield(sock, type); 480 kmemcheck_annotate_bitfield(sock, type);
481 inode->i_ino = get_next_ino();
483 inode->i_mode = S_IFSOCK | S_IRWXUGO; 482 inode->i_mode = S_IFSOCK | S_IRWXUGO;
484 inode->i_uid = current_fsuid(); 483 inode->i_uid = current_fsuid();
485 inode->i_gid = current_fsgid(); 484 inode->i_gid = current_fsgid();
@@ -1145,7 +1144,7 @@ call_kill:
1145} 1144}
1146EXPORT_SYMBOL(sock_wake_async); 1145EXPORT_SYMBOL(sock_wake_async);
1147 1146
1148static int __sock_create(struct net *net, int family, int type, int protocol, 1147int __sock_create(struct net *net, int family, int type, int protocol,
1149 struct socket **res, int kern) 1148 struct socket **res, int kern)
1150{ 1149{
1151 int err; 1150 int err;
@@ -1257,6 +1256,7 @@ out_release:
1257 rcu_read_unlock(); 1256 rcu_read_unlock();
1258 goto out_sock_release; 1257 goto out_sock_release;
1259} 1258}
1259EXPORT_SYMBOL(__sock_create);
1260 1260
1261int sock_create(int family, int type, int protocol, struct socket **res) 1261int sock_create(int family, int type, int protocol, struct socket **res)
1262{ 1262{
@@ -1652,6 +1652,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1652 struct iovec iov; 1652 struct iovec iov;
1653 int fput_needed; 1653 int fput_needed;
1654 1654
1655 if (len > INT_MAX)
1656 len = INT_MAX;
1655 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1657 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1656 if (!sock) 1658 if (!sock)
1657 goto out; 1659 goto out;
@@ -1709,6 +1711,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1709 int err, err2; 1711 int err, err2;
1710 int fput_needed; 1712 int fput_needed;
1711 1713
1714 if (size > INT_MAX)
1715 size = INT_MAX;
1712 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1716 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1713 if (!sock) 1717 if (!sock)
1714 goto out; 1718 goto out;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 3376d7657185..8873fd8ddacd 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -36,22 +36,3 @@ config RPCSEC_GSS_KRB5
36 Kerberos support should be installed. 36 Kerberos support should be installed.
37 37
38 If unsure, say Y. 38 If unsure, say Y.
39
40config RPCSEC_GSS_SPKM3
41 tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
42 depends on SUNRPC && EXPERIMENTAL
43 select SUNRPC_GSS
44 select CRYPTO
45 select CRYPTO_MD5
46 select CRYPTO_DES
47 select CRYPTO_CAST5
48 select CRYPTO_CBC
49 help
50 Choose Y here to enable Secure RPC using the SPKM3 public key
51 GSS-API mechanism (RFC 2025).
52
53 Secure RPC calls with SPKM3 require an auxiliary userspace
54 daemon which may be found in the Linux nfs-utils package
55 available from http://linux-nfs.org/.
56
57 If unsure, say N.
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index e9eaaf7d43c1..afe67849269f 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -595,7 +595,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
595int 595int
596rpcauth_refreshcred(struct rpc_task *task) 596rpcauth_refreshcred(struct rpc_task *task)
597{ 597{
598 struct rpc_cred *cred = task->tk_rqstp->rq_cred; 598 struct rpc_cred *cred;
599 int err; 599 int err;
600 600
601 cred = task->tk_rqstp->rq_cred; 601 cred = task->tk_rqstp->rq_cred;
@@ -658,7 +658,7 @@ out1:
658 return err; 658 return err;
659} 659}
660 660
661void __exit rpcauth_remove_module(void) 661void rpcauth_remove_module(void)
662{ 662{
663 rpc_destroy_authunix(); 663 rpc_destroy_authunix();
664 rpc_destroy_generic_auth(); 664 rpc_destroy_generic_auth();
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 43162bb3b78f..e010a015d996 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -158,7 +158,7 @@ int __init rpc_init_generic_auth(void)
158 return rpcauth_init_credcache(&generic_auth); 158 return rpcauth_init_credcache(&generic_auth);
159} 159}
160 160
161void __exit rpc_destroy_generic_auth(void) 161void rpc_destroy_generic_auth(void)
162{ 162{
163 rpcauth_destroy_credcache(&generic_auth); 163 rpcauth_destroy_credcache(&generic_auth);
164} 164}
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 74a231735f67..7350d86a32ee 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -11,8 +11,3 @@ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 11
12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ 12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o 13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
14
15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
16
17rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \
18 gss_spkm3_token.o
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 778e5dfc5144..f375decc024b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -427,7 +427,7 @@ static int
427context_derive_keys_rc4(struct krb5_ctx *ctx) 427context_derive_keys_rc4(struct krb5_ctx *ctx)
428{ 428{
429 struct crypto_hash *hmac; 429 struct crypto_hash *hmac;
430 char sigkeyconstant[] = "signaturekey"; 430 static const char sigkeyconstant[] = "signaturekey";
431 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */ 431 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */
432 struct hash_desc desc; 432 struct hash_desc desc;
433 struct scatterlist sg[1]; 433 struct scatterlist sg[1];
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
deleted file mode 100644
index adade3d313f2..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ /dev/null
@@ -1,247 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_mech.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 * J. Bruce Fields <bfields@umich.edu>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36
37#include <linux/err.h>
38#include <linux/module.h>
39#include <linux/init.h>
40#include <linux/types.h>
41#include <linux/slab.h>
42#include <linux/sunrpc/auth.h>
43#include <linux/in.h>
44#include <linux/sunrpc/svcauth_gss.h>
45#include <linux/sunrpc/gss_spkm3.h>
46#include <linux/sunrpc/xdr.h>
47#include <linux/crypto.h>
48
49#ifdef RPC_DEBUG
50# define RPCDBG_FACILITY RPCDBG_AUTH
51#endif
52
53static const void *
54simple_get_bytes(const void *p, const void *end, void *res, int len)
55{
56 const void *q = (const void *)((const char *)p + len);
57 if (unlikely(q > end || q < p))
58 return ERR_PTR(-EFAULT);
59 memcpy(res, p, len);
60 return q;
61}
62
63static const void *
64simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
65{
66 const void *q;
67 unsigned int len;
68 p = simple_get_bytes(p, end, &len, sizeof(len));
69 if (IS_ERR(p))
70 return p;
71 res->len = len;
72 if (len == 0) {
73 res->data = NULL;
74 return p;
75 }
76 q = (const void *)((const char *)p + len);
77 if (unlikely(q > end || q < p))
78 return ERR_PTR(-EFAULT);
79 res->data = kmemdup(p, len, GFP_NOFS);
80 if (unlikely(res->data == NULL))
81 return ERR_PTR(-ENOMEM);
82 return q;
83}
84
85static int
86gss_import_sec_context_spkm3(const void *p, size_t len,
87 struct gss_ctx *ctx_id,
88 gfp_t gfp_mask)
89{
90 const void *end = (const void *)((const char *)p + len);
91 struct spkm3_ctx *ctx;
92 int version;
93
94 if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
95 goto out_err;
96
97 p = simple_get_bytes(p, end, &version, sizeof(version));
98 if (IS_ERR(p))
99 goto out_err_free_ctx;
100 if (version != 1) {
101 dprintk("RPC: unknown spkm3 token format: "
102 "obsolete nfs-utils?\n");
103 p = ERR_PTR(-EINVAL);
104 goto out_err_free_ctx;
105 }
106
107 p = simple_get_netobj(p, end, &ctx->ctx_id);
108 if (IS_ERR(p))
109 goto out_err_free_ctx;
110
111 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
112 if (IS_ERR(p))
113 goto out_err_free_ctx_id;
114
115 p = simple_get_netobj(p, end, &ctx->mech_used);
116 if (IS_ERR(p))
117 goto out_err_free_ctx_id;
118
119 p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
120 if (IS_ERR(p))
121 goto out_err_free_mech;
122
123 p = simple_get_netobj(p, end, &ctx->conf_alg);
124 if (IS_ERR(p))
125 goto out_err_free_mech;
126
127 p = simple_get_netobj(p, end, &ctx->derived_conf_key);
128 if (IS_ERR(p))
129 goto out_err_free_conf_alg;
130
131 p = simple_get_netobj(p, end, &ctx->intg_alg);
132 if (IS_ERR(p))
133 goto out_err_free_conf_key;
134
135 p = simple_get_netobj(p, end, &ctx->derived_integ_key);
136 if (IS_ERR(p))
137 goto out_err_free_intg_alg;
138
139 if (p != end) {
140 p = ERR_PTR(-EFAULT);
141 goto out_err_free_intg_key;
142 }
143
144 ctx_id->internal_ctx_id = ctx;
145
146 dprintk("RPC: Successfully imported new spkm context.\n");
147 return 0;
148
149out_err_free_intg_key:
150 kfree(ctx->derived_integ_key.data);
151out_err_free_intg_alg:
152 kfree(ctx->intg_alg.data);
153out_err_free_conf_key:
154 kfree(ctx->derived_conf_key.data);
155out_err_free_conf_alg:
156 kfree(ctx->conf_alg.data);
157out_err_free_mech:
158 kfree(ctx->mech_used.data);
159out_err_free_ctx_id:
160 kfree(ctx->ctx_id.data);
161out_err_free_ctx:
162 kfree(ctx);
163out_err:
164 return PTR_ERR(p);
165}
166
167static void
168gss_delete_sec_context_spkm3(void *internal_ctx)
169{
170 struct spkm3_ctx *sctx = internal_ctx;
171
172 kfree(sctx->derived_integ_key.data);
173 kfree(sctx->intg_alg.data);
174 kfree(sctx->derived_conf_key.data);
175 kfree(sctx->conf_alg.data);
176 kfree(sctx->mech_used.data);
177 kfree(sctx->ctx_id.data);
178 kfree(sctx);
179}
180
181static u32
182gss_verify_mic_spkm3(struct gss_ctx *ctx,
183 struct xdr_buf *signbuf,
184 struct xdr_netobj *checksum)
185{
186 u32 maj_stat = 0;
187 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
188
189 maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK);
190
191 dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
192 return maj_stat;
193}
194
195static u32
196gss_get_mic_spkm3(struct gss_ctx *ctx,
197 struct xdr_buf *message_buffer,
198 struct xdr_netobj *message_token)
199{
200 u32 err = 0;
201 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
202
203 err = spkm3_make_token(sctx, message_buffer,
204 message_token, SPKM_MIC_TOK);
205 dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err);
206 return err;
207}
208
209static const struct gss_api_ops gss_spkm3_ops = {
210 .gss_import_sec_context = gss_import_sec_context_spkm3,
211 .gss_get_mic = gss_get_mic_spkm3,
212 .gss_verify_mic = gss_verify_mic_spkm3,
213 .gss_delete_sec_context = gss_delete_sec_context_spkm3,
214};
215
216static struct pf_desc gss_spkm3_pfs[] = {
217 {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"},
218 {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"},
219};
220
221static struct gss_api_mech gss_spkm3_mech = {
222 .gm_name = "spkm3",
223 .gm_owner = THIS_MODULE,
224 .gm_oid = {7, "\053\006\001\005\005\001\003"},
225 .gm_ops = &gss_spkm3_ops,
226 .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs),
227 .gm_pfs = gss_spkm3_pfs,
228};
229
230static int __init init_spkm3_module(void)
231{
232 int status;
233
234 status = gss_mech_register(&gss_spkm3_mech);
235 if (status)
236 printk("Failed to register spkm3 gss mechanism!\n");
237 return status;
238}
239
240static void __exit cleanup_spkm3_module(void)
241{
242 gss_mech_unregister(&gss_spkm3_mech);
243}
244
245MODULE_LICENSE("GPL");
246module_init(init_spkm3_module);
247module_exit(cleanup_spkm3_module);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
deleted file mode 100644
index 5a3a65a0e2b4..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ /dev/null
@@ -1,186 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_seal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/jiffies.h>
38#include <linux/sunrpc/gss_spkm3.h>
39#include <linux/random.h>
40#include <linux/crypto.h>
41#include <linux/pagemap.h>
42#include <linux/scatterlist.h>
43#include <linux/sunrpc/xdr.h>
44
45#ifdef RPC_DEBUG
46# define RPCDBG_FACILITY RPCDBG_AUTH
47#endif
48
49const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"};
50const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"};
51
52/*
53 * spkm3_make_token()
54 *
55 * Only SPKM_MIC_TOK with md5 intg-alg is supported
56 */
57
58u32
59spkm3_make_token(struct spkm3_ctx *ctx,
60 struct xdr_buf * text, struct xdr_netobj * token,
61 int toktype)
62{
63 s32 checksum_type;
64 char tokhdrbuf[25];
65 char cksumdata[16];
66 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
67 struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf};
68 int tokenlen = 0;
69 unsigned char *ptr;
70 s32 now;
71 int ctxelen = 0, ctxzbit = 0;
72 int md5elen = 0, md5zbit = 0;
73
74 now = jiffies;
75
76 if (ctx->ctx_id.len != 16) {
77 dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
78 ctx->ctx_id.len);
79 goto out_err;
80 }
81
82 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
83 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
84 "algorithm. only support hmac-md5 I-ALG.\n");
85 goto out_err;
86 } else
87 checksum_type = CKSUMTYPE_HMAC_MD5;
88
89 if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) {
90 dprintk("RPC: gss_spkm3_seal: unsupported C-ALG "
91 "algorithm\n");
92 goto out_err;
93 }
94
95 if (toktype == SPKM_MIC_TOK) {
96 /* Calculate checksum over the mic-header */
97 asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
98 spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
99 ctxelen, ctxzbit);
100 if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key,
101 (char *)mic_hdr.data, mic_hdr.len,
102 text, 0, &md5cksum))
103 goto out_err;
104
105 asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
106 tokenlen = 10 + ctxelen + 1 + md5elen + 1;
107
108 /* Create token header using generic routines */
109 token->len = g_token_size(&ctx->mech_used, tokenlen + 2);
110
111 ptr = token->data;
112 g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr);
113
114 spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
115 } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
116 dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK "
117 "not supported\n");
118 goto out_err;
119 }
120
121 /* XXX need to implement sequence numbers, and ctx->expired */
122
123 return GSS_S_COMPLETE;
124out_err:
125 token->data = NULL;
126 token->len = 0;
127 return GSS_S_FAILURE;
128}
129
130static int
131spkm3_checksummer(struct scatterlist *sg, void *data)
132{
133 struct hash_desc *desc = data;
134
135 return crypto_hash_update(desc, sg, sg->length);
136}
137
138/* checksum the plaintext data and hdrlen bytes of the token header */
139s32
140make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
141 unsigned int hdrlen, struct xdr_buf *body,
142 unsigned int body_offset, struct xdr_netobj *cksum)
143{
144 char *cksumname;
145 struct hash_desc desc; /* XXX add to ctx? */
146 struct scatterlist sg[1];
147 int err;
148
149 switch (cksumtype) {
150 case CKSUMTYPE_HMAC_MD5:
151 cksumname = "hmac(md5)";
152 break;
153 default:
154 dprintk("RPC: spkm3_make_checksum:"
155 " unsupported checksum %d", cksumtype);
156 return GSS_S_FAILURE;
157 }
158
159 if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE;
160
161 desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
162 if (IS_ERR(desc.tfm))
163 return GSS_S_FAILURE;
164 cksum->len = crypto_hash_digestsize(desc.tfm);
165 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
166
167 err = crypto_hash_setkey(desc.tfm, key->data, key->len);
168 if (err)
169 goto out;
170
171 err = crypto_hash_init(&desc);
172 if (err)
173 goto out;
174
175 sg_init_one(sg, header, hdrlen);
176 crypto_hash_update(&desc, sg, sg->length);
177
178 xdr_process_buf(body, body_offset, body->len - body_offset,
179 spkm3_checksummer, &desc);
180 crypto_hash_final(&desc, cksum->data);
181
182out:
183 crypto_free_hash(desc.tfm);
184
185 return err ? GSS_S_FAILURE : 0;
186}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
deleted file mode 100644
index a99825d7caa0..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ /dev/null
@@ -1,267 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_token.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/random.h>
41#include <linux/crypto.h>
42
43#ifdef RPC_DEBUG
44# define RPCDBG_FACILITY RPCDBG_AUTH
45#endif
46
47/*
48 * asn1_bitstring_len()
49 *
50 * calculate the asn1 bitstring length of the xdr_netobject
51 */
52void
53asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
54{
55 int i, zbit = 0,elen = in->len;
56 char *ptr;
57
58 ptr = &in->data[in->len -1];
59
60 /* count trailing 0's */
61 for(i = in->len; i > 0; i--) {
62 if (*ptr == 0) {
63 ptr--;
64 elen--;
65 } else
66 break;
67 }
68
69 /* count number of 0 bits in final octet */
70 ptr = &in->data[elen - 1];
71 for(i = 0; i < 8; i++) {
72 short mask = 0x01;
73
74 if (!((mask << i) & *ptr))
75 zbit++;
76 else
77 break;
78 }
79 *enclen = elen;
80 *zerobits = zbit;
81}
82
83/*
84 * decode_asn1_bitstring()
85 *
86 * decode a bitstring into a buffer of the expected length.
87 * enclen = bit string length
88 * explen = expected length (define in rfc)
89 */
90int
91decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
92{
93 if (!(out->data = kzalloc(explen,GFP_NOFS)))
94 return 0;
95 out->len = explen;
96 memcpy(out->data, in, enclen);
97 return 1;
98}
99
100/*
101 * SPKMInnerContextToken choice SPKM_MIC asn1 token layout
102 *
103 * contextid is always 16 bytes plain data. max asn1 bitstring len = 17.
104 *
105 * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum)
106 *
107 * pos value
108 * ----------
109 * [0] a4 SPKM-MIC tag
110 * [1] ?? innertoken length (max 44)
111 *
112 *
113 * tok_hdr piece of checksum data starts here
114 *
115 * the maximum mic-header len = 9 + 17 = 26
116 * mic-header
117 * ----------
118 * [2] 30 SEQUENCE tag
119 * [3] ?? mic-header length: (max 23) = TokenID + ContextID
120 *
121 * TokenID - all fields constant and can be hardcoded
122 * -------
123 * [4] 02 Type 2
124 * [5] 02 Length 2
125 * [6][7] 01 01 TokenID (SPKM_MIC_TOK)
126 *
127 * ContextID - encoded length not constant, calculated
128 * ---------
129 * [8] 03 Type 3
130 * [9] ?? encoded length
131 * [10] ?? ctxzbit
132 * [11] contextid
133 *
134 * mic_header piece of checksum data ends here.
135 *
136 * int-cksum - encoded length not constant, calculated
137 * ---------
138 * [??] 03 Type 3
139 * [??] ?? encoded length
140 * [??] ?? md5zbit
141 * [??] int-cksum (NID_md5 = 16)
142 *
143 * maximum SPKM-MIC innercontext token length =
144 * 10 + encoded contextid_size(17 max) + 2 + encoded
145 * cksum_size (17 maxfor NID_md5) = 46
146 */
147
148/*
149 * spkm3_mic_header()
150 *
151 * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation
152 * elen: 16 byte context id asn1 bitstring encoded length
153 */
154void
155spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit)
156{
157 char *hptr = *hdrbuf;
158 char *top = *hdrbuf;
159
160 *(u8 *)hptr++ = 0x30;
161 *(u8 *)hptr++ = elen + 7; /* on the wire header length */
162
163 /* tokenid */
164 *(u8 *)hptr++ = 0x02;
165 *(u8 *)hptr++ = 0x02;
166 *(u8 *)hptr++ = 0x01;
167 *(u8 *)hptr++ = 0x01;
168
169 /* coniextid */
170 *(u8 *)hptr++ = 0x03;
171 *(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */
172 *(u8 *)hptr++ = zbit;
173 memcpy(hptr, ctxdata, elen);
174 hptr += elen;
175 *hdrlen = hptr - top;
176}
177
178/*
179 * spkm3_mic_innercontext_token()
180 *
181 * *tokp points to the beginning of the SPKM_MIC token described
182 * in rfc 2025, section 3.2.1:
183 *
184 * toklen is the inner token length
185 */
186void
187spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
188{
189 unsigned char *ict = *tokp;
190
191 *(u8 *)ict++ = 0xa4;
192 *(u8 *)ict++ = toklen;
193 memcpy(ict, mic_hdr->data, mic_hdr->len);
194 ict += mic_hdr->len;
195
196 *(u8 *)ict++ = 0x03;
197 *(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */
198 *(u8 *)ict++ = md5zbit;
199 memcpy(ict, md5cksum->data, md5elen);
200}
201
202u32
203spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum)
204{
205 struct xdr_netobj spkm3_ctx_id = {.len =0, .data = NULL};
206 unsigned char *ptr = *tokp;
207 int ctxelen;
208 u32 ret = GSS_S_DEFECTIVE_TOKEN;
209
210 /* spkm3 innercontext token preamble */
211 if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) {
212 dprintk("RPC: BAD SPKM ictoken preamble\n");
213 goto out;
214 }
215
216 *mic_hdrlen = ptr[3];
217
218 /* token type */
219 if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) {
220 dprintk("RPC: BAD asn1 SPKM3 token type\n");
221 goto out;
222 }
223
224 /* only support SPKM_MIC_TOK */
225 if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
226 dprintk("RPC: ERROR unsupported SPKM3 token\n");
227 goto out;
228 }
229
230 /* contextid */
231 if (ptr[8] != 0x03) {
232 dprintk("RPC: BAD SPKM3 asn1 context-id type\n");
233 goto out;
234 }
235
236 ctxelen = ptr[9];
237 if (ctxelen > 17) { /* length includes asn1 zbit octet */
238 dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen);
239 goto out;
240 }
241
242 /* ignore ptr[10] */
243
244 if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16))
245 goto out;
246
247 /*
248 * in the current implementation: the optional int-alg is not present
249 * so the default int-alg (md5) is used the optional snd-seq field is
250 * also not present
251 */
252
253 if (*mic_hdrlen != 6 + ctxelen) {
254 dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only "
255 "support default int-alg (should be absent) "
256 "and do not support snd-seq\n", *mic_hdrlen);
257 goto out;
258 }
259 /* checksum */
260 *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */
261
262 ret = GSS_S_COMPLETE;
263out:
264 kfree(spkm3_ctx_id.data);
265 return ret;
266}
267
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
deleted file mode 100644
index cc21ee860bb6..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ /dev/null
@@ -1,127 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_unseal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/crypto.h>
41
42#ifdef RPC_DEBUG
43# define RPCDBG_FACILITY RPCDBG_AUTH
44#endif
45
46/*
47 * spkm3_read_token()
48 *
49 * only SPKM_MIC_TOK with md5 intg-alg is supported
50 */
51u32
52spkm3_read_token(struct spkm3_ctx *ctx,
53 struct xdr_netobj *read_token, /* checksum */
54 struct xdr_buf *message_buffer, /* signbuf */
55 int toktype)
56{
57 s32 checksum_type;
58 s32 code;
59 struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
60 char cksumdata[16];
61 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
62 unsigned char *ptr = (unsigned char *)read_token->data;
63 unsigned char *cksum;
64 int bodysize, md5elen;
65 int mic_hdrlen;
66 u32 ret = GSS_S_DEFECTIVE_TOKEN;
67
68 if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
69 &bodysize, &ptr, read_token->len))
70 goto out;
71
72 /* decode the token */
73
74 if (toktype != SPKM_MIC_TOK) {
75 dprintk("RPC: BAD SPKM3 token type: %d\n", toktype);
76 goto out;
77 }
78
79 if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
80 goto out;
81
82 if (*cksum++ != 0x03) {
83 dprintk("RPC: spkm3_read_token BAD checksum type\n");
84 goto out;
85 }
86 md5elen = *cksum++;
87 cksum++; /* move past the zbit */
88
89 if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
90 goto out;
91
92 /* HARD CODED FOR MD5 */
93
94 /* compute the checksum of the message.
95 * ptr + 2 = start of header piece of checksum
96 * mic_hdrlen + 2 = length of header piece of checksum
97 */
98 ret = GSS_S_DEFECTIVE_TOKEN;
99 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
100 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
101 "algorithm\n");
102 goto out;
103 }
104
105 checksum_type = CKSUMTYPE_HMAC_MD5;
106
107 code = make_spkm3_checksum(checksum_type,
108 &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2,
109 message_buffer, 0, &md5cksum);
110
111 if (code)
112 goto out;
113
114 ret = GSS_S_BAD_SIG;
115 code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
116 if (code) {
117 dprintk("RPC: bad MIC checksum\n");
118 goto out;
119 }
120
121
122 /* XXX: need to add expiration and sequencing */
123 ret = GSS_S_COMPLETE;
124out:
125 kfree(wire_cksum.data);
126 return ret;
127}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index cc385b3a59c2..dec2a6fc7c12 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -964,7 +964,7 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
964 if (rqstp->rq_gssclient == NULL) 964 if (rqstp->rq_gssclient == NULL)
965 return SVC_DENIED; 965 return SVC_DENIED;
966 stat = svcauth_unix_set_client(rqstp); 966 stat = svcauth_unix_set_client(rqstp);
967 if (stat == SVC_DROP) 967 if (stat == SVC_DROP || stat == SVC_CLOSE)
968 return stat; 968 return stat;
969 return SVC_OK; 969 return SVC_OK;
970} 970}
@@ -1018,7 +1018,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
1018 return SVC_DENIED; 1018 return SVC_DENIED;
1019 memset(&rsikey, 0, sizeof(rsikey)); 1019 memset(&rsikey, 0, sizeof(rsikey));
1020 if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) 1020 if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
1021 return SVC_DROP; 1021 return SVC_CLOSE;
1022 *authp = rpc_autherr_badverf; 1022 *authp = rpc_autherr_badverf;
1023 if (svc_safe_getnetobj(argv, &tmpobj)) { 1023 if (svc_safe_getnetobj(argv, &tmpobj)) {
1024 kfree(rsikey.in_handle.data); 1024 kfree(rsikey.in_handle.data);
@@ -1026,38 +1026,35 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
1026 } 1026 }
1027 if (dup_netobj(&rsikey.in_token, &tmpobj)) { 1027 if (dup_netobj(&rsikey.in_token, &tmpobj)) {
1028 kfree(rsikey.in_handle.data); 1028 kfree(rsikey.in_handle.data);
1029 return SVC_DROP; 1029 return SVC_CLOSE;
1030 } 1030 }
1031 1031
1032 /* Perform upcall, or find upcall result: */ 1032 /* Perform upcall, or find upcall result: */
1033 rsip = rsi_lookup(&rsikey); 1033 rsip = rsi_lookup(&rsikey);
1034 rsi_free(&rsikey); 1034 rsi_free(&rsikey);
1035 if (!rsip) 1035 if (!rsip)
1036 return SVC_DROP; 1036 return SVC_CLOSE;
1037 switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { 1037 if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0)
1038 case -EAGAIN:
1039 case -ETIMEDOUT:
1040 case -ENOENT:
1041 /* No upcall result: */ 1038 /* No upcall result: */
1042 return SVC_DROP; 1039 return SVC_CLOSE;
1043 case 0: 1040
1044 ret = SVC_DROP; 1041 ret = SVC_CLOSE;
1045 /* Got an answer to the upcall; use it: */ 1042 /* Got an answer to the upcall; use it: */
1046 if (gss_write_init_verf(rqstp, rsip)) 1043 if (gss_write_init_verf(rqstp, rsip))
1047 goto out; 1044 goto out;
1048 if (resv->iov_len + 4 > PAGE_SIZE) 1045 if (resv->iov_len + 4 > PAGE_SIZE)
1049 goto out; 1046 goto out;
1050 svc_putnl(resv, RPC_SUCCESS); 1047 svc_putnl(resv, RPC_SUCCESS);
1051 if (svc_safe_putnetobj(resv, &rsip->out_handle)) 1048 if (svc_safe_putnetobj(resv, &rsip->out_handle))
1052 goto out; 1049 goto out;
1053 if (resv->iov_len + 3 * 4 > PAGE_SIZE) 1050 if (resv->iov_len + 3 * 4 > PAGE_SIZE)
1054 goto out; 1051 goto out;
1055 svc_putnl(resv, rsip->major_status); 1052 svc_putnl(resv, rsip->major_status);
1056 svc_putnl(resv, rsip->minor_status); 1053 svc_putnl(resv, rsip->minor_status);
1057 svc_putnl(resv, GSS_SEQ_WIN); 1054 svc_putnl(resv, GSS_SEQ_WIN);
1058 if (svc_safe_putnetobj(resv, &rsip->out_token)) 1055 if (svc_safe_putnetobj(resv, &rsip->out_token))
1059 goto out; 1056 goto out;
1060 } 1057
1061 ret = SVC_COMPLETE; 1058 ret = SVC_COMPLETE;
1062out: 1059out:
1063 cache_put(&rsip->h, &rsi_cache); 1060 cache_put(&rsip->h, &rsi_cache);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 7dce81a926c5..e433e7580e27 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -33,15 +33,16 @@
33#include <linux/sunrpc/cache.h> 33#include <linux/sunrpc/cache.h>
34#include <linux/sunrpc/stats.h> 34#include <linux/sunrpc/stats.h>
35#include <linux/sunrpc/rpc_pipe_fs.h> 35#include <linux/sunrpc/rpc_pipe_fs.h>
36#include "netns.h"
36 37
37#define RPCDBG_FACILITY RPCDBG_CACHE 38#define RPCDBG_FACILITY RPCDBG_CACHE
38 39
39static int cache_defer_req(struct cache_req *req, struct cache_head *item); 40static void cache_defer_req(struct cache_req *req, struct cache_head *item);
40static void cache_revisit_request(struct cache_head *item); 41static void cache_revisit_request(struct cache_head *item);
41 42
42static void cache_init(struct cache_head *h) 43static void cache_init(struct cache_head *h)
43{ 44{
44 time_t now = get_seconds(); 45 time_t now = seconds_since_boot();
45 h->next = NULL; 46 h->next = NULL;
46 h->flags = 0; 47 h->flags = 0;
47 kref_init(&h->ref); 48 kref_init(&h->ref);
@@ -51,7 +52,7 @@ static void cache_init(struct cache_head *h)
51 52
52static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) 53static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
53{ 54{
54 return (h->expiry_time < get_seconds()) || 55 return (h->expiry_time < seconds_since_boot()) ||
55 (detail->flush_time > h->last_refresh); 56 (detail->flush_time > h->last_refresh);
56} 57}
57 58
@@ -126,7 +127,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
126static void cache_fresh_locked(struct cache_head *head, time_t expiry) 127static void cache_fresh_locked(struct cache_head *head, time_t expiry)
127{ 128{
128 head->expiry_time = expiry; 129 head->expiry_time = expiry;
129 head->last_refresh = get_seconds(); 130 head->last_refresh = seconds_since_boot();
130 set_bit(CACHE_VALID, &head->flags); 131 set_bit(CACHE_VALID, &head->flags);
131} 132}
132 133
@@ -237,7 +238,7 @@ int cache_check(struct cache_detail *detail,
237 238
238 /* now see if we want to start an upcall */ 239 /* now see if we want to start an upcall */
239 refresh_age = (h->expiry_time - h->last_refresh); 240 refresh_age = (h->expiry_time - h->last_refresh);
240 age = get_seconds() - h->last_refresh; 241 age = seconds_since_boot() - h->last_refresh;
241 242
242 if (rqstp == NULL) { 243 if (rqstp == NULL) {
243 if (rv == -EAGAIN) 244 if (rv == -EAGAIN)
@@ -252,7 +253,7 @@ int cache_check(struct cache_detail *detail,
252 cache_revisit_request(h); 253 cache_revisit_request(h);
253 if (rv == -EAGAIN) { 254 if (rv == -EAGAIN) {
254 set_bit(CACHE_NEGATIVE, &h->flags); 255 set_bit(CACHE_NEGATIVE, &h->flags);
255 cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY); 256 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
256 cache_fresh_unlocked(h, detail); 257 cache_fresh_unlocked(h, detail);
257 rv = -ENOENT; 258 rv = -ENOENT;
258 } 259 }
@@ -267,7 +268,8 @@ int cache_check(struct cache_detail *detail,
267 } 268 }
268 269
269 if (rv == -EAGAIN) { 270 if (rv == -EAGAIN) {
270 if (cache_defer_req(rqstp, h) < 0) { 271 cache_defer_req(rqstp, h);
272 if (!test_bit(CACHE_PENDING, &h->flags)) {
271 /* Request is not deferred */ 273 /* Request is not deferred */
272 rv = cache_is_valid(detail, h); 274 rv = cache_is_valid(detail, h);
273 if (rv == -EAGAIN) 275 if (rv == -EAGAIN)
@@ -387,11 +389,11 @@ static int cache_clean(void)
387 return -1; 389 return -1;
388 } 390 }
389 current_detail = list_entry(next, struct cache_detail, others); 391 current_detail = list_entry(next, struct cache_detail, others);
390 if (current_detail->nextcheck > get_seconds()) 392 if (current_detail->nextcheck > seconds_since_boot())
391 current_index = current_detail->hash_size; 393 current_index = current_detail->hash_size;
392 else { 394 else {
393 current_index = 0; 395 current_index = 0;
394 current_detail->nextcheck = get_seconds()+30*60; 396 current_detail->nextcheck = seconds_since_boot()+30*60;
395 } 397 }
396 } 398 }
397 399
@@ -476,7 +478,7 @@ EXPORT_SYMBOL_GPL(cache_flush);
476void cache_purge(struct cache_detail *detail) 478void cache_purge(struct cache_detail *detail)
477{ 479{
478 detail->flush_time = LONG_MAX; 480 detail->flush_time = LONG_MAX;
479 detail->nextcheck = get_seconds(); 481 detail->nextcheck = seconds_since_boot();
480 cache_flush(); 482 cache_flush();
481 detail->flush_time = 1; 483 detail->flush_time = 1;
482} 484}
@@ -505,81 +507,155 @@ EXPORT_SYMBOL_GPL(cache_purge);
505 507
506static DEFINE_SPINLOCK(cache_defer_lock); 508static DEFINE_SPINLOCK(cache_defer_lock);
507static LIST_HEAD(cache_defer_list); 509static LIST_HEAD(cache_defer_list);
508static struct list_head cache_defer_hash[DFR_HASHSIZE]; 510static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
509static int cache_defer_cnt; 511static int cache_defer_cnt;
510 512
511static int cache_defer_req(struct cache_req *req, struct cache_head *item) 513static void __unhash_deferred_req(struct cache_deferred_req *dreq)
514{
515 hlist_del_init(&dreq->hash);
516 if (!list_empty(&dreq->recent)) {
517 list_del_init(&dreq->recent);
518 cache_defer_cnt--;
519 }
520}
521
522static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
512{ 523{
513 struct cache_deferred_req *dreq, *discard;
514 int hash = DFR_HASH(item); 524 int hash = DFR_HASH(item);
515 525
516 if (cache_defer_cnt >= DFR_MAX) { 526 INIT_LIST_HEAD(&dreq->recent);
517 /* too much in the cache, randomly drop this one, 527 hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
518 * or continue and drop the oldest below 528}
519 */ 529
520 if (net_random()&1) 530static void setup_deferral(struct cache_deferred_req *dreq,
521 return -ENOMEM; 531 struct cache_head *item,
522 } 532 int count_me)
523 dreq = req->defer(req); 533{
524 if (dreq == NULL)
525 return -ENOMEM;
526 534
527 dreq->item = item; 535 dreq->item = item;
528 536
529 spin_lock(&cache_defer_lock); 537 spin_lock(&cache_defer_lock);
530 538
531 list_add(&dreq->recent, &cache_defer_list); 539 __hash_deferred_req(dreq, item);
532
533 if (cache_defer_hash[hash].next == NULL)
534 INIT_LIST_HEAD(&cache_defer_hash[hash]);
535 list_add(&dreq->hash, &cache_defer_hash[hash]);
536 540
537 /* it is in, now maybe clean up */ 541 if (count_me) {
538 discard = NULL; 542 cache_defer_cnt++;
539 if (++cache_defer_cnt > DFR_MAX) { 543 list_add(&dreq->recent, &cache_defer_list);
540 discard = list_entry(cache_defer_list.prev,
541 struct cache_deferred_req, recent);
542 list_del_init(&discard->recent);
543 list_del_init(&discard->hash);
544 cache_defer_cnt--;
545 } 544 }
545
546 spin_unlock(&cache_defer_lock); 546 spin_unlock(&cache_defer_lock);
547 547
548}
549
550struct thread_deferred_req {
551 struct cache_deferred_req handle;
552 struct completion completion;
553};
554
555static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
556{
557 struct thread_deferred_req *dr =
558 container_of(dreq, struct thread_deferred_req, handle);
559 complete(&dr->completion);
560}
561
562static void cache_wait_req(struct cache_req *req, struct cache_head *item)
563{
564 struct thread_deferred_req sleeper;
565 struct cache_deferred_req *dreq = &sleeper.handle;
566
567 sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
568 dreq->revisit = cache_restart_thread;
569
570 setup_deferral(dreq, item, 0);
571
572 if (!test_bit(CACHE_PENDING, &item->flags) ||
573 wait_for_completion_interruptible_timeout(
574 &sleeper.completion, req->thread_wait) <= 0) {
575 /* The completion wasn't completed, so we need
576 * to clean up
577 */
578 spin_lock(&cache_defer_lock);
579 if (!hlist_unhashed(&sleeper.handle.hash)) {
580 __unhash_deferred_req(&sleeper.handle);
581 spin_unlock(&cache_defer_lock);
582 } else {
583 /* cache_revisit_request already removed
584 * this from the hash table, but hasn't
585 * called ->revisit yet. It will very soon
586 * and we need to wait for it.
587 */
588 spin_unlock(&cache_defer_lock);
589 wait_for_completion(&sleeper.completion);
590 }
591 }
592}
593
594static void cache_limit_defers(void)
595{
596 /* Make sure we haven't exceed the limit of allowed deferred
597 * requests.
598 */
599 struct cache_deferred_req *discard = NULL;
600
601 if (cache_defer_cnt <= DFR_MAX)
602 return;
603
604 spin_lock(&cache_defer_lock);
605
606 /* Consider removing either the first or the last */
607 if (cache_defer_cnt > DFR_MAX) {
608 if (net_random() & 1)
609 discard = list_entry(cache_defer_list.next,
610 struct cache_deferred_req, recent);
611 else
612 discard = list_entry(cache_defer_list.prev,
613 struct cache_deferred_req, recent);
614 __unhash_deferred_req(discard);
615 }
616 spin_unlock(&cache_defer_lock);
548 if (discard) 617 if (discard)
549 /* there was one too many */
550 discard->revisit(discard, 1); 618 discard->revisit(discard, 1);
619}
551 620
552 if (!test_bit(CACHE_PENDING, &item->flags)) { 621static void cache_defer_req(struct cache_req *req, struct cache_head *item)
553 /* must have just been validated... */ 622{
554 cache_revisit_request(item); 623 struct cache_deferred_req *dreq;
555 return -EAGAIN; 624
625 if (req->thread_wait) {
626 cache_wait_req(req, item);
627 if (!test_bit(CACHE_PENDING, &item->flags))
628 return;
556 } 629 }
557 return 0; 630 dreq = req->defer(req);
631 if (dreq == NULL)
632 return;
633 setup_deferral(dreq, item, 1);
634 if (!test_bit(CACHE_PENDING, &item->flags))
635 /* Bit could have been cleared before we managed to
636 * set up the deferral, so need to revisit just in case
637 */
638 cache_revisit_request(item);
639
640 cache_limit_defers();
558} 641}
559 642
560static void cache_revisit_request(struct cache_head *item) 643static void cache_revisit_request(struct cache_head *item)
561{ 644{
562 struct cache_deferred_req *dreq; 645 struct cache_deferred_req *dreq;
563 struct list_head pending; 646 struct list_head pending;
564 647 struct hlist_node *lp, *tmp;
565 struct list_head *lp;
566 int hash = DFR_HASH(item); 648 int hash = DFR_HASH(item);
567 649
568 INIT_LIST_HEAD(&pending); 650 INIT_LIST_HEAD(&pending);
569 spin_lock(&cache_defer_lock); 651 spin_lock(&cache_defer_lock);
570 652
571 lp = cache_defer_hash[hash].next; 653 hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash)
572 if (lp) { 654 if (dreq->item == item) {
573 while (lp != &cache_defer_hash[hash]) { 655 __unhash_deferred_req(dreq);
574 dreq = list_entry(lp, struct cache_deferred_req, hash); 656 list_add(&dreq->recent, &pending);
575 lp = lp->next;
576 if (dreq->item == item) {
577 list_del_init(&dreq->hash);
578 list_move(&dreq->recent, &pending);
579 cache_defer_cnt--;
580 }
581 } 657 }
582 } 658
583 spin_unlock(&cache_defer_lock); 659 spin_unlock(&cache_defer_lock);
584 660
585 while (!list_empty(&pending)) { 661 while (!list_empty(&pending)) {
@@ -600,9 +676,8 @@ void cache_clean_deferred(void *owner)
600 676
601 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { 677 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
602 if (dreq->owner == owner) { 678 if (dreq->owner == owner) {
603 list_del_init(&dreq->hash); 679 __unhash_deferred_req(dreq);
604 list_move(&dreq->recent, &pending); 680 list_add(&dreq->recent, &pending);
605 cache_defer_cnt--;
606 } 681 }
607 } 682 }
608 spin_unlock(&cache_defer_lock); 683 spin_unlock(&cache_defer_lock);
@@ -901,7 +976,7 @@ static int cache_release(struct inode *inode, struct file *filp,
901 filp->private_data = NULL; 976 filp->private_data = NULL;
902 kfree(rp); 977 kfree(rp);
903 978
904 cd->last_close = get_seconds(); 979 cd->last_close = seconds_since_boot();
905 atomic_dec(&cd->readers); 980 atomic_dec(&cd->readers);
906 } 981 }
907 module_put(cd->owner); 982 module_put(cd->owner);
@@ -1014,6 +1089,23 @@ static void warn_no_listener(struct cache_detail *detail)
1014 } 1089 }
1015} 1090}
1016 1091
1092static bool cache_listeners_exist(struct cache_detail *detail)
1093{
1094 if (atomic_read(&detail->readers))
1095 return true;
1096 if (detail->last_close == 0)
1097 /* This cache was never opened */
1098 return false;
1099 if (detail->last_close < seconds_since_boot() - 30)
1100 /*
1101 * We allow for the possibility that someone might
1102 * restart a userspace daemon without restarting the
1103 * server; but after 30 seconds, we give up.
1104 */
1105 return false;
1106 return true;
1107}
1108
1017/* 1109/*
1018 * register an upcall request to user-space and queue it up for read() by the 1110 * register an upcall request to user-space and queue it up for read() by the
1019 * upcall daemon. 1111 * upcall daemon.
@@ -1032,10 +1124,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
1032 char *bp; 1124 char *bp;
1033 int len; 1125 int len;
1034 1126
1035 if (atomic_read(&detail->readers) == 0 && 1127 if (!cache_listeners_exist(detail)) {
1036 detail->last_close < get_seconds() - 30) { 1128 warn_no_listener(detail);
1037 warn_no_listener(detail); 1129 return -EINVAL;
1038 return -EINVAL;
1039 } 1130 }
1040 1131
1041 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1132 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
@@ -1094,13 +1185,19 @@ int qword_get(char **bpp, char *dest, int bufsize)
1094 if (bp[0] == '\\' && bp[1] == 'x') { 1185 if (bp[0] == '\\' && bp[1] == 'x') {
1095 /* HEX STRING */ 1186 /* HEX STRING */
1096 bp += 2; 1187 bp += 2;
1097 while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) { 1188 while (len < bufsize) {
1098 int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; 1189 int h, l;
1099 bp++; 1190
1100 byte <<= 4; 1191 h = hex_to_bin(bp[0]);
1101 byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; 1192 if (h < 0)
1102 *dest++ = byte; 1193 break;
1103 bp++; 1194
1195 l = hex_to_bin(bp[1]);
1196 if (l < 0)
1197 break;
1198
1199 *dest++ = (h << 4) | l;
1200 bp += 2;
1104 len++; 1201 len++;
1105 } 1202 }
1106 } else { 1203 } else {
@@ -1218,7 +1315,8 @@ static int c_show(struct seq_file *m, void *p)
1218 1315
1219 ifdebug(CACHE) 1316 ifdebug(CACHE)
1220 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", 1317 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
1221 cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags); 1318 convert_to_wallclock(cp->expiry_time),
1319 atomic_read(&cp->ref.refcount), cp->flags);
1222 cache_get(cp); 1320 cache_get(cp);
1223 if (cache_check(cd, cp, NULL)) 1321 if (cache_check(cd, cp, NULL))
1224 /* cache_check does a cache_put on failure */ 1322 /* cache_check does a cache_put on failure */
@@ -1284,7 +1382,7 @@ static ssize_t read_flush(struct file *file, char __user *buf,
1284 unsigned long p = *ppos; 1382 unsigned long p = *ppos;
1285 size_t len; 1383 size_t len;
1286 1384
1287 sprintf(tbuf, "%lu\n", cd->flush_time); 1385 sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time));
1288 len = strlen(tbuf); 1386 len = strlen(tbuf);
1289 if (p >= len) 1387 if (p >= len)
1290 return 0; 1388 return 0;
@@ -1302,19 +1400,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1302 struct cache_detail *cd) 1400 struct cache_detail *cd)
1303{ 1401{
1304 char tbuf[20]; 1402 char tbuf[20];
1305 char *ep; 1403 char *bp, *ep;
1306 long flushtime; 1404
1307 if (*ppos || count > sizeof(tbuf)-1) 1405 if (*ppos || count > sizeof(tbuf)-1)
1308 return -EINVAL; 1406 return -EINVAL;
1309 if (copy_from_user(tbuf, buf, count)) 1407 if (copy_from_user(tbuf, buf, count))
1310 return -EFAULT; 1408 return -EFAULT;
1311 tbuf[count] = 0; 1409 tbuf[count] = 0;
1312 flushtime = simple_strtoul(tbuf, &ep, 0); 1410 simple_strtoul(tbuf, &ep, 0);
1313 if (*ep && *ep != '\n') 1411 if (*ep && *ep != '\n')
1314 return -EINVAL; 1412 return -EINVAL;
1315 1413
1316 cd->flush_time = flushtime; 1414 bp = tbuf;
1317 cd->nextcheck = get_seconds(); 1415 cd->flush_time = get_expiry(&bp);
1416 cd->nextcheck = seconds_since_boot();
1318 cache_flush(); 1417 cache_flush();
1319 1418
1320 *ppos += count; 1419 *ppos += count;
@@ -1438,8 +1537,10 @@ static const struct file_operations cache_flush_operations_procfs = {
1438 .llseek = no_llseek, 1537 .llseek = no_llseek,
1439}; 1538};
1440 1539
1441static void remove_cache_proc_entries(struct cache_detail *cd) 1540static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
1442{ 1541{
1542 struct sunrpc_net *sn;
1543
1443 if (cd->u.procfs.proc_ent == NULL) 1544 if (cd->u.procfs.proc_ent == NULL)
1444 return; 1545 return;
1445 if (cd->u.procfs.flush_ent) 1546 if (cd->u.procfs.flush_ent)
@@ -1449,15 +1550,18 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
1449 if (cd->u.procfs.content_ent) 1550 if (cd->u.procfs.content_ent)
1450 remove_proc_entry("content", cd->u.procfs.proc_ent); 1551 remove_proc_entry("content", cd->u.procfs.proc_ent);
1451 cd->u.procfs.proc_ent = NULL; 1552 cd->u.procfs.proc_ent = NULL;
1452 remove_proc_entry(cd->name, proc_net_rpc); 1553 sn = net_generic(net, sunrpc_net_id);
1554 remove_proc_entry(cd->name, sn->proc_net_rpc);
1453} 1555}
1454 1556
1455#ifdef CONFIG_PROC_FS 1557#ifdef CONFIG_PROC_FS
1456static int create_cache_proc_entries(struct cache_detail *cd) 1558static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1457{ 1559{
1458 struct proc_dir_entry *p; 1560 struct proc_dir_entry *p;
1561 struct sunrpc_net *sn;
1459 1562
1460 cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); 1563 sn = net_generic(net, sunrpc_net_id);
1564 cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc);
1461 if (cd->u.procfs.proc_ent == NULL) 1565 if (cd->u.procfs.proc_ent == NULL)
1462 goto out_nomem; 1566 goto out_nomem;
1463 cd->u.procfs.channel_ent = NULL; 1567 cd->u.procfs.channel_ent = NULL;
@@ -1488,11 +1592,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
1488 } 1592 }
1489 return 0; 1593 return 0;
1490out_nomem: 1594out_nomem:
1491 remove_cache_proc_entries(cd); 1595 remove_cache_proc_entries(cd, net);
1492 return -ENOMEM; 1596 return -ENOMEM;
1493} 1597}
1494#else /* CONFIG_PROC_FS */ 1598#else /* CONFIG_PROC_FS */
1495static int create_cache_proc_entries(struct cache_detail *cd) 1599static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1496{ 1600{
1497 return 0; 1601 return 0;
1498} 1602}
@@ -1503,23 +1607,33 @@ void __init cache_initialize(void)
1503 INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); 1607 INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
1504} 1608}
1505 1609
1506int cache_register(struct cache_detail *cd) 1610int cache_register_net(struct cache_detail *cd, struct net *net)
1507{ 1611{
1508 int ret; 1612 int ret;
1509 1613
1510 sunrpc_init_cache_detail(cd); 1614 sunrpc_init_cache_detail(cd);
1511 ret = create_cache_proc_entries(cd); 1615 ret = create_cache_proc_entries(cd, net);
1512 if (ret) 1616 if (ret)
1513 sunrpc_destroy_cache_detail(cd); 1617 sunrpc_destroy_cache_detail(cd);
1514 return ret; 1618 return ret;
1515} 1619}
1620
1621int cache_register(struct cache_detail *cd)
1622{
1623 return cache_register_net(cd, &init_net);
1624}
1516EXPORT_SYMBOL_GPL(cache_register); 1625EXPORT_SYMBOL_GPL(cache_register);
1517 1626
1518void cache_unregister(struct cache_detail *cd) 1627void cache_unregister_net(struct cache_detail *cd, struct net *net)
1519{ 1628{
1520 remove_cache_proc_entries(cd); 1629 remove_cache_proc_entries(cd, net);
1521 sunrpc_destroy_cache_detail(cd); 1630 sunrpc_destroy_cache_detail(cd);
1522} 1631}
1632
1633void cache_unregister(struct cache_detail *cd)
1634{
1635 cache_unregister_net(cd, &init_net);
1636}
1523EXPORT_SYMBOL_GPL(cache_unregister); 1637EXPORT_SYMBOL_GPL(cache_unregister);
1524 1638
1525static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, 1639static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fa5549079d79..92ce94f5146b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -284,6 +284,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
284 struct rpc_xprt *xprt; 284 struct rpc_xprt *xprt;
285 struct rpc_clnt *clnt; 285 struct rpc_clnt *clnt;
286 struct xprt_create xprtargs = { 286 struct xprt_create xprtargs = {
287 .net = args->net,
287 .ident = args->protocol, 288 .ident = args->protocol,
288 .srcaddr = args->saddress, 289 .srcaddr = args->saddress,
289 .dstaddr = args->address, 290 .dstaddr = args->address,
@@ -988,20 +989,26 @@ call_refreshresult(struct rpc_task *task)
988 dprint_status(task); 989 dprint_status(task);
989 990
990 task->tk_status = 0; 991 task->tk_status = 0;
991 task->tk_action = call_allocate; 992 task->tk_action = call_refresh;
992 if (status >= 0 && rpcauth_uptodatecred(task))
993 return;
994 switch (status) { 993 switch (status) {
995 case -EACCES: 994 case 0:
996 rpc_exit(task, -EACCES); 995 if (rpcauth_uptodatecred(task))
997 return; 996 task->tk_action = call_allocate;
998 case -ENOMEM:
999 rpc_exit(task, -ENOMEM);
1000 return; 997 return;
1001 case -ETIMEDOUT: 998 case -ETIMEDOUT:
1002 rpc_delay(task, 3*HZ); 999 rpc_delay(task, 3*HZ);
1000 case -EAGAIN:
1001 status = -EACCES;
1002 if (!task->tk_cred_retry)
1003 break;
1004 task->tk_cred_retry--;
1005 dprintk("RPC: %5u %s: retry refresh creds\n",
1006 task->tk_pid, __func__);
1007 return;
1003 } 1008 }
1004 task->tk_action = call_refresh; 1009 dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
1010 task->tk_pid, __func__, status);
1011 rpc_exit(task, status);
1005} 1012}
1006 1013
1007/* 1014/*
@@ -1675,7 +1682,7 @@ rpc_verify_header(struct rpc_task *task)
1675 rpcauth_invalcred(task); 1682 rpcauth_invalcred(task);
1676 /* Ensure we obtain a new XID! */ 1683 /* Ensure we obtain a new XID! */
1677 xprt_release(task); 1684 xprt_release(task);
1678 task->tk_action = call_refresh; 1685 task->tk_action = call_reserve;
1679 goto out_retry; 1686 goto out_retry;
1680 case RPC_AUTH_BADCRED: 1687 case RPC_AUTH_BADCRED:
1681 case RPC_AUTH_BADVERF: 1688 case RPC_AUTH_BADVERF:
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
new file mode 100644
index 000000000000..d013bf211cae
--- /dev/null
+++ b/net/sunrpc/netns.h
@@ -0,0 +1,19 @@
1#ifndef __SUNRPC_NETNS_H__
2#define __SUNRPC_NETNS_H__
3
4#include <net/net_namespace.h>
5#include <net/netns/generic.h>
6
7struct cache_detail;
8
9struct sunrpc_net {
10 struct proc_dir_entry *proc_net_rpc;
11 struct cache_detail *ip_map_cache;
12};
13
14extern int sunrpc_net_id;
15
16int ip_map_cache_create(struct net *);
17void ip_map_cache_destroy(struct net *);
18
19#endif
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 52f252432144..10a17a37ec4e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -28,7 +28,7 @@
28#include <linux/sunrpc/rpc_pipe_fs.h> 28#include <linux/sunrpc/rpc_pipe_fs.h>
29#include <linux/sunrpc/cache.h> 29#include <linux/sunrpc/cache.h>
30 30
31static struct vfsmount *rpc_mount __read_mostly; 31static struct vfsmount *rpc_mnt __read_mostly;
32static int rpc_mount_count; 32static int rpc_mount_count;
33 33
34static struct file_system_type rpc_pipe_fs_type; 34static struct file_system_type rpc_pipe_fs_type;
@@ -417,16 +417,16 @@ struct vfsmount *rpc_get_mount(void)
417{ 417{
418 int err; 418 int err;
419 419
420 err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count); 420 err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count);
421 if (err != 0) 421 if (err != 0)
422 return ERR_PTR(err); 422 return ERR_PTR(err);
423 return rpc_mount; 423 return rpc_mnt;
424} 424}
425EXPORT_SYMBOL_GPL(rpc_get_mount); 425EXPORT_SYMBOL_GPL(rpc_get_mount);
426 426
427void rpc_put_mount(void) 427void rpc_put_mount(void)
428{ 428{
429 simple_release_fs(&rpc_mount, &rpc_mount_count); 429 simple_release_fs(&rpc_mnt, &rpc_mount_count);
430} 430}
431EXPORT_SYMBOL_GPL(rpc_put_mount); 431EXPORT_SYMBOL_GPL(rpc_put_mount);
432 432
@@ -445,6 +445,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
445 struct inode *inode = new_inode(sb); 445 struct inode *inode = new_inode(sb);
446 if (!inode) 446 if (!inode)
447 return NULL; 447 return NULL;
448 inode->i_ino = get_next_ino();
448 inode->i_mode = mode; 449 inode->i_mode = mode;
449 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 450 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
450 switch(mode & S_IFMT) { 451 switch(mode & S_IFMT) {
@@ -1017,17 +1018,17 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1017 return 0; 1018 return 0;
1018} 1019}
1019 1020
1020static int 1021static struct dentry *
1021rpc_get_sb(struct file_system_type *fs_type, 1022rpc_mount(struct file_system_type *fs_type,
1022 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1023 int flags, const char *dev_name, void *data)
1023{ 1024{
1024 return get_sb_single(fs_type, flags, data, rpc_fill_super, mnt); 1025 return mount_single(fs_type, flags, data, rpc_fill_super);
1025} 1026}
1026 1027
1027static struct file_system_type rpc_pipe_fs_type = { 1028static struct file_system_type rpc_pipe_fs_type = {
1028 .owner = THIS_MODULE, 1029 .owner = THIS_MODULE,
1029 .name = "rpc_pipefs", 1030 .name = "rpc_pipefs",
1030 .get_sb = rpc_get_sb, 1031 .mount = rpc_mount,
1031 .kill_sb = kill_litter_super, 1032 .kill_sb = kill_litter_super,
1032}; 1033};
1033 1034
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index dac219a56ae1..fa6d7ca2c851 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -177,6 +177,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex);
177static int rpcb_create_local(void) 177static int rpcb_create_local(void)
178{ 178{
179 struct rpc_create_args args = { 179 struct rpc_create_args args = {
180 .net = &init_net,
180 .protocol = XPRT_TRANSPORT_TCP, 181 .protocol = XPRT_TRANSPORT_TCP,
181 .address = (struct sockaddr *)&rpcb_inaddr_loopback, 182 .address = (struct sockaddr *)&rpcb_inaddr_loopback,
182 .addrsize = sizeof(rpcb_inaddr_loopback), 183 .addrsize = sizeof(rpcb_inaddr_loopback),
@@ -211,8 +212,9 @@ static int rpcb_create_local(void)
211 */ 212 */
212 clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); 213 clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
213 if (IS_ERR(clnt4)) { 214 if (IS_ERR(clnt4)) {
214 dprintk("RPC: failed to create local rpcbind v4 " 215 dprintk("RPC: failed to bind second program to "
215 "cleint (errno %ld).\n", PTR_ERR(clnt4)); 216 "rpcbind v4 client (errno %ld).\n",
217 PTR_ERR(clnt4));
216 clnt4 = NULL; 218 clnt4 = NULL;
217 } 219 }
218 220
@@ -228,6 +230,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
228 size_t salen, int proto, u32 version) 230 size_t salen, int proto, u32 version)
229{ 231{
230 struct rpc_create_args args = { 232 struct rpc_create_args args = {
233 .net = &init_net,
231 .protocol = proto, 234 .protocol = proto,
232 .address = srvaddr, 235 .address = srvaddr,
233 .addrsize = salen, 236 .addrsize = salen,
@@ -247,7 +250,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
247 ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); 250 ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT);
248 break; 251 break;
249 default: 252 default:
250 return NULL; 253 return ERR_PTR(-EAFNOSUPPORT);
251 } 254 }
252 255
253 return rpc_create(&args); 256 return rpc_create(&args);
@@ -475,57 +478,6 @@ int rpcb_v4_register(const u32 program, const u32 version,
475 return -EAFNOSUPPORT; 478 return -EAFNOSUPPORT;
476} 479}
477 480
478/**
479 * rpcb_getport_sync - obtain the port for an RPC service on a given host
480 * @sin: address of remote peer
481 * @prog: RPC program number to bind
482 * @vers: RPC version number to bind
483 * @prot: transport protocol to use to make this request
484 *
485 * Return value is the requested advertised port number,
486 * or a negative errno value.
487 *
488 * Called from outside the RPC client in a synchronous task context.
489 * Uses default timeout parameters specified by underlying transport.
490 *
491 * XXX: Needs to support IPv6
492 */
493int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
494{
495 struct rpcbind_args map = {
496 .r_prog = prog,
497 .r_vers = vers,
498 .r_prot = prot,
499 .r_port = 0,
500 };
501 struct rpc_message msg = {
502 .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT],
503 .rpc_argp = &map,
504 .rpc_resp = &map,
505 };
506 struct rpc_clnt *rpcb_clnt;
507 int status;
508
509 dprintk("RPC: %s(%pI4, %u, %u, %d)\n",
510 __func__, &sin->sin_addr.s_addr, prog, vers, prot);
511
512 rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
513 sizeof(*sin), prot, RPCBVERS_2);
514 if (IS_ERR(rpcb_clnt))
515 return PTR_ERR(rpcb_clnt);
516
517 status = rpc_call_sync(rpcb_clnt, &msg, 0);
518 rpc_shutdown_client(rpcb_clnt);
519
520 if (status >= 0) {
521 if (map.r_port != 0)
522 return map.r_port;
523 status = -EACCES;
524 }
525 return status;
526}
527EXPORT_SYMBOL_GPL(rpcb_getport_sync);
528
529static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc) 481static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
530{ 482{
531 struct rpc_message msg = { 483 struct rpc_message msg = {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index aa5dbda6608c..243fc09b164e 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -908,7 +908,7 @@ static int rpciod_start(void)
908 * Create the rpciod thread and wait for it to start. 908 * Create the rpciod thread and wait for it to start.
909 */ 909 */
910 dprintk("RPC: creating workqueue rpciod\n"); 910 dprintk("RPC: creating workqueue rpciod\n");
911 wq = create_workqueue("rpciod"); 911 wq = alloc_workqueue("rpciod", WQ_RESCUER, 0);
912 rpciod_workqueue = wq; 912 rpciod_workqueue = wq;
913 return rpciod_workqueue != NULL; 913 return rpciod_workqueue != NULL;
914} 914}
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index ea1046f3f9a3..80df89d957ba 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -22,11 +22,10 @@
22#include <linux/sunrpc/clnt.h> 22#include <linux/sunrpc/clnt.h>
23#include <linux/sunrpc/svcsock.h> 23#include <linux/sunrpc/svcsock.h>
24#include <linux/sunrpc/metrics.h> 24#include <linux/sunrpc/metrics.h>
25#include <net/net_namespace.h>
26 25
27#define RPCDBG_FACILITY RPCDBG_MISC 26#include "netns.h"
28 27
29struct proc_dir_entry *proc_net_rpc = NULL; 28#define RPCDBG_FACILITY RPCDBG_MISC
30 29
31/* 30/*
32 * Get RPC client stats 31 * Get RPC client stats
@@ -116,9 +115,7 @@ EXPORT_SYMBOL_GPL(svc_seq_show);
116 */ 115 */
117struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) 116struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
118{ 117{
119 struct rpc_iostats *new; 118 return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
120 new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
121 return new;
122} 119}
123EXPORT_SYMBOL_GPL(rpc_alloc_iostats); 120EXPORT_SYMBOL_GPL(rpc_alloc_iostats);
124 121
@@ -218,10 +215,11 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats);
218static inline struct proc_dir_entry * 215static inline struct proc_dir_entry *
219do_register(const char *name, void *data, const struct file_operations *fops) 216do_register(const char *name, void *data, const struct file_operations *fops)
220{ 217{
221 rpc_proc_init(); 218 struct sunrpc_net *sn;
222 dprintk("RPC: registering /proc/net/rpc/%s\n", name);
223 219
224 return proc_create_data(name, 0, proc_net_rpc, fops, data); 220 dprintk("RPC: registering /proc/net/rpc/%s\n", name);
221 sn = net_generic(&init_net, sunrpc_net_id);
222 return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
225} 223}
226 224
227struct proc_dir_entry * 225struct proc_dir_entry *
@@ -234,7 +232,10 @@ EXPORT_SYMBOL_GPL(rpc_proc_register);
234void 232void
235rpc_proc_unregister(const char *name) 233rpc_proc_unregister(const char *name)
236{ 234{
237 remove_proc_entry(name, proc_net_rpc); 235 struct sunrpc_net *sn;
236
237 sn = net_generic(&init_net, sunrpc_net_id);
238 remove_proc_entry(name, sn->proc_net_rpc);
238} 239}
239EXPORT_SYMBOL_GPL(rpc_proc_unregister); 240EXPORT_SYMBOL_GPL(rpc_proc_unregister);
240 241
@@ -248,25 +249,29 @@ EXPORT_SYMBOL_GPL(svc_proc_register);
248void 249void
249svc_proc_unregister(const char *name) 250svc_proc_unregister(const char *name)
250{ 251{
251 remove_proc_entry(name, proc_net_rpc); 252 struct sunrpc_net *sn;
253
254 sn = net_generic(&init_net, sunrpc_net_id);
255 remove_proc_entry(name, sn->proc_net_rpc);
252} 256}
253EXPORT_SYMBOL_GPL(svc_proc_unregister); 257EXPORT_SYMBOL_GPL(svc_proc_unregister);
254 258
255void 259int rpc_proc_init(struct net *net)
256rpc_proc_init(void)
257{ 260{
261 struct sunrpc_net *sn;
262
258 dprintk("RPC: registering /proc/net/rpc\n"); 263 dprintk("RPC: registering /proc/net/rpc\n");
259 if (!proc_net_rpc) 264 sn = net_generic(net, sunrpc_net_id);
260 proc_net_rpc = proc_mkdir("rpc", init_net.proc_net); 265 sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net);
266 if (sn->proc_net_rpc == NULL)
267 return -ENOMEM;
268
269 return 0;
261} 270}
262 271
263void 272void rpc_proc_exit(struct net *net)
264rpc_proc_exit(void)
265{ 273{
266 dprintk("RPC: unregistering /proc/net/rpc\n"); 274 dprintk("RPC: unregistering /proc/net/rpc\n");
267 if (proc_net_rpc) { 275 remove_proc_entry("rpc", net->proc_net);
268 proc_net_rpc = NULL;
269 remove_proc_entry("rpc", init_net.proc_net);
270 }
271} 276}
272 277
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c0d085013a2b..9d0809160994 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -22,7 +22,44 @@
22#include <linux/sunrpc/rpc_pipe_fs.h> 22#include <linux/sunrpc/rpc_pipe_fs.h>
23#include <linux/sunrpc/xprtsock.h> 23#include <linux/sunrpc/xprtsock.h>
24 24
25extern struct cache_detail ip_map_cache, unix_gid_cache; 25#include "netns.h"
26
27int sunrpc_net_id;
28
29static __net_init int sunrpc_init_net(struct net *net)
30{
31 int err;
32
33 err = rpc_proc_init(net);
34 if (err)
35 goto err_proc;
36
37 err = ip_map_cache_create(net);
38 if (err)
39 goto err_ipmap;
40
41 return 0;
42
43err_ipmap:
44 rpc_proc_exit(net);
45err_proc:
46 return err;
47}
48
49static __net_exit void sunrpc_exit_net(struct net *net)
50{
51 ip_map_cache_destroy(net);
52 rpc_proc_exit(net);
53}
54
55static struct pernet_operations sunrpc_net_ops = {
56 .init = sunrpc_init_net,
57 .exit = sunrpc_exit_net,
58 .id = &sunrpc_net_id,
59 .size = sizeof(struct sunrpc_net),
60};
61
62extern struct cache_detail unix_gid_cache;
26 63
27extern void cleanup_rpcb_clnt(void); 64extern void cleanup_rpcb_clnt(void);
28 65
@@ -38,18 +75,22 @@ init_sunrpc(void)
38 err = rpcauth_init_module(); 75 err = rpcauth_init_module();
39 if (err) 76 if (err)
40 goto out3; 77 goto out3;
78
79 cache_initialize();
80
81 err = register_pernet_subsys(&sunrpc_net_ops);
82 if (err)
83 goto out4;
41#ifdef RPC_DEBUG 84#ifdef RPC_DEBUG
42 rpc_register_sysctl(); 85 rpc_register_sysctl();
43#endif 86#endif
44#ifdef CONFIG_PROC_FS
45 rpc_proc_init();
46#endif
47 cache_initialize();
48 cache_register(&ip_map_cache);
49 cache_register(&unix_gid_cache); 87 cache_register(&unix_gid_cache);
50 svc_init_xprt_sock(); /* svc sock transport */ 88 svc_init_xprt_sock(); /* svc sock transport */
51 init_socket_xprt(); /* clnt sock transport */ 89 init_socket_xprt(); /* clnt sock transport */
52 return 0; 90 return 0;
91
92out4:
93 rpcauth_remove_module();
53out3: 94out3:
54 rpc_destroy_mempool(); 95 rpc_destroy_mempool();
55out2: 96out2:
@@ -67,14 +108,11 @@ cleanup_sunrpc(void)
67 svc_cleanup_xprt_sock(); 108 svc_cleanup_xprt_sock();
68 unregister_rpc_pipefs(); 109 unregister_rpc_pipefs();
69 rpc_destroy_mempool(); 110 rpc_destroy_mempool();
70 cache_unregister(&ip_map_cache);
71 cache_unregister(&unix_gid_cache); 111 cache_unregister(&unix_gid_cache);
112 unregister_pernet_subsys(&sunrpc_net_ops);
72#ifdef RPC_DEBUG 113#ifdef RPC_DEBUG
73 rpc_unregister_sysctl(); 114 rpc_unregister_sysctl();
74#endif 115#endif
75#ifdef CONFIG_PROC_FS
76 rpc_proc_exit();
77#endif
78 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 116 rcu_barrier(); /* Wait for completion of call_rcu()'s */
79} 117}
80MODULE_LICENSE("GPL"); 118MODULE_LICENSE("GPL");
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d9017d64597e..6359c42c4941 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1055,6 +1055,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1055 goto err_bad; 1055 goto err_bad;
1056 case SVC_DENIED: 1056 case SVC_DENIED:
1057 goto err_bad_auth; 1057 goto err_bad_auth;
1058 case SVC_CLOSE:
1059 if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
1060 svc_close_xprt(rqstp->rq_xprt);
1058 case SVC_DROP: 1061 case SVC_DROP:
1059 goto dropit; 1062 goto dropit;
1060 case SVC_COMPLETE: 1063 case SVC_COMPLETE:
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index cbc084939dd8..ea2ff78dcf7b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -5,7 +5,6 @@
5 */ 5 */
6 6
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/smp_lock.h>
9#include <linux/errno.h> 8#include <linux/errno.h>
10#include <linux/freezer.h> 9#include <linux/freezer.h>
11#include <linux/kthread.h> 10#include <linux/kthread.h>
@@ -100,16 +99,14 @@ EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
100 */ 99 */
101int svc_print_xprts(char *buf, int maxlen) 100int svc_print_xprts(char *buf, int maxlen)
102{ 101{
103 struct list_head *le; 102 struct svc_xprt_class *xcl;
104 char tmpstr[80]; 103 char tmpstr[80];
105 int len = 0; 104 int len = 0;
106 buf[0] = '\0'; 105 buf[0] = '\0';
107 106
108 spin_lock(&svc_xprt_class_lock); 107 spin_lock(&svc_xprt_class_lock);
109 list_for_each(le, &svc_xprt_class_list) { 108 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
110 int slen; 109 int slen;
111 struct svc_xprt_class *xcl =
112 list_entry(le, struct svc_xprt_class, xcl_list);
113 110
114 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); 111 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
115 slen = strlen(tmpstr); 112 slen = strlen(tmpstr);
@@ -128,9 +125,9 @@ static void svc_xprt_free(struct kref *kref)
128 struct svc_xprt *xprt = 125 struct svc_xprt *xprt =
129 container_of(kref, struct svc_xprt, xpt_ref); 126 container_of(kref, struct svc_xprt, xpt_ref);
130 struct module *owner = xprt->xpt_class->xcl_owner; 127 struct module *owner = xprt->xpt_class->xcl_owner;
131 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) && 128 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
132 xprt->xpt_auth_cache != NULL) 129 svcauth_unix_info_release(xprt);
133 svcauth_unix_info_release(xprt->xpt_auth_cache); 130 put_net(xprt->xpt_net);
134 xprt->xpt_ops->xpo_free(xprt); 131 xprt->xpt_ops->xpo_free(xprt);
135 module_put(owner); 132 module_put(owner);
136} 133}
@@ -156,15 +153,18 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
156 INIT_LIST_HEAD(&xprt->xpt_list); 153 INIT_LIST_HEAD(&xprt->xpt_list);
157 INIT_LIST_HEAD(&xprt->xpt_ready); 154 INIT_LIST_HEAD(&xprt->xpt_ready);
158 INIT_LIST_HEAD(&xprt->xpt_deferred); 155 INIT_LIST_HEAD(&xprt->xpt_deferred);
156 INIT_LIST_HEAD(&xprt->xpt_users);
159 mutex_init(&xprt->xpt_mutex); 157 mutex_init(&xprt->xpt_mutex);
160 spin_lock_init(&xprt->xpt_lock); 158 spin_lock_init(&xprt->xpt_lock);
161 set_bit(XPT_BUSY, &xprt->xpt_flags); 159 set_bit(XPT_BUSY, &xprt->xpt_flags);
162 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); 160 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
161 xprt->xpt_net = get_net(&init_net);
163} 162}
164EXPORT_SYMBOL_GPL(svc_xprt_init); 163EXPORT_SYMBOL_GPL(svc_xprt_init);
165 164
166static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, 165static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
167 struct svc_serv *serv, 166 struct svc_serv *serv,
167 struct net *net,
168 const int family, 168 const int family,
169 const unsigned short port, 169 const unsigned short port,
170 int flags) 170 int flags)
@@ -199,12 +199,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
199 return ERR_PTR(-EAFNOSUPPORT); 199 return ERR_PTR(-EAFNOSUPPORT);
200 } 200 }
201 201
202 return xcl->xcl_ops->xpo_create(serv, sap, len, flags); 202 return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
203} 203}
204 204
205int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, 205int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
206 const int family, const unsigned short port, 206 struct net *net, const int family,
207 int flags) 207 const unsigned short port, int flags)
208{ 208{
209 struct svc_xprt_class *xcl; 209 struct svc_xprt_class *xcl;
210 210
@@ -220,7 +220,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
220 goto err; 220 goto err;
221 221
222 spin_unlock(&svc_xprt_class_lock); 222 spin_unlock(&svc_xprt_class_lock);
223 newxprt = __svc_xpo_create(xcl, serv, family, port, flags); 223 newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags);
224 if (IS_ERR(newxprt)) { 224 if (IS_ERR(newxprt)) {
225 module_put(xcl->xcl_owner); 225 module_put(xcl->xcl_owner);
226 return PTR_ERR(newxprt); 226 return PTR_ERR(newxprt);
@@ -329,12 +329,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
329 "svc_xprt_enqueue: " 329 "svc_xprt_enqueue: "
330 "threads and transports both waiting??\n"); 330 "threads and transports both waiting??\n");
331 331
332 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
333 /* Don't enqueue dead transports */
334 dprintk("svc: transport %p is dead, not enqueued\n", xprt);
335 goto out_unlock;
336 }
337
338 pool->sp_stats.packets++; 332 pool->sp_stats.packets++;
339 333
340 /* Mark transport as busy. It will remain in this state until 334 /* Mark transport as busy. It will remain in this state until
@@ -651,6 +645,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
651 if (signalled() || kthread_should_stop()) 645 if (signalled() || kthread_should_stop())
652 return -EINTR; 646 return -EINTR;
653 647
648 /* Normally we will wait up to 5 seconds for any required
649 * cache information to be provided.
650 */
651 rqstp->rq_chandle.thread_wait = 5*HZ;
652
654 spin_lock_bh(&pool->sp_lock); 653 spin_lock_bh(&pool->sp_lock);
655 xprt = svc_xprt_dequeue(pool); 654 xprt = svc_xprt_dequeue(pool);
656 if (xprt) { 655 if (xprt) {
@@ -658,6 +657,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
658 svc_xprt_get(xprt); 657 svc_xprt_get(xprt);
659 rqstp->rq_reserved = serv->sv_max_mesg; 658 rqstp->rq_reserved = serv->sv_max_mesg;
660 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); 659 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
660
661 /* As there is a shortage of threads and this request
662 * had to be queued, don't allow the thread to wait so
663 * long for cache updates.
664 */
665 rqstp->rq_chandle.thread_wait = 1*HZ;
661 } else { 666 } else {
662 /* No data pending. Go to sleep */ 667 /* No data pending. Go to sleep */
663 svc_thread_enqueue(pool, rqstp); 668 svc_thread_enqueue(pool, rqstp);
@@ -868,6 +873,19 @@ static void svc_age_temp_xprts(unsigned long closure)
868 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); 873 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
869} 874}
870 875
876static void call_xpt_users(struct svc_xprt *xprt)
877{
878 struct svc_xpt_user *u;
879
880 spin_lock(&xprt->xpt_lock);
881 while (!list_empty(&xprt->xpt_users)) {
882 u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list);
883 list_del(&u->list);
884 u->callback(u);
885 }
886 spin_unlock(&xprt->xpt_lock);
887}
888
871/* 889/*
872 * Remove a dead transport 890 * Remove a dead transport
873 */ 891 */
@@ -878,7 +896,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
878 896
879 /* Only do this once */ 897 /* Only do this once */
880 if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) 898 if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
881 return; 899 BUG();
882 900
883 dprintk("svc: svc_delete_xprt(%p)\n", xprt); 901 dprintk("svc: svc_delete_xprt(%p)\n", xprt);
884 xprt->xpt_ops->xpo_detach(xprt); 902 xprt->xpt_ops->xpo_detach(xprt);
@@ -900,6 +918,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
900 while ((dr = svc_deferred_dequeue(xprt)) != NULL) 918 while ((dr = svc_deferred_dequeue(xprt)) != NULL)
901 kfree(dr); 919 kfree(dr);
902 920
921 call_xpt_users(xprt);
903 svc_xprt_put(xprt); 922 svc_xprt_put(xprt);
904} 923}
905 924
@@ -910,10 +929,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
910 /* someone else will have to effect the close */ 929 /* someone else will have to effect the close */
911 return; 930 return;
912 931
913 svc_xprt_get(xprt);
914 svc_delete_xprt(xprt); 932 svc_delete_xprt(xprt);
915 clear_bit(XPT_BUSY, &xprt->xpt_flags);
916 svc_xprt_put(xprt);
917} 933}
918EXPORT_SYMBOL_GPL(svc_close_xprt); 934EXPORT_SYMBOL_GPL(svc_close_xprt);
919 935
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 207311610988..560677d187f1 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -18,6 +18,8 @@
18 18
19#include <linux/sunrpc/clnt.h> 19#include <linux/sunrpc/clnt.h>
20 20
21#include "netns.h"
22
21/* 23/*
22 * AUTHUNIX and AUTHNULL credentials are both handled here. 24 * AUTHUNIX and AUTHNULL credentials are both handled here.
23 * AUTHNULL is treated just like AUTHUNIX except that the uid/gid 25 * AUTHNULL is treated just like AUTHUNIX except that the uid/gid
@@ -92,7 +94,6 @@ struct ip_map {
92 struct unix_domain *m_client; 94 struct unix_domain *m_client;
93 int m_add_change; 95 int m_add_change;
94}; 96};
95static struct cache_head *ip_table[IP_HASHMAX];
96 97
97static void ip_map_put(struct kref *kref) 98static void ip_map_put(struct kref *kref)
98{ 99{
@@ -178,8 +179,8 @@ static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
178 return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); 179 return sunrpc_cache_pipe_upcall(cd, h, ip_map_request);
179} 180}
180 181
181static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); 182static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
182static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); 183static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
183 184
184static int ip_map_parse(struct cache_detail *cd, 185static int ip_map_parse(struct cache_detail *cd,
185 char *mesg, int mlen) 186 char *mesg, int mlen)
@@ -219,10 +220,9 @@ static int ip_map_parse(struct cache_detail *cd,
219 switch (address.sa.sa_family) { 220 switch (address.sa.sa_family) {
220 case AF_INET: 221 case AF_INET:
221 /* Form a mapped IPv4 address in sin6 */ 222 /* Form a mapped IPv4 address in sin6 */
222 memset(&sin6, 0, sizeof(sin6));
223 sin6.sin6_family = AF_INET6; 223 sin6.sin6_family = AF_INET6;
224 sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); 224 ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
225 sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr; 225 &sin6.sin6_addr);
226 break; 226 break;
227#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 227#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
228 case AF_INET6: 228 case AF_INET6:
@@ -249,9 +249,9 @@ static int ip_map_parse(struct cache_detail *cd,
249 dom = NULL; 249 dom = NULL;
250 250
251 /* IPv6 scope IDs are ignored for now */ 251 /* IPv6 scope IDs are ignored for now */
252 ipmp = ip_map_lookup(class, &sin6.sin6_addr); 252 ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr);
253 if (ipmp) { 253 if (ipmp) {
254 err = ip_map_update(ipmp, 254 err = __ip_map_update(cd, ipmp,
255 container_of(dom, struct unix_domain, h), 255 container_of(dom, struct unix_domain, h),
256 expiry); 256 expiry);
257 } else 257 } else
@@ -294,29 +294,15 @@ static int ip_map_show(struct seq_file *m,
294} 294}
295 295
296 296
297struct cache_detail ip_map_cache = { 297static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
298 .owner = THIS_MODULE, 298 struct in6_addr *addr)
299 .hash_size = IP_HASHMAX,
300 .hash_table = ip_table,
301 .name = "auth.unix.ip",
302 .cache_put = ip_map_put,
303 .cache_upcall = ip_map_upcall,
304 .cache_parse = ip_map_parse,
305 .cache_show = ip_map_show,
306 .match = ip_map_match,
307 .init = ip_map_init,
308 .update = update,
309 .alloc = ip_map_alloc,
310};
311
312static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
313{ 299{
314 struct ip_map ip; 300 struct ip_map ip;
315 struct cache_head *ch; 301 struct cache_head *ch;
316 302
317 strcpy(ip.m_class, class); 303 strcpy(ip.m_class, class);
318 ipv6_addr_copy(&ip.m_addr, addr); 304 ipv6_addr_copy(&ip.m_addr, addr);
319 ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, 305 ch = sunrpc_cache_lookup(cd, &ip.h,
320 hash_str(class, IP_HASHBITS) ^ 306 hash_str(class, IP_HASHBITS) ^
321 hash_ip6(*addr)); 307 hash_ip6(*addr));
322 308
@@ -326,7 +312,17 @@ static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
326 return NULL; 312 return NULL;
327} 313}
328 314
329static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) 315static inline struct ip_map *ip_map_lookup(struct net *net, char *class,
316 struct in6_addr *addr)
317{
318 struct sunrpc_net *sn;
319
320 sn = net_generic(net, sunrpc_net_id);
321 return __ip_map_lookup(sn->ip_map_cache, class, addr);
322}
323
324static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
325 struct unix_domain *udom, time_t expiry)
330{ 326{
331 struct ip_map ip; 327 struct ip_map ip;
332 struct cache_head *ch; 328 struct cache_head *ch;
@@ -344,17 +340,25 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
344 ip.m_add_change++; 340 ip.m_add_change++;
345 } 341 }
346 ip.h.expiry_time = expiry; 342 ip.h.expiry_time = expiry;
347 ch = sunrpc_cache_update(&ip_map_cache, 343 ch = sunrpc_cache_update(cd, &ip.h, &ipm->h,
348 &ip.h, &ipm->h,
349 hash_str(ipm->m_class, IP_HASHBITS) ^ 344 hash_str(ipm->m_class, IP_HASHBITS) ^
350 hash_ip6(ipm->m_addr)); 345 hash_ip6(ipm->m_addr));
351 if (!ch) 346 if (!ch)
352 return -ENOMEM; 347 return -ENOMEM;
353 cache_put(ch, &ip_map_cache); 348 cache_put(ch, cd);
354 return 0; 349 return 0;
355} 350}
356 351
357int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) 352static inline int ip_map_update(struct net *net, struct ip_map *ipm,
353 struct unix_domain *udom, time_t expiry)
354{
355 struct sunrpc_net *sn;
356
357 sn = net_generic(net, sunrpc_net_id);
358 return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
359}
360
361int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom)
358{ 362{
359 struct unix_domain *udom; 363 struct unix_domain *udom;
360 struct ip_map *ipmp; 364 struct ip_map *ipmp;
@@ -362,10 +366,10 @@ int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom)
362 if (dom->flavour != &svcauth_unix) 366 if (dom->flavour != &svcauth_unix)
363 return -EINVAL; 367 return -EINVAL;
364 udom = container_of(dom, struct unix_domain, h); 368 udom = container_of(dom, struct unix_domain, h);
365 ipmp = ip_map_lookup("nfsd", addr); 369 ipmp = ip_map_lookup(net, "nfsd", addr);
366 370
367 if (ipmp) 371 if (ipmp)
368 return ip_map_update(ipmp, udom, NEVER); 372 return ip_map_update(net, ipmp, udom, NEVER);
369 else 373 else
370 return -ENOMEM; 374 return -ENOMEM;
371} 375}
@@ -383,16 +387,18 @@ int auth_unix_forget_old(struct auth_domain *dom)
383} 387}
384EXPORT_SYMBOL_GPL(auth_unix_forget_old); 388EXPORT_SYMBOL_GPL(auth_unix_forget_old);
385 389
386struct auth_domain *auth_unix_lookup(struct in6_addr *addr) 390struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr)
387{ 391{
388 struct ip_map *ipm; 392 struct ip_map *ipm;
389 struct auth_domain *rv; 393 struct auth_domain *rv;
394 struct sunrpc_net *sn;
390 395
391 ipm = ip_map_lookup("nfsd", addr); 396 sn = net_generic(net, sunrpc_net_id);
397 ipm = ip_map_lookup(net, "nfsd", addr);
392 398
393 if (!ipm) 399 if (!ipm)
394 return NULL; 400 return NULL;
395 if (cache_check(&ip_map_cache, &ipm->h, NULL)) 401 if (cache_check(sn->ip_map_cache, &ipm->h, NULL))
396 return NULL; 402 return NULL;
397 403
398 if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { 404 if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) {
@@ -403,22 +409,29 @@ struct auth_domain *auth_unix_lookup(struct in6_addr *addr)
403 rv = &ipm->m_client->h; 409 rv = &ipm->m_client->h;
404 kref_get(&rv->ref); 410 kref_get(&rv->ref);
405 } 411 }
406 cache_put(&ipm->h, &ip_map_cache); 412 cache_put(&ipm->h, sn->ip_map_cache);
407 return rv; 413 return rv;
408} 414}
409EXPORT_SYMBOL_GPL(auth_unix_lookup); 415EXPORT_SYMBOL_GPL(auth_unix_lookup);
410 416
411void svcauth_unix_purge(void) 417void svcauth_unix_purge(void)
412{ 418{
413 cache_purge(&ip_map_cache); 419 struct net *net;
420
421 for_each_net(net) {
422 struct sunrpc_net *sn;
423
424 sn = net_generic(net, sunrpc_net_id);
425 cache_purge(sn->ip_map_cache);
426 }
414} 427}
415EXPORT_SYMBOL_GPL(svcauth_unix_purge); 428EXPORT_SYMBOL_GPL(svcauth_unix_purge);
416 429
417static inline struct ip_map * 430static inline struct ip_map *
418ip_map_cached_get(struct svc_rqst *rqstp) 431ip_map_cached_get(struct svc_xprt *xprt)
419{ 432{
420 struct ip_map *ipm = NULL; 433 struct ip_map *ipm = NULL;
421 struct svc_xprt *xprt = rqstp->rq_xprt; 434 struct sunrpc_net *sn;
422 435
423 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { 436 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
424 spin_lock(&xprt->xpt_lock); 437 spin_lock(&xprt->xpt_lock);
@@ -430,9 +443,10 @@ ip_map_cached_get(struct svc_rqst *rqstp)
430 * remembered, e.g. by a second mount from the 443 * remembered, e.g. by a second mount from the
431 * same IP address. 444 * same IP address.
432 */ 445 */
446 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
433 xprt->xpt_auth_cache = NULL; 447 xprt->xpt_auth_cache = NULL;
434 spin_unlock(&xprt->xpt_lock); 448 spin_unlock(&xprt->xpt_lock);
435 cache_put(&ipm->h, &ip_map_cache); 449 cache_put(&ipm->h, sn->ip_map_cache);
436 return NULL; 450 return NULL;
437 } 451 }
438 cache_get(&ipm->h); 452 cache_get(&ipm->h);
@@ -443,10 +457,8 @@ ip_map_cached_get(struct svc_rqst *rqstp)
443} 457}
444 458
445static inline void 459static inline void
446ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) 460ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm)
447{ 461{
448 struct svc_xprt *xprt = rqstp->rq_xprt;
449
450 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { 462 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
451 spin_lock(&xprt->xpt_lock); 463 spin_lock(&xprt->xpt_lock);
452 if (xprt->xpt_auth_cache == NULL) { 464 if (xprt->xpt_auth_cache == NULL) {
@@ -456,15 +468,26 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
456 } 468 }
457 spin_unlock(&xprt->xpt_lock); 469 spin_unlock(&xprt->xpt_lock);
458 } 470 }
459 if (ipm) 471 if (ipm) {
460 cache_put(&ipm->h, &ip_map_cache); 472 struct sunrpc_net *sn;
473
474 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
475 cache_put(&ipm->h, sn->ip_map_cache);
476 }
461} 477}
462 478
463void 479void
464svcauth_unix_info_release(void *info) 480svcauth_unix_info_release(struct svc_xprt *xpt)
465{ 481{
466 struct ip_map *ipm = info; 482 struct ip_map *ipm;
467 cache_put(&ipm->h, &ip_map_cache); 483
484 ipm = xpt->xpt_auth_cache;
485 if (ipm != NULL) {
486 struct sunrpc_net *sn;
487
488 sn = net_generic(xpt->xpt_net, sunrpc_net_id);
489 cache_put(&ipm->h, sn->ip_map_cache);
490 }
468} 491}
469 492
470/**************************************************************************** 493/****************************************************************************
@@ -674,6 +697,8 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
674 switch (ret) { 697 switch (ret) {
675 case -ENOENT: 698 case -ENOENT:
676 return ERR_PTR(-ENOENT); 699 return ERR_PTR(-ENOENT);
700 case -ETIMEDOUT:
701 return ERR_PTR(-ESHUTDOWN);
677 case 0: 702 case 0:
678 gi = get_group_info(ug->gi); 703 gi = get_group_info(ug->gi);
679 cache_put(&ug->h, &unix_gid_cache); 704 cache_put(&ug->h, &unix_gid_cache);
@@ -691,6 +716,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
691 struct ip_map *ipm; 716 struct ip_map *ipm;
692 struct group_info *gi; 717 struct group_info *gi;
693 struct svc_cred *cred = &rqstp->rq_cred; 718 struct svc_cred *cred = &rqstp->rq_cred;
719 struct svc_xprt *xprt = rqstp->rq_xprt;
720 struct net *net = xprt->xpt_net;
721 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
694 722
695 switch (rqstp->rq_addr.ss_family) { 723 switch (rqstp->rq_addr.ss_family) {
696 case AF_INET: 724 case AF_INET:
@@ -709,26 +737,27 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
709 if (rqstp->rq_proc == 0) 737 if (rqstp->rq_proc == 0)
710 return SVC_OK; 738 return SVC_OK;
711 739
712 ipm = ip_map_cached_get(rqstp); 740 ipm = ip_map_cached_get(xprt);
713 if (ipm == NULL) 741 if (ipm == NULL)
714 ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, 742 ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
715 &sin6->sin6_addr); 743 &sin6->sin6_addr);
716 744
717 if (ipm == NULL) 745 if (ipm == NULL)
718 return SVC_DENIED; 746 return SVC_DENIED;
719 747
720 switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { 748 switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
721 default: 749 default:
722 BUG(); 750 BUG();
723 case -EAGAIN:
724 case -ETIMEDOUT: 751 case -ETIMEDOUT:
752 return SVC_CLOSE;
753 case -EAGAIN:
725 return SVC_DROP; 754 return SVC_DROP;
726 case -ENOENT: 755 case -ENOENT:
727 return SVC_DENIED; 756 return SVC_DENIED;
728 case 0: 757 case 0:
729 rqstp->rq_client = &ipm->m_client->h; 758 rqstp->rq_client = &ipm->m_client->h;
730 kref_get(&rqstp->rq_client->ref); 759 kref_get(&rqstp->rq_client->ref);
731 ip_map_cached_put(rqstp, ipm); 760 ip_map_cached_put(xprt, ipm);
732 break; 761 break;
733 } 762 }
734 763
@@ -736,6 +765,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
736 switch (PTR_ERR(gi)) { 765 switch (PTR_ERR(gi)) {
737 case -EAGAIN: 766 case -EAGAIN:
738 return SVC_DROP; 767 return SVC_DROP;
768 case -ESHUTDOWN:
769 return SVC_CLOSE;
739 case -ENOENT: 770 case -ENOENT:
740 break; 771 break;
741 default: 772 default:
@@ -776,7 +807,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
776 cred->cr_gid = (gid_t) -1; 807 cred->cr_gid = (gid_t) -1;
777 cred->cr_group_info = groups_alloc(0); 808 cred->cr_group_info = groups_alloc(0);
778 if (cred->cr_group_info == NULL) 809 if (cred->cr_group_info == NULL)
779 return SVC_DROP; /* kmalloc failure - client must retry */ 810 return SVC_CLOSE; /* kmalloc failure - client must retry */
780 811
781 /* Put NULL verifier */ 812 /* Put NULL verifier */
782 svc_putnl(resv, RPC_AUTH_NULL); 813 svc_putnl(resv, RPC_AUTH_NULL);
@@ -840,7 +871,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
840 goto badcred; 871 goto badcred;
841 cred->cr_group_info = groups_alloc(slen); 872 cred->cr_group_info = groups_alloc(slen);
842 if (cred->cr_group_info == NULL) 873 if (cred->cr_group_info == NULL)
843 return SVC_DROP; 874 return SVC_CLOSE;
844 for (i = 0; i < slen; i++) 875 for (i = 0; i < slen; i++)
845 GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); 876 GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
846 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { 877 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
@@ -886,3 +917,56 @@ struct auth_ops svcauth_unix = {
886 .set_client = svcauth_unix_set_client, 917 .set_client = svcauth_unix_set_client,
887}; 918};
888 919
920int ip_map_cache_create(struct net *net)
921{
922 int err = -ENOMEM;
923 struct cache_detail *cd;
924 struct cache_head **tbl;
925 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
926
927 cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
928 if (cd == NULL)
929 goto err_cd;
930
931 tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL);
932 if (tbl == NULL)
933 goto err_tbl;
934
935 cd->owner = THIS_MODULE,
936 cd->hash_size = IP_HASHMAX,
937 cd->hash_table = tbl,
938 cd->name = "auth.unix.ip",
939 cd->cache_put = ip_map_put,
940 cd->cache_upcall = ip_map_upcall,
941 cd->cache_parse = ip_map_parse,
942 cd->cache_show = ip_map_show,
943 cd->match = ip_map_match,
944 cd->init = ip_map_init,
945 cd->update = update,
946 cd->alloc = ip_map_alloc,
947
948 err = cache_register_net(cd, net);
949 if (err)
950 goto err_reg;
951
952 sn->ip_map_cache = cd;
953 return 0;
954
955err_reg:
956 kfree(tbl);
957err_tbl:
958 kfree(cd);
959err_cd:
960 return err;
961}
962
963void ip_map_cache_destroy(struct net *net)
964{
965 struct sunrpc_net *sn;
966
967 sn = net_generic(net, sunrpc_net_id);
968 cache_purge(sn->ip_map_cache);
969 cache_unregister_net(sn->ip_map_cache, net);
970 kfree(sn->ip_map_cache->hash_table);
971 kfree(sn->ip_map_cache);
972}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7e534dd09077..07919e16be3e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -64,7 +64,8 @@ static void svc_tcp_sock_detach(struct svc_xprt *);
64static void svc_sock_free(struct svc_xprt *); 64static void svc_sock_free(struct svc_xprt *);
65 65
66static struct svc_xprt *svc_create_socket(struct svc_serv *, int, 66static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
67 struct sockaddr *, int, int); 67 struct net *, struct sockaddr *,
68 int, int);
68#ifdef CONFIG_DEBUG_LOCK_ALLOC 69#ifdef CONFIG_DEBUG_LOCK_ALLOC
69static struct lock_class_key svc_key[2]; 70static struct lock_class_key svc_key[2];
70static struct lock_class_key svc_slock_key[2]; 71static struct lock_class_key svc_slock_key[2];
@@ -657,10 +658,11 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
657} 658}
658 659
659static struct svc_xprt *svc_udp_create(struct svc_serv *serv, 660static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
661 struct net *net,
660 struct sockaddr *sa, int salen, 662 struct sockaddr *sa, int salen,
661 int flags) 663 int flags)
662{ 664{
663 return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); 665 return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
664} 666}
665 667
666static struct svc_xprt_ops svc_udp_ops = { 668static struct svc_xprt_ops svc_udp_ops = {
@@ -1133,9 +1135,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
1133 reclen = htonl(0x80000000|((xbufp->len ) - 4)); 1135 reclen = htonl(0x80000000|((xbufp->len ) - 4));
1134 memcpy(xbufp->head[0].iov_base, &reclen, 4); 1136 memcpy(xbufp->head[0].iov_base, &reclen, 4);
1135 1137
1136 if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags))
1137 return -ENOTCONN;
1138
1139 sent = svc_sendto(rqstp, &rqstp->rq_res); 1138 sent = svc_sendto(rqstp, &rqstp->rq_res);
1140 if (sent != xbufp->len) { 1139 if (sent != xbufp->len) {
1141 printk(KERN_NOTICE 1140 printk(KERN_NOTICE
@@ -1178,10 +1177,11 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
1178} 1177}
1179 1178
1180static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, 1179static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
1180 struct net *net,
1181 struct sockaddr *sa, int salen, 1181 struct sockaddr *sa, int salen,
1182 int flags) 1182 int flags)
1183{ 1183{
1184 return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); 1184 return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
1185} 1185}
1186 1186
1187static struct svc_xprt_ops svc_tcp_ops = { 1187static struct svc_xprt_ops svc_tcp_ops = {
@@ -1258,19 +1258,13 @@ void svc_sock_update_bufs(struct svc_serv *serv)
1258 * The number of server threads has changed. Update 1258 * The number of server threads has changed. Update
1259 * rcvbuf and sndbuf accordingly on all sockets 1259 * rcvbuf and sndbuf accordingly on all sockets
1260 */ 1260 */
1261 struct list_head *le; 1261 struct svc_sock *svsk;
1262 1262
1263 spin_lock_bh(&serv->sv_lock); 1263 spin_lock_bh(&serv->sv_lock);
1264 list_for_each(le, &serv->sv_permsocks) { 1264 list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list)
1265 struct svc_sock *svsk =
1266 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1267 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1265 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1268 } 1266 list_for_each_entry(svsk, &serv->sv_tempsocks, sk_xprt.xpt_list)
1269 list_for_each(le, &serv->sv_tempsocks) {
1270 struct svc_sock *svsk =
1271 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1272 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1267 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1273 }
1274 spin_unlock_bh(&serv->sv_lock); 1268 spin_unlock_bh(&serv->sv_lock);
1275} 1269}
1276EXPORT_SYMBOL_GPL(svc_sock_update_bufs); 1270EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
@@ -1385,6 +1379,7 @@ EXPORT_SYMBOL_GPL(svc_addsock);
1385 */ 1379 */
1386static struct svc_xprt *svc_create_socket(struct svc_serv *serv, 1380static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1387 int protocol, 1381 int protocol,
1382 struct net *net,
1388 struct sockaddr *sin, int len, 1383 struct sockaddr *sin, int len,
1389 int flags) 1384 int flags)
1390{ 1385{
@@ -1421,7 +1416,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1421 return ERR_PTR(-EINVAL); 1416 return ERR_PTR(-EINVAL);
1422 } 1417 }
1423 1418
1424 error = sock_create_kern(family, type, protocol, &sock); 1419 error = __sock_create(net, family, type, protocol, &sock, 1);
1425 if (error < 0) 1420 if (error < 0)
1426 return ERR_PTR(error); 1421 return ERR_PTR(error);
1427 1422
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index a1f82a87d34d..cd9e841e7492 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -111,6 +111,23 @@ xdr_decode_string_inplace(__be32 *p, char **sp,
111} 111}
112EXPORT_SYMBOL_GPL(xdr_decode_string_inplace); 112EXPORT_SYMBOL_GPL(xdr_decode_string_inplace);
113 113
114/**
115 * xdr_terminate_string - '\0'-terminate a string residing in an xdr_buf
116 * @buf: XDR buffer where string resides
117 * @len: length of string, in bytes
118 *
119 */
120void
121xdr_terminate_string(struct xdr_buf *buf, const u32 len)
122{
123 char *kaddr;
124
125 kaddr = kmap_atomic(buf->pages[0], KM_USER0);
126 kaddr[buf->page_base + len] = '\0';
127 kunmap_atomic(kaddr, KM_USER0);
128}
129EXPORT_SYMBOL(xdr_terminate_string);
130
114void 131void
115xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, 132xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
116 unsigned int len) 133 unsigned int len)
@@ -395,24 +412,29 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
395{ 412{
396 struct kvec *tail; 413 struct kvec *tail;
397 size_t copy; 414 size_t copy;
398 char *p;
399 unsigned int pglen = buf->page_len; 415 unsigned int pglen = buf->page_len;
416 unsigned int tailbuf_len;
400 417
401 tail = buf->tail; 418 tail = buf->tail;
402 BUG_ON (len > pglen); 419 BUG_ON (len > pglen);
403 420
421 tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
422
404 /* Shift the tail first */ 423 /* Shift the tail first */
405 if (tail->iov_len != 0) { 424 if (tailbuf_len != 0) {
406 p = (char *)tail->iov_base + len; 425 unsigned int free_space = tailbuf_len - tail->iov_len;
426
427 if (len < free_space)
428 free_space = len;
429 tail->iov_len += free_space;
430
431 copy = len;
407 if (tail->iov_len > len) { 432 if (tail->iov_len > len) {
408 copy = tail->iov_len - len; 433 char *p = (char *)tail->iov_base + len;
409 memmove(p, tail->iov_base, copy); 434 memmove(p, tail->iov_base, tail->iov_len - len);
410 } else 435 } else
411 buf->buflen -= len;
412 /* Copy from the inlined pages into the tail */
413 copy = len;
414 if (copy > tail->iov_len)
415 copy = tail->iov_len; 436 copy = tail->iov_len;
437 /* Copy from the inlined pages into the tail */
416 _copy_from_pages((char *)tail->iov_base, 438 _copy_from_pages((char *)tail->iov_base,
417 buf->pages, buf->page_base + pglen - len, 439 buf->pages, buf->page_base + pglen - len,
418 copy); 440 copy);
@@ -551,6 +573,27 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
551EXPORT_SYMBOL_GPL(xdr_init_decode); 573EXPORT_SYMBOL_GPL(xdr_init_decode);
552 574
553/** 575/**
576 * xdr_inline_peek - Allow read-ahead in the XDR data stream
577 * @xdr: pointer to xdr_stream struct
578 * @nbytes: number of bytes of data to decode
579 *
580 * Check if the input buffer is long enough to enable us to decode
581 * 'nbytes' more bytes of data starting at the current position.
582 * If so return the current pointer without updating the current
583 * pointer position.
584 */
585__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
586{
587 __be32 *p = xdr->p;
588 __be32 *q = p + XDR_QUADLEN(nbytes);
589
590 if (unlikely(q > xdr->end || q < p))
591 return NULL;
592 return p;
593}
594EXPORT_SYMBOL_GPL(xdr_inline_peek);
595
596/**
554 * xdr_inline_decode - Retrieve non-page XDR data to decode 597 * xdr_inline_decode - Retrieve non-page XDR data to decode
555 * @xdr: pointer to xdr_stream struct 598 * @xdr: pointer to xdr_stream struct
556 * @nbytes: number of bytes of data to decode 599 * @nbytes: number of bytes of data to decode
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 970fb00f388c..4c8f18aff7c3 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -199,8 +199,6 @@ int xprt_reserve_xprt(struct rpc_task *task)
199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
200 if (task == xprt->snd_task) 200 if (task == xprt->snd_task)
201 return 1; 201 return 1;
202 if (task == NULL)
203 return 0;
204 goto out_sleep; 202 goto out_sleep;
205 } 203 }
206 xprt->snd_task = task; 204 xprt->snd_task = task;
@@ -757,13 +755,11 @@ static void xprt_connect_status(struct rpc_task *task)
757 */ 755 */
758struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) 756struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
759{ 757{
760 struct list_head *pos; 758 struct rpc_rqst *entry;
761 759
762 list_for_each(pos, &xprt->recv) { 760 list_for_each_entry(entry, &xprt->recv, rq_list)
763 struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
764 if (entry->rq_xid == xid) 761 if (entry->rq_xid == xid)
765 return entry; 762 return entry;
766 }
767 763
768 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", 764 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n",
769 ntohl(xid)); 765 ntohl(xid));
@@ -962,6 +958,37 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
962 spin_unlock(&xprt->reserve_lock); 958 spin_unlock(&xprt->reserve_lock);
963} 959}
964 960
961struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
962{
963 struct rpc_xprt *xprt;
964
965 xprt = kzalloc(size, GFP_KERNEL);
966 if (xprt == NULL)
967 goto out;
968
969 xprt->max_reqs = max_req;
970 xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
971 if (xprt->slot == NULL)
972 goto out_free;
973
974 xprt->xprt_net = get_net(net);
975 return xprt;
976
977out_free:
978 kfree(xprt);
979out:
980 return NULL;
981}
982EXPORT_SYMBOL_GPL(xprt_alloc);
983
984void xprt_free(struct rpc_xprt *xprt)
985{
986 put_net(xprt->xprt_net);
987 kfree(xprt->slot);
988 kfree(xprt);
989}
990EXPORT_SYMBOL_GPL(xprt_free);
991
965/** 992/**
966 * xprt_reserve - allocate an RPC request slot 993 * xprt_reserve - allocate an RPC request slot
967 * @task: RPC task requesting a slot allocation 994 * @task: RPC task requesting a slot allocation
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index d718b8fa9525..09af4fab1a45 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -43,6 +43,7 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/fs.h> 44#include <linux/fs.h>
45#include <linux/sysctl.h> 45#include <linux/sysctl.h>
46#include <linux/workqueue.h>
46#include <linux/sunrpc/clnt.h> 47#include <linux/sunrpc/clnt.h>
47#include <linux/sunrpc/sched.h> 48#include <linux/sunrpc/sched.h>
48#include <linux/sunrpc/svc_rdma.h> 49#include <linux/sunrpc/svc_rdma.h>
@@ -74,6 +75,8 @@ atomic_t rdma_stat_sq_prod;
74struct kmem_cache *svc_rdma_map_cachep; 75struct kmem_cache *svc_rdma_map_cachep;
75struct kmem_cache *svc_rdma_ctxt_cachep; 76struct kmem_cache *svc_rdma_ctxt_cachep;
76 77
78struct workqueue_struct *svc_rdma_wq;
79
77/* 80/*
78 * This function implements reading and resetting an atomic_t stat 81 * This function implements reading and resetting an atomic_t stat
79 * variable through read/write to a proc file. Any write to the file 82 * variable through read/write to a proc file. Any write to the file
@@ -231,7 +234,7 @@ static ctl_table svcrdma_root_table[] = {
231void svc_rdma_cleanup(void) 234void svc_rdma_cleanup(void)
232{ 235{
233 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); 236 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
234 flush_scheduled_work(); 237 destroy_workqueue(svc_rdma_wq);
235 if (svcrdma_table_header) { 238 if (svcrdma_table_header) {
236 unregister_sysctl_table(svcrdma_table_header); 239 unregister_sysctl_table(svcrdma_table_header);
237 svcrdma_table_header = NULL; 240 svcrdma_table_header = NULL;
@@ -249,6 +252,11 @@ int svc_rdma_init(void)
249 dprintk("\tsq_depth : %d\n", 252 dprintk("\tsq_depth : %d\n",
250 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); 253 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
251 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); 254 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
255
256 svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
257 if (!svc_rdma_wq)
258 return -ENOMEM;
259
252 if (!svcrdma_table_header) 260 if (!svcrdma_table_header)
253 svcrdma_table_header = 261 svcrdma_table_header =
254 register_sysctl_table(svcrdma_root_table); 262 register_sysctl_table(svcrdma_root_table);
@@ -283,6 +291,7 @@ int svc_rdma_init(void)
283 kmem_cache_destroy(svc_rdma_map_cachep); 291 kmem_cache_destroy(svc_rdma_map_cachep);
284 err0: 292 err0:
285 unregister_sysctl_table(svcrdma_table_header); 293 unregister_sysctl_table(svcrdma_table_header);
294 destroy_workqueue(svc_rdma_wq);
286 return -ENOMEM; 295 return -ENOMEM;
287} 296}
288MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); 297MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 0194de814933..df67211c4baf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -263,9 +263,9 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; 263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) { 264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
265 frmr->page_list->page_list[page_no] = 265 frmr->page_list->page_list[page_no] =
266 ib_dma_map_single(xprt->sc_cm_id->device, 266 ib_dma_map_page(xprt->sc_cm_id->device,
267 page_address(rqstp->rq_arg.pages[page_no]), 267 rqstp->rq_arg.pages[page_no], 0,
268 PAGE_SIZE, DMA_FROM_DEVICE); 268 PAGE_SIZE, DMA_FROM_DEVICE);
269 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 269 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
270 frmr->page_list->page_list[page_no])) 270 frmr->page_list->page_list[page_no]))
271 goto fatal_err; 271 goto fatal_err;
@@ -309,17 +309,21 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
309 int count) 309 int count)
310{ 310{
311 int i; 311 int i;
312 unsigned long off;
312 313
313 ctxt->count = count; 314 ctxt->count = count;
314 ctxt->direction = DMA_FROM_DEVICE; 315 ctxt->direction = DMA_FROM_DEVICE;
315 for (i = 0; i < count; i++) { 316 for (i = 0; i < count; i++) {
316 ctxt->sge[i].length = 0; /* in case map fails */ 317 ctxt->sge[i].length = 0; /* in case map fails */
317 if (!frmr) { 318 if (!frmr) {
319 BUG_ON(0 == virt_to_page(vec[i].iov_base));
320 off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
318 ctxt->sge[i].addr = 321 ctxt->sge[i].addr =
319 ib_dma_map_single(xprt->sc_cm_id->device, 322 ib_dma_map_page(xprt->sc_cm_id->device,
320 vec[i].iov_base, 323 virt_to_page(vec[i].iov_base),
321 vec[i].iov_len, 324 off,
322 DMA_FROM_DEVICE); 325 vec[i].iov_len,
326 DMA_FROM_DEVICE);
323 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 327 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
324 ctxt->sge[i].addr)) 328 ctxt->sge[i].addr))
325 return -EINVAL; 329 return -EINVAL;
@@ -491,6 +495,7 @@ next_sge:
491 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", 495 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
492 err); 496 err);
493 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 497 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
498 svc_rdma_unmap_dma(ctxt);
494 svc_rdma_put_context(ctxt, 0); 499 svc_rdma_put_context(ctxt, 0);
495 goto out; 500 goto out;
496 } 501 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b15e1ebb2bfa..249a835b703f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -70,8 +70,8 @@
70 * on extra page for the RPCRMDA header. 70 * on extra page for the RPCRMDA header.
71 */ 71 */
72static int fast_reg_xdr(struct svcxprt_rdma *xprt, 72static int fast_reg_xdr(struct svcxprt_rdma *xprt,
73 struct xdr_buf *xdr, 73 struct xdr_buf *xdr,
74 struct svc_rdma_req_map *vec) 74 struct svc_rdma_req_map *vec)
75{ 75{
76 int sge_no; 76 int sge_no;
77 u32 sge_bytes; 77 u32 sge_bytes;
@@ -96,21 +96,25 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
96 vec->count = 2; 96 vec->count = 2;
97 sge_no++; 97 sge_no++;
98 98
99 /* Build the FRMR */ 99 /* Map the XDR head */
100 frmr->kva = frva; 100 frmr->kva = frva;
101 frmr->direction = DMA_TO_DEVICE; 101 frmr->direction = DMA_TO_DEVICE;
102 frmr->access_flags = 0; 102 frmr->access_flags = 0;
103 frmr->map_len = PAGE_SIZE; 103 frmr->map_len = PAGE_SIZE;
104 frmr->page_list_len = 1; 104 frmr->page_list_len = 1;
105 page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
105 frmr->page_list->page_list[page_no] = 106 frmr->page_list->page_list[page_no] =
106 ib_dma_map_single(xprt->sc_cm_id->device, 107 ib_dma_map_page(xprt->sc_cm_id->device,
107 (void *)xdr->head[0].iov_base, 108 virt_to_page(xdr->head[0].iov_base),
108 PAGE_SIZE, DMA_TO_DEVICE); 109 page_off,
110 PAGE_SIZE - page_off,
111 DMA_TO_DEVICE);
109 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 112 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
110 frmr->page_list->page_list[page_no])) 113 frmr->page_list->page_list[page_no]))
111 goto fatal_err; 114 goto fatal_err;
112 atomic_inc(&xprt->sc_dma_used); 115 atomic_inc(&xprt->sc_dma_used);
113 116
117 /* Map the XDR page list */
114 page_off = xdr->page_base; 118 page_off = xdr->page_base;
115 page_bytes = xdr->page_len + page_off; 119 page_bytes = xdr->page_len + page_off;
116 if (!page_bytes) 120 if (!page_bytes)
@@ -128,9 +132,9 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
128 page_bytes -= sge_bytes; 132 page_bytes -= sge_bytes;
129 133
130 frmr->page_list->page_list[page_no] = 134 frmr->page_list->page_list[page_no] =
131 ib_dma_map_single(xprt->sc_cm_id->device, 135 ib_dma_map_page(xprt->sc_cm_id->device,
132 page_address(page), 136 page, page_off,
133 PAGE_SIZE, DMA_TO_DEVICE); 137 sge_bytes, DMA_TO_DEVICE);
134 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 138 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
135 frmr->page_list->page_list[page_no])) 139 frmr->page_list->page_list[page_no]))
136 goto fatal_err; 140 goto fatal_err;
@@ -166,8 +170,10 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
166 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; 170 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
167 171
168 frmr->page_list->page_list[page_no] = 172 frmr->page_list->page_list[page_no] =
169 ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, 173 ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
170 DMA_TO_DEVICE); 174 page_off,
175 PAGE_SIZE,
176 DMA_TO_DEVICE);
171 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 177 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
172 frmr->page_list->page_list[page_no])) 178 frmr->page_list->page_list[page_no]))
173 goto fatal_err; 179 goto fatal_err;
@@ -245,6 +251,35 @@ static int map_xdr(struct svcxprt_rdma *xprt,
245 return 0; 251 return 0;
246} 252}
247 253
254static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
255 struct xdr_buf *xdr,
256 u32 xdr_off, size_t len, int dir)
257{
258 struct page *page;
259 dma_addr_t dma_addr;
260 if (xdr_off < xdr->head[0].iov_len) {
261 /* This offset is in the head */
262 xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
263 page = virt_to_page(xdr->head[0].iov_base);
264 } else {
265 xdr_off -= xdr->head[0].iov_len;
266 if (xdr_off < xdr->page_len) {
267 /* This offset is in the page list */
268 page = xdr->pages[xdr_off >> PAGE_SHIFT];
269 xdr_off &= ~PAGE_MASK;
270 } else {
271 /* This offset is in the tail */
272 xdr_off -= xdr->page_len;
273 xdr_off += (unsigned long)
274 xdr->tail[0].iov_base & ~PAGE_MASK;
275 page = virt_to_page(xdr->tail[0].iov_base);
276 }
277 }
278 dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
279 min_t(size_t, PAGE_SIZE, len), dir);
280 return dma_addr;
281}
282
248/* Assumptions: 283/* Assumptions:
249 * - We are using FRMR 284 * - We are using FRMR
250 * - or - 285 * - or -
@@ -293,10 +328,9 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
293 sge[sge_no].length = sge_bytes; 328 sge[sge_no].length = sge_bytes;
294 if (!vec->frmr) { 329 if (!vec->frmr) {
295 sge[sge_no].addr = 330 sge[sge_no].addr =
296 ib_dma_map_single(xprt->sc_cm_id->device, 331 dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
297 (void *) 332 sge_bytes, DMA_TO_DEVICE);
298 vec->sge[xdr_sge_no].iov_base + sge_off, 333 xdr_off += sge_bytes;
299 sge_bytes, DMA_TO_DEVICE);
300 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 334 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
301 sge[sge_no].addr)) 335 sge[sge_no].addr))
302 goto err; 336 goto err;
@@ -333,6 +367,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
333 goto err; 367 goto err;
334 return 0; 368 return 0;
335 err: 369 err:
370 svc_rdma_unmap_dma(ctxt);
371 svc_rdma_put_frmr(xprt, vec->frmr);
336 svc_rdma_put_context(ctxt, 0); 372 svc_rdma_put_context(ctxt, 0);
337 /* Fatal error, close transport */ 373 /* Fatal error, close transport */
338 return -EIO; 374 return -EIO;
@@ -494,7 +530,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
494 * In all three cases, this function prepares the RPCRDMA header in 530 * In all three cases, this function prepares the RPCRDMA header in
495 * sge[0], the 'type' parameter indicates the type to place in the 531 * sge[0], the 'type' parameter indicates the type to place in the
496 * RPCRDMA header, and the 'byte_count' field indicates how much of 532 * RPCRDMA header, and the 'byte_count' field indicates how much of
497 * the XDR to include in this RDMA_SEND. 533 * the XDR to include in this RDMA_SEND. NB: The offset of the payload
534 * to send is zero in the XDR.
498 */ 535 */
499static int send_reply(struct svcxprt_rdma *rdma, 536static int send_reply(struct svcxprt_rdma *rdma,
500 struct svc_rqst *rqstp, 537 struct svc_rqst *rqstp,
@@ -536,23 +573,24 @@ static int send_reply(struct svcxprt_rdma *rdma,
536 ctxt->sge[0].lkey = rdma->sc_dma_lkey; 573 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
537 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 574 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
538 ctxt->sge[0].addr = 575 ctxt->sge[0].addr =
539 ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), 576 ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
540 ctxt->sge[0].length, DMA_TO_DEVICE); 577 ctxt->sge[0].length, DMA_TO_DEVICE);
541 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) 578 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
542 goto err; 579 goto err;
543 atomic_inc(&rdma->sc_dma_used); 580 atomic_inc(&rdma->sc_dma_used);
544 581
545 ctxt->direction = DMA_TO_DEVICE; 582 ctxt->direction = DMA_TO_DEVICE;
546 583
547 /* Determine how many of our SGE are to be transmitted */ 584 /* Map the payload indicated by 'byte_count' */
548 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 585 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
586 int xdr_off = 0;
549 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 587 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
550 byte_count -= sge_bytes; 588 byte_count -= sge_bytes;
551 if (!vec->frmr) { 589 if (!vec->frmr) {
552 ctxt->sge[sge_no].addr = 590 ctxt->sge[sge_no].addr =
553 ib_dma_map_single(rdma->sc_cm_id->device, 591 dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
554 vec->sge[sge_no].iov_base, 592 sge_bytes, DMA_TO_DEVICE);
555 sge_bytes, DMA_TO_DEVICE); 593 xdr_off += sge_bytes;
556 if (ib_dma_mapping_error(rdma->sc_cm_id->device, 594 if (ib_dma_mapping_error(rdma->sc_cm_id->device,
557 ctxt->sge[sge_no].addr)) 595 ctxt->sge[sge_no].addr))
558 goto err; 596 goto err;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index edea15a54e51..9df1eadc912a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -45,6 +45,7 @@
45#include <linux/sched.h> 45#include <linux/sched.h>
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/spinlock.h> 47#include <linux/spinlock.h>
48#include <linux/workqueue.h>
48#include <rdma/ib_verbs.h> 49#include <rdma/ib_verbs.h>
49#include <rdma/rdma_cm.h> 50#include <rdma/rdma_cm.h>
50#include <linux/sunrpc/svc_rdma.h> 51#include <linux/sunrpc/svc_rdma.h>
@@ -52,6 +53,7 @@
52#define RPCDBG_FACILITY RPCDBG_SVCXPRT 53#define RPCDBG_FACILITY RPCDBG_SVCXPRT
53 54
54static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 55static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
56 struct net *net,
55 struct sockaddr *sa, int salen, 57 struct sockaddr *sa, int salen,
56 int flags); 58 int flags);
57static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); 59static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
@@ -89,6 +91,9 @@ struct svc_xprt_class svc_rdma_class = {
89/* WR context cache. Created in svc_rdma.c */ 91/* WR context cache. Created in svc_rdma.c */
90extern struct kmem_cache *svc_rdma_ctxt_cachep; 92extern struct kmem_cache *svc_rdma_ctxt_cachep;
91 93
94/* Workqueue created in svc_rdma.c */
95extern struct workqueue_struct *svc_rdma_wq;
96
92struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) 97struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
93{ 98{
94 struct svc_rdma_op_ctxt *ctxt; 99 struct svc_rdma_op_ctxt *ctxt;
@@ -120,7 +125,7 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
120 */ 125 */
121 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { 126 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
122 atomic_dec(&xprt->sc_dma_used); 127 atomic_dec(&xprt->sc_dma_used);
123 ib_dma_unmap_single(xprt->sc_cm_id->device, 128 ib_dma_unmap_page(xprt->sc_cm_id->device,
124 ctxt->sge[i].addr, 129 ctxt->sge[i].addr,
125 ctxt->sge[i].length, 130 ctxt->sge[i].length,
126 ctxt->direction); 131 ctxt->direction);
@@ -502,8 +507,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
502 BUG_ON(sge_no >= xprt->sc_max_sge); 507 BUG_ON(sge_no >= xprt->sc_max_sge);
503 page = svc_rdma_get_page(); 508 page = svc_rdma_get_page();
504 ctxt->pages[sge_no] = page; 509 ctxt->pages[sge_no] = page;
505 pa = ib_dma_map_single(xprt->sc_cm_id->device, 510 pa = ib_dma_map_page(xprt->sc_cm_id->device,
506 page_address(page), PAGE_SIZE, 511 page, 0, PAGE_SIZE,
507 DMA_FROM_DEVICE); 512 DMA_FROM_DEVICE);
508 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) 513 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
509 goto err_put_ctxt; 514 goto err_put_ctxt;
@@ -511,9 +516,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
511 ctxt->sge[sge_no].addr = pa; 516 ctxt->sge[sge_no].addr = pa;
512 ctxt->sge[sge_no].length = PAGE_SIZE; 517 ctxt->sge[sge_no].length = PAGE_SIZE;
513 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; 518 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
519 ctxt->count = sge_no + 1;
514 buflen += PAGE_SIZE; 520 buflen += PAGE_SIZE;
515 } 521 }
516 ctxt->count = sge_no;
517 recv_wr.next = NULL; 522 recv_wr.next = NULL;
518 recv_wr.sg_list = &ctxt->sge[0]; 523 recv_wr.sg_list = &ctxt->sge[0];
519 recv_wr.num_sge = ctxt->count; 524 recv_wr.num_sge = ctxt->count;
@@ -529,6 +534,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
529 return ret; 534 return ret;
530 535
531 err_put_ctxt: 536 err_put_ctxt:
537 svc_rdma_unmap_dma(ctxt);
532 svc_rdma_put_context(ctxt, 1); 538 svc_rdma_put_context(ctxt, 1);
533 return -ENOMEM; 539 return -ENOMEM;
534} 540}
@@ -670,6 +676,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
670 * Create a listening RDMA service endpoint. 676 * Create a listening RDMA service endpoint.
671 */ 677 */
672static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 678static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
679 struct net *net,
673 struct sockaddr *sa, int salen, 680 struct sockaddr *sa, int salen,
674 int flags) 681 int flags)
675{ 682{
@@ -798,8 +805,8 @@ static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
798 if (ib_dma_mapping_error(frmr->mr->device, addr)) 805 if (ib_dma_mapping_error(frmr->mr->device, addr))
799 continue; 806 continue;
800 atomic_dec(&xprt->sc_dma_used); 807 atomic_dec(&xprt->sc_dma_used);
801 ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, 808 ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
802 frmr->direction); 809 frmr->direction);
803 } 810 }
804} 811}
805 812
@@ -1184,7 +1191,7 @@ static void svc_rdma_free(struct svc_xprt *xprt)
1184 struct svcxprt_rdma *rdma = 1191 struct svcxprt_rdma *rdma =
1185 container_of(xprt, struct svcxprt_rdma, sc_xprt); 1192 container_of(xprt, struct svcxprt_rdma, sc_xprt);
1186 INIT_WORK(&rdma->sc_work, __svc_rdma_free); 1193 INIT_WORK(&rdma->sc_work, __svc_rdma_free);
1187 schedule_work(&rdma->sc_work); 1194 queue_work(svc_rdma_wq, &rdma->sc_work);
1188} 1195}
1189 1196
1190static int svc_rdma_has_wspace(struct svc_xprt *xprt) 1197static int svc_rdma_has_wspace(struct svc_xprt *xprt)
@@ -1274,7 +1281,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1274 atomic_read(&xprt->sc_sq_count) < 1281 atomic_read(&xprt->sc_sq_count) <
1275 xprt->sc_sq_depth); 1282 xprt->sc_sq_depth);
1276 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1283 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1277 return 0; 1284 return -ENOTCONN;
1278 continue; 1285 continue;
1279 } 1286 }
1280 /* Take a transport ref for each WR posted */ 1287 /* Take a transport ref for each WR posted */
@@ -1306,7 +1313,6 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1306 enum rpcrdma_errcode err) 1313 enum rpcrdma_errcode err)
1307{ 1314{
1308 struct ib_send_wr err_wr; 1315 struct ib_send_wr err_wr;
1309 struct ib_sge sge;
1310 struct page *p; 1316 struct page *p;
1311 struct svc_rdma_op_ctxt *ctxt; 1317 struct svc_rdma_op_ctxt *ctxt;
1312 u32 *va; 1318 u32 *va;
@@ -1319,26 +1325,27 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1319 /* XDR encode error */ 1325 /* XDR encode error */
1320 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1326 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1321 1327
1328 ctxt = svc_rdma_get_context(xprt);
1329 ctxt->direction = DMA_FROM_DEVICE;
1330 ctxt->count = 1;
1331 ctxt->pages[0] = p;
1332
1322 /* Prepare SGE for local address */ 1333 /* Prepare SGE for local address */
1323 sge.addr = ib_dma_map_single(xprt->sc_cm_id->device, 1334 ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
1324 page_address(p), PAGE_SIZE, DMA_FROM_DEVICE); 1335 p, 0, length, DMA_FROM_DEVICE);
1325 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { 1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1326 put_page(p); 1337 put_page(p);
1327 return; 1338 return;
1328 } 1339 }
1329 atomic_inc(&xprt->sc_dma_used); 1340 atomic_inc(&xprt->sc_dma_used);
1330 sge.lkey = xprt->sc_dma_lkey; 1341 ctxt->sge[0].lkey = xprt->sc_dma_lkey;
1331 sge.length = length; 1342 ctxt->sge[0].length = length;
1332
1333 ctxt = svc_rdma_get_context(xprt);
1334 ctxt->count = 1;
1335 ctxt->pages[0] = p;
1336 1343
1337 /* Prepare SEND WR */ 1344 /* Prepare SEND WR */
1338 memset(&err_wr, 0, sizeof err_wr); 1345 memset(&err_wr, 0, sizeof err_wr);
1339 ctxt->wr_op = IB_WR_SEND; 1346 ctxt->wr_op = IB_WR_SEND;
1340 err_wr.wr_id = (unsigned long)ctxt; 1347 err_wr.wr_id = (unsigned long)ctxt;
1341 err_wr.sg_list = &sge; 1348 err_wr.sg_list = ctxt->sge;
1342 err_wr.num_sge = 1; 1349 err_wr.num_sge = 1;
1343 err_wr.opcode = IB_WR_SEND; 1350 err_wr.opcode = IB_WR_SEND;
1344 err_wr.send_flags = IB_SEND_SIGNALED; 1351 err_wr.send_flags = IB_SEND_SIGNALED;
@@ -1348,9 +1355,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1348 if (ret) { 1355 if (ret) {
1349 dprintk("svcrdma: Error %d posting send for protocol error\n", 1356 dprintk("svcrdma: Error %d posting send for protocol error\n",
1350 ret); 1357 ret);
1351 ib_dma_unmap_single(xprt->sc_cm_id->device, 1358 svc_rdma_unmap_dma(ctxt);
1352 sge.addr, PAGE_SIZE,
1353 DMA_FROM_DEVICE);
1354 svc_rdma_put_context(ctxt, 1); 1359 svc_rdma_put_context(ctxt, 1);
1355 } 1360 }
1356} 1361}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a85e866a77f7..0867070bb5ca 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -237,8 +237,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
237 237
238 dprintk("RPC: %s: called\n", __func__); 238 dprintk("RPC: %s: called\n", __func__);
239 239
240 cancel_delayed_work(&r_xprt->rdma_connect); 240 cancel_delayed_work_sync(&r_xprt->rdma_connect);
241 flush_scheduled_work();
242 241
243 xprt_clear_connected(xprt); 242 xprt_clear_connected(xprt);
244 243
@@ -251,9 +250,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
251 250
252 xprt_rdma_free_addresses(xprt); 251 xprt_rdma_free_addresses(xprt);
253 252
254 kfree(xprt->slot); 253 xprt_free(xprt);
255 xprt->slot = NULL;
256 kfree(xprt);
257 254
258 dprintk("RPC: %s: returning\n", __func__); 255 dprintk("RPC: %s: returning\n", __func__);
259 256
@@ -285,23 +282,14 @@ xprt_setup_rdma(struct xprt_create *args)
285 return ERR_PTR(-EBADF); 282 return ERR_PTR(-EBADF);
286 } 283 }
287 284
288 xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); 285 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
286 xprt_rdma_slot_table_entries);
289 if (xprt == NULL) { 287 if (xprt == NULL) {
290 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", 288 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
291 __func__); 289 __func__);
292 return ERR_PTR(-ENOMEM); 290 return ERR_PTR(-ENOMEM);
293 } 291 }
294 292
295 xprt->max_reqs = xprt_rdma_slot_table_entries;
296 xprt->slot = kcalloc(xprt->max_reqs,
297 sizeof(struct rpc_rqst), GFP_KERNEL);
298 if (xprt->slot == NULL) {
299 dprintk("RPC: %s: couldn't allocate %d slots\n",
300 __func__, xprt->max_reqs);
301 kfree(xprt);
302 return ERR_PTR(-ENOMEM);
303 }
304
305 /* 60 second timeout, no retries */ 293 /* 60 second timeout, no retries */
306 xprt->timeout = &xprt_rdma_default_timeout; 294 xprt->timeout = &xprt_rdma_default_timeout;
307 xprt->bind_timeout = (60U * HZ); 295 xprt->bind_timeout = (60U * HZ);
@@ -410,8 +398,7 @@ out3:
410out2: 398out2:
411 rpcrdma_ia_close(&new_xprt->rx_ia); 399 rpcrdma_ia_close(&new_xprt->rx_ia);
412out1: 400out1:
413 kfree(xprt->slot); 401 xprt_free(xprt);
414 kfree(xprt);
415 return ERR_PTR(rc); 402 return ERR_PTR(rc);
416} 403}
417 404
@@ -460,7 +447,7 @@ xprt_rdma_connect(struct rpc_task *task)
460 } else { 447 } else {
461 schedule_delayed_work(&r_xprt->rdma_connect, 0); 448 schedule_delayed_work(&r_xprt->rdma_connect, 0);
462 if (!RPC_IS_ASYNC(task)) 449 if (!RPC_IS_ASYNC(task))
463 flush_scheduled_work(); 450 flush_delayed_work(&r_xprt->rdma_connect);
464 } 451 }
465} 452}
466 453
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fe9306bf10cc..dfcab5ac65af 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -774,8 +774,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
774 774
775 xs_close(xprt); 775 xs_close(xprt);
776 xs_free_peer_addresses(xprt); 776 xs_free_peer_addresses(xprt);
777 kfree(xprt->slot); 777 xprt_free(xprt);
778 kfree(xprt);
779 module_put(THIS_MODULE); 778 module_put(THIS_MODULE);
780} 779}
781 780
@@ -1516,7 +1515,7 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
1516 xs_update_peer_port(xprt); 1515 xs_update_peer_port(xprt);
1517} 1516}
1518 1517
1519static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) 1518static unsigned short xs_get_srcport(struct sock_xprt *transport)
1520{ 1519{
1521 unsigned short port = transport->srcport; 1520 unsigned short port = transport->srcport;
1522 1521
@@ -1525,7 +1524,7 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket
1525 return port; 1524 return port;
1526} 1525}
1527 1526
1528static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) 1527static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port)
1529{ 1528{
1530 if (transport->srcport != 0) 1529 if (transport->srcport != 0)
1531 transport->srcport = 0; 1530 transport->srcport = 0;
@@ -1535,23 +1534,18 @@ static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket
1535 return xprt_max_resvport; 1534 return xprt_max_resvport;
1536 return --port; 1535 return --port;
1537} 1536}
1538 1537static int xs_bind(struct sock_xprt *transport, struct socket *sock)
1539static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1540{ 1538{
1541 struct sockaddr_in myaddr = { 1539 struct sockaddr_storage myaddr;
1542 .sin_family = AF_INET,
1543 };
1544 struct sockaddr_in *sa;
1545 int err, nloop = 0; 1540 int err, nloop = 0;
1546 unsigned short port = xs_get_srcport(transport, sock); 1541 unsigned short port = xs_get_srcport(transport);
1547 unsigned short last; 1542 unsigned short last;
1548 1543
1549 sa = (struct sockaddr_in *)&transport->srcaddr; 1544 memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
1550 myaddr.sin_addr = sa->sin_addr;
1551 do { 1545 do {
1552 myaddr.sin_port = htons(port); 1546 rpc_set_port((struct sockaddr *)&myaddr, port);
1553 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1547 err = kernel_bind(sock, (struct sockaddr *)&myaddr,
1554 sizeof(myaddr)); 1548 transport->xprt.addrlen);
1555 if (port == 0) 1549 if (port == 0)
1556 break; 1550 break;
1557 if (err == 0) { 1551 if (err == 0) {
@@ -1559,48 +1553,23 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1559 break; 1553 break;
1560 } 1554 }
1561 last = port; 1555 last = port;
1562 port = xs_next_srcport(transport, sock, port); 1556 port = xs_next_srcport(transport, port);
1563 if (port > last) 1557 if (port > last)
1564 nloop++; 1558 nloop++;
1565 } while (err == -EADDRINUSE && nloop != 2); 1559 } while (err == -EADDRINUSE && nloop != 2);
1566 dprintk("RPC: %s %pI4:%u: %s (%d)\n",
1567 __func__, &myaddr.sin_addr,
1568 port, err ? "failed" : "ok", err);
1569 return err;
1570}
1571
1572static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
1573{
1574 struct sockaddr_in6 myaddr = {
1575 .sin6_family = AF_INET6,
1576 };
1577 struct sockaddr_in6 *sa;
1578 int err, nloop = 0;
1579 unsigned short port = xs_get_srcport(transport, sock);
1580 unsigned short last;
1581 1560
1582 sa = (struct sockaddr_in6 *)&transport->srcaddr; 1561 if (myaddr.ss_family == AF_INET)
1583 myaddr.sin6_addr = sa->sin6_addr; 1562 dprintk("RPC: %s %pI4:%u: %s (%d)\n", __func__,
1584 do { 1563 &((struct sockaddr_in *)&myaddr)->sin_addr,
1585 myaddr.sin6_port = htons(port); 1564 port, err ? "failed" : "ok", err);
1586 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1565 else
1587 sizeof(myaddr)); 1566 dprintk("RPC: %s %pI6:%u: %s (%d)\n", __func__,
1588 if (port == 0) 1567 &((struct sockaddr_in6 *)&myaddr)->sin6_addr,
1589 break; 1568 port, err ? "failed" : "ok", err);
1590 if (err == 0) {
1591 transport->srcport = port;
1592 break;
1593 }
1594 last = port;
1595 port = xs_next_srcport(transport, sock, port);
1596 if (port > last)
1597 nloop++;
1598 } while (err == -EADDRINUSE && nloop != 2);
1599 dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n",
1600 &myaddr.sin6_addr, port, err ? "failed" : "ok", err);
1601 return err; 1569 return err;
1602} 1570}
1603 1571
1572
1604#ifdef CONFIG_DEBUG_LOCK_ALLOC 1573#ifdef CONFIG_DEBUG_LOCK_ALLOC
1605static struct lock_class_key xs_key[2]; 1574static struct lock_class_key xs_key[2];
1606static struct lock_class_key xs_slock_key[2]; 1575static struct lock_class_key xs_slock_key[2];
@@ -1622,6 +1591,18 @@ static inline void xs_reclassify_socket6(struct socket *sock)
1622 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", 1591 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
1623 &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); 1592 &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
1624} 1593}
1594
1595static inline void xs_reclassify_socket(int family, struct socket *sock)
1596{
1597 switch (family) {
1598 case AF_INET:
1599 xs_reclassify_socket4(sock);
1600 break;
1601 case AF_INET6:
1602 xs_reclassify_socket6(sock);
1603 break;
1604 }
1605}
1625#else 1606#else
1626static inline void xs_reclassify_socket4(struct socket *sock) 1607static inline void xs_reclassify_socket4(struct socket *sock)
1627{ 1608{
@@ -1630,8 +1611,36 @@ static inline void xs_reclassify_socket4(struct socket *sock)
1630static inline void xs_reclassify_socket6(struct socket *sock) 1611static inline void xs_reclassify_socket6(struct socket *sock)
1631{ 1612{
1632} 1613}
1614
1615static inline void xs_reclassify_socket(int family, struct socket *sock)
1616{
1617}
1633#endif 1618#endif
1634 1619
1620static struct socket *xs_create_sock(struct rpc_xprt *xprt,
1621 struct sock_xprt *transport, int family, int type, int protocol)
1622{
1623 struct socket *sock;
1624 int err;
1625
1626 err = __sock_create(xprt->xprt_net, family, type, protocol, &sock, 1);
1627 if (err < 0) {
1628 dprintk("RPC: can't create %d transport socket (%d).\n",
1629 protocol, -err);
1630 goto out;
1631 }
1632 xs_reclassify_socket(family, sock);
1633
1634 if (xs_bind(transport, sock)) {
1635 sock_release(sock);
1636 goto out;
1637 }
1638
1639 return sock;
1640out:
1641 return ERR_PTR(err);
1642}
1643
1635static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1644static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1636{ 1645{
1637 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1646 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1661,82 +1670,23 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1661 xs_udp_do_set_buffer_size(xprt); 1670 xs_udp_do_set_buffer_size(xprt);
1662} 1671}
1663 1672
1664/** 1673static void xs_udp_setup_socket(struct work_struct *work)
1665 * xs_udp_connect_worker4 - set up a UDP socket
1666 * @work: RPC transport to connect
1667 *
1668 * Invoked by a work queue tasklet.
1669 */
1670static void xs_udp_connect_worker4(struct work_struct *work)
1671{ 1674{
1672 struct sock_xprt *transport = 1675 struct sock_xprt *transport =
1673 container_of(work, struct sock_xprt, connect_worker.work); 1676 container_of(work, struct sock_xprt, connect_worker.work);
1674 struct rpc_xprt *xprt = &transport->xprt; 1677 struct rpc_xprt *xprt = &transport->xprt;
1675 struct socket *sock = transport->sock; 1678 struct socket *sock = transport->sock;
1676 int err, status = -EIO; 1679 int status = -EIO;
1677 1680
1678 if (xprt->shutdown) 1681 if (xprt->shutdown)
1679 goto out; 1682 goto out;
1680 1683
1681 /* Start by resetting any existing state */ 1684 /* Start by resetting any existing state */
1682 xs_reset_transport(transport); 1685 xs_reset_transport(transport);
1683 1686 sock = xs_create_sock(xprt, transport,
1684 err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1687 xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP);
1685 if (err < 0) { 1688 if (IS_ERR(sock))
1686 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1687 goto out; 1689 goto out;
1688 }
1689 xs_reclassify_socket4(sock);
1690
1691 if (xs_bind4(transport, sock)) {
1692 sock_release(sock);
1693 goto out;
1694 }
1695
1696 dprintk("RPC: worker connecting xprt %p via %s to "
1697 "%s (port %s)\n", xprt,
1698 xprt->address_strings[RPC_DISPLAY_PROTO],
1699 xprt->address_strings[RPC_DISPLAY_ADDR],
1700 xprt->address_strings[RPC_DISPLAY_PORT]);
1701
1702 xs_udp_finish_connecting(xprt, sock);
1703 status = 0;
1704out:
1705 xprt_clear_connecting(xprt);
1706 xprt_wake_pending_tasks(xprt, status);
1707}
1708
1709/**
1710 * xs_udp_connect_worker6 - set up a UDP socket
1711 * @work: RPC transport to connect
1712 *
1713 * Invoked by a work queue tasklet.
1714 */
1715static void xs_udp_connect_worker6(struct work_struct *work)
1716{
1717 struct sock_xprt *transport =
1718 container_of(work, struct sock_xprt, connect_worker.work);
1719 struct rpc_xprt *xprt = &transport->xprt;
1720 struct socket *sock = transport->sock;
1721 int err, status = -EIO;
1722
1723 if (xprt->shutdown)
1724 goto out;
1725
1726 /* Start by resetting any existing state */
1727 xs_reset_transport(transport);
1728
1729 err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
1730 if (err < 0) {
1731 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1732 goto out;
1733 }
1734 xs_reclassify_socket6(sock);
1735
1736 if (xs_bind6(transport, sock) < 0) {
1737 sock_release(sock);
1738 goto out;
1739 }
1740 1690
1741 dprintk("RPC: worker connecting xprt %p via %s to " 1691 dprintk("RPC: worker connecting xprt %p via %s to "
1742 "%s (port %s)\n", xprt, 1692 "%s (port %s)\n", xprt,
@@ -1755,12 +1705,12 @@ out:
1755 * We need to preserve the port number so the reply cache on the server can 1705 * We need to preserve the port number so the reply cache on the server can
1756 * find our cached RPC replies when we get around to reconnecting. 1706 * find our cached RPC replies when we get around to reconnecting.
1757 */ 1707 */
1758static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 1708static void xs_abort_connection(struct sock_xprt *transport)
1759{ 1709{
1760 int result; 1710 int result;
1761 struct sockaddr any; 1711 struct sockaddr any;
1762 1712
1763 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); 1713 dprintk("RPC: disconnecting xprt %p to reuse port\n", transport);
1764 1714
1765 /* 1715 /*
1766 * Disconnect the transport socket by doing a connect operation 1716 * Disconnect the transport socket by doing a connect operation
@@ -1770,13 +1720,13 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo
1770 any.sa_family = AF_UNSPEC; 1720 any.sa_family = AF_UNSPEC;
1771 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 1721 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
1772 if (!result) 1722 if (!result)
1773 xs_sock_mark_closed(xprt); 1723 xs_sock_mark_closed(&transport->xprt);
1774 else 1724 else
1775 dprintk("RPC: AF_UNSPEC connect return code %d\n", 1725 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1776 result); 1726 result);
1777} 1727}
1778 1728
1779static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 1729static void xs_tcp_reuse_connection(struct sock_xprt *transport)
1780{ 1730{
1781 unsigned int state = transport->inet->sk_state; 1731 unsigned int state = transport->inet->sk_state;
1782 1732
@@ -1799,7 +1749,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra
1799 "sk_shutdown set to %d\n", 1749 "sk_shutdown set to %d\n",
1800 __func__, transport->inet->sk_shutdown); 1750 __func__, transport->inet->sk_shutdown);
1801 } 1751 }
1802 xs_abort_connection(xprt, transport); 1752 xs_abort_connection(transport);
1803} 1753}
1804 1754
1805static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1755static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
@@ -1852,12 +1802,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1852 * 1802 *
1853 * Invoked by a work queue tasklet. 1803 * Invoked by a work queue tasklet.
1854 */ 1804 */
1855static void xs_tcp_setup_socket(struct rpc_xprt *xprt, 1805static void xs_tcp_setup_socket(struct work_struct *work)
1856 struct sock_xprt *transport,
1857 struct socket *(*create_sock)(struct rpc_xprt *,
1858 struct sock_xprt *))
1859{ 1806{
1807 struct sock_xprt *transport =
1808 container_of(work, struct sock_xprt, connect_worker.work);
1860 struct socket *sock = transport->sock; 1809 struct socket *sock = transport->sock;
1810 struct rpc_xprt *xprt = &transport->xprt;
1861 int status = -EIO; 1811 int status = -EIO;
1862 1812
1863 if (xprt->shutdown) 1813 if (xprt->shutdown)
@@ -1865,7 +1815,8 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1865 1815
1866 if (!sock) { 1816 if (!sock) {
1867 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 1817 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1868 sock = create_sock(xprt, transport); 1818 sock = xs_create_sock(xprt, transport,
1819 xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP);
1869 if (IS_ERR(sock)) { 1820 if (IS_ERR(sock)) {
1870 status = PTR_ERR(sock); 1821 status = PTR_ERR(sock);
1871 goto out; 1822 goto out;
@@ -1876,7 +1827,7 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1876 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, 1827 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
1877 &xprt->state); 1828 &xprt->state);
1878 /* "close" the socket, preserving the local port */ 1829 /* "close" the socket, preserving the local port */
1879 xs_tcp_reuse_connection(xprt, transport); 1830 xs_tcp_reuse_connection(transport);
1880 1831
1881 if (abort_and_exit) 1832 if (abort_and_exit)
1882 goto out_eagain; 1833 goto out_eagain;
@@ -1925,84 +1876,6 @@ out:
1925 xprt_wake_pending_tasks(xprt, status); 1876 xprt_wake_pending_tasks(xprt, status);
1926} 1877}
1927 1878
1928static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
1929 struct sock_xprt *transport)
1930{
1931 struct socket *sock;
1932 int err;
1933
1934 /* start from scratch */
1935 err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
1936 if (err < 0) {
1937 dprintk("RPC: can't create TCP transport socket (%d).\n",
1938 -err);
1939 goto out_err;
1940 }
1941 xs_reclassify_socket4(sock);
1942
1943 if (xs_bind4(transport, sock) < 0) {
1944 sock_release(sock);
1945 goto out_err;
1946 }
1947 return sock;
1948out_err:
1949 return ERR_PTR(-EIO);
1950}
1951
1952/**
1953 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
1954 * @work: RPC transport to connect
1955 *
1956 * Invoked by a work queue tasklet.
1957 */
1958static void xs_tcp_connect_worker4(struct work_struct *work)
1959{
1960 struct sock_xprt *transport =
1961 container_of(work, struct sock_xprt, connect_worker.work);
1962 struct rpc_xprt *xprt = &transport->xprt;
1963
1964 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
1965}
1966
1967static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
1968 struct sock_xprt *transport)
1969{
1970 struct socket *sock;
1971 int err;
1972
1973 /* start from scratch */
1974 err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
1975 if (err < 0) {
1976 dprintk("RPC: can't create TCP transport socket (%d).\n",
1977 -err);
1978 goto out_err;
1979 }
1980 xs_reclassify_socket6(sock);
1981
1982 if (xs_bind6(transport, sock) < 0) {
1983 sock_release(sock);
1984 goto out_err;
1985 }
1986 return sock;
1987out_err:
1988 return ERR_PTR(-EIO);
1989}
1990
1991/**
1992 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
1993 * @work: RPC transport to connect
1994 *
1995 * Invoked by a work queue tasklet.
1996 */
1997static void xs_tcp_connect_worker6(struct work_struct *work)
1998{
1999 struct sock_xprt *transport =
2000 container_of(work, struct sock_xprt, connect_worker.work);
2001 struct rpc_xprt *xprt = &transport->xprt;
2002
2003 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
2004}
2005
2006/** 1879/**
2007 * xs_connect - connect a socket to a remote endpoint 1880 * xs_connect - connect a socket to a remote endpoint
2008 * @task: address of RPC task that manages state of connect request 1881 * @task: address of RPC task that manages state of connect request
@@ -2262,6 +2135,31 @@ static struct rpc_xprt_ops bc_tcp_ops = {
2262 .print_stats = xs_tcp_print_stats, 2135 .print_stats = xs_tcp_print_stats,
2263}; 2136};
2264 2137
2138static int xs_init_anyaddr(const int family, struct sockaddr *sap)
2139{
2140 static const struct sockaddr_in sin = {
2141 .sin_family = AF_INET,
2142 .sin_addr.s_addr = htonl(INADDR_ANY),
2143 };
2144 static const struct sockaddr_in6 sin6 = {
2145 .sin6_family = AF_INET6,
2146 .sin6_addr = IN6ADDR_ANY_INIT,
2147 };
2148
2149 switch (family) {
2150 case AF_INET:
2151 memcpy(sap, &sin, sizeof(sin));
2152 break;
2153 case AF_INET6:
2154 memcpy(sap, &sin6, sizeof(sin6));
2155 break;
2156 default:
2157 dprintk("RPC: %s: Bad address family\n", __func__);
2158 return -EAFNOSUPPORT;
2159 }
2160 return 0;
2161}
2162
2265static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, 2163static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2266 unsigned int slot_table_size) 2164 unsigned int slot_table_size)
2267{ 2165{
@@ -2273,27 +2171,25 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2273 return ERR_PTR(-EBADF); 2171 return ERR_PTR(-EBADF);
2274 } 2172 }
2275 2173
2276 new = kzalloc(sizeof(*new), GFP_KERNEL); 2174 xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
2277 if (new == NULL) { 2175 if (xprt == NULL) {
2278 dprintk("RPC: xs_setup_xprt: couldn't allocate " 2176 dprintk("RPC: xs_setup_xprt: couldn't allocate "
2279 "rpc_xprt\n"); 2177 "rpc_xprt\n");
2280 return ERR_PTR(-ENOMEM); 2178 return ERR_PTR(-ENOMEM);
2281 } 2179 }
2282 xprt = &new->xprt;
2283
2284 xprt->max_reqs = slot_table_size;
2285 xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
2286 if (xprt->slot == NULL) {
2287 kfree(xprt);
2288 dprintk("RPC: xs_setup_xprt: couldn't allocate slot "
2289 "table\n");
2290 return ERR_PTR(-ENOMEM);
2291 }
2292 2180
2181 new = container_of(xprt, struct sock_xprt, xprt);
2293 memcpy(&xprt->addr, args->dstaddr, args->addrlen); 2182 memcpy(&xprt->addr, args->dstaddr, args->addrlen);
2294 xprt->addrlen = args->addrlen; 2183 xprt->addrlen = args->addrlen;
2295 if (args->srcaddr) 2184 if (args->srcaddr)
2296 memcpy(&new->srcaddr, args->srcaddr, args->addrlen); 2185 memcpy(&new->srcaddr, args->srcaddr, args->addrlen);
2186 else {
2187 int err;
2188 err = xs_init_anyaddr(args->dstaddr->sa_family,
2189 (struct sockaddr *)&new->srcaddr);
2190 if (err != 0)
2191 return ERR_PTR(err);
2192 }
2297 2193
2298 return xprt; 2194 return xprt;
2299} 2195}
@@ -2341,7 +2237,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2341 xprt_set_bound(xprt); 2237 xprt_set_bound(xprt);
2342 2238
2343 INIT_DELAYED_WORK(&transport->connect_worker, 2239 INIT_DELAYED_WORK(&transport->connect_worker,
2344 xs_udp_connect_worker4); 2240 xs_udp_setup_socket);
2345 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); 2241 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
2346 break; 2242 break;
2347 case AF_INET6: 2243 case AF_INET6:
@@ -2349,7 +2245,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2349 xprt_set_bound(xprt); 2245 xprt_set_bound(xprt);
2350 2246
2351 INIT_DELAYED_WORK(&transport->connect_worker, 2247 INIT_DELAYED_WORK(&transport->connect_worker,
2352 xs_udp_connect_worker6); 2248 xs_udp_setup_socket);
2353 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); 2249 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
2354 break; 2250 break;
2355 default: 2251 default:
@@ -2371,8 +2267,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2371 return xprt; 2267 return xprt;
2372 ret = ERR_PTR(-EINVAL); 2268 ret = ERR_PTR(-EINVAL);
2373out_err: 2269out_err:
2374 kfree(xprt->slot); 2270 xprt_free(xprt);
2375 kfree(xprt);
2376 return ret; 2271 return ret;
2377} 2272}
2378 2273
@@ -2416,7 +2311,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2416 xprt_set_bound(xprt); 2311 xprt_set_bound(xprt);
2417 2312
2418 INIT_DELAYED_WORK(&transport->connect_worker, 2313 INIT_DELAYED_WORK(&transport->connect_worker,
2419 xs_tcp_connect_worker4); 2314 xs_tcp_setup_socket);
2420 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); 2315 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
2421 break; 2316 break;
2422 case AF_INET6: 2317 case AF_INET6:
@@ -2424,7 +2319,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2424 xprt_set_bound(xprt); 2319 xprt_set_bound(xprt);
2425 2320
2426 INIT_DELAYED_WORK(&transport->connect_worker, 2321 INIT_DELAYED_WORK(&transport->connect_worker,
2427 xs_tcp_connect_worker6); 2322 xs_tcp_setup_socket);
2428 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 2323 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
2429 break; 2324 break;
2430 default: 2325 default:
@@ -2447,8 +2342,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2447 return xprt; 2342 return xprt;
2448 ret = ERR_PTR(-EINVAL); 2343 ret = ERR_PTR(-EINVAL);
2449out_err: 2344out_err:
2450 kfree(xprt->slot); 2345 xprt_free(xprt);
2451 kfree(xprt);
2452 return ret; 2346 return ret;
2453} 2347}
2454 2348
@@ -2507,15 +2401,10 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2507 goto out_err; 2401 goto out_err;
2508 } 2402 }
2509 2403
2510 if (xprt_bound(xprt)) 2404 dprintk("RPC: set up xprt to %s (port %s) via %s\n",
2511 dprintk("RPC: set up xprt to %s (port %s) via %s\n", 2405 xprt->address_strings[RPC_DISPLAY_ADDR],
2512 xprt->address_strings[RPC_DISPLAY_ADDR], 2406 xprt->address_strings[RPC_DISPLAY_PORT],
2513 xprt->address_strings[RPC_DISPLAY_PORT], 2407 xprt->address_strings[RPC_DISPLAY_PROTO]);
2514 xprt->address_strings[RPC_DISPLAY_PROTO]);
2515 else
2516 dprintk("RPC: set up xprt to %s (autobind) via %s\n",
2517 xprt->address_strings[RPC_DISPLAY_ADDR],
2518 xprt->address_strings[RPC_DISPLAY_PROTO]);
2519 2408
2520 /* 2409 /*
2521 * Since we don't want connections for the backchannel, we set 2410 * Since we don't want connections for the backchannel, we set
@@ -2528,8 +2417,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2528 return xprt; 2417 return xprt;
2529 ret = ERR_PTR(-EINVAL); 2418 ret = ERR_PTR(-EINVAL);
2530out_err: 2419out_err:
2531 kfree(xprt->slot); 2420 xprt_free(xprt);
2532 kfree(xprt);
2533 return ret; 2421 return ret;
2534} 2422}
2535 2423
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 33217fc3d697..e9f0d5004483 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -396,6 +396,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
396 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; 396 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
397 struct tipc_sock *tsock = tipc_sk(sock->sk); 397 struct tipc_sock *tsock = tipc_sk(sock->sk);
398 398
399 memset(addr, 0, sizeof(*addr));
399 if (peer) { 400 if (peer) {
400 if ((sock->state != SS_CONNECTED) && 401 if ((sock->state != SS_CONNECTED) &&
401 ((peer != 2) || (sock->state != SS_DISCONNECTING))) 402 ((peer != 2) || (sock->state != SS_DISCONNECTING)))
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0ebc777a6660..2268e6798124 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,7 +117,7 @@
117 117
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; 118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock); 119static DEFINE_SPINLOCK(unix_table_lock);
120static atomic_t unix_nr_socks = ATOMIC_INIT(0); 120static atomic_long_t unix_nr_socks;
121 121
122#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) 122#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
123 123
@@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk)
360 if (u->addr) 360 if (u->addr)
361 unix_release_addr(u->addr); 361 unix_release_addr(u->addr);
362 362
363 atomic_dec(&unix_nr_socks); 363 atomic_long_dec(&unix_nr_socks);
364 local_bh_disable(); 364 local_bh_disable();
365 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 365 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366 local_bh_enable(); 366 local_bh_enable();
367#ifdef UNIX_REFCNT_DEBUG 367#ifdef UNIX_REFCNT_DEBUG
368 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, 368 printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
369 atomic_read(&unix_nr_socks)); 369 atomic_long_read(&unix_nr_socks));
370#endif 370#endif
371} 371}
372 372
@@ -606,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
606 struct sock *sk = NULL; 606 struct sock *sk = NULL;
607 struct unix_sock *u; 607 struct unix_sock *u;
608 608
609 atomic_inc(&unix_nr_socks); 609 atomic_long_inc(&unix_nr_socks);
610 if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) 610 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
611 goto out; 611 goto out;
612 612
613 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); 613 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
@@ -632,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
632 unix_insert_socket(unix_sockets_unbound, sk); 632 unix_insert_socket(unix_sockets_unbound, sk);
633out: 633out:
634 if (sk == NULL) 634 if (sk == NULL)
635 atomic_dec(&unix_nr_socks); 635 atomic_long_dec(&unix_nr_socks);
636 else { 636 else {
637 local_bh_disable(); 637 local_bh_disable();
638 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 638 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -1343,9 +1343,25 @@ static void unix_destruct_scm(struct sk_buff *skb)
1343 sock_wfree(skb); 1343 sock_wfree(skb);
1344} 1344}
1345 1345
1346#define MAX_RECURSION_LEVEL 4
1347
1346static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1348static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1347{ 1349{
1348 int i; 1350 int i;
1351 unsigned char max_level = 0;
1352 int unix_sock_count = 0;
1353
1354 for (i = scm->fp->count - 1; i >= 0; i--) {
1355 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1356
1357 if (sk) {
1358 unix_sock_count++;
1359 max_level = max(max_level,
1360 unix_sk(sk)->recursion_level);
1361 }
1362 }
1363 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1364 return -ETOOMANYREFS;
1349 1365
1350 /* 1366 /*
1351 * Need to duplicate file references for the sake of garbage 1367 * Need to duplicate file references for the sake of garbage
@@ -1356,9 +1372,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1356 if (!UNIXCB(skb).fp) 1372 if (!UNIXCB(skb).fp)
1357 return -ENOMEM; 1373 return -ENOMEM;
1358 1374
1359 for (i = scm->fp->count-1; i >= 0; i--) 1375 if (unix_sock_count) {
1360 unix_inflight(scm->fp->fp[i]); 1376 for (i = scm->fp->count - 1; i >= 0; i--)
1361 return 0; 1377 unix_inflight(scm->fp->fp[i]);
1378 }
1379 return max_level;
1362} 1380}
1363 1381
1364static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1382static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1393,6 +1411,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1393 struct sk_buff *skb; 1411 struct sk_buff *skb;
1394 long timeo; 1412 long timeo;
1395 struct scm_cookie tmp_scm; 1413 struct scm_cookie tmp_scm;
1414 int max_level;
1396 1415
1397 if (NULL == siocb->scm) 1416 if (NULL == siocb->scm)
1398 siocb->scm = &tmp_scm; 1417 siocb->scm = &tmp_scm;
@@ -1431,8 +1450,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1431 goto out; 1450 goto out;
1432 1451
1433 err = unix_scm_to_skb(siocb->scm, skb, true); 1452 err = unix_scm_to_skb(siocb->scm, skb, true);
1434 if (err) 1453 if (err < 0)
1435 goto out_free; 1454 goto out_free;
1455 max_level = err + 1;
1436 unix_get_secdata(siocb->scm, skb); 1456 unix_get_secdata(siocb->scm, skb);
1437 1457
1438 skb_reset_transport_header(skb); 1458 skb_reset_transport_header(skb);
@@ -1514,6 +1534,8 @@ restart:
1514 if (sock_flag(other, SOCK_RCVTSTAMP)) 1534 if (sock_flag(other, SOCK_RCVTSTAMP))
1515 __net_timestamp(skb); 1535 __net_timestamp(skb);
1516 skb_queue_tail(&other->sk_receive_queue, skb); 1536 skb_queue_tail(&other->sk_receive_queue, skb);
1537 if (max_level > unix_sk(other)->recursion_level)
1538 unix_sk(other)->recursion_level = max_level;
1517 unix_state_unlock(other); 1539 unix_state_unlock(other);
1518 other->sk_data_ready(other, len); 1540 other->sk_data_ready(other, len);
1519 sock_put(other); 1541 sock_put(other);
@@ -1544,6 +1566,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1544 int sent = 0; 1566 int sent = 0;
1545 struct scm_cookie tmp_scm; 1567 struct scm_cookie tmp_scm;
1546 bool fds_sent = false; 1568 bool fds_sent = false;
1569 int max_level;
1547 1570
1548 if (NULL == siocb->scm) 1571 if (NULL == siocb->scm)
1549 siocb->scm = &tmp_scm; 1572 siocb->scm = &tmp_scm;
@@ -1607,10 +1630,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1607 1630
1608 /* Only send the fds in the first buffer */ 1631 /* Only send the fds in the first buffer */
1609 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); 1632 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1610 if (err) { 1633 if (err < 0) {
1611 kfree_skb(skb); 1634 kfree_skb(skb);
1612 goto out_err; 1635 goto out_err;
1613 } 1636 }
1637 max_level = err + 1;
1614 fds_sent = true; 1638 fds_sent = true;
1615 1639
1616 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 1640 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
@@ -1626,6 +1650,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1626 goto pipe_err_free; 1650 goto pipe_err_free;
1627 1651
1628 skb_queue_tail(&other->sk_receive_queue, skb); 1652 skb_queue_tail(&other->sk_receive_queue, skb);
1653 if (max_level > unix_sk(other)->recursion_level)
1654 unix_sk(other)->recursion_level = max_level;
1629 unix_state_unlock(other); 1655 unix_state_unlock(other);
1630 other->sk_data_ready(other, size); 1656 other->sk_data_ready(other, size);
1631 sent += size; 1657 sent += size;
@@ -1845,6 +1871,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1845 unix_state_lock(sk); 1871 unix_state_lock(sk);
1846 skb = skb_dequeue(&sk->sk_receive_queue); 1872 skb = skb_dequeue(&sk->sk_receive_queue);
1847 if (skb == NULL) { 1873 if (skb == NULL) {
1874 unix_sk(sk)->recursion_level = 0;
1848 if (copied >= target) 1875 if (copied >= target)
1849 goto unlock; 1876 goto unlock;
1850 1877
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c8df6fda0b1f..f89f83bf828e 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
96unsigned int unix_tot_inflight; 96unsigned int unix_tot_inflight;
97 97
98 98
99static struct sock *unix_get_socket(struct file *filp) 99struct sock *unix_get_socket(struct file *filp)
100{ 100{
101 struct sock *u_sock = NULL; 101 struct sock *u_sock = NULL;
102 struct inode *inode = filp->f_path.dentry->d_inode; 102 struct inode *inode = filp->f_path.dentry->d_inode;
@@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
259} 259}
260 260
261static bool gc_in_progress = false; 261static bool gc_in_progress = false;
262#define UNIX_INFLIGHT_TRIGGER_GC 16000
262 263
263void wait_for_unix_gc(void) 264void wait_for_unix_gc(void)
264{ 265{
266 /*
267 * If number of inflight sockets is insane,
268 * force a garbage collect right now.
269 */
270 if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
271 unix_gc();
265 wait_event(unix_gc_wait, gc_in_progress == false); 272 wait_event(unix_gc_wait, gc_in_progress == false);
266} 273}
267 274
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index d0c92dddb26b..17cd0c04d139 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -44,6 +44,38 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
44 return chan; 44 return chan;
45} 45}
46 46
47static bool can_beacon_sec_chan(struct wiphy *wiphy,
48 struct ieee80211_channel *chan,
49 enum nl80211_channel_type channel_type)
50{
51 struct ieee80211_channel *sec_chan;
52 int diff;
53
54 switch (channel_type) {
55 case NL80211_CHAN_HT40PLUS:
56 diff = 20;
57 break;
58 case NL80211_CHAN_HT40MINUS:
59 diff = -20;
60 break;
61 default:
62 return false;
63 }
64
65 sec_chan = ieee80211_get_channel(wiphy, chan->center_freq + diff);
66 if (!sec_chan)
67 return false;
68
69 /* we'll need a DFS capability later */
70 if (sec_chan->flags & (IEEE80211_CHAN_DISABLED |
71 IEEE80211_CHAN_PASSIVE_SCAN |
72 IEEE80211_CHAN_NO_IBSS |
73 IEEE80211_CHAN_RADAR))
74 return false;
75
76 return true;
77}
78
47int cfg80211_set_freq(struct cfg80211_registered_device *rdev, 79int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
48 struct wireless_dev *wdev, int freq, 80 struct wireless_dev *wdev, int freq,
49 enum nl80211_channel_type channel_type) 81 enum nl80211_channel_type channel_type)
@@ -68,6 +100,28 @@ int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
68 if (!chan) 100 if (!chan)
69 return -EINVAL; 101 return -EINVAL;
70 102
103 /* Both channels should be able to initiate communication */
104 if (wdev && (wdev->iftype == NL80211_IFTYPE_ADHOC ||
105 wdev->iftype == NL80211_IFTYPE_AP ||
106 wdev->iftype == NL80211_IFTYPE_AP_VLAN ||
107 wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
108 wdev->iftype == NL80211_IFTYPE_P2P_GO)) {
109 switch (channel_type) {
110 case NL80211_CHAN_HT40PLUS:
111 case NL80211_CHAN_HT40MINUS:
112 if (!can_beacon_sec_chan(&rdev->wiphy, chan,
113 channel_type)) {
114 printk(KERN_DEBUG
115 "cfg80211: Secondary channel not "
116 "allowed to initiate communication\n");
117 return -EINVAL;
118 }
119 break;
120 default:
121 break;
122 }
123 }
124
71 result = rdev->ops->set_channel(&rdev->wiphy, 125 result = rdev->ops->set_channel(&rdev->wiphy,
72 wdev ? wdev->netdev : NULL, 126 wdev ? wdev->netdev : NULL,
73 chan, channel_type); 127 chan, channel_type);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c506241f8637..4e78e3f26798 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -224,8 +224,8 @@ static int nl80211_prepare_netdev_dump(struct sk_buff *skb,
224 } 224 }
225 225
226 *rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx); 226 *rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
227 if (IS_ERR(dev)) { 227 if (IS_ERR(*rdev)) {
228 err = PTR_ERR(dev); 228 err = PTR_ERR(*rdev);
229 goto out_rtnl; 229 goto out_rtnl;
230 } 230 }
231 231
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index d14bbf960c18..4b9f8912526c 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1167,7 +1167,7 @@ static int ignore_request(struct wiphy *wiphy,
1167 return 0; 1167 return 0;
1168 return -EALREADY; 1168 return -EALREADY;
1169 } 1169 }
1170 return REG_INTERSECT; 1170 return 0;
1171 case NL80211_REGDOM_SET_BY_DRIVER: 1171 case NL80211_REGDOM_SET_BY_DRIVER:
1172 if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) { 1172 if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) {
1173 if (regdom_changes(pending_request->alpha2)) 1173 if (regdom_changes(pending_request->alpha2))
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 771bab00754b..55187c8f6420 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -61,6 +61,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
61 while (len > 0) { 61 while (len > 0) {
62 switch (*p & X25_FAC_CLASS_MASK) { 62 switch (*p & X25_FAC_CLASS_MASK) {
63 case X25_FAC_CLASS_A: 63 case X25_FAC_CLASS_A:
64 if (len < 2)
65 return 0;
64 switch (*p) { 66 switch (*p) {
65 case X25_FAC_REVERSE: 67 case X25_FAC_REVERSE:
66 if((p[1] & 0x81) == 0x81) { 68 if((p[1] & 0x81) == 0x81) {
@@ -104,6 +106,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
104 len -= 2; 106 len -= 2;
105 break; 107 break;
106 case X25_FAC_CLASS_B: 108 case X25_FAC_CLASS_B:
109 if (len < 3)
110 return 0;
107 switch (*p) { 111 switch (*p) {
108 case X25_FAC_PACKET_SIZE: 112 case X25_FAC_PACKET_SIZE:
109 facilities->pacsize_in = p[1]; 113 facilities->pacsize_in = p[1];
@@ -125,6 +129,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
125 len -= 3; 129 len -= 3;
126 break; 130 break;
127 case X25_FAC_CLASS_C: 131 case X25_FAC_CLASS_C:
132 if (len < 4)
133 return 0;
128 printk(KERN_DEBUG "X.25: unknown facility %02X, " 134 printk(KERN_DEBUG "X.25: unknown facility %02X, "
129 "values %02X, %02X, %02X\n", 135 "values %02X, %02X, %02X\n",
130 p[0], p[1], p[2], p[3]); 136 p[0], p[1], p[2], p[3]);
@@ -132,26 +138,26 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
132 len -= 4; 138 len -= 4;
133 break; 139 break;
134 case X25_FAC_CLASS_D: 140 case X25_FAC_CLASS_D:
141 if (len < p[1] + 2)
142 return 0;
135 switch (*p) { 143 switch (*p) {
136 case X25_FAC_CALLING_AE: 144 case X25_FAC_CALLING_AE:
137 if (p[1] > X25_MAX_DTE_FACIL_LEN) 145 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
138 break; 146 return 0;
139 dte_facs->calling_len = p[2]; 147 dte_facs->calling_len = p[2];
140 memcpy(dte_facs->calling_ae, &p[3], p[1] - 1); 148 memcpy(dte_facs->calling_ae, &p[3], p[1] - 1);
141 *vc_fac_mask |= X25_MASK_CALLING_AE; 149 *vc_fac_mask |= X25_MASK_CALLING_AE;
142 break; 150 break;
143 case X25_FAC_CALLED_AE: 151 case X25_FAC_CALLED_AE:
144 if (p[1] > X25_MAX_DTE_FACIL_LEN) 152 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
145 break; 153 return 0;
146 dte_facs->called_len = p[2]; 154 dte_facs->called_len = p[2];
147 memcpy(dte_facs->called_ae, &p[3], p[1] - 1); 155 memcpy(dte_facs->called_ae, &p[3], p[1] - 1);
148 *vc_fac_mask |= X25_MASK_CALLED_AE; 156 *vc_fac_mask |= X25_MASK_CALLED_AE;
149 break; 157 break;
150 default: 158 default:
151 printk(KERN_DEBUG "X.25: unknown facility %02X," 159 printk(KERN_DEBUG "X.25: unknown facility %02X,"
152 "length %d, values %02X, %02X, " 160 "length %d\n", p[0], p[1]);
153 "%02X, %02X\n",
154 p[0], p[1], p[2], p[3], p[4], p[5]);
155 break; 161 break;
156 } 162 }
157 len -= p[1] + 2; 163 len -= p[1] + 2;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 63178961efac..f729f022be69 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -119,6 +119,8 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
119 &x25->vc_facil_mask); 119 &x25->vc_facil_mask);
120 if (len > 0) 120 if (len > 0)
121 skb_pull(skb, len); 121 skb_pull(skb, len);
122 else
123 return -1;
122 /* 124 /*
123 * Copy any Call User Data. 125 * Copy any Call User Data.
124 */ 126 */
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index a2023ec52329..1e98bc0fe0a5 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -19,7 +19,7 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz)
19 if (sz <= PAGE_SIZE) 19 if (sz <= PAGE_SIZE)
20 n = kzalloc(sz, GFP_KERNEL); 20 n = kzalloc(sz, GFP_KERNEL);
21 else if (hashdist) 21 else if (hashdist)
22 n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 22 n = vzalloc(sz);
23 else 23 else
24 n = (struct hlist_head *) 24 n = (struct hlist_head *)
25 __get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 25 __get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,