aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/fddi.c2
-rw-r--r--net/802/tr.c3
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/8021q/vlan_dev.c8
-rw-r--r--net/9p/client.c114
-rw-r--r--net/9p/trans_fd.c14
-rw-r--r--net/9p/trans_rdma.c1
-rw-r--r--net/9p/trans_virtio.c1
-rw-r--r--net/Kconfig8
-rw-r--r--net/atm/br2684.c1
-rw-r--r--net/ax25/ax25_uid.c12
-rw-r--r--net/bluetooth/hci_conn.c18
-rw-r--r--net/bluetooth/hci_event.c74
-rw-r--r--net/bluetooth/hci_sysfs.c81
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/rfcomm/tty.c6
-rw-r--r--net/bridge/br_input.c5
-rw-r--r--net/bridge/br_netfilter.c10
-rw-r--r--net/bridge/br_stp.c3
-rw-r--r--net/can/af_can.c4
-rw-r--r--net/core/datagram.c14
-rw-r--r--net/core/dev.c36
-rw-r--r--net/core/ethtool.c3
-rw-r--r--net/core/gen_estimator.c13
-rw-r--r--net/core/netpoll.c8
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/skbuff.c31
-rw-r--r--net/core/sock.c8
-rw-r--r--net/ipv4/Kconfig4
-rw-r--r--net/ipv4/fib_trie.c6
-rw-r--r--net/ipv4/ipconfig.c12
-rw-r--r--net/ipv4/netfilter/arp_tables.c125
-rw-r--r--net/ipv4/netfilter/ip_tables.c126
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c3
-rw-r--r--net/ipv4/route.c62
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_input.c13
-rw-r--r--net/ipv4/tcp_output.c73
-rw-r--r--net/ipv4/tcp_vegas.c11
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv6/Kconfig18
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/netfilter/ip6_tables.c123
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c6
-rw-r--r--net/ipv6/route.c3
-rw-r--r--net/ipv6/udp.c6
-rw-r--r--net/ipv6/xfrm6_output.c1
-rw-r--r--net/irda/ircomm/ircomm_tty.c256
-rw-r--r--net/iucv/af_iucv.c24
-rw-r--r--net/mac80211/Kconfig9
-rw-r--r--net/mac80211/main.c22
-rw-r--r--net/mac80211/mlme.c38
-rw-r--r--net/mac80211/pm.c15
-rw-r--r--net/mac80211/rc80211_minstrel.c4
-rw-r--r--net/mac80211/rc80211_pid_algo.c73
-rw-r--r--net/mac80211/rx.c15
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/mac80211/wext.c43
-rw-r--r--net/netfilter/Kconfig9
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c9
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c4
-rw-r--r--net/netfilter/nf_conntrack_expect.c30
-rw-r--r--net/netfilter/nf_conntrack_helper.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c62
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c18
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c1
-rw-r--r--net/netfilter/nf_log.c4
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/x_tables.c53
-rw-r--r--net/netfilter/xt_cluster.c8
-rw-r--r--net/netfilter/xt_hashlimit.c2
-rw-r--r--net/netfilter/xt_recent.c9
-rw-r--r--net/netlabel/netlabel_addrlist.c26
-rw-r--r--net/netrom/af_netrom.c6
-rw-r--r--net/packet/af_packet.c5
-rw-r--r--net/phonet/Kconfig2
-rw-r--r--net/rds/ib.c5
-rw-r--r--net/rds/ib.h28
-rw-r--r--net/rds/ib_cm.c43
-rw-r--r--net/rds/ib_rdma.c43
-rw-r--r--net/rds/ib_recv.c37
-rw-r--r--net/rds/iw.c5
-rw-r--r--net/rds/iw.h28
-rw-r--r--net/rds/iw_cm.c44
-rw-r--r--net/rds/iw_rdma.c44
-rw-r--r--net/rds/iw_recv.c37
-rw-r--r--net/rds/rds.h6
-rw-r--r--net/rds/send.c6
-rw-r--r--net/rose/af_rose.c10
-rw-r--r--net/rxrpc/ar-connection.c12
-rw-r--r--net/sched/cls_api.c25
-rw-r--r--net/sched/cls_cgroup.c25
-rw-r--r--net/sched/em_meta.c6
-rw-r--r--net/sched/sch_fifo.c2
-rw-r--r--net/sched/sch_netem.c8
-rw-r--r--net/sched/sch_teql.c5
-rw-r--r--net/socket.c6
-rw-r--r--net/sunrpc/Kconfig24
-rw-r--r--net/sunrpc/clnt.c48
-rw-r--r--net/sunrpc/rpcb_clnt.c103
-rw-r--r--net/sunrpc/svc.c164
-rw-r--r--net/sunrpc/svc_xprt.c158
-rw-r--r--net/sunrpc/svcsock.c40
-rw-r--r--net/sunrpc/xprt.c95
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c26
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c23
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c13
-rw-r--r--net/sunrpc/xprtrdma/verbs.c3
-rw-r--r--net/sunrpc/xprtsock.c381
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/wimax/Kconfig2
-rw-r--r--net/wimax/op-msg.c11
-rw-r--r--net/wimax/stack.c17
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/nl80211.c4
-rw-r--r--net/wireless/reg.c29
-rw-r--r--net/wireless/scan.c41
-rw-r--r--net/wireless/wext.c7
-rw-r--r--net/xfrm/xfrm_state.c6
123 files changed, 2012 insertions, 1496 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c
index f1611a1e06a7..539e6064e6d4 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -215,3 +215,5 @@ struct net_device *alloc_fddidev(int sizeof_priv)
215 return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup); 215 return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup);
216} 216}
217EXPORT_SYMBOL(alloc_fddidev); 217EXPORT_SYMBOL(alloc_fddidev);
218
219MODULE_LICENSE("GPL");
diff --git a/net/802/tr.c b/net/802/tr.c
index e7eb13084d71..e874447ad144 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -561,6 +561,9 @@ static int rif_seq_show(struct seq_file *seq, void *v)
561 } 561 }
562 seq_putc(seq, '\n'); 562 seq_putc(seq, '\n');
563 } 563 }
564
565 if (dev)
566 dev_put(dev);
564 } 567 }
565 return 0; 568 return 0;
566} 569}
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 2b7390e377b3..d1e10546eb85 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -492,6 +492,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
492 continue; 492 continue;
493 493
494 dev_change_flags(vlandev, flgs & ~IFF_UP); 494 dev_change_flags(vlandev, flgs & ~IFF_UP);
495 vlan_transfer_operstate(dev, vlandev);
495 } 496 }
496 break; 497 break;
497 498
@@ -507,6 +508,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
507 continue; 508 continue;
508 509
509 dev_change_flags(vlandev, flgs | IFF_UP); 510 dev_change_flags(vlandev, flgs | IFF_UP);
511 vlan_transfer_operstate(dev, vlandev);
510 } 512 }
511 break; 513 break;
512 514
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 654e45f5719d..c67fe6f75653 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -121,8 +121,10 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
121 if (!skb) 121 if (!skb)
122 return NET_RX_DROP; 122 return NET_RX_DROP;
123 123
124 if (netpoll_rx_on(skb)) 124 if (netpoll_rx_on(skb)) {
125 skb->protocol = eth_type_trans(skb, skb->dev);
125 return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); 126 return vlan_hwaccel_receive_skb(skb, grp, vlan_tci);
127 }
126 128
127 return napi_frags_finish(napi, skb, 129 return napi_frags_finish(napi, skb,
128 vlan_gro_common(napi, grp, vlan_tci, skb)); 130 vlan_gro_common(napi, grp, vlan_tci, skb));
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 1b34135cf990..b4b9068e55a7 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -462,6 +462,7 @@ static int vlan_dev_open(struct net_device *dev)
462 if (vlan->flags & VLAN_FLAG_GVRP) 462 if (vlan->flags & VLAN_FLAG_GVRP)
463 vlan_gvrp_request_join(dev); 463 vlan_gvrp_request_join(dev);
464 464
465 netif_carrier_on(dev);
465 return 0; 466 return 0;
466 467
467clear_allmulti: 468clear_allmulti:
@@ -471,6 +472,7 @@ del_unicast:
471 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) 472 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
472 dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN); 473 dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN);
473out: 474out:
475 netif_carrier_off(dev);
474 return err; 476 return err;
475} 477}
476 478
@@ -492,6 +494,7 @@ static int vlan_dev_stop(struct net_device *dev)
492 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) 494 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
493 dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len); 495 dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len);
494 496
497 netif_carrier_off(dev);
495 return 0; 498 return 0;
496} 499}
497 500
@@ -612,6 +615,8 @@ static int vlan_dev_init(struct net_device *dev)
612 struct net_device *real_dev = vlan_dev_info(dev)->real_dev; 615 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
613 int subclass = 0; 616 int subclass = 0;
614 617
618 netif_carrier_off(dev);
619
615 /* IFF_BROADCAST|IFF_MULTICAST; ??? */ 620 /* IFF_BROADCAST|IFF_MULTICAST; ??? */
616 dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); 621 dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI);
617 dev->iflink = real_dev->ifindex; 622 dev->iflink = real_dev->ifindex;
@@ -668,7 +673,8 @@ static int vlan_ethtool_get_settings(struct net_device *dev,
668 const struct vlan_dev_info *vlan = vlan_dev_info(dev); 673 const struct vlan_dev_info *vlan = vlan_dev_info(dev);
669 struct net_device *real_dev = vlan->real_dev; 674 struct net_device *real_dev = vlan->real_dev;
670 675
671 if (!real_dev->ethtool_ops->get_settings) 676 if (!real_dev->ethtool_ops ||
677 !real_dev->ethtool_ops->get_settings)
672 return -EOPNOTSUPP; 678 return -EOPNOTSUPP;
673 679
674 return real_dev->ethtool_ops->get_settings(real_dev, cmd); 680 return real_dev->ethtool_ops->get_settings(real_dev, cmd);
diff --git a/net/9p/client.c b/net/9p/client.c
index 1eb580c38fbb..dd43a8289b0d 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -203,7 +203,6 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
203 p9pdu_reset(req->tc); 203 p9pdu_reset(req->tc);
204 p9pdu_reset(req->rc); 204 p9pdu_reset(req->rc);
205 205
206 req->flush_tag = 0;
207 req->tc->tag = tag-1; 206 req->tc->tag = tag-1;
208 req->status = REQ_STATUS_ALLOC; 207 req->status = REQ_STATUS_ALLOC;
209 208
@@ -324,35 +323,9 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
324 */ 323 */
325void p9_client_cb(struct p9_client *c, struct p9_req_t *req) 324void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
326{ 325{
327 struct p9_req_t *other_req;
328 unsigned long flags;
329
330 P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); 326 P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
331 327 wake_up(req->wq);
332 if (req->status == REQ_STATUS_ERROR) 328 P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
333 wake_up(req->wq);
334
335 if (req->flush_tag) { /* flush receive path */
336 P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag);
337 spin_lock_irqsave(&c->lock, flags);
338 other_req = p9_tag_lookup(c, req->flush_tag);
339 if (other_req->status != REQ_STATUS_FLSH) /* stale flush */
340 spin_unlock_irqrestore(&c->lock, flags);
341 else {
342 other_req->status = REQ_STATUS_FLSHD;
343 spin_unlock_irqrestore(&c->lock, flags);
344 wake_up(other_req->wq);
345 }
346 p9_free_req(c, req);
347 } else { /* normal receive path */
348 P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag);
349 spin_lock_irqsave(&c->lock, flags);
350 if (req->status != REQ_STATUS_FLSHD)
351 req->status = REQ_STATUS_RCVD;
352 spin_unlock_irqrestore(&c->lock, flags);
353 wake_up(req->wq);
354 P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
355 }
356} 329}
357EXPORT_SYMBOL(p9_client_cb); 330EXPORT_SYMBOL(p9_client_cb);
358 331
@@ -486,9 +459,15 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
486 if (IS_ERR(req)) 459 if (IS_ERR(req))
487 return PTR_ERR(req); 460 return PTR_ERR(req);
488 461
489 req->flush_tag = oldtag;
490 462
491 /* we don't free anything here because RPC isn't complete */ 463 /* if we haven't received a response for oldreq,
464 remove it from the list. */
465 spin_lock(&c->lock);
466 if (oldreq->status == REQ_STATUS_FLSH)
467 list_del(&oldreq->req_list);
468 spin_unlock(&c->lock);
469
470 p9_free_req(c, req);
492 return 0; 471 return 0;
493} 472}
494 473
@@ -509,7 +488,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
509 struct p9_req_t *req; 488 struct p9_req_t *req;
510 unsigned long flags; 489 unsigned long flags;
511 int sigpending; 490 int sigpending;
512 int flushed = 0;
513 491
514 P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); 492 P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type);
515 493
@@ -546,42 +524,28 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
546 goto reterr; 524 goto reterr;
547 } 525 }
548 526
549 /* if it was a flush we just transmitted, return our tag */
550 if (type == P9_TFLUSH)
551 return req;
552again:
553 P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); 527 P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag);
554 err = wait_event_interruptible(*req->wq, 528 err = wait_event_interruptible(*req->wq,
555 req->status >= REQ_STATUS_RCVD); 529 req->status >= REQ_STATUS_RCVD);
556 P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n", 530 P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n",
557 req->wq, tag, err, flushed); 531 req->wq, tag, err);
558 532
559 if (req->status == REQ_STATUS_ERROR) { 533 if (req->status == REQ_STATUS_ERROR) {
560 P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); 534 P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
561 err = req->t_err; 535 err = req->t_err;
562 } else if (err == -ERESTARTSYS && flushed) {
563 P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n");
564 goto again;
565 } else if (req->status == REQ_STATUS_FLSHD) {
566 P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n");
567 err = -ERESTARTSYS;
568 } 536 }
569 537
570 if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) { 538 if ((err == -ERESTARTSYS) && (c->status == Connected)) {
571 P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); 539 P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
572 spin_lock_irqsave(&c->lock, flags);
573 if (req->status == REQ_STATUS_SENT)
574 req->status = REQ_STATUS_FLSH;
575 spin_unlock_irqrestore(&c->lock, flags);
576 sigpending = 1; 540 sigpending = 1;
577 flushed = 1;
578 clear_thread_flag(TIF_SIGPENDING); 541 clear_thread_flag(TIF_SIGPENDING);
579 542
580 if (c->trans_mod->cancel(c, req)) { 543 if (c->trans_mod->cancel(c, req))
581 err = p9_client_flush(c, req); 544 p9_client_flush(c, req);
582 if (err == 0) 545
583 goto again; 546 /* if we received the response anyway, don't signal error */
584 } 547 if (req->status == REQ_STATUS_RCVD)
548 err = 0;
585 } 549 }
586 550
587 if (sigpending) { 551 if (sigpending) {
@@ -1244,19 +1208,53 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
1244 ret->name, ret->uid, ret->gid, ret->muid, ret->extension, 1208 ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
1245 ret->n_uid, ret->n_gid, ret->n_muid); 1209 ret->n_uid, ret->n_gid, ret->n_muid);
1246 1210
1211 p9_free_req(clnt, req);
1212 return ret;
1213
1247free_and_error: 1214free_and_error:
1248 p9_free_req(clnt, req); 1215 p9_free_req(clnt, req);
1249error: 1216error:
1250 return ret; 1217 kfree(ret);
1218 return ERR_PTR(err);
1251} 1219}
1252EXPORT_SYMBOL(p9_client_stat); 1220EXPORT_SYMBOL(p9_client_stat);
1253 1221
1222static int p9_client_statsize(struct p9_wstat *wst, int optional)
1223{
1224 int ret;
1225
1226 /* size[2] type[2] dev[4] qid[13] */
1227 /* mode[4] atime[4] mtime[4] length[8]*/
1228 /* name[s] uid[s] gid[s] muid[s] */
1229 ret = 2+2+4+13+4+4+4+8+2+2+2+2;
1230
1231 if (wst->name)
1232 ret += strlen(wst->name);
1233 if (wst->uid)
1234 ret += strlen(wst->uid);
1235 if (wst->gid)
1236 ret += strlen(wst->gid);
1237 if (wst->muid)
1238 ret += strlen(wst->muid);
1239
1240 if (optional) {
1241 ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */
1242 if (wst->extension)
1243 ret += strlen(wst->extension);
1244 }
1245
1246 return ret;
1247}
1248
1254int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) 1249int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
1255{ 1250{
1256 int err; 1251 int err;
1257 struct p9_req_t *req; 1252 struct p9_req_t *req;
1258 struct p9_client *clnt; 1253 struct p9_client *clnt;
1259 1254
1255 err = 0;
1256 clnt = fid->clnt;
1257 wst->size = p9_client_statsize(wst, clnt->dotu);
1260 P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); 1258 P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
1261 P9_DPRINTK(P9_DEBUG_9P, 1259 P9_DPRINTK(P9_DEBUG_9P,
1262 " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" 1260 " sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
@@ -1268,10 +1266,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
1268 wst->atime, wst->mtime, (unsigned long long)wst->length, 1266 wst->atime, wst->mtime, (unsigned long long)wst->length,
1269 wst->name, wst->uid, wst->gid, wst->muid, wst->extension, 1267 wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
1270 wst->n_uid, wst->n_gid, wst->n_muid); 1268 wst->n_uid, wst->n_gid, wst->n_muid);
1271 err = 0;
1272 clnt = fid->clnt;
1273 1269
1274 req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst); 1270 req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size, wst);
1275 if (IS_ERR(req)) { 1271 if (IS_ERR(req)) {
1276 err = PTR_ERR(req); 1272 err = PTR_ERR(req);
1277 goto error; 1273 goto error;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c613ed08a5ee..a2a1814c7a8d 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -213,8 +213,8 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
213 spin_unlock_irqrestore(&m->client->lock, flags); 213 spin_unlock_irqrestore(&m->client->lock, flags);
214 214
215 list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { 215 list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
216 list_del(&req->req_list);
217 P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req); 216 P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req);
217 list_del(&req->req_list);
218 p9_client_cb(m->client, req); 218 p9_client_cb(m->client, req);
219 } 219 }
220} 220}
@@ -336,7 +336,8 @@ static void p9_read_work(struct work_struct *work)
336 "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); 336 "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
337 337
338 m->req = p9_tag_lookup(m->client, tag); 338 m->req = p9_tag_lookup(m->client, tag);
339 if (!m->req) { 339 if (!m->req || (m->req->status != REQ_STATUS_SENT &&
340 m->req->status != REQ_STATUS_FLSH)) {
340 P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", 341 P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
341 tag); 342 tag);
342 err = -EIO; 343 err = -EIO;
@@ -361,10 +362,11 @@ static void p9_read_work(struct work_struct *work)
361 if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ 362 if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
362 P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n"); 363 P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n");
363 spin_lock(&m->client->lock); 364 spin_lock(&m->client->lock);
365 if (m->req->status != REQ_STATUS_ERROR)
366 m->req->status = REQ_STATUS_RCVD;
364 list_del(&m->req->req_list); 367 list_del(&m->req->req_list);
365 spin_unlock(&m->client->lock); 368 spin_unlock(&m->client->lock);
366 p9_client_cb(m->client, m->req); 369 p9_client_cb(m->client, m->req);
367
368 m->rbuf = NULL; 370 m->rbuf = NULL;
369 m->rpos = 0; 371 m->rpos = 0;
370 m->rsize = 0; 372 m->rsize = 0;
@@ -454,6 +456,7 @@ static void p9_write_work(struct work_struct *work)
454 req = list_entry(m->unsent_req_list.next, struct p9_req_t, 456 req = list_entry(m->unsent_req_list.next, struct p9_req_t,
455 req_list); 457 req_list);
456 req->status = REQ_STATUS_SENT; 458 req->status = REQ_STATUS_SENT;
459 P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req);
457 list_move_tail(&req->req_list, &m->req_list); 460 list_move_tail(&req->req_list, &m->req_list);
458 461
459 m->wbuf = req->tc->sdata; 462 m->wbuf = req->tc->sdata;
@@ -683,12 +686,13 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
683 P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req); 686 P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
684 687
685 spin_lock(&client->lock); 688 spin_lock(&client->lock);
686 list_del(&req->req_list);
687 689
688 if (req->status == REQ_STATUS_UNSENT) { 690 if (req->status == REQ_STATUS_UNSENT) {
691 list_del(&req->req_list);
689 req->status = REQ_STATUS_FLSHD; 692 req->status = REQ_STATUS_FLSHD;
690 ret = 0; 693 ret = 0;
691 } 694 } else if (req->status == REQ_STATUS_SENT)
695 req->status = REQ_STATUS_FLSH;
692 696
693 spin_unlock(&client->lock); 697 spin_unlock(&client->lock);
694 698
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 7fa0eb20b2f6..ac4990041ebb 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -295,6 +295,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
295 goto err_out; 295 goto err_out;
296 296
297 req->rc = c->rc; 297 req->rc = c->rc;
298 req->status = REQ_STATUS_RCVD;
298 p9_client_cb(client, req); 299 p9_client_cb(client, req);
299 300
300 return; 301 return;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 2d7781ec663b..bb8579a141a8 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -134,6 +134,7 @@ static void req_done(struct virtqueue *vq)
134 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); 134 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
135 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); 135 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
136 req = p9_tag_lookup(chan->client, rc->tag); 136 req = p9_tag_lookup(chan->client, rc->tag);
137 req->status = REQ_STATUS_RCVD;
137 p9_client_cb(chan->client, req); 138 p9_client_cb(chan->client, req);
138 } 139 }
139} 140}
diff --git a/net/Kconfig b/net/Kconfig
index ec93e7e38b38..c19f549c8e74 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -119,12 +119,6 @@ menuconfig NETFILTER
119 <file:Documentation/Changes> under "iptables" for the location of 119 <file:Documentation/Changes> under "iptables" for the location of
120 these packages. 120 these packages.
121 121
122 Make sure to say N to "Fast switching" below if you intend to say Y
123 here, as Fast switching currently bypasses netfilter.
124
125 Chances are that you should say Y here if you compile a kernel which
126 will run as a router and N for regular hosts. If unsure, say N.
127
128if NETFILTER 122if NETFILTER
129 123
130config NETFILTER_DEBUG 124config NETFILTER_DEBUG
@@ -140,7 +134,7 @@ config NETFILTER_ADVANCED
140 default y 134 default y
141 help 135 help
142 If you say Y here you can select between all the netfilter modules. 136 If you say Y here you can select between all the netfilter modules.
143 If you say N the more ununsual ones will not be shown and the 137 If you say N the more unusual ones will not be shown and the
144 basic ones needed by most people will default to 'M'. 138 basic ones needed by most people will default to 'M'.
145 139
146 If unsure, say Y. 140 If unsure, say Y.
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 334fcd4a4ea4..3100a8940afc 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -549,6 +549,7 @@ static void br2684_setup(struct net_device *netdev)
549 struct br2684_dev *brdev = BRPRIV(netdev); 549 struct br2684_dev *brdev = BRPRIV(netdev);
550 550
551 ether_setup(netdev); 551 ether_setup(netdev);
552 brdev->net_dev = netdev;
552 553
553 netdev->netdev_ops = &br2684_netdev_ops; 554 netdev->netdev_ops = &br2684_netdev_ops;
554 555
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 57aeba729bae..832bcf092a01 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -148,9 +148,13 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
148{ 148{
149 struct ax25_uid_assoc *pt; 149 struct ax25_uid_assoc *pt;
150 struct hlist_node *node; 150 struct hlist_node *node;
151 int i = 0; 151 int i = 1;
152 152
153 read_lock(&ax25_uid_lock); 153 read_lock(&ax25_uid_lock);
154
155 if (*pos == 0)
156 return SEQ_START_TOKEN;
157
154 ax25_uid_for_each(pt, node, &ax25_uid_list) { 158 ax25_uid_for_each(pt, node, &ax25_uid_list) {
155 if (i == *pos) 159 if (i == *pos)
156 return pt; 160 return pt;
@@ -162,8 +166,10 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
162static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos) 166static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
163{ 167{
164 ++*pos; 168 ++*pos;
165 169 if (v == SEQ_START_TOKEN)
166 return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next, 170 return ax25_uid_list.first;
171 else
172 return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next,
167 ax25_uid_assoc, uid_node); 173 ax25_uid_assoc, uid_node);
168} 174}
169 175
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 1181db08d9de..fa47d5d84f5c 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -171,10 +171,8 @@ static void hci_conn_timeout(unsigned long arg)
171 switch (conn->state) { 171 switch (conn->state) {
172 case BT_CONNECT: 172 case BT_CONNECT:
173 case BT_CONNECT2: 173 case BT_CONNECT2:
174 if (conn->type == ACL_LINK) 174 if (conn->type == ACL_LINK && conn->out)
175 hci_acl_connect_cancel(conn); 175 hci_acl_connect_cancel(conn);
176 else
177 hci_acl_disconn(conn, 0x13);
178 break; 176 break;
179 case BT_CONFIG: 177 case BT_CONFIG:
180 case BT_CONNECTED: 178 case BT_CONNECTED:
@@ -215,6 +213,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
215 conn->state = BT_OPEN; 213 conn->state = BT_OPEN;
216 214
217 conn->power_save = 1; 215 conn->power_save = 1;
216 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
218 217
219 switch (type) { 218 switch (type) {
220 case ACL_LINK: 219 case ACL_LINK:
@@ -247,6 +246,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
247 if (hdev->notify) 246 if (hdev->notify)
248 hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); 247 hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
249 248
249 hci_conn_init_sysfs(conn);
250
250 tasklet_enable(&hdev->tx_task); 251 tasklet_enable(&hdev->tx_task);
251 252
252 return conn; 253 return conn;
@@ -289,6 +290,8 @@ int hci_conn_del(struct hci_conn *conn)
289 290
290 hci_conn_del_sysfs(conn); 291 hci_conn_del_sysfs(conn);
291 292
293 hci_dev_put(hdev);
294
292 return 0; 295 return 0;
293} 296}
294 297
@@ -424,12 +427,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
424 if (sec_level == BT_SECURITY_SDP) 427 if (sec_level == BT_SECURITY_SDP)
425 return 1; 428 return 1;
426 429
427 if (sec_level == BT_SECURITY_LOW) { 430 if (sec_level == BT_SECURITY_LOW &&
428 if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) 431 (!conn->ssp_mode || !conn->hdev->ssp_mode))
429 return hci_conn_auth(conn, sec_level, auth_type); 432 return 1;
430 else
431 return 1;
432 }
433 433
434 if (conn->link_mode & HCI_LM_ENCRYPT) 434 if (conn->link_mode & HCI_LM_ENCRYPT)
435 return hci_conn_auth(conn, sec_level, auth_type); 435 return hci_conn_auth(conn, sec_level, auth_type);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 55534244c3a0..184ba0a88ec0 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -866,8 +866,16 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
866 hci_dev_lock(hdev); 866 hci_dev_lock(hdev);
867 867
868 conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); 868 conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
869 if (!conn) 869 if (!conn) {
870 goto unlock; 870 if (ev->link_type != SCO_LINK)
871 goto unlock;
872
873 conn = hci_conn_hash_lookup_ba(hdev, ESCO_LINK, &ev->bdaddr);
874 if (!conn)
875 goto unlock;
876
877 conn->type = SCO_LINK;
878 }
871 879
872 if (!ev->status) { 880 if (!ev->status) {
873 conn->handle = __le16_to_cpu(ev->handle); 881 conn->handle = __le16_to_cpu(ev->handle);
@@ -875,6 +883,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
875 if (conn->type == ACL_LINK) { 883 if (conn->type == ACL_LINK) {
876 conn->state = BT_CONFIG; 884 conn->state = BT_CONFIG;
877 hci_conn_hold(conn); 885 hci_conn_hold(conn);
886 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
878 } else 887 } else
879 conn->state = BT_CONNECTED; 888 conn->state = BT_CONNECTED;
880 889
@@ -1055,9 +1064,14 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
1055 hci_proto_connect_cfm(conn, ev->status); 1064 hci_proto_connect_cfm(conn, ev->status);
1056 hci_conn_put(conn); 1065 hci_conn_put(conn);
1057 } 1066 }
1058 } else 1067 } else {
1059 hci_auth_cfm(conn, ev->status); 1068 hci_auth_cfm(conn, ev->status);
1060 1069
1070 hci_conn_hold(conn);
1071 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
1072 hci_conn_put(conn);
1073 }
1074
1061 if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) { 1075 if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
1062 if (!ev->status) { 1076 if (!ev->status) {
1063 struct hci_cp_set_conn_encrypt cp; 1077 struct hci_cp_set_conn_encrypt cp;
@@ -1471,7 +1485,21 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb
1471 1485
1472static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) 1486static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
1473{ 1487{
1488 struct hci_ev_pin_code_req *ev = (void *) skb->data;
1489 struct hci_conn *conn;
1490
1474 BT_DBG("%s", hdev->name); 1491 BT_DBG("%s", hdev->name);
1492
1493 hci_dev_lock(hdev);
1494
1495 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
1496 if (conn && conn->state == BT_CONNECTED) {
1497 hci_conn_hold(conn);
1498 conn->disc_timeout = HCI_PAIRING_TIMEOUT;
1499 hci_conn_put(conn);
1500 }
1501
1502 hci_dev_unlock(hdev);
1475} 1503}
1476 1504
1477static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) 1505static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1481,7 +1509,21 @@ static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff
1481 1509
1482static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) 1510static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
1483{ 1511{
1512 struct hci_ev_link_key_notify *ev = (void *) skb->data;
1513 struct hci_conn *conn;
1514
1484 BT_DBG("%s", hdev->name); 1515 BT_DBG("%s", hdev->name);
1516
1517 hci_dev_lock(hdev);
1518
1519 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
1520 if (conn) {
1521 hci_conn_hold(conn);
1522 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
1523 hci_conn_put(conn);
1524 }
1525
1526 hci_dev_unlock(hdev);
1485} 1527}
1486 1528
1487static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb) 1529static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1646,20 +1688,28 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu
1646 conn->type = SCO_LINK; 1688 conn->type = SCO_LINK;
1647 } 1689 }
1648 1690
1649 if (conn->out && ev->status == 0x1c && conn->attempt < 2) { 1691 switch (ev->status) {
1650 conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | 1692 case 0x00:
1651 (hdev->esco_type & EDR_ESCO_MASK);
1652 hci_setup_sync(conn, conn->link->handle);
1653 goto unlock;
1654 }
1655
1656 if (!ev->status) {
1657 conn->handle = __le16_to_cpu(ev->handle); 1693 conn->handle = __le16_to_cpu(ev->handle);
1658 conn->state = BT_CONNECTED; 1694 conn->state = BT_CONNECTED;
1659 1695
1660 hci_conn_add_sysfs(conn); 1696 hci_conn_add_sysfs(conn);
1661 } else 1697 break;
1698
1699 case 0x1c: /* SCO interval rejected */
1700 case 0x1f: /* Unspecified error */
1701 if (conn->out && conn->attempt < 2) {
1702 conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
1703 (hdev->esco_type & EDR_ESCO_MASK);
1704 hci_setup_sync(conn, conn->link->handle);
1705 goto unlock;
1706 }
1707 /* fall through */
1708
1709 default:
1662 conn->state = BT_CLOSED; 1710 conn->state = BT_CLOSED;
1711 break;
1712 }
1663 1713
1664 hci_proto_connect_cfm(conn, ev->status); 1714 hci_proto_connect_cfm(conn, ev->status);
1665 if (ev->status) 1715 if (ev->status)
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index ed82796d4a0f..95f7a7a544b4 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -9,8 +9,7 @@
9struct class *bt_class = NULL; 9struct class *bt_class = NULL;
10EXPORT_SYMBOL_GPL(bt_class); 10EXPORT_SYMBOL_GPL(bt_class);
11 11
12static struct workqueue_struct *btaddconn; 12static struct workqueue_struct *bt_workq;
13static struct workqueue_struct *btdelconn;
14 13
15static inline char *link_typetostr(int type) 14static inline char *link_typetostr(int type)
16{ 15{
@@ -88,35 +87,17 @@ static struct device_type bt_link = {
88 87
89static void add_conn(struct work_struct *work) 88static void add_conn(struct work_struct *work)
90{ 89{
91 struct hci_conn *conn = container_of(work, struct hci_conn, work); 90 struct hci_conn *conn = container_of(work, struct hci_conn, work_add);
91 struct hci_dev *hdev = conn->hdev;
92 92
93 flush_workqueue(btdelconn); 93 dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
94 94
95 if (device_add(&conn->dev) < 0) { 95 if (device_add(&conn->dev) < 0) {
96 BT_ERR("Failed to register connection device"); 96 BT_ERR("Failed to register connection device");
97 return; 97 return;
98 } 98 }
99}
100
101void hci_conn_add_sysfs(struct hci_conn *conn)
102{
103 struct hci_dev *hdev = conn->hdev;
104
105 BT_DBG("conn %p", conn);
106
107 conn->dev.type = &bt_link;
108 conn->dev.class = bt_class;
109 conn->dev.parent = &hdev->dev;
110 99
111 dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); 100 hci_dev_hold(hdev);
112
113 dev_set_drvdata(&conn->dev, conn);
114
115 device_initialize(&conn->dev);
116
117 INIT_WORK(&conn->work, add_conn);
118
119 queue_work(btaddconn, &conn->work);
120} 101}
121 102
122/* 103/*
@@ -131,9 +112,12 @@ static int __match_tty(struct device *dev, void *data)
131 112
132static void del_conn(struct work_struct *work) 113static void del_conn(struct work_struct *work)
133{ 114{
134 struct hci_conn *conn = container_of(work, struct hci_conn, work); 115 struct hci_conn *conn = container_of(work, struct hci_conn, work_del);
135 struct hci_dev *hdev = conn->hdev; 116 struct hci_dev *hdev = conn->hdev;
136 117
118 if (!device_is_registered(&conn->dev))
119 return;
120
137 while (1) { 121 while (1) {
138 struct device *dev; 122 struct device *dev;
139 123
@@ -146,19 +130,40 @@ static void del_conn(struct work_struct *work)
146 130
147 device_del(&conn->dev); 131 device_del(&conn->dev);
148 put_device(&conn->dev); 132 put_device(&conn->dev);
133
149 hci_dev_put(hdev); 134 hci_dev_put(hdev);
150} 135}
151 136
152void hci_conn_del_sysfs(struct hci_conn *conn) 137void hci_conn_init_sysfs(struct hci_conn *conn)
153{ 138{
139 struct hci_dev *hdev = conn->hdev;
140
154 BT_DBG("conn %p", conn); 141 BT_DBG("conn %p", conn);
155 142
156 if (!device_is_registered(&conn->dev)) 143 conn->dev.type = &bt_link;
157 return; 144 conn->dev.class = bt_class;
145 conn->dev.parent = &hdev->dev;
158 146
159 INIT_WORK(&conn->work, del_conn); 147 dev_set_drvdata(&conn->dev, conn);
148
149 device_initialize(&conn->dev);
160 150
161 queue_work(btdelconn, &conn->work); 151 INIT_WORK(&conn->work_add, add_conn);
152 INIT_WORK(&conn->work_del, del_conn);
153}
154
155void hci_conn_add_sysfs(struct hci_conn *conn)
156{
157 BT_DBG("conn %p", conn);
158
159 queue_work(bt_workq, &conn->work_add);
160}
161
162void hci_conn_del_sysfs(struct hci_conn *conn)
163{
164 BT_DBG("conn %p", conn);
165
166 queue_work(bt_workq, &conn->work_del);
162} 167}
163 168
164static inline char *host_typetostr(int type) 169static inline char *host_typetostr(int type)
@@ -435,20 +440,13 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
435 440
436int __init bt_sysfs_init(void) 441int __init bt_sysfs_init(void)
437{ 442{
438 btaddconn = create_singlethread_workqueue("btaddconn"); 443 bt_workq = create_singlethread_workqueue("bluetooth");
439 if (!btaddconn) 444 if (!bt_workq)
440 return -ENOMEM; 445 return -ENOMEM;
441 446
442 btdelconn = create_singlethread_workqueue("btdelconn");
443 if (!btdelconn) {
444 destroy_workqueue(btaddconn);
445 return -ENOMEM;
446 }
447
448 bt_class = class_create(THIS_MODULE, "bluetooth"); 447 bt_class = class_create(THIS_MODULE, "bluetooth");
449 if (IS_ERR(bt_class)) { 448 if (IS_ERR(bt_class)) {
450 destroy_workqueue(btdelconn); 449 destroy_workqueue(bt_workq);
451 destroy_workqueue(btaddconn);
452 return PTR_ERR(bt_class); 450 return PTR_ERR(bt_class);
453 } 451 }
454 452
@@ -457,8 +455,7 @@ int __init bt_sysfs_init(void)
457 455
458void bt_sysfs_cleanup(void) 456void bt_sysfs_cleanup(void)
459{ 457{
460 destroy_workqueue(btaddconn); 458 destroy_workqueue(bt_workq);
461 destroy_workqueue(btdelconn);
462 459
463 class_destroy(bt_class); 460 class_destroy(bt_class);
464} 461}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 1d0fb0f23c63..374536e050aa 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1194,6 +1194,8 @@ void rfcomm_dlc_accept(struct rfcomm_dlc *d)
1194 1194
1195 rfcomm_send_ua(d->session, d->dlci); 1195 rfcomm_send_ua(d->session, d->dlci);
1196 1196
1197 rfcomm_dlc_clear_timer(d);
1198
1197 rfcomm_dlc_lock(d); 1199 rfcomm_dlc_lock(d);
1198 d->state = BT_CONNECTED; 1200 d->state = BT_CONNECTED;
1199 d->state_change(d, 0); 1201 d->state_change(d, 0);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index abdc703a11d2..cab71ea2796d 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1093,11 +1093,6 @@ static void rfcomm_tty_hangup(struct tty_struct *tty)
1093 } 1093 }
1094} 1094}
1095 1095
1096static int rfcomm_tty_read_proc(char *buf, char **start, off_t offset, int len, int *eof, void *unused)
1097{
1098 return 0;
1099}
1100
1101static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) 1096static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp)
1102{ 1097{
1103 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; 1098 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
@@ -1156,7 +1151,6 @@ static const struct tty_operations rfcomm_ops = {
1156 .send_xchar = rfcomm_tty_send_xchar, 1151 .send_xchar = rfcomm_tty_send_xchar,
1157 .hangup = rfcomm_tty_hangup, 1152 .hangup = rfcomm_tty_hangup,
1158 .wait_until_sent = rfcomm_tty_wait_until_sent, 1153 .wait_until_sent = rfcomm_tty_wait_until_sent,
1159 .read_proc = rfcomm_tty_read_proc,
1160 .tiocmget = rfcomm_tty_tiocmget, 1154 .tiocmget = rfcomm_tty_tiocmget,
1161 .tiocmset = rfcomm_tty_tiocmset, 1155 .tiocmset = rfcomm_tty_tiocmset,
1162}; 1156};
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 30b88777c3df..5ee1a3682bf2 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -134,6 +134,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
134 if (skb->protocol == htons(ETH_P_PAUSE)) 134 if (skb->protocol == htons(ETH_P_PAUSE))
135 goto drop; 135 goto drop;
136 136
137 /* If STP is turned off, then forward */
138 if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
139 goto forward;
140
137 if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, 141 if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
138 NULL, br_handle_local_finish)) 142 NULL, br_handle_local_finish))
139 return NULL; /* frame consumed by filter */ 143 return NULL; /* frame consumed by filter */
@@ -141,6 +145,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
141 return skb; /* continue processing */ 145 return skb; /* continue processing */
142 } 146 }
143 147
148forward:
144 switch (p->state) { 149 switch (p->state) {
145 case BR_STATE_FORWARDING: 150 case BR_STATE_FORWARDING:
146 rhook = rcu_dereference(br_should_route_hook); 151 rhook = rcu_dereference(br_should_route_hook);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 3953ac4214c8..e4a418fcb35b 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -788,15 +788,23 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
788 return NF_STOLEN; 788 return NF_STOLEN;
789} 789}
790 790
791#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
791static int br_nf_dev_queue_xmit(struct sk_buff *skb) 792static int br_nf_dev_queue_xmit(struct sk_buff *skb)
792{ 793{
793 if (skb->protocol == htons(ETH_P_IP) && 794 if (skb->nfct != NULL &&
795 (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) &&
794 skb->len > skb->dev->mtu && 796 skb->len > skb->dev->mtu &&
795 !skb_is_gso(skb)) 797 !skb_is_gso(skb))
796 return ip_fragment(skb, br_dev_queue_push_xmit); 798 return ip_fragment(skb, br_dev_queue_push_xmit);
797 else 799 else
798 return br_dev_queue_push_xmit(skb); 800 return br_dev_queue_push_xmit(skb);
799} 801}
802#else
803static int br_nf_dev_queue_xmit(struct sk_buff *skb)
804{
805 return br_dev_queue_push_xmit(skb);
806}
807#endif
800 808
801/* PF_BRIDGE/POST_ROUTING ********************************************/ 809/* PF_BRIDGE/POST_ROUTING ********************************************/
802static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, 810static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 6e63ec3f1fcf..0660515f3992 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -297,6 +297,9 @@ void br_topology_change_detection(struct net_bridge *br)
297{ 297{
298 int isroot = br_is_root_bridge(br); 298 int isroot = br_is_root_bridge(br);
299 299
300 if (br->stp_enabled != BR_KERNEL_STP)
301 return;
302
300 pr_info("%s: topology change detected, %s\n", br->dev->name, 303 pr_info("%s: topology change detected, %s\n", br->dev->name,
301 isroot ? "propagating" : "sending tcn bpdu"); 304 isroot ? "propagating" : "sending tcn bpdu");
302 305
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 547bafc79e28..10f0528c3bf5 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -674,8 +674,8 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
674 674
675 rcu_read_unlock(); 675 rcu_read_unlock();
676 676
677 /* free the skbuff allocated by the netdevice driver */ 677 /* consume the skbuff allocated by the netdevice driver */
678 kfree_skb(skb); 678 consume_skb(skb);
679 679
680 if (matches > 0) { 680 if (matches > 0) {
681 can_stats.matches++; 681 can_stats.matches++;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index d0de644b378d..b01a76abe1d2 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -64,13 +64,25 @@ static inline int connection_based(struct sock *sk)
64 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; 64 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
65} 65}
66 66
67static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync,
68 void *key)
69{
70 unsigned long bits = (unsigned long)key;
71
72 /*
73 * Avoid a wakeup if event not interesting for us
74 */
75 if (bits && !(bits & (POLLIN | POLLERR)))
76 return 0;
77 return autoremove_wake_function(wait, mode, sync, key);
78}
67/* 79/*
68 * Wait for a packet.. 80 * Wait for a packet..
69 */ 81 */
70static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) 82static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
71{ 83{
72 int error; 84 int error;
73 DEFINE_WAIT(wait); 85 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
74 86
75 prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 87 prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
76 88
diff --git a/net/core/dev.c b/net/core/dev.c
index 52fea5b28ca6..e2e9e4af3ace 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1336,7 +1336,12 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1336{ 1336{
1337 struct packet_type *ptype; 1337 struct packet_type *ptype;
1338 1338
1339#ifdef CONFIG_NET_CLS_ACT
1340 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1341 net_timestamp(skb);
1342#else
1339 net_timestamp(skb); 1343 net_timestamp(skb);
1344#endif
1340 1345
1341 rcu_read_lock(); 1346 rcu_read_lock();
1342 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1347 list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1430,7 +1435,7 @@ void netif_device_detach(struct net_device *dev)
1430{ 1435{
1431 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && 1436 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1432 netif_running(dev)) { 1437 netif_running(dev)) {
1433 netif_stop_queue(dev); 1438 netif_tx_stop_all_queues(dev);
1434 } 1439 }
1435} 1440}
1436EXPORT_SYMBOL(netif_device_detach); 1441EXPORT_SYMBOL(netif_device_detach);
@@ -1445,7 +1450,7 @@ void netif_device_attach(struct net_device *dev)
1445{ 1450{
1446 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && 1451 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1447 netif_running(dev)) { 1452 netif_running(dev)) {
1448 netif_wake_queue(dev); 1453 netif_tx_wake_all_queues(dev);
1449 __netdev_watchdog_up(dev); 1454 __netdev_watchdog_up(dev);
1450 } 1455 }
1451} 1456}
@@ -1730,11 +1735,12 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1730{ 1735{
1731 u32 hash; 1736 u32 hash;
1732 1737
1733 if (skb_rx_queue_recorded(skb)) { 1738 if (skb_rx_queue_recorded(skb))
1734 hash = skb_get_rx_queue(skb); 1739 return skb_get_rx_queue(skb) % dev->real_num_tx_queues;
1735 } else if (skb->sk && skb->sk->sk_hash) { 1740
1741 if (skb->sk && skb->sk->sk_hash)
1736 hash = skb->sk->sk_hash; 1742 hash = skb->sk->sk_hash;
1737 } else 1743 else
1738 hash = skb->protocol; 1744 hash = skb->protocol;
1739 1745
1740 hash = jhash_1word(hash, skb_tx_hashrnd); 1746 hash = jhash_1word(hash, skb_tx_hashrnd);
@@ -2328,8 +2334,10 @@ static int napi_gro_complete(struct sk_buff *skb)
2328 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2334 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2329 int err = -ENOENT; 2335 int err = -ENOENT;
2330 2336
2331 if (NAPI_GRO_CB(skb)->count == 1) 2337 if (NAPI_GRO_CB(skb)->count == 1) {
2338 skb_shinfo(skb)->gso_size = 0;
2332 goto out; 2339 goto out;
2340 }
2333 2341
2334 rcu_read_lock(); 2342 rcu_read_lock();
2335 list_for_each_entry_rcu(ptype, head, list) { 2343 list_for_each_entry_rcu(ptype, head, list) {
@@ -2348,7 +2356,6 @@ static int napi_gro_complete(struct sk_buff *skb)
2348 } 2356 }
2349 2357
2350out: 2358out:
2351 skb_shinfo(skb)->gso_size = 0;
2352 return netif_receive_skb(skb); 2359 return netif_receive_skb(skb);
2353} 2360}
2354 2361
@@ -2472,8 +2479,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2472 return GRO_NORMAL; 2479 return GRO_NORMAL;
2473 2480
2474 for (p = napi->gro_list; p; p = p->next) { 2481 for (p = napi->gro_list; p; p = p->next) {
2475 NAPI_GRO_CB(p)->same_flow = !compare_ether_header( 2482 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2476 skb_mac_header(p), skb_gro_mac_header(skb)); 2483 && !compare_ether_header(skb_mac_header(p),
2484 skb_gro_mac_header(skb));
2477 NAPI_GRO_CB(p)->flush = 0; 2485 NAPI_GRO_CB(p)->flush = 0;
2478 } 2486 }
2479 2487
@@ -2538,9 +2546,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2538 } 2546 }
2539 2547
2540 BUG_ON(info->nr_frags > MAX_SKB_FRAGS); 2548 BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
2541 frag = &info->frags[info->nr_frags - 1]; 2549 frag = info->frags;
2542 2550
2543 for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) { 2551 for (i = 0; i < info->nr_frags; i++) {
2544 skb_fill_page_desc(skb, i, frag->page, frag->page_offset, 2552 skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
2545 frag->size); 2553 frag->size);
2546 frag++; 2554 frag++;
@@ -4398,7 +4406,7 @@ int register_netdevice(struct net_device *dev)
4398 dev->iflink = -1; 4406 dev->iflink = -1;
4399 4407
4400#ifdef CONFIG_COMPAT_NET_DEV_OPS 4408#ifdef CONFIG_COMPAT_NET_DEV_OPS
4401 /* Netdevice_ops API compatiability support. 4409 /* Netdevice_ops API compatibility support.
4402 * This is temporary until all network devices are converted. 4410 * This is temporary until all network devices are converted.
4403 */ 4411 */
4404 if (dev->netdev_ops) { 4412 if (dev->netdev_ops) {
@@ -4409,7 +4417,7 @@ int register_netdevice(struct net_device *dev)
4409 dev->name, netdev_drivername(dev, drivername, 64)); 4417 dev->name, netdev_drivername(dev, drivername, 64));
4410 4418
4411 /* This works only because net_device_ops and the 4419 /* This works only because net_device_ops and the
4412 compatiablity structure are the same. */ 4420 compatibility structure are the same. */
4413 dev->netdev_ops = (void *) &(dev->init); 4421 dev->netdev_ops = (void *) &(dev->init);
4414 } 4422 }
4415#endif 4423#endif
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 244ca56dffac..d9d5160610d5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -261,8 +261,7 @@ static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
261 ret = 0; 261 ret = 0;
262 262
263err_out: 263err_out:
264 if (rule_buf) 264 kfree(rule_buf);
265 kfree(rule_buf);
266 265
267 return ret; 266 return ret;
268} 267}
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9cc9f95b109e..6d62d4618cfc 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -66,9 +66,9 @@
66 66
67 NOTES. 67 NOTES.
68 68
69 * The stored value for avbps is scaled by 2^5, so that maximal 69 * avbps is scaled by 2^5, avpps is scaled by 2^10.
70 rate is ~1Gbit, avpps is scaled by 2^10. 70 * both values are reported as 32 bit unsigned values. bps can
71 71 overflow for fast links : max speed being 34360Mbit/sec
72 * Minimal interval is HZ/4=250msec (it is the greatest common divisor 72 * Minimal interval is HZ/4=250msec (it is the greatest common divisor
73 for HZ=100 and HZ=1024 8)), maximal interval 73 for HZ=100 and HZ=1024 8)), maximal interval
74 is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals 74 is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
@@ -86,9 +86,9 @@ struct gen_estimator
86 spinlock_t *stats_lock; 86 spinlock_t *stats_lock;
87 int ewma_log; 87 int ewma_log;
88 u64 last_bytes; 88 u64 last_bytes;
89 u64 avbps;
89 u32 last_packets; 90 u32 last_packets;
90 u32 avpps; 91 u32 avpps;
91 u32 avbps;
92 struct rcu_head e_rcu; 92 struct rcu_head e_rcu;
93 struct rb_node node; 93 struct rb_node node;
94}; 94};
@@ -115,6 +115,7 @@ static void est_timer(unsigned long arg)
115 rcu_read_lock(); 115 rcu_read_lock();
116 list_for_each_entry_rcu(e, &elist[idx].list, list) { 116 list_for_each_entry_rcu(e, &elist[idx].list, list) {
117 u64 nbytes; 117 u64 nbytes;
118 u64 brate;
118 u32 npackets; 119 u32 npackets;
119 u32 rate; 120 u32 rate;
120 121
@@ -125,9 +126,9 @@ static void est_timer(unsigned long arg)
125 126
126 nbytes = e->bstats->bytes; 127 nbytes = e->bstats->bytes;
127 npackets = e->bstats->packets; 128 npackets = e->bstats->packets;
128 rate = (nbytes - e->last_bytes)<<(7 - idx); 129 brate = (nbytes - e->last_bytes)<<(7 - idx);
129 e->last_bytes = nbytes; 130 e->last_bytes = nbytes;
130 e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; 131 e->avbps += ((s64)(brate - e->avbps)) >> e->ewma_log;
131 e->rate_est->bps = (e->avbps+0xF)>>5; 132 e->rate_est->bps = (e->avbps+0xF)>>5;
132 133
133 rate = (npackets - e->last_packets)<<(12 - idx); 134 rate = (npackets - e->last_packets)<<(12 - idx);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b5873bdff612..64f51eec6576 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -175,9 +175,13 @@ static void service_arp_queue(struct netpoll_info *npi)
175void netpoll_poll(struct netpoll *np) 175void netpoll_poll(struct netpoll *np)
176{ 176{
177 struct net_device *dev = np->dev; 177 struct net_device *dev = np->dev;
178 const struct net_device_ops *ops = dev->netdev_ops; 178 const struct net_device_ops *ops;
179
180 if (!dev || !netif_running(dev))
181 return;
179 182
180 if (!dev || !netif_running(dev) || !ops->ndo_poll_controller) 183 ops = dev->netdev_ops;
184 if (!ops->ndo_poll_controller)
181 return; 185 return;
182 186
183 /* Process pending work on NIC */ 187 /* Process pending work on NIC */
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 3779c1438c11..0666a827bc62 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2447,7 +2447,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev)
2447 if (pkt_dev->cflows) { 2447 if (pkt_dev->cflows) {
2448 /* let go of the SAs if we have them */ 2448 /* let go of the SAs if we have them */
2449 int i = 0; 2449 int i = 0;
2450 for (; i < pkt_dev->nflows; i++){ 2450 for (; i < pkt_dev->cflows; i++) {
2451 struct xfrm_state *x = pkt_dev->flows[i].x; 2451 struct xfrm_state *x = pkt_dev->flows[i].x;
2452 if (x) { 2452 if (x) {
2453 xfrm_state_put(x); 2453 xfrm_state_put(x);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ce6356cd9f71..e505b5392e1e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -502,7 +502,9 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
502 shinfo->gso_segs = 0; 502 shinfo->gso_segs = 0;
503 shinfo->gso_type = 0; 503 shinfo->gso_type = 0;
504 shinfo->ip6_frag_id = 0; 504 shinfo->ip6_frag_id = 0;
505 shinfo->tx_flags.flags = 0;
505 shinfo->frag_list = NULL; 506 shinfo->frag_list = NULL;
507 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
506 508
507 memset(skb, 0, offsetof(struct sk_buff, tail)); 509 memset(skb, 0, offsetof(struct sk_buff, tail));
508 skb->data = skb->head + NET_SKB_PAD; 510 skb->data = skb->head + NET_SKB_PAD;
@@ -1365,9 +1367,8 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
1365 1367
1366static inline struct page *linear_to_page(struct page *page, unsigned int *len, 1368static inline struct page *linear_to_page(struct page *page, unsigned int *len,
1367 unsigned int *offset, 1369 unsigned int *offset,
1368 struct sk_buff *skb) 1370 struct sk_buff *skb, struct sock *sk)
1369{ 1371{
1370 struct sock *sk = skb->sk;
1371 struct page *p = sk->sk_sndmsg_page; 1372 struct page *p = sk->sk_sndmsg_page;
1372 unsigned int off; 1373 unsigned int off;
1373 1374
@@ -1405,13 +1406,14 @@ new_page:
1405 */ 1406 */
1406static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, 1407static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
1407 unsigned int *len, unsigned int offset, 1408 unsigned int *len, unsigned int offset,
1408 struct sk_buff *skb, int linear) 1409 struct sk_buff *skb, int linear,
1410 struct sock *sk)
1409{ 1411{
1410 if (unlikely(spd->nr_pages == PIPE_BUFFERS)) 1412 if (unlikely(spd->nr_pages == PIPE_BUFFERS))
1411 return 1; 1413 return 1;
1412 1414
1413 if (linear) { 1415 if (linear) {
1414 page = linear_to_page(page, len, &offset, skb); 1416 page = linear_to_page(page, len, &offset, skb, sk);
1415 if (!page) 1417 if (!page)
1416 return 1; 1418 return 1;
1417 } else 1419 } else
@@ -1442,7 +1444,8 @@ static inline void __segment_seek(struct page **page, unsigned int *poff,
1442static inline int __splice_segment(struct page *page, unsigned int poff, 1444static inline int __splice_segment(struct page *page, unsigned int poff,
1443 unsigned int plen, unsigned int *off, 1445 unsigned int plen, unsigned int *off,
1444 unsigned int *len, struct sk_buff *skb, 1446 unsigned int *len, struct sk_buff *skb,
1445 struct splice_pipe_desc *spd, int linear) 1447 struct splice_pipe_desc *spd, int linear,
1448 struct sock *sk)
1446{ 1449{
1447 if (!*len) 1450 if (!*len)
1448 return 1; 1451 return 1;
@@ -1465,7 +1468,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1465 /* the linear region may spread across several pages */ 1468 /* the linear region may spread across several pages */
1466 flen = min_t(unsigned int, flen, PAGE_SIZE - poff); 1469 flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
1467 1470
1468 if (spd_fill_page(spd, page, &flen, poff, skb, linear)) 1471 if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk))
1469 return 1; 1472 return 1;
1470 1473
1471 __segment_seek(&page, &poff, &plen, flen); 1474 __segment_seek(&page, &poff, &plen, flen);
@@ -1481,8 +1484,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1481 * pipe is full or if we already spliced the requested length. 1484 * pipe is full or if we already spliced the requested length.
1482 */ 1485 */
1483static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, 1486static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1484 unsigned int *len, 1487 unsigned int *len, struct splice_pipe_desc *spd,
1485 struct splice_pipe_desc *spd) 1488 struct sock *sk)
1486{ 1489{
1487 int seg; 1490 int seg;
1488 1491
@@ -1492,7 +1495,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1492 if (__splice_segment(virt_to_page(skb->data), 1495 if (__splice_segment(virt_to_page(skb->data),
1493 (unsigned long) skb->data & (PAGE_SIZE - 1), 1496 (unsigned long) skb->data & (PAGE_SIZE - 1),
1494 skb_headlen(skb), 1497 skb_headlen(skb),
1495 offset, len, skb, spd, 1)) 1498 offset, len, skb, spd, 1, sk))
1496 return 1; 1499 return 1;
1497 1500
1498 /* 1501 /*
@@ -1502,7 +1505,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1502 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; 1505 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1503 1506
1504 if (__splice_segment(f->page, f->page_offset, f->size, 1507 if (__splice_segment(f->page, f->page_offset, f->size,
1505 offset, len, skb, spd, 0)) 1508 offset, len, skb, spd, 0, sk))
1506 return 1; 1509 return 1;
1507 } 1510 }
1508 1511
@@ -1528,12 +1531,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1528 .ops = &sock_pipe_buf_ops, 1531 .ops = &sock_pipe_buf_ops,
1529 .spd_release = sock_spd_release, 1532 .spd_release = sock_spd_release,
1530 }; 1533 };
1534 struct sock *sk = skb->sk;
1531 1535
1532 /* 1536 /*
1533 * __skb_splice_bits() only fails if the output has no room left, 1537 * __skb_splice_bits() only fails if the output has no room left,
1534 * so no point in going over the frag_list for the error case. 1538 * so no point in going over the frag_list for the error case.
1535 */ 1539 */
1536 if (__skb_splice_bits(skb, &offset, &tlen, &spd)) 1540 if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk))
1537 goto done; 1541 goto done;
1538 else if (!tlen) 1542 else if (!tlen)
1539 goto done; 1543 goto done;
@@ -1545,14 +1549,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1545 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1549 struct sk_buff *list = skb_shinfo(skb)->frag_list;
1546 1550
1547 for (; list && tlen; list = list->next) { 1551 for (; list && tlen; list = list->next) {
1548 if (__skb_splice_bits(list, &offset, &tlen, &spd)) 1552 if (__skb_splice_bits(list, &offset, &tlen, &spd, sk))
1549 break; 1553 break;
1550 } 1554 }
1551 } 1555 }
1552 1556
1553done: 1557done:
1554 if (spd.nr_pages) { 1558 if (spd.nr_pages) {
1555 struct sock *sk = skb->sk;
1556 int ret; 1559 int ret;
1557 1560
1558 /* 1561 /*
@@ -2285,7 +2288,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
2285next_skb: 2288next_skb:
2286 block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; 2289 block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
2287 2290
2288 if (abs_offset < block_limit) { 2291 if (abs_offset < block_limit && !st->frag_data) {
2289 *data = st->cur_skb->data + (abs_offset - st->stepped_offset); 2292 *data = st->cur_skb->data + (abs_offset - st->stepped_offset);
2290 return block_limit - abs_offset; 2293 return block_limit - abs_offset;
2291 } 2294 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 0620046e4eba..7dbf3ffb35cc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1677,7 +1677,7 @@ static void sock_def_error_report(struct sock *sk)
1677{ 1677{
1678 read_lock(&sk->sk_callback_lock); 1678 read_lock(&sk->sk_callback_lock);
1679 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1679 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1680 wake_up_interruptible(sk->sk_sleep); 1680 wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
1681 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 1681 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1682 read_unlock(&sk->sk_callback_lock); 1682 read_unlock(&sk->sk_callback_lock);
1683} 1683}
@@ -1686,7 +1686,8 @@ static void sock_def_readable(struct sock *sk, int len)
1686{ 1686{
1687 read_lock(&sk->sk_callback_lock); 1687 read_lock(&sk->sk_callback_lock);
1688 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1688 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1689 wake_up_interruptible_sync(sk->sk_sleep); 1689 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
1690 POLLRDNORM | POLLRDBAND);
1690 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 1691 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1691 read_unlock(&sk->sk_callback_lock); 1692 read_unlock(&sk->sk_callback_lock);
1692} 1693}
@@ -1700,7 +1701,8 @@ static void sock_def_write_space(struct sock *sk)
1700 */ 1701 */
1701 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 1702 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1702 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1703 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1703 wake_up_interruptible_sync(sk->sk_sleep); 1704 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
1705 POLLWRNORM | POLLWRBAND);
1704 1706
1705 /* Should agree with poll, otherwise some programs break */ 1707 /* Should agree with poll, otherwise some programs break */
1706 if (sock_writeable(sk)) 1708 if (sock_writeable(sk))
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index b2cf91e4ccaa..5b919f7b45db 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -407,8 +407,8 @@ config INET_XFRM_MODE_BEET
407 If unsure, say Y. 407 If unsure, say Y.
408 408
409config INET_LRO 409config INET_LRO
410 tristate "Large Receive Offload (ipv4/tcp)" 410 bool "Large Receive Offload (ipv4/tcp)"
411 411 default y
412 ---help--- 412 ---help---
413 Support for Large Receive Offload (ipv4/tcp). 413 Support for Large Receive Offload (ipv4/tcp).
414 414
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ec0ae490f0b6..33c7c85dfe40 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key)
986static struct node *trie_rebalance(struct trie *t, struct tnode *tn) 986static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
987{ 987{
988 int wasfull; 988 int wasfull;
989 t_key cindex, key = tn->key; 989 t_key cindex, key;
990 struct tnode *tp; 990 struct tnode *tp;
991 991
992 preempt_disable();
993 key = tn->key;
994
992 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { 995 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
993 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 996 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
994 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 997 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
@@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
1007 if (IS_TNODE(tn)) 1010 if (IS_TNODE(tn))
1008 tn = (struct tnode *)resize(t, (struct tnode *)tn); 1011 tn = (struct tnode *)resize(t, (struct tnode *)tn);
1009 1012
1013 preempt_enable();
1010 return (struct node *)tn; 1014 return (struct node *)tn;
1011} 1015}
1012 1016
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 90d22ae0a419..88bf051d0cbb 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -139,6 +139,8 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */
139__be32 root_server_addr = NONE; /* Address of NFS server */ 139__be32 root_server_addr = NONE; /* Address of NFS server */
140u8 root_server_path[256] = { 0, }; /* Path to mount as root */ 140u8 root_server_path[256] = { 0, }; /* Path to mount as root */
141 141
142u32 ic_dev_xid; /* Device under configuration */
143
142/* vendor class identifier */ 144/* vendor class identifier */
143static char vendor_class_identifier[253] __initdata; 145static char vendor_class_identifier[253] __initdata;
144 146
@@ -932,6 +934,13 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
932 goto drop_unlock; 934 goto drop_unlock;
933 } 935 }
934 936
937 /* Is it a reply for the device we are configuring? */
938 if (b->xid != ic_dev_xid) {
939 if (net_ratelimit())
940 printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n");
941 goto drop_unlock;
942 }
943
935 /* Parse extensions */ 944 /* Parse extensions */
936 if (ext_len >= 4 && 945 if (ext_len >= 4 &&
937 !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */ 946 !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */
@@ -1115,6 +1124,9 @@ static int __init ic_dynamic(void)
1115 get_random_bytes(&timeout, sizeof(timeout)); 1124 get_random_bytes(&timeout, sizeof(timeout));
1116 timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); 1125 timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
1117 for (;;) { 1126 for (;;) {
1127 /* Track the device we are configuring */
1128 ic_dev_xid = d->xid;
1129
1118#ifdef IPCONFIG_BOOTP 1130#ifdef IPCONFIG_BOOTP
1119 if (do_bootp && (d->able & IC_BOOTP)) 1131 if (do_bootp && (d->able & IC_BOOTP))
1120 ic_bootp_send_if(d, jiffies - start_jiffies); 1132 ic_bootp_send_if(d, jiffies - start_jiffies);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 35c5f6a5cb7c..831fe1879dc0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
253 indev = in ? in->name : nulldevname; 253 indev = in ? in->name : nulldevname;
254 outdev = out ? out->name : nulldevname; 254 outdev = out ? out->name : nulldevname;
255 255
256 rcu_read_lock(); 256 xt_info_rdlock_bh();
257 private = rcu_dereference(table->private); 257 private = table->private;
258 table_base = rcu_dereference(private->entries[smp_processor_id()]); 258 table_base = private->entries[smp_processor_id()];
259 259
260 e = get_entry(table_base, private->hook_entry[hook]); 260 e = get_entry(table_base, private->hook_entry[hook]);
261 back = get_entry(table_base, private->underflow[hook]); 261 back = get_entry(table_base, private->underflow[hook]);
@@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
273 273
274 hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + 274 hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
275 (2 * skb->dev->addr_len); 275 (2 * skb->dev->addr_len);
276
276 ADD_COUNTER(e->counters, hdr_len, 1); 277 ADD_COUNTER(e->counters, hdr_len, 1);
277 278
278 t = arpt_get_target(e); 279 t = arpt_get_target(e);
@@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
328 e = (void *)e + e->next_offset; 329 e = (void *)e + e->next_offset;
329 } 330 }
330 } while (!hotdrop); 331 } while (!hotdrop);
331 332 xt_info_rdunlock_bh();
332 rcu_read_unlock();
333 333
334 if (hotdrop) 334 if (hotdrop)
335 return NF_DROP; 335 return NF_DROP;
@@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t,
711 /* Instead of clearing (by a previous call to memset()) 711 /* Instead of clearing (by a previous call to memset())
712 * the counters and using adds, we set the counters 712 * the counters and using adds, we set the counters
713 * with data used by 'current' CPU 713 * with data used by 'current' CPU
714 * We dont care about preemption here. 714 *
715 * Bottom half has to be disabled to prevent deadlock
716 * if new softirq were to run and call ipt_do_table
715 */ 717 */
716 curcpu = raw_smp_processor_id(); 718 local_bh_disable();
719 curcpu = smp_processor_id();
717 720
718 i = 0; 721 i = 0;
719 ARPT_ENTRY_ITERATE(t->entries[curcpu], 722 ARPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t,
726 if (cpu == curcpu) 729 if (cpu == curcpu)
727 continue; 730 continue;
728 i = 0; 731 i = 0;
732 xt_info_wrlock(cpu);
729 ARPT_ENTRY_ITERATE(t->entries[cpu], 733 ARPT_ENTRY_ITERATE(t->entries[cpu],
730 t->size, 734 t->size,
731 add_entry_to_counter, 735 add_entry_to_counter,
732 counters, 736 counters,
733 &i); 737 &i);
738 xt_info_wrunlock(cpu);
734 } 739 }
735}
736
737
738/* We're lazy, and add to the first CPU; overflow works its fey magic
739 * and everything is OK. */
740static int
741add_counter_to_entry(struct arpt_entry *e,
742 const struct xt_counters addme[],
743 unsigned int *i)
744{
745 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
746
747 (*i)++;
748 return 0;
749}
750
751/* Take values from counters and add them back onto the current cpu */
752static void put_counters(struct xt_table_info *t,
753 const struct xt_counters counters[])
754{
755 unsigned int i, cpu;
756
757 local_bh_disable();
758 cpu = smp_processor_id();
759 i = 0;
760 ARPT_ENTRY_ITERATE(t->entries[cpu],
761 t->size,
762 add_counter_to_entry,
763 counters,
764 &i);
765 local_bh_enable(); 740 local_bh_enable();
766} 741}
767 742
768static inline int
769zero_entry_counter(struct arpt_entry *e, void *arg)
770{
771 e->counters.bcnt = 0;
772 e->counters.pcnt = 0;
773 return 0;
774}
775
776static void
777clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
778{
779 unsigned int cpu;
780 const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
781
782 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
783 for_each_possible_cpu(cpu) {
784 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
785 ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
786 zero_entry_counter, NULL);
787 }
788}
789
790static struct xt_counters *alloc_counters(struct xt_table *table) 743static struct xt_counters *alloc_counters(struct xt_table *table)
791{ 744{
792 unsigned int countersize; 745 unsigned int countersize;
793 struct xt_counters *counters; 746 struct xt_counters *counters;
794 struct xt_table_info *private = table->private; 747 struct xt_table_info *private = table->private;
795 struct xt_table_info *info;
796 748
797 /* We need atomic snapshot of counters: rest doesn't change 749 /* We need atomic snapshot of counters: rest doesn't change
798 * (other than comefrom, which userspace doesn't care 750 * (other than comefrom, which userspace doesn't care
@@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
802 counters = vmalloc_node(countersize, numa_node_id()); 754 counters = vmalloc_node(countersize, numa_node_id());
803 755
804 if (counters == NULL) 756 if (counters == NULL)
805 goto nomem; 757 return ERR_PTR(-ENOMEM);
806
807 info = xt_alloc_table_info(private->size);
808 if (!info)
809 goto free_counters;
810
811 clone_counters(info, private);
812
813 mutex_lock(&table->lock);
814 xt_table_entry_swap_rcu(private, info);
815 synchronize_net(); /* Wait until smoke has cleared */
816 758
817 get_counters(info, counters); 759 get_counters(private, counters);
818 put_counters(private, counters);
819 mutex_unlock(&table->lock);
820
821 xt_free_table_info(info);
822 760
823 return counters; 761 return counters;
824
825 free_counters:
826 vfree(counters);
827 nomem:
828 return ERR_PTR(-ENOMEM);
829} 762}
830 763
831static int copy_entries_to_user(unsigned int total_size, 764static int copy_entries_to_user(unsigned int total_size,
@@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name,
1094 (newinfo->number <= oldinfo->initial_entries)) 1027 (newinfo->number <= oldinfo->initial_entries))
1095 module_put(t->me); 1028 module_put(t->me);
1096 1029
1097 /* Get the old counters. */ 1030 /* Get the old counters, and synchronize with replace */
1098 get_counters(oldinfo, counters); 1031 get_counters(oldinfo, counters);
1032
1099 /* Decrease module usage counts and free resource */ 1033 /* Decrease module usage counts and free resource */
1100 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1034 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1101 ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, 1035 ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
1165 return ret; 1099 return ret;
1166} 1100}
1167 1101
1102/* We're lazy, and add to the first CPU; overflow works its fey magic
1103 * and everything is OK. */
1104static int
1105add_counter_to_entry(struct arpt_entry *e,
1106 const struct xt_counters addme[],
1107 unsigned int *i)
1108{
1109 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1110
1111 (*i)++;
1112 return 0;
1113}
1114
1168static int do_add_counters(struct net *net, void __user *user, unsigned int len, 1115static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1169 int compat) 1116 int compat)
1170{ 1117{
1171 unsigned int i; 1118 unsigned int i, curcpu;
1172 struct xt_counters_info tmp; 1119 struct xt_counters_info tmp;
1173 struct xt_counters *paddc; 1120 struct xt_counters *paddc;
1174 unsigned int num_counters; 1121 unsigned int num_counters;
@@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1224 goto free; 1171 goto free;
1225 } 1172 }
1226 1173
1227 mutex_lock(&t->lock); 1174 local_bh_disable();
1228 private = t->private; 1175 private = t->private;
1229 if (private->number != num_counters) { 1176 if (private->number != num_counters) {
1230 ret = -EINVAL; 1177 ret = -EINVAL;
1231 goto unlock_up_free; 1178 goto unlock_up_free;
1232 } 1179 }
1233 1180
1234 preempt_disable();
1235 i = 0; 1181 i = 0;
1236 /* Choose the copy that is on our node */ 1182 /* Choose the copy that is on our node */
1237 loc_cpu_entry = private->entries[smp_processor_id()]; 1183 curcpu = smp_processor_id();
1184 loc_cpu_entry = private->entries[curcpu];
1185 xt_info_wrlock(curcpu);
1238 ARPT_ENTRY_ITERATE(loc_cpu_entry, 1186 ARPT_ENTRY_ITERATE(loc_cpu_entry,
1239 private->size, 1187 private->size,
1240 add_counter_to_entry, 1188 add_counter_to_entry,
1241 paddc, 1189 paddc,
1242 &i); 1190 &i);
1243 preempt_enable(); 1191 xt_info_wrunlock(curcpu);
1244 unlock_up_free: 1192 unlock_up_free:
1245 mutex_unlock(&t->lock); 1193 local_bh_enable();
1246
1247 xt_table_unlock(t); 1194 xt_table_unlock(t);
1248 module_put(t->me); 1195 module_put(t->me);
1249 free: 1196 free:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 82ee7c9049ff..2ec8d7290c40 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb,
338 tgpar.hooknum = hook; 338 tgpar.hooknum = hook;
339 339
340 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 340 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
341 341 xt_info_rdlock_bh();
342 rcu_read_lock(); 342 private = table->private;
343 private = rcu_dereference(table->private); 343 table_base = private->entries[smp_processor_id()];
344 table_base = rcu_dereference(private->entries[smp_processor_id()]);
345 344
346 e = get_entry(table_base, private->hook_entry[hook]); 345 e = get_entry(table_base, private->hook_entry[hook]);
347 346
@@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb,
436 e = (void *)e + e->next_offset; 435 e = (void *)e + e->next_offset;
437 } 436 }
438 } while (!hotdrop); 437 } while (!hotdrop);
439 438 xt_info_rdunlock_bh();
440 rcu_read_unlock();
441 439
442#ifdef DEBUG_ALLOW_ALL 440#ifdef DEBUG_ALLOW_ALL
443 return NF_ACCEPT; 441 return NF_ACCEPT;
@@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t,
896 894
897 /* Instead of clearing (by a previous call to memset()) 895 /* Instead of clearing (by a previous call to memset())
898 * the counters and using adds, we set the counters 896 * the counters and using adds, we set the counters
899 * with data used by 'current' CPU 897 * with data used by 'current' CPU.
900 * We dont care about preemption here. 898 *
899 * Bottom half has to be disabled to prevent deadlock
900 * if new softirq were to run and call ipt_do_table
901 */ 901 */
902 curcpu = raw_smp_processor_id(); 902 local_bh_disable();
903 curcpu = smp_processor_id();
903 904
904 i = 0; 905 i = 0;
905 IPT_ENTRY_ITERATE(t->entries[curcpu], 906 IPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t,
912 if (cpu == curcpu) 913 if (cpu == curcpu)
913 continue; 914 continue;
914 i = 0; 915 i = 0;
916 xt_info_wrlock(cpu);
915 IPT_ENTRY_ITERATE(t->entries[cpu], 917 IPT_ENTRY_ITERATE(t->entries[cpu],
916 t->size, 918 t->size,
917 add_entry_to_counter, 919 add_entry_to_counter,
918 counters, 920 counters,
919 &i); 921 &i);
922 xt_info_wrunlock(cpu);
920 } 923 }
921
922}
923
924/* We're lazy, and add to the first CPU; overflow works its fey magic
925 * and everything is OK. */
926static int
927add_counter_to_entry(struct ipt_entry *e,
928 const struct xt_counters addme[],
929 unsigned int *i)
930{
931 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
932
933 (*i)++;
934 return 0;
935}
936
937/* Take values from counters and add them back onto the current cpu */
938static void put_counters(struct xt_table_info *t,
939 const struct xt_counters counters[])
940{
941 unsigned int i, cpu;
942
943 local_bh_disable();
944 cpu = smp_processor_id();
945 i = 0;
946 IPT_ENTRY_ITERATE(t->entries[cpu],
947 t->size,
948 add_counter_to_entry,
949 counters,
950 &i);
951 local_bh_enable(); 924 local_bh_enable();
952} 925}
953 926
954
955static inline int
956zero_entry_counter(struct ipt_entry *e, void *arg)
957{
958 e->counters.bcnt = 0;
959 e->counters.pcnt = 0;
960 return 0;
961}
962
963static void
964clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
965{
966 unsigned int cpu;
967 const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
968
969 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
970 for_each_possible_cpu(cpu) {
971 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
972 IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
973 zero_entry_counter, NULL);
974 }
975}
976
977static struct xt_counters * alloc_counters(struct xt_table *table) 927static struct xt_counters * alloc_counters(struct xt_table *table)
978{ 928{
979 unsigned int countersize; 929 unsigned int countersize;
980 struct xt_counters *counters; 930 struct xt_counters *counters;
981 struct xt_table_info *private = table->private; 931 struct xt_table_info *private = table->private;
982 struct xt_table_info *info;
983 932
984 /* We need atomic snapshot of counters: rest doesn't change 933 /* We need atomic snapshot of counters: rest doesn't change
985 (other than comefrom, which userspace doesn't care 934 (other than comefrom, which userspace doesn't care
@@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
988 counters = vmalloc_node(countersize, numa_node_id()); 937 counters = vmalloc_node(countersize, numa_node_id());
989 938
990 if (counters == NULL) 939 if (counters == NULL)
991 goto nomem; 940 return ERR_PTR(-ENOMEM);
992 941
993 info = xt_alloc_table_info(private->size); 942 get_counters(private, counters);
994 if (!info)
995 goto free_counters;
996
997 clone_counters(info, private);
998
999 mutex_lock(&table->lock);
1000 xt_table_entry_swap_rcu(private, info);
1001 synchronize_net(); /* Wait until smoke has cleared */
1002
1003 get_counters(info, counters);
1004 put_counters(private, counters);
1005 mutex_unlock(&table->lock);
1006
1007 xt_free_table_info(info);
1008 943
1009 return counters; 944 return counters;
1010
1011 free_counters:
1012 vfree(counters);
1013 nomem:
1014 return ERR_PTR(-ENOMEM);
1015} 945}
1016 946
1017static int 947static int
@@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1306 (newinfo->number <= oldinfo->initial_entries)) 1236 (newinfo->number <= oldinfo->initial_entries))
1307 module_put(t->me); 1237 module_put(t->me);
1308 1238
1309 /* Get the old counters. */ 1239 /* Get the old counters, and synchronize with replace */
1310 get_counters(oldinfo, counters); 1240 get_counters(oldinfo, counters);
1241
1311 /* Decrease module usage counts and free resource */ 1242 /* Decrease module usage counts and free resource */
1312 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1243 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1313 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, 1244 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len)
1377 return ret; 1308 return ret;
1378} 1309}
1379 1310
1311/* We're lazy, and add to the first CPU; overflow works its fey magic
1312 * and everything is OK. */
1313static int
1314add_counter_to_entry(struct ipt_entry *e,
1315 const struct xt_counters addme[],
1316 unsigned int *i)
1317{
1318 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1319
1320 (*i)++;
1321 return 0;
1322}
1380 1323
1381static int 1324static int
1382do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) 1325do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
1383{ 1326{
1384 unsigned int i; 1327 unsigned int i, curcpu;
1385 struct xt_counters_info tmp; 1328 struct xt_counters_info tmp;
1386 struct xt_counters *paddc; 1329 struct xt_counters *paddc;
1387 unsigned int num_counters; 1330 unsigned int num_counters;
@@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
1437 goto free; 1380 goto free;
1438 } 1381 }
1439 1382
1440 mutex_lock(&t->lock); 1383 local_bh_disable();
1441 private = t->private; 1384 private = t->private;
1442 if (private->number != num_counters) { 1385 if (private->number != num_counters) {
1443 ret = -EINVAL; 1386 ret = -EINVAL;
1444 goto unlock_up_free; 1387 goto unlock_up_free;
1445 } 1388 }
1446 1389
1447 preempt_disable();
1448 i = 0; 1390 i = 0;
1449 /* Choose the copy that is on our node */ 1391 /* Choose the copy that is on our node */
1450 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1392 curcpu = smp_processor_id();
1393 loc_cpu_entry = private->entries[curcpu];
1394 xt_info_wrlock(curcpu);
1451 IPT_ENTRY_ITERATE(loc_cpu_entry, 1395 IPT_ENTRY_ITERATE(loc_cpu_entry,
1452 private->size, 1396 private->size,
1453 add_counter_to_entry, 1397 add_counter_to_entry,
1454 paddc, 1398 paddc,
1455 &i); 1399 &i);
1456 preempt_enable(); 1400 xt_info_wrunlock(curcpu);
1457 unlock_up_free: 1401 unlock_up_free:
1458 mutex_unlock(&t->lock); 1402 local_bh_enable();
1459 xt_table_unlock(t); 1403 xt_table_unlock(t);
1460 module_put(t->me); 1404 module_put(t->me);
1461 free: 1405 free:
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index fe65187810f0..3229e0a81ba6 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -211,7 +211,8 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
211 minip = ntohl(range->min_ip); 211 minip = ntohl(range->min_ip);
212 maxip = ntohl(range->max_ip); 212 maxip = ntohl(range->max_ip);
213 j = jhash_2words((__force u32)tuple->src.u3.ip, 213 j = jhash_2words((__force u32)tuple->src.u3.ip,
214 (__force u32)tuple->dst.u3.ip, 0); 214 range->flags & IP_NAT_RANGE_PERSISTENT ?
215 (__force u32)tuple->dst.u3.ip : 0, 0);
215 j = ((u64)j * (maxip - minip + 1)) >> 32; 216 j = ((u64)j * (maxip - minip + 1)) >> 32;
216 *var_ipp = htonl(minip + j); 217 *var_ipp = htonl(minip + j);
217} 218}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c40debe51b38..28205e5bfa9b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -784,8 +784,8 @@ static void rt_check_expire(void)
784{ 784{
785 static unsigned int rover; 785 static unsigned int rover;
786 unsigned int i = rover, goal; 786 unsigned int i = rover, goal;
787 struct rtable *rth, **rthp; 787 struct rtable *rth, *aux, **rthp;
788 unsigned long length = 0, samples = 0; 788 unsigned long samples = 0;
789 unsigned long sum = 0, sum2 = 0; 789 unsigned long sum = 0, sum2 = 0;
790 u64 mult; 790 u64 mult;
791 791
@@ -795,9 +795,9 @@ static void rt_check_expire(void)
795 goal = (unsigned int)mult; 795 goal = (unsigned int)mult;
796 if (goal > rt_hash_mask) 796 if (goal > rt_hash_mask)
797 goal = rt_hash_mask + 1; 797 goal = rt_hash_mask + 1;
798 length = 0;
799 for (; goal > 0; goal--) { 798 for (; goal > 0; goal--) {
800 unsigned long tmo = ip_rt_gc_timeout; 799 unsigned long tmo = ip_rt_gc_timeout;
800 unsigned long length;
801 801
802 i = (i + 1) & rt_hash_mask; 802 i = (i + 1) & rt_hash_mask;
803 rthp = &rt_hash_table[i].chain; 803 rthp = &rt_hash_table[i].chain;
@@ -809,8 +809,10 @@ static void rt_check_expire(void)
809 809
810 if (*rthp == NULL) 810 if (*rthp == NULL)
811 continue; 811 continue;
812 length = 0;
812 spin_lock_bh(rt_hash_lock_addr(i)); 813 spin_lock_bh(rt_hash_lock_addr(i));
813 while ((rth = *rthp) != NULL) { 814 while ((rth = *rthp) != NULL) {
815 prefetch(rth->u.dst.rt_next);
814 if (rt_is_expired(rth)) { 816 if (rt_is_expired(rth)) {
815 *rthp = rth->u.dst.rt_next; 817 *rthp = rth->u.dst.rt_next;
816 rt_free(rth); 818 rt_free(rth);
@@ -819,33 +821,30 @@ static void rt_check_expire(void)
819 if (rth->u.dst.expires) { 821 if (rth->u.dst.expires) {
820 /* Entry is expired even if it is in use */ 822 /* Entry is expired even if it is in use */
821 if (time_before_eq(jiffies, rth->u.dst.expires)) { 823 if (time_before_eq(jiffies, rth->u.dst.expires)) {
824nofree:
822 tmo >>= 1; 825 tmo >>= 1;
823 rthp = &rth->u.dst.rt_next; 826 rthp = &rth->u.dst.rt_next;
824 /* 827 /*
825 * Only bump our length if the hash 828 * We only count entries on
826 * inputs on entries n and n+1 are not
827 * the same, we only count entries on
828 * a chain with equal hash inputs once 829 * a chain with equal hash inputs once
829 * so that entries for different QOS 830 * so that entries for different QOS
830 * levels, and other non-hash input 831 * levels, and other non-hash input
831 * attributes don't unfairly skew 832 * attributes don't unfairly skew
832 * the length computation 833 * the length computation
833 */ 834 */
834 if ((*rthp == NULL) || 835 for (aux = rt_hash_table[i].chain;;) {
835 !compare_hash_inputs(&(*rthp)->fl, 836 if (aux == rth) {
836 &rth->fl)) 837 length += ONE;
837 length += ONE; 838 break;
839 }
840 if (compare_hash_inputs(&aux->fl, &rth->fl))
841 break;
842 aux = aux->u.dst.rt_next;
843 }
838 continue; 844 continue;
839 } 845 }
840 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { 846 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
841 tmo >>= 1; 847 goto nofree;
842 rthp = &rth->u.dst.rt_next;
843 if ((*rthp == NULL) ||
844 !compare_hash_inputs(&(*rthp)->fl,
845 &rth->fl))
846 length += ONE;
847 continue;
848 }
849 848
850 /* Cleanup aged off entries. */ 849 /* Cleanup aged off entries. */
851 *rthp = rth->u.dst.rt_next; 850 *rthp = rth->u.dst.rt_next;
@@ -1068,7 +1067,6 @@ out: return 0;
1068static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) 1067static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
1069{ 1068{
1070 struct rtable *rth, **rthp; 1069 struct rtable *rth, **rthp;
1071 struct rtable *rthi;
1072 unsigned long now; 1070 unsigned long now;
1073 struct rtable *cand, **candp; 1071 struct rtable *cand, **candp;
1074 u32 min_score; 1072 u32 min_score;
@@ -1088,7 +1086,6 @@ restart:
1088 } 1086 }
1089 1087
1090 rthp = &rt_hash_table[hash].chain; 1088 rthp = &rt_hash_table[hash].chain;
1091 rthi = NULL;
1092 1089
1093 spin_lock_bh(rt_hash_lock_addr(hash)); 1090 spin_lock_bh(rt_hash_lock_addr(hash));
1094 while ((rth = *rthp) != NULL) { 1091 while ((rth = *rthp) != NULL) {
@@ -1134,17 +1131,6 @@ restart:
1134 chain_length++; 1131 chain_length++;
1135 1132
1136 rthp = &rth->u.dst.rt_next; 1133 rthp = &rth->u.dst.rt_next;
1137
1138 /*
1139 * check to see if the next entry in the chain
1140 * contains the same hash input values as rt. If it does
1141 * This is where we will insert into the list, instead of
1142 * at the head. This groups entries that differ by aspects not
1143 * relvant to the hash function together, which we use to adjust
1144 * our chain length
1145 */
1146 if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl))
1147 rthi = rth;
1148 } 1134 }
1149 1135
1150 if (cand) { 1136 if (cand) {
@@ -1205,10 +1191,7 @@ restart:
1205 } 1191 }
1206 } 1192 }
1207 1193
1208 if (rthi) 1194 rt->u.dst.rt_next = rt_hash_table[hash].chain;
1209 rt->u.dst.rt_next = rthi->u.dst.rt_next;
1210 else
1211 rt->u.dst.rt_next = rt_hash_table[hash].chain;
1212 1195
1213#if RT_CACHE_DEBUG >= 2 1196#if RT_CACHE_DEBUG >= 2
1214 if (rt->u.dst.rt_next) { 1197 if (rt->u.dst.rt_next) {
@@ -1224,10 +1207,7 @@ restart:
1224 * previous writes to rt are comitted to memory 1207 * previous writes to rt are comitted to memory
1225 * before making rt visible to other CPUS. 1208 * before making rt visible to other CPUS.
1226 */ 1209 */
1227 if (rthi) 1210 rcu_assign_pointer(rt_hash_table[hash].chain, rt);
1228 rcu_assign_pointer(rthi->u.dst.rt_next, rt);
1229 else
1230 rcu_assign_pointer(rt_hash_table[hash].chain, rt);
1231 1211
1232 spin_unlock_bh(rt_hash_lock_addr(hash)); 1212 spin_unlock_bh(rt_hash_lock_addr(hash));
1233 *rp = rt; 1213 *rp = rt;
@@ -3397,7 +3377,7 @@ int __init ip_rt_init(void)
3397 0, 3377 0,
3398 &rt_hash_log, 3378 &rt_hash_log,
3399 &rt_hash_mask, 3379 &rt_hash_mask,
3400 0); 3380 rhash_entries ? 0 : 512 * 1024);
3401 memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); 3381 memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
3402 rt_hash_lock_init(); 3382 rt_hash_lock_init();
3403 3383
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2451aeb5ac23..7a0f0b27bf1f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1081,8 +1081,7 @@ out_err:
1081 * this, no blocking and very strange errors 8) 1081 * this, no blocking and very strange errors 8)
1082 */ 1082 */
1083 1083
1084static int tcp_recv_urg(struct sock *sk, long timeo, 1084static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
1085 struct msghdr *msg, int len, int flags)
1086{ 1085{
1087 struct tcp_sock *tp = tcp_sk(sk); 1086 struct tcp_sock *tp = tcp_sk(sk);
1088 1087
@@ -1322,6 +1321,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1322 struct task_struct *user_recv = NULL; 1321 struct task_struct *user_recv = NULL;
1323 int copied_early = 0; 1322 int copied_early = 0;
1324 struct sk_buff *skb; 1323 struct sk_buff *skb;
1324 u32 urg_hole = 0;
1325 1325
1326 lock_sock(sk); 1326 lock_sock(sk);
1327 1327
@@ -1533,7 +1533,8 @@ do_prequeue:
1533 } 1533 }
1534 } 1534 }
1535 } 1535 }
1536 if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { 1536 if ((flags & MSG_PEEK) &&
1537 (peek_seq - copied - urg_hole != tp->copied_seq)) {
1537 if (net_ratelimit()) 1538 if (net_ratelimit())
1538 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", 1539 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
1539 current->comm, task_pid_nr(current)); 1540 current->comm, task_pid_nr(current));
@@ -1554,6 +1555,7 @@ do_prequeue:
1554 if (!urg_offset) { 1555 if (!urg_offset) {
1555 if (!sock_flag(sk, SOCK_URGINLINE)) { 1556 if (!sock_flag(sk, SOCK_URGINLINE)) {
1556 ++*seq; 1557 ++*seq;
1558 urg_hole++;
1557 offset++; 1559 offset++;
1558 used--; 1560 used--;
1559 if (!used) 1561 if (!used)
@@ -1697,7 +1699,7 @@ out:
1697 return err; 1699 return err;
1698 1700
1699recv_urg: 1701recv_urg:
1700 err = tcp_recv_urg(sk, timeo, msg, len, flags); 1702 err = tcp_recv_urg(sk, msg, len, flags);
1701 goto out; 1703 goto out;
1702} 1704}
1703 1705
@@ -2512,6 +2514,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2512 struct sk_buff *p; 2514 struct sk_buff *p;
2513 struct tcphdr *th; 2515 struct tcphdr *th;
2514 struct tcphdr *th2; 2516 struct tcphdr *th2;
2517 unsigned int len;
2515 unsigned int thlen; 2518 unsigned int thlen;
2516 unsigned int flags; 2519 unsigned int flags;
2517 unsigned int mss = 1; 2520 unsigned int mss = 1;
@@ -2532,6 +2535,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2532 2535
2533 skb_gro_pull(skb, thlen); 2536 skb_gro_pull(skb, thlen);
2534 2537
2538 len = skb_gro_len(skb);
2535 flags = tcp_flag_word(th); 2539 flags = tcp_flag_word(th);
2536 2540
2537 for (; (p = *head); head = &p->next) { 2541 for (; (p = *head); head = &p->next) {
@@ -2562,7 +2566,7 @@ found:
2562 2566
2563 mss = skb_shinfo(p)->gso_size; 2567 mss = skb_shinfo(p)->gso_size;
2564 2568
2565 flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb); 2569 flush |= (len > mss) | !len;
2566 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); 2570 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
2567 2571
2568 if (flush || skb_gro_receive(head, skb)) { 2572 if (flush || skb_gro_receive(head, skb)) {
@@ -2575,7 +2579,7 @@ found:
2575 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); 2579 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
2576 2580
2577out_check_final: 2581out_check_final:
2578 flush = skb_gro_len(skb) < mss; 2582 flush = len < mss;
2579 flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | 2583 flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
2580 TCP_FLAG_SYN | TCP_FLAG_FIN); 2584 TCP_FLAG_SYN | TCP_FLAG_FIN);
2581 2585
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2bc8e27a163d..eec3e6f9956c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -597,16 +597,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
597 tcp_grow_window(sk, skb); 597 tcp_grow_window(sk, skb);
598} 598}
599 599
600static u32 tcp_rto_min(struct sock *sk)
601{
602 struct dst_entry *dst = __sk_dst_get(sk);
603 u32 rto_min = TCP_RTO_MIN;
604
605 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
606 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
607 return rto_min;
608}
609
610/* Called to compute a smoothed rtt estimate. The data fed to this 600/* Called to compute a smoothed rtt estimate. The data fed to this
611 * routine either comes from timestamps, or from segments that were 601 * routine either comes from timestamps, or from segments that were
612 * known _not_ to have been retransmitted [see Karn/Partridge 602 * known _not_ to have been retransmitted [see Karn/Partridge
@@ -928,6 +918,8 @@ static void tcp_init_metrics(struct sock *sk)
928 tcp_set_rto(sk); 918 tcp_set_rto(sk);
929 if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) 919 if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
930 goto reset; 920 goto reset;
921
922cwnd:
931 tp->snd_cwnd = tcp_init_cwnd(tp, dst); 923 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
932 tp->snd_cwnd_stamp = tcp_time_stamp; 924 tp->snd_cwnd_stamp = tcp_time_stamp;
933 return; 925 return;
@@ -942,6 +934,7 @@ reset:
942 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; 934 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
943 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; 935 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
944 } 936 }
937 goto cwnd;
945} 938}
946 939
947static void tcp_update_reordering(struct sock *sk, const int metric, 940static void tcp_update_reordering(struct sock *sk, const int metric,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c1f259d2d33b..59aec609cec6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -754,6 +754,36 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
754 tp->fackets_out -= decr; 754 tp->fackets_out -= decr;
755} 755}
756 756
757/* Pcount in the middle of the write queue got changed, we need to do various
758 * tweaks to fix counters
759 */
760static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
761{
762 struct tcp_sock *tp = tcp_sk(sk);
763
764 tp->packets_out -= decr;
765
766 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
767 tp->sacked_out -= decr;
768 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
769 tp->retrans_out -= decr;
770 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
771 tp->lost_out -= decr;
772
773 /* Reno case is special. Sigh... */
774 if (tcp_is_reno(tp) && decr > 0)
775 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
776
777 tcp_adjust_fackets_out(sk, skb, decr);
778
779 if (tp->lost_skb_hint &&
780 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
781 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
782 tp->lost_cnt_hint -= decr;
783
784 tcp_verify_left_out(tp);
785}
786
757/* Function to create two new TCP segments. Shrinks the given segment 787/* Function to create two new TCP segments. Shrinks the given segment
758 * to the specified size and appends a new segment with the rest of the 788 * to the specified size and appends a new segment with the rest of the
759 * packet to the list. This won't be called frequently, I hope. 789 * packet to the list. This won't be called frequently, I hope.
@@ -836,28 +866,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
836 int diff = old_factor - tcp_skb_pcount(skb) - 866 int diff = old_factor - tcp_skb_pcount(skb) -
837 tcp_skb_pcount(buff); 867 tcp_skb_pcount(buff);
838 868
839 tp->packets_out -= diff; 869 if (diff)
840 870 tcp_adjust_pcount(sk, skb, diff);
841 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
842 tp->sacked_out -= diff;
843 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
844 tp->retrans_out -= diff;
845
846 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
847 tp->lost_out -= diff;
848
849 /* Adjust Reno SACK estimate. */
850 if (tcp_is_reno(tp) && diff > 0) {
851 tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
852 tcp_verify_left_out(tp);
853 }
854 tcp_adjust_fackets_out(sk, skb, diff);
855
856 if (tp->lost_skb_hint &&
857 before(TCP_SKB_CB(skb)->seq,
858 TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
859 (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
860 tp->lost_cnt_hint -= diff;
861 } 871 }
862 872
863 /* Link BUFF into the send queue. */ 873 /* Link BUFF into the send queue. */
@@ -1768,22 +1778,14 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1768 * packet counting does not break. 1778 * packet counting does not break.
1769 */ 1779 */
1770 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; 1780 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
1771 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS)
1772 tp->retrans_out -= tcp_skb_pcount(next_skb);
1773 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST)
1774 tp->lost_out -= tcp_skb_pcount(next_skb);
1775 /* Reno case is special. Sigh... */
1776 if (tcp_is_reno(tp) && tp->sacked_out)
1777 tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
1778
1779 tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
1780 tp->packets_out -= tcp_skb_pcount(next_skb);
1781 1781
1782 /* changed transmit queue under us so clear hints */ 1782 /* changed transmit queue under us so clear hints */
1783 tcp_clear_retrans_hints_partial(tp); 1783 tcp_clear_retrans_hints_partial(tp);
1784 if (next_skb == tp->retransmit_skb_hint) 1784 if (next_skb == tp->retransmit_skb_hint)
1785 tp->retransmit_skb_hint = skb; 1785 tp->retransmit_skb_hint = skb;
1786 1786
1787 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
1788
1787 sk_wmem_free_skb(sk, next_skb); 1789 sk_wmem_free_skb(sk, next_skb);
1788} 1790}
1789 1791
@@ -1891,7 +1893,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1891 if (tcp_fragment(sk, skb, cur_mss, cur_mss)) 1893 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
1892 return -ENOMEM; /* We'll try again later. */ 1894 return -ENOMEM; /* We'll try again later. */
1893 } else { 1895 } else {
1894 tcp_init_tso_segs(sk, skb, cur_mss); 1896 int oldpcount = tcp_skb_pcount(skb);
1897
1898 if (unlikely(oldpcount > 1)) {
1899 tcp_init_tso_segs(sk, skb, cur_mss);
1900 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
1901 }
1895 } 1902 }
1896 1903
1897 tcp_retrans_try_collapse(sk, skb, cur_mss); 1904 tcp_retrans_try_collapse(sk, skb, cur_mss);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a453aac91bd3..c6743eec9b7d 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,6 +158,11 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
158} 158}
159EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); 159EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
160 160
161static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
162{
163 return min(tp->snd_ssthresh, tp->snd_cwnd-1);
164}
165
161static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) 166static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
162{ 167{
163 struct tcp_sock *tp = tcp_sk(sk); 168 struct tcp_sock *tp = tcp_sk(sk);
@@ -221,11 +226,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
221 */ 226 */
222 diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; 227 diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
223 228
224 if (diff > gamma && tp->snd_ssthresh > 2 ) { 229 if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) {
225 /* Going too fast. Time to slow down 230 /* Going too fast. Time to slow down
226 * and switch to congestion avoidance. 231 * and switch to congestion avoidance.
227 */ 232 */
228 tp->snd_ssthresh = 2;
229 233
230 /* Set cwnd to match the actual rate 234 /* Set cwnd to match the actual rate
231 * exactly: 235 * exactly:
@@ -235,6 +239,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
235 * utilization. 239 * utilization.
236 */ 240 */
237 tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); 241 tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
242 tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
238 243
239 } else if (tp->snd_cwnd <= tp->snd_ssthresh) { 244 } else if (tp->snd_cwnd <= tp->snd_ssthresh) {
240 /* Slow start. */ 245 /* Slow start. */
@@ -250,6 +255,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
250 * we slow down. 255 * we slow down.
251 */ 256 */
252 tp->snd_cwnd--; 257 tp->snd_cwnd--;
258 tp->snd_ssthresh
259 = tcp_vegas_ssthresh(tp);
253 } else if (diff < alpha) { 260 } else if (diff < alpha) {
254 /* We don't have enough extra packets 261 /* We don't have enough extra packets
255 * in the network, so speed up. 262 * in the network, so speed up.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index bda08a09357d..7a1d1ce22e66 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -222,7 +222,7 @@ fail:
222 return error; 222 return error;
223} 223}
224 224
225int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 225static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
226{ 226{
227 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 227 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
228 228
@@ -1823,7 +1823,6 @@ EXPORT_SYMBOL(udp_lib_getsockopt);
1823EXPORT_SYMBOL(udp_lib_setsockopt); 1823EXPORT_SYMBOL(udp_lib_setsockopt);
1824EXPORT_SYMBOL(udp_poll); 1824EXPORT_SYMBOL(udp_poll);
1825EXPORT_SYMBOL(udp_lib_get_port); 1825EXPORT_SYMBOL(udp_lib_get_port);
1826EXPORT_SYMBOL(ipv4_rcv_saddr_equal);
1827 1826
1828#ifdef CONFIG_PROC_FS 1827#ifdef CONFIG_PROC_FS
1829EXPORT_SYMBOL(udp_proc_register); 1828EXPORT_SYMBOL(udp_proc_register);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ec992159b5f8..ca8cb326d1d2 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -22,17 +22,17 @@ menuconfig IPV6
22if IPV6 22if IPV6
23 23
24config IPV6_PRIVACY 24config IPV6_PRIVACY
25 bool "IPv6: Privacy Extensions support" 25 bool "IPv6: Privacy Extensions (RFC 3041) support"
26 ---help--- 26 ---help---
27 Privacy Extensions for Stateless Address Autoconfiguration in IPv6 27 Privacy Extensions for Stateless Address Autoconfiguration in IPv6
28 support. With this option, additional periodically-alter 28 support. With this option, additional periodically-altered
29 pseudo-random global-scope unicast address(es) will assigned to 29 pseudo-random global-scope unicast address(es) will be assigned to
30 your interface(s). 30 your interface(s).
31 31
32 We use our standard pseudo random algorithm to generate randomized 32 We use our standard pseudo-random algorithm to generate the
33 interface identifier, instead of one described in RFC 3041. 33 randomized interface identifier, instead of one described in RFC 3041.
34 34
35 By default, kernel do not generate temporary addresses. 35 By default the kernel does not generate temporary addresses.
36 To use temporary addresses, do 36 To use temporary addresses, do
37 37
38 echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr 38 echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr
@@ -43,9 +43,9 @@ config IPV6_ROUTER_PREF
43 bool "IPv6: Router Preference (RFC 4191) support" 43 bool "IPv6: Router Preference (RFC 4191) support"
44 ---help--- 44 ---help---
45 Router Preference is an optional extension to the Router 45 Router Preference is an optional extension to the Router
46 Advertisement message to improve the ability of hosts 46 Advertisement message which improves the ability of hosts
47 to pick more appropriate router, especially when the hosts 47 to pick an appropriate router, especially when the hosts
48 is placed in a multi-homed network. 48 are placed in a multi-homed network.
49 49
50 If unsure, say N. 50 If unsure, say N.
51 51
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d31df0f4bc9a..a7fdf9a27f15 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -380,10 +380,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
380 default: 380 default:
381 goto sticky_done; 381 goto sticky_done;
382 } 382 }
383
384 if ((rthdr->hdrlen & 1) ||
385 (rthdr->hdrlen >> 1) != rthdr->segments_left)
386 goto sticky_done;
387 } 383 }
388 384
389 retv = 0; 385 retv = 0;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e89cfa3a8f25..219e165aea10 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb,
365 365
366 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 366 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
367 367
368 rcu_read_lock(); 368 xt_info_rdlock_bh();
369 private = rcu_dereference(table->private); 369 private = table->private;
370 table_base = rcu_dereference(private->entries[smp_processor_id()]); 370 table_base = private->entries[smp_processor_id()];
371 371
372 e = get_entry(table_base, private->hook_entry[hook]); 372 e = get_entry(table_base, private->hook_entry[hook]);
373 373
@@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb,
466#ifdef CONFIG_NETFILTER_DEBUG 466#ifdef CONFIG_NETFILTER_DEBUG
467 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; 467 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
468#endif 468#endif
469 rcu_read_unlock(); 469 xt_info_rdunlock_bh();
470 470
471#ifdef DEBUG_ALLOW_ALL 471#ifdef DEBUG_ALLOW_ALL
472 return NF_ACCEPT; 472 return NF_ACCEPT;
@@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t,
926 /* Instead of clearing (by a previous call to memset()) 926 /* Instead of clearing (by a previous call to memset())
927 * the counters and using adds, we set the counters 927 * the counters and using adds, we set the counters
928 * with data used by 'current' CPU 928 * with data used by 'current' CPU
929 * We dont care about preemption here. 929 *
930 * Bottom half has to be disabled to prevent deadlock
931 * if new softirq were to run and call ipt_do_table
930 */ 932 */
931 curcpu = raw_smp_processor_id(); 933 local_bh_disable();
934 curcpu = smp_processor_id();
932 935
933 i = 0; 936 i = 0;
934 IP6T_ENTRY_ITERATE(t->entries[curcpu], 937 IP6T_ENTRY_ITERATE(t->entries[curcpu],
@@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t,
941 if (cpu == curcpu) 944 if (cpu == curcpu)
942 continue; 945 continue;
943 i = 0; 946 i = 0;
947 xt_info_wrlock(cpu);
944 IP6T_ENTRY_ITERATE(t->entries[cpu], 948 IP6T_ENTRY_ITERATE(t->entries[cpu],
945 t->size, 949 t->size,
946 add_entry_to_counter, 950 add_entry_to_counter,
947 counters, 951 counters,
948 &i); 952 &i);
953 xt_info_wrunlock(cpu);
949 } 954 }
950}
951
952/* We're lazy, and add to the first CPU; overflow works its fey magic
953 * and everything is OK. */
954static int
955add_counter_to_entry(struct ip6t_entry *e,
956 const struct xt_counters addme[],
957 unsigned int *i)
958{
959 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
960
961 (*i)++;
962 return 0;
963}
964
965/* Take values from counters and add them back onto the current cpu */
966static void put_counters(struct xt_table_info *t,
967 const struct xt_counters counters[])
968{
969 unsigned int i, cpu;
970
971 local_bh_disable();
972 cpu = smp_processor_id();
973 i = 0;
974 IP6T_ENTRY_ITERATE(t->entries[cpu],
975 t->size,
976 add_counter_to_entry,
977 counters,
978 &i);
979 local_bh_enable(); 955 local_bh_enable();
980} 956}
981 957
982static inline int
983zero_entry_counter(struct ip6t_entry *e, void *arg)
984{
985 e->counters.bcnt = 0;
986 e->counters.pcnt = 0;
987 return 0;
988}
989
990static void
991clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
992{
993 unsigned int cpu;
994 const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
995
996 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
997 for_each_possible_cpu(cpu) {
998 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
999 IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
1000 zero_entry_counter, NULL);
1001 }
1002}
1003
1004static struct xt_counters *alloc_counters(struct xt_table *table) 958static struct xt_counters *alloc_counters(struct xt_table *table)
1005{ 959{
1006 unsigned int countersize; 960 unsigned int countersize;
1007 struct xt_counters *counters; 961 struct xt_counters *counters;
1008 struct xt_table_info *private = table->private; 962 struct xt_table_info *private = table->private;
1009 struct xt_table_info *info;
1010 963
1011 /* We need atomic snapshot of counters: rest doesn't change 964 /* We need atomic snapshot of counters: rest doesn't change
1012 (other than comefrom, which userspace doesn't care 965 (other than comefrom, which userspace doesn't care
@@ -1015,28 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
1015 counters = vmalloc_node(countersize, numa_node_id()); 968 counters = vmalloc_node(countersize, numa_node_id());
1016 969
1017 if (counters == NULL) 970 if (counters == NULL)
1018 goto nomem; 971 return ERR_PTR(-ENOMEM);
1019
1020 info = xt_alloc_table_info(private->size);
1021 if (!info)
1022 goto free_counters;
1023
1024 clone_counters(info, private);
1025
1026 mutex_lock(&table->lock);
1027 xt_table_entry_swap_rcu(private, info);
1028 synchronize_net(); /* Wait until smoke has cleared */
1029 972
1030 get_counters(info, counters); 973 get_counters(private, counters);
1031 put_counters(private, counters);
1032 mutex_unlock(&table->lock);
1033 974
1034 xt_free_table_info(info); 975 return counters;
1035
1036 free_counters:
1037 vfree(counters);
1038 nomem:
1039 return ERR_PTR(-ENOMEM);
1040} 976}
1041 977
1042static int 978static int
@@ -1332,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1332 (newinfo->number <= oldinfo->initial_entries)) 1268 (newinfo->number <= oldinfo->initial_entries))
1333 module_put(t->me); 1269 module_put(t->me);
1334 1270
1335 /* Get the old counters. */ 1271 /* Get the old counters, and synchronize with replace */
1336 get_counters(oldinfo, counters); 1272 get_counters(oldinfo, counters);
1273
1337 /* Decrease module usage counts and free resource */ 1274 /* Decrease module usage counts and free resource */
1338 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1275 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1339 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, 1276 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1403,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len)
1403 return ret; 1340 return ret;
1404} 1341}
1405 1342
1343/* We're lazy, and add to the first CPU; overflow works its fey magic
1344 * and everything is OK. */
1345static int
1346add_counter_to_entry(struct ip6t_entry *e,
1347 const struct xt_counters addme[],
1348 unsigned int *i)
1349{
1350 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1351
1352 (*i)++;
1353 return 0;
1354}
1355
1406static int 1356static int
1407do_add_counters(struct net *net, void __user *user, unsigned int len, 1357do_add_counters(struct net *net, void __user *user, unsigned int len,
1408 int compat) 1358 int compat)
1409{ 1359{
1410 unsigned int i; 1360 unsigned int i, curcpu;
1411 struct xt_counters_info tmp; 1361 struct xt_counters_info tmp;
1412 struct xt_counters *paddc; 1362 struct xt_counters *paddc;
1413 unsigned int num_counters; 1363 unsigned int num_counters;
@@ -1463,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
1463 goto free; 1413 goto free;
1464 } 1414 }
1465 1415
1466 mutex_lock(&t->lock); 1416
1417 local_bh_disable();
1467 private = t->private; 1418 private = t->private;
1468 if (private->number != num_counters) { 1419 if (private->number != num_counters) {
1469 ret = -EINVAL; 1420 ret = -EINVAL;
1470 goto unlock_up_free; 1421 goto unlock_up_free;
1471 } 1422 }
1472 1423
1473 preempt_disable();
1474 i = 0; 1424 i = 0;
1475 /* Choose the copy that is on our node */ 1425 /* Choose the copy that is on our node */
1476 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1426 curcpu = smp_processor_id();
1427 xt_info_wrlock(curcpu);
1428 loc_cpu_entry = private->entries[curcpu];
1477 IP6T_ENTRY_ITERATE(loc_cpu_entry, 1429 IP6T_ENTRY_ITERATE(loc_cpu_entry,
1478 private->size, 1430 private->size,
1479 add_counter_to_entry, 1431 add_counter_to_entry,
1480 paddc, 1432 paddc,
1481 &i); 1433 &i);
1482 preempt_enable(); 1434 xt_info_wrunlock(curcpu);
1435
1483 unlock_up_free: 1436 unlock_up_free:
1484 mutex_unlock(&t->lock); 1437 local_bh_enable();
1485 xt_table_unlock(t); 1438 xt_table_unlock(t);
1486 module_put(t->me); 1439 module_put(t->me);
1487 free: 1440 free:
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 14e6724d5672..91490ad9302c 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -50,14 +50,14 @@ ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
50 struct ipv6_opt_hdr _hdr; 50 struct ipv6_opt_hdr _hdr;
51 int hdrlen; 51 int hdrlen;
52 52
53 /* Is there enough space for the next ext header? */
54 if (len < (int)sizeof(struct ipv6_opt_hdr))
55 return false;
56 /* No more exthdr -> evaluate */ 53 /* No more exthdr -> evaluate */
57 if (nexthdr == NEXTHDR_NONE) { 54 if (nexthdr == NEXTHDR_NONE) {
58 temp |= MASK_NONE; 55 temp |= MASK_NONE;
59 break; 56 break;
60 } 57 }
58 /* Is there enough space for the next ext header? */
59 if (len < (int)sizeof(struct ipv6_opt_hdr))
60 return false;
61 /* ESP -> evaluate */ 61 /* ESP -> evaluate */
62 if (nexthdr == NEXTHDR_ESP) { 62 if (nexthdr == NEXTHDR_ESP) {
63 temp |= MASK_ESP; 63 temp |= MASK_ESP;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1394ddb6e35c..032a5ec391c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -137,6 +137,7 @@ static struct rt6_info ip6_null_entry_template = {
137 } 137 }
138 }, 138 },
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
140 .rt6i_protocol = RTPROT_KERNEL,
140 .rt6i_metric = ~(u32) 0, 141 .rt6i_metric = ~(u32) 0,
141 .rt6i_ref = ATOMIC_INIT(1), 142 .rt6i_ref = ATOMIC_INIT(1),
142}; 143};
@@ -159,6 +160,7 @@ static struct rt6_info ip6_prohibit_entry_template = {
159 } 160 }
160 }, 161 },
161 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
163 .rt6i_protocol = RTPROT_KERNEL,
162 .rt6i_metric = ~(u32) 0, 164 .rt6i_metric = ~(u32) 0,
163 .rt6i_ref = ATOMIC_INIT(1), 165 .rt6i_ref = ATOMIC_INIT(1),
164}; 166};
@@ -176,6 +178,7 @@ static struct rt6_info ip6_blk_hole_entry_template = {
176 } 178 }
177 }, 179 },
178 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_protocol = RTPROT_KERNEL,
179 .rt6i_metric = ~(u32) 0, 182 .rt6i_metric = ~(u32) 0,
180 .rt6i_ref = ATOMIC_INIT(1), 183 .rt6i_ref = ATOMIC_INIT(1),
181}; 184};
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6842dd2edd5b..8905712cfbb8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -53,6 +53,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
53{ 53{
54 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; 54 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
55 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); 55 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
56 __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
57 __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
56 int sk_ipv6only = ipv6_only_sock(sk); 58 int sk_ipv6only = ipv6_only_sock(sk);
57 int sk2_ipv6only = inet_v6_ipv6only(sk2); 59 int sk2_ipv6only = inet_v6_ipv6only(sk2);
58 int addr_type = ipv6_addr_type(sk_rcv_saddr6); 60 int addr_type = ipv6_addr_type(sk_rcv_saddr6);
@@ -60,7 +62,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
60 62
61 /* if both are mapped, treat as IPv4 */ 63 /* if both are mapped, treat as IPv4 */
62 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) 64 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
63 return ipv4_rcv_saddr_equal(sk, sk2); 65 return (!sk2_ipv6only &&
66 (!sk_rcv_saddr || !sk2_rcv_saddr ||
67 sk_rcv_saddr == sk2_rcv_saddr));
64 68
65 if (addr_type2 == IPV6_ADDR_ANY && 69 if (addr_type2 == IPV6_ADDR_ANY &&
66 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) 70 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 0af823cf7f1f..5ee5a031bc93 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -72,6 +72,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
72#endif 72#endif
73 73
74 skb->protocol = htons(ETH_P_IPV6); 74 skb->protocol = htons(ETH_P_IPV6);
75 skb->local_df = 1;
75 76
76 return x->outer_mode->output2(x, skb); 77 return x->outer_mode->output2(x, skb);
77} 78}
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 086d5ef098fd..811984d9324b 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -34,6 +34,7 @@
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/fs.h> 35#include <linux/fs.h>
36#include <linux/sched.h> 36#include <linux/sched.h>
37#include <linux/seq_file.h>
37#include <linux/termios.h> 38#include <linux/termios.h>
38#include <linux/tty.h> 39#include <linux/tty.h>
39#include <linux/interrupt.h> 40#include <linux/interrupt.h>
@@ -72,8 +73,7 @@ static int ircomm_tty_control_indication(void *instance, void *sap,
72static void ircomm_tty_flow_indication(void *instance, void *sap, 73static void ircomm_tty_flow_indication(void *instance, void *sap,
73 LOCAL_FLOW cmd); 74 LOCAL_FLOW cmd);
74#ifdef CONFIG_PROC_FS 75#ifdef CONFIG_PROC_FS
75static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len, 76static const struct file_operations ircomm_tty_proc_fops;
76 int *eof, void *unused);
77#endif /* CONFIG_PROC_FS */ 77#endif /* CONFIG_PROC_FS */
78static struct tty_driver *driver; 78static struct tty_driver *driver;
79 79
@@ -98,7 +98,7 @@ static const struct tty_operations ops = {
98 .hangup = ircomm_tty_hangup, 98 .hangup = ircomm_tty_hangup,
99 .wait_until_sent = ircomm_tty_wait_until_sent, 99 .wait_until_sent = ircomm_tty_wait_until_sent,
100#ifdef CONFIG_PROC_FS 100#ifdef CONFIG_PROC_FS
101 .read_proc = ircomm_tty_read_proc, 101 .proc_fops = &ircomm_tty_proc_fops,
102#endif /* CONFIG_PROC_FS */ 102#endif /* CONFIG_PROC_FS */
103}; 103};
104 104
@@ -1245,150 +1245,170 @@ static void ircomm_tty_flow_indication(void *instance, void *sap,
1245} 1245}
1246 1246
1247#ifdef CONFIG_PROC_FS 1247#ifdef CONFIG_PROC_FS
1248static int ircomm_tty_line_info(struct ircomm_tty_cb *self, char *buf) 1248static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m)
1249{ 1249{
1250 int ret=0; 1250 char sep;
1251 1251
1252 ret += sprintf(buf+ret, "State: %s\n", ircomm_tty_state[self->state]); 1252 seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]);
1253 1253
1254 ret += sprintf(buf+ret, "Service type: "); 1254 seq_puts(m, "Service type: ");
1255 if (self->service_type & IRCOMM_9_WIRE) 1255 if (self->service_type & IRCOMM_9_WIRE)
1256 ret += sprintf(buf+ret, "9_WIRE"); 1256 seq_puts(m, "9_WIRE");
1257 else if (self->service_type & IRCOMM_3_WIRE) 1257 else if (self->service_type & IRCOMM_3_WIRE)
1258 ret += sprintf(buf+ret, "3_WIRE"); 1258 seq_puts(m, "3_WIRE");
1259 else if (self->service_type & IRCOMM_3_WIRE_RAW) 1259 else if (self->service_type & IRCOMM_3_WIRE_RAW)
1260 ret += sprintf(buf+ret, "3_WIRE_RAW"); 1260 seq_puts(m, "3_WIRE_RAW");
1261 else 1261 else
1262 ret += sprintf(buf+ret, "No common service type!\n"); 1262 seq_puts(m, "No common service type!\n");
1263 ret += sprintf(buf+ret, "\n"); 1263 seq_putc(m, '\n');
1264 1264
1265 ret += sprintf(buf+ret, "Port name: %s\n", self->settings.port_name); 1265 seq_printf(m, "Port name: %s\n", self->settings.port_name);
1266 1266
1267 ret += sprintf(buf+ret, "DTE status: "); 1267 seq_printf(m, "DTE status:");
1268 if (self->settings.dte & IRCOMM_RTS) 1268 sep = ' ';
1269 ret += sprintf(buf+ret, "RTS|"); 1269 if (self->settings.dte & IRCOMM_RTS) {
1270 if (self->settings.dte & IRCOMM_DTR) 1270 seq_printf(m, "%cRTS", sep);
1271 ret += sprintf(buf+ret, "DTR|"); 1271 sep = '|';
1272 if (self->settings.dte) 1272 }
1273 ret--; /* remove the last | */ 1273 if (self->settings.dte & IRCOMM_DTR) {
1274 ret += sprintf(buf+ret, "\n"); 1274 seq_printf(m, "%cDTR", sep);
1275 1275 sep = '|';
1276 ret += sprintf(buf+ret, "DCE status: "); 1276 }
1277 if (self->settings.dce & IRCOMM_CTS) 1277 seq_putc(m, '\n');
1278 ret += sprintf(buf+ret, "CTS|"); 1278
1279 if (self->settings.dce & IRCOMM_DSR) 1279 seq_puts(m, "DCE status:");
1280 ret += sprintf(buf+ret, "DSR|"); 1280 sep = ' ';
1281 if (self->settings.dce & IRCOMM_CD) 1281 if (self->settings.dce & IRCOMM_CTS) {
1282 ret += sprintf(buf+ret, "CD|"); 1282 seq_printf(m, "%cCTS", sep);
1283 if (self->settings.dce & IRCOMM_RI) 1283 sep = '|';
1284 ret += sprintf(buf+ret, "RI|"); 1284 }
1285 if (self->settings.dce) 1285 if (self->settings.dce & IRCOMM_DSR) {
1286 ret--; /* remove the last | */ 1286 seq_printf(m, "%cDSR", sep);
1287 ret += sprintf(buf+ret, "\n"); 1287 sep = '|';
1288 1288 }
1289 ret += sprintf(buf+ret, "Configuration: "); 1289 if (self->settings.dce & IRCOMM_CD) {
1290 seq_printf(m, "%cCD", sep);
1291 sep = '|';
1292 }
1293 if (self->settings.dce & IRCOMM_RI) {
1294 seq_printf(m, "%cRI", sep);
1295 sep = '|';
1296 }
1297 seq_putc(m, '\n');
1298
1299 seq_puts(m, "Configuration: ");
1290 if (!self->settings.null_modem) 1300 if (!self->settings.null_modem)
1291 ret += sprintf(buf+ret, "DTE <-> DCE\n"); 1301 seq_puts(m, "DTE <-> DCE\n");
1292 else 1302 else
1293 ret += sprintf(buf+ret, 1303 seq_puts(m, "DTE <-> DTE (null modem emulation)\n");
1294 "DTE <-> DTE (null modem emulation)\n"); 1304
1295 1305 seq_printf(m, "Data rate: %d\n", self->settings.data_rate);
1296 ret += sprintf(buf+ret, "Data rate: %d\n", self->settings.data_rate); 1306
1297 1307 seq_puts(m, "Flow control:");
1298 ret += sprintf(buf+ret, "Flow control: "); 1308 sep = ' ';
1299 if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) 1309 if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) {
1300 ret += sprintf(buf+ret, "XON_XOFF_IN|"); 1310 seq_printf(m, "%cXON_XOFF_IN", sep);
1301 if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) 1311 sep = '|';
1302 ret += sprintf(buf+ret, "XON_XOFF_OUT|"); 1312 }
1303 if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) 1313 if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) {
1304 ret += sprintf(buf+ret, "RTS_CTS_IN|"); 1314 seq_printf(m, "%cXON_XOFF_OUT", sep);
1305 if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) 1315 sep = '|';
1306 ret += sprintf(buf+ret, "RTS_CTS_OUT|"); 1316 }
1307 if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) 1317 if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) {
1308 ret += sprintf(buf+ret, "DSR_DTR_IN|"); 1318 seq_printf(m, "%cRTS_CTS_IN", sep);
1309 if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) 1319 sep = '|';
1310 ret += sprintf(buf+ret, "DSR_DTR_OUT|"); 1320 }
1311 if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) 1321 if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) {
1312 ret += sprintf(buf+ret, "ENQ_ACK_IN|"); 1322 seq_printf(m, "%cRTS_CTS_OUT", sep);
1313 if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) 1323 sep = '|';
1314 ret += sprintf(buf+ret, "ENQ_ACK_OUT|"); 1324 }
1315 if (self->settings.flow_control) 1325 if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) {
1316 ret--; /* remove the last | */ 1326 seq_printf(m, "%cDSR_DTR_IN", sep);
1317 ret += sprintf(buf+ret, "\n"); 1327 sep = '|';
1318 1328 }
1319 ret += sprintf(buf+ret, "Flags: "); 1329 if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) {
1320 if (self->flags & ASYNC_CTS_FLOW) 1330 seq_printf(m, "%cDSR_DTR_OUT", sep);
1321 ret += sprintf(buf+ret, "ASYNC_CTS_FLOW|"); 1331 sep = '|';
1322 if (self->flags & ASYNC_CHECK_CD) 1332 }
1323 ret += sprintf(buf+ret, "ASYNC_CHECK_CD|"); 1333 if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) {
1324 if (self->flags & ASYNC_INITIALIZED) 1334 seq_printf(m, "%cENQ_ACK_IN", sep);
1325 ret += sprintf(buf+ret, "ASYNC_INITIALIZED|"); 1335 sep = '|';
1326 if (self->flags & ASYNC_LOW_LATENCY) 1336 }
1327 ret += sprintf(buf+ret, "ASYNC_LOW_LATENCY|"); 1337 if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) {
1328 if (self->flags & ASYNC_CLOSING) 1338 seq_printf(m, "%cENQ_ACK_OUT", sep);
1329 ret += sprintf(buf+ret, "ASYNC_CLOSING|"); 1339 sep = '|';
1330 if (self->flags & ASYNC_NORMAL_ACTIVE) 1340 }
1331 ret += sprintf(buf+ret, "ASYNC_NORMAL_ACTIVE|"); 1341 seq_putc(m, '\n');
1332 if (self->flags) 1342
1333 ret--; /* remove the last | */ 1343 seq_puts(m, "Flags:");
1334 ret += sprintf(buf+ret, "\n"); 1344 sep = ' ';
1335 1345 if (self->flags & ASYNC_CTS_FLOW) {
1336 ret += sprintf(buf+ret, "Role: %s\n", self->client ? 1346 seq_printf(m, "%cASYNC_CTS_FLOW", sep);
1337 "client" : "server"); 1347 sep = '|';
1338 ret += sprintf(buf+ret, "Open count: %d\n", self->open_count); 1348 }
1339 ret += sprintf(buf+ret, "Max data size: %d\n", self->max_data_size); 1349 if (self->flags & ASYNC_CHECK_CD) {
1340 ret += sprintf(buf+ret, "Max header size: %d\n", self->max_header_size); 1350 seq_printf(m, "%cASYNC_CHECK_CD", sep);
1351 sep = '|';
1352 }
1353 if (self->flags & ASYNC_INITIALIZED) {
1354 seq_printf(m, "%cASYNC_INITIALIZED", sep);
1355 sep = '|';
1356 }
1357 if (self->flags & ASYNC_LOW_LATENCY) {
1358 seq_printf(m, "%cASYNC_LOW_LATENCY", sep);
1359 sep = '|';
1360 }
1361 if (self->flags & ASYNC_CLOSING) {
1362 seq_printf(m, "%cASYNC_CLOSING", sep);
1363 sep = '|';
1364 }
1365 if (self->flags & ASYNC_NORMAL_ACTIVE) {
1366 seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep);
1367 sep = '|';
1368 }
1369 seq_putc(m, '\n');
1370
1371 seq_printf(m, "Role: %s\n", self->client ? "client" : "server");
1372 seq_printf(m, "Open count: %d\n", self->open_count);
1373 seq_printf(m, "Max data size: %d\n", self->max_data_size);
1374 seq_printf(m, "Max header size: %d\n", self->max_header_size);
1341 1375
1342 if (self->tty) 1376 if (self->tty)
1343 ret += sprintf(buf+ret, "Hardware: %s\n", 1377 seq_printf(m, "Hardware: %s\n",
1344 self->tty->hw_stopped ? "Stopped" : "Running"); 1378 self->tty->hw_stopped ? "Stopped" : "Running");
1345
1346 ret += sprintf(buf+ret, "\n");
1347 return ret;
1348} 1379}
1349 1380
1350 1381static int ircomm_tty_proc_show(struct seq_file *m, void *v)
1351/*
1352 * Function ircomm_tty_read_proc (buf, start, offset, len, eof, unused)
1353 *
1354 *
1355 *
1356 */
1357static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len,
1358 int *eof, void *unused)
1359{ 1382{
1360 struct ircomm_tty_cb *self; 1383 struct ircomm_tty_cb *self;
1361 int count = 0, l;
1362 off_t begin = 0;
1363 unsigned long flags; 1384 unsigned long flags;
1364 1385
1365 spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags); 1386 spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags);
1366 1387
1367 self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty); 1388 self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty);
1368 while ((self != NULL) && (count < 4000)) { 1389 while (self != NULL) {
1369 if (self->magic != IRCOMM_TTY_MAGIC) 1390 if (self->magic != IRCOMM_TTY_MAGIC)
1370 break; 1391 break;
1371 1392
1372 l = ircomm_tty_line_info(self, buf + count); 1393 ircomm_tty_line_info(self, m);
1373 count += l;
1374 if (count+begin > offset+len)
1375 goto done;
1376 if (count+begin < offset) {
1377 begin += count;
1378 count = 0;
1379 }
1380
1381 self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty); 1394 self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty);
1382 } 1395 }
1383 *eof = 1;
1384done:
1385 spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags); 1396 spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags);
1397 return 0;
1398}
1386 1399
1387 if (offset >= count+begin) 1400static int ircomm_tty_proc_open(struct inode *inode, struct file *file)
1388 return 0; 1401{
1389 *start = buf + (offset-begin); 1402 return single_open(file, ircomm_tty_proc_show, NULL);
1390 return ((len < begin+count-offset) ? len : begin+count-offset);
1391} 1403}
1404
1405static const struct file_operations ircomm_tty_proc_fops = {
1406 .owner = THIS_MODULE,
1407 .open = ircomm_tty_proc_open,
1408 .read = seq_read,
1409 .llseek = seq_lseek,
1410 .release = single_release,
1411};
1392#endif /* CONFIG_PROC_FS */ 1412#endif /* CONFIG_PROC_FS */
1393 1413
1394MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>"); 1414MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>");
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 49e786535dc8..b51c9187c347 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -172,6 +172,7 @@ static void iucv_sock_close(struct sock *sk)
172 err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo); 172 err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo);
173 } 173 }
174 174
175 case IUCV_CLOSING: /* fall through */
175 sk->sk_state = IUCV_CLOSED; 176 sk->sk_state = IUCV_CLOSED;
176 sk->sk_state_change(sk); 177 sk->sk_state_change(sk);
177 178
@@ -224,6 +225,8 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
224 spin_lock_init(&iucv_sk(sk)->message_q.lock); 225 spin_lock_init(&iucv_sk(sk)->message_q.lock);
225 skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); 226 skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
226 iucv_sk(sk)->send_tag = 0; 227 iucv_sk(sk)->send_tag = 0;
228 iucv_sk(sk)->path = NULL;
229 memset(&iucv_sk(sk)->src_user_id , 0, 32);
227 230
228 sk->sk_destruct = iucv_sock_destruct; 231 sk->sk_destruct = iucv_sock_destruct;
229 sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; 232 sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
@@ -811,6 +814,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
811 814
812 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 815 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
813 816
817 /* receive/dequeue next skb:
818 * the function understands MSG_PEEK and, thus, does not dequeue skb */
814 skb = skb_recv_datagram(sk, flags, noblock, &err); 819 skb = skb_recv_datagram(sk, flags, noblock, &err);
815 if (!skb) { 820 if (!skb) {
816 if (sk->sk_shutdown & RCV_SHUTDOWN) 821 if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -858,9 +863,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
858 iucv_process_message_q(sk); 863 iucv_process_message_q(sk);
859 spin_unlock_bh(&iucv->message_q.lock); 864 spin_unlock_bh(&iucv->message_q.lock);
860 } 865 }
861 866 }
862 } else
863 skb_queue_head(&sk->sk_receive_queue, skb);
864 867
865done: 868done:
866 return err ? : copied; 869 return err ? : copied;
@@ -934,6 +937,9 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
934 937
935 lock_sock(sk); 938 lock_sock(sk);
936 switch (sk->sk_state) { 939 switch (sk->sk_state) {
940 case IUCV_DISCONN:
941 case IUCV_CLOSING:
942 case IUCV_SEVERED:
937 case IUCV_CLOSED: 943 case IUCV_CLOSED:
938 err = -ENOTCONN; 944 err = -ENOTCONN;
939 goto fail; 945 goto fail;
@@ -1113,8 +1119,12 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
1113 struct sock_msg_q *save_msg; 1119 struct sock_msg_q *save_msg;
1114 int len; 1120 int len;
1115 1121
1116 if (sk->sk_shutdown & RCV_SHUTDOWN) 1122 if (sk->sk_shutdown & RCV_SHUTDOWN) {
1123 iucv_message_reject(path, msg);
1117 return; 1124 return;
1125 }
1126
1127 spin_lock(&iucv->message_q.lock);
1118 1128
1119 if (!list_empty(&iucv->message_q.list) || 1129 if (!list_empty(&iucv->message_q.list) ||
1120 !skb_queue_empty(&iucv->backlog_skb_q)) 1130 !skb_queue_empty(&iucv->backlog_skb_q))
@@ -1129,9 +1139,8 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
1129 if (!skb) 1139 if (!skb)
1130 goto save_message; 1140 goto save_message;
1131 1141
1132 spin_lock(&iucv->message_q.lock);
1133 iucv_process_message(sk, skb, path, msg); 1142 iucv_process_message(sk, skb, path, msg);
1134 spin_unlock(&iucv->message_q.lock); 1143 goto out_unlock;
1135 1144
1136 return; 1145 return;
1137 1146
@@ -1142,8 +1151,9 @@ save_message:
1142 save_msg->path = path; 1151 save_msg->path = path;
1143 save_msg->msg = *msg; 1152 save_msg->msg = *msg;
1144 1153
1145 spin_lock(&iucv->message_q.lock);
1146 list_add_tail(&save_msg->list, &iucv->message_q.list); 1154 list_add_tail(&save_msg->list, &iucv->message_q.list);
1155
1156out_unlock:
1147 spin_unlock(&iucv->message_q.lock); 1157 spin_unlock(&iucv->message_q.lock);
1148} 1158}
1149 1159
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 60c16162474c..ecc3faf9f11a 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -33,7 +33,7 @@ choice
33 ---help--- 33 ---help---
34 This option selects the default rate control algorithm 34 This option selects the default rate control algorithm
35 mac80211 will use. Note that this default can still be 35 mac80211 will use. Note that this default can still be
36 overriden through the ieee80211_default_rc_algo module 36 overridden through the ieee80211_default_rc_algo module
37 parameter if different algorithms are available. 37 parameter if different algorithms are available.
38 38
39config MAC80211_RC_DEFAULT_PID 39config MAC80211_RC_DEFAULT_PID
@@ -202,10 +202,3 @@ config MAC80211_DEBUG_COUNTERS
202 and show them in debugfs. 202 and show them in debugfs.
203 203
204 If unsure, say N. 204 If unsure, say N.
205
206config MAC80211_VERBOSE_SPECT_MGMT_DEBUG
207 bool "Verbose Spectrum Management (IEEE 802.11h)debugging"
208 depends on MAC80211_DEBUG_MENU
209 ---help---
210 Say Y here to print out verbose Spectrum Management (IEEE 802.11h)
211 debug messages.
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a6f1d8a869bc..14134193cd17 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -258,7 +258,7 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
258 (chan->max_power - local->power_constr_level) : 258 (chan->max_power - local->power_constr_level) :
259 chan->max_power; 259 chan->max_power;
260 260
261 if (local->user_power_level) 261 if (local->user_power_level >= 0)
262 power = min(power, local->user_power_level); 262 power = min(power, local->user_power_level);
263 263
264 if (local->hw.conf.power_level != power) { 264 if (local->hw.conf.power_level != power) {
@@ -757,6 +757,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
757 local->hw.conf.long_frame_max_tx_count = 4; 757 local->hw.conf.long_frame_max_tx_count = 4;
758 local->hw.conf.short_frame_max_tx_count = 7; 758 local->hw.conf.short_frame_max_tx_count = 7;
759 local->hw.conf.radio_enabled = true; 759 local->hw.conf.radio_enabled = true;
760 local->user_power_level = -1;
760 761
761 INIT_LIST_HEAD(&local->interfaces); 762 INIT_LIST_HEAD(&local->interfaces);
762 mutex_init(&local->iflist_mtx); 763 mutex_init(&local->iflist_mtx);
@@ -909,6 +910,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
909 if (result < 0) 910 if (result < 0)
910 goto fail_sta_info; 911 goto fail_sta_info;
911 912
913 result = ieee80211_wep_init(local);
914 if (result < 0) {
915 printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n",
916 wiphy_name(local->hw.wiphy), result);
917 goto fail_wep;
918 }
919
912 rtnl_lock(); 920 rtnl_lock();
913 result = dev_alloc_name(local->mdev, local->mdev->name); 921 result = dev_alloc_name(local->mdev, local->mdev->name);
914 if (result < 0) 922 if (result < 0)
@@ -930,14 +938,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
930 goto fail_rate; 938 goto fail_rate;
931 } 939 }
932 940
933 result = ieee80211_wep_init(local);
934
935 if (result < 0) {
936 printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n",
937 wiphy_name(local->hw.wiphy), result);
938 goto fail_wep;
939 }
940
941 /* add one default STA interface if supported */ 941 /* add one default STA interface if supported */
942 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) { 942 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) {
943 result = ieee80211_if_add(local, "wlan%d", NULL, 943 result = ieee80211_if_add(local, "wlan%d", NULL,
@@ -967,13 +967,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
967 967
968 return 0; 968 return 0;
969 969
970fail_wep:
971 rate_control_deinitialize(local);
972fail_rate: 970fail_rate:
973 unregister_netdevice(local->mdev); 971 unregister_netdevice(local->mdev);
974 local->mdev = NULL; 972 local->mdev = NULL;
975fail_dev: 973fail_dev:
976 rtnl_unlock(); 974 rtnl_unlock();
975 ieee80211_wep_free(local);
976fail_wep:
977 sta_info_stop(local); 977 sta_info_stop(local);
978fail_sta_info: 978fail_sta_info:
979 debugfs_hw_del(local); 979 debugfs_hw_del(local);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7ecda9d59d8a..132938b073dc 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -441,6 +441,9 @@ static bool ieee80211_check_tim(struct ieee802_11_elems *elems, u16 aid)
441 u8 index, indexn1, indexn2; 441 u8 index, indexn1, indexn2;
442 struct ieee80211_tim_ie *tim = (struct ieee80211_tim_ie *) elems->tim; 442 struct ieee80211_tim_ie *tim = (struct ieee80211_tim_ie *) elems->tim;
443 443
444 if (unlikely(!tim || elems->tim_len < 4))
445 return false;
446
444 aid &= 0x3fff; 447 aid &= 0x3fff;
445 index = aid / 8; 448 index = aid / 8;
446 mask = 1 << (aid & 7); 449 mask = 1 << (aid & 7);
@@ -945,9 +948,13 @@ void ieee80211_beacon_loss_work(struct work_struct *work)
945 u.mgd.beacon_loss_work); 948 u.mgd.beacon_loss_work);
946 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 949 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
947 950
948 printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM " 951#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
949 "- sending probe request\n", sdata->dev->name, 952 if (net_ratelimit()) {
950 sdata->u.mgd.bssid); 953 printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM "
954 "- sending probe request\n", sdata->dev->name,
955 sdata->u.mgd.bssid);
956 }
957#endif
951 958
952 ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL; 959 ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL;
953 ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, 960 ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid,
@@ -1007,9 +1014,13 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata)
1007 (local->hw.conf.flags & IEEE80211_CONF_PS)) && 1014 (local->hw.conf.flags & IEEE80211_CONF_PS)) &&
1008 time_after(jiffies, 1015 time_after(jiffies,
1009 ifmgd->last_beacon + IEEE80211_MONITORING_INTERVAL)) { 1016 ifmgd->last_beacon + IEEE80211_MONITORING_INTERVAL)) {
1010 printk(KERN_DEBUG "%s: beacon loss from AP %pM " 1017#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1011 "- sending probe request\n", 1018 if (net_ratelimit()) {
1012 sdata->dev->name, ifmgd->bssid); 1019 printk(KERN_DEBUG "%s: beacon loss from AP %pM "
1020 "- sending probe request\n",
1021 sdata->dev->name, ifmgd->bssid);
1022 }
1023#endif
1013 ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL; 1024 ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL;
1014 ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, 1025 ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid,
1015 ifmgd->ssid_len, NULL, 0); 1026 ifmgd->ssid_len, NULL, 0);
@@ -1355,7 +1366,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1355 1366
1356 for (i = 0; i < elems.ext_supp_rates_len; i++) { 1367 for (i = 0; i < elems.ext_supp_rates_len; i++) {
1357 int rate = (elems.ext_supp_rates[i] & 0x7f) * 5; 1368 int rate = (elems.ext_supp_rates[i] & 0x7f) * 5;
1358 bool is_basic = !!(elems.supp_rates[i] & 0x80); 1369 bool is_basic = !!(elems.ext_supp_rates[i] & 0x80);
1359 1370
1360 if (rate > 110) 1371 if (rate > 110)
1361 have_higher_than_11mbit = true; 1372 have_higher_than_11mbit = true;
@@ -1902,9 +1913,17 @@ static void ieee80211_sta_work(struct work_struct *work)
1902 1913
1903static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) 1914static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
1904{ 1915{
1905 if (sdata->vif.type == NL80211_IFTYPE_STATION) 1916 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
1917 /*
1918 * Need to update last_beacon to avoid beacon loss
1919 * test to trigger.
1920 */
1921 sdata->u.mgd.last_beacon = jiffies;
1922
1923
1906 queue_work(sdata->local->hw.workqueue, 1924 queue_work(sdata->local->hw.workqueue,
1907 &sdata->u.mgd.work); 1925 &sdata->u.mgd.work);
1926 }
1908} 1927}
1909 1928
1910/* interface setup */ 1929/* interface setup */
@@ -2105,12 +2124,13 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
2105 struct ieee80211_local *local = 2124 struct ieee80211_local *local =
2106 container_of(work, struct ieee80211_local, 2125 container_of(work, struct ieee80211_local,
2107 dynamic_ps_enable_work); 2126 dynamic_ps_enable_work);
2127 /* XXX: using scan_sdata is completely broken! */
2108 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 2128 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
2109 2129
2110 if (local->hw.conf.flags & IEEE80211_CONF_PS) 2130 if (local->hw.conf.flags & IEEE80211_CONF_PS)
2111 return; 2131 return;
2112 2132
2113 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) 2133 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK && sdata)
2114 ieee80211_send_nullfunc(local, sdata, 1); 2134 ieee80211_send_nullfunc(local, sdata, 1);
2115 2135
2116 local->hw.conf.flags |= IEEE80211_CONF_PS; 2136 local->hw.conf.flags |= IEEE80211_CONF_PS;
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 027302326498..81985d27cbda 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -156,8 +156,19 @@ int __ieee80211_resume(struct ieee80211_hw *hw)
156 case NL80211_IFTYPE_ADHOC: 156 case NL80211_IFTYPE_ADHOC:
157 case NL80211_IFTYPE_AP: 157 case NL80211_IFTYPE_AP:
158 case NL80211_IFTYPE_MESH_POINT: 158 case NL80211_IFTYPE_MESH_POINT:
159 WARN_ON(ieee80211_if_config(sdata, changed)); 159 /*
160 ieee80211_bss_info_change_notify(sdata, ~0); 160 * Driver's config_interface can fail if rfkill is
161 * enabled. Accommodate this return code.
162 * FIXME: When mac80211 has knowledge of rfkill
163 * state the code below can change back to:
164 * WARN(ieee80211_if_config(sdata, changed));
165 * ieee80211_bss_info_change_notify(sdata, ~0);
166 */
167 if (ieee80211_if_config(sdata, changed))
168 printk(KERN_DEBUG "%s: failed to configure interface during resume\n",
169 sdata->dev->name);
170 else
171 ieee80211_bss_info_change_notify(sdata, ~0);
161 break; 172 break;
162 case NL80211_IFTYPE_WDS: 173 case NL80211_IFTYPE_WDS:
163 break; 174 break;
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 3824990d340b..d9233ec50610 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -476,8 +476,8 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
476 return NULL; 476 return NULL;
477 477
478 for (i = 0; i < IEEE80211_NUM_BANDS; i++) { 478 for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
479 sband = hw->wiphy->bands[hw->conf.channel->band]; 479 sband = hw->wiphy->bands[i];
480 if (sband->n_bitrates > max_rates) 480 if (sband && sband->n_bitrates > max_rates)
481 max_rates = sband->n_bitrates; 481 max_rates = sband->n_bitrates;
482 } 482 }
483 483
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index b16801cde06f..8bef9a1262ff 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -317,13 +317,44 @@ rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband,
317 struct ieee80211_sta *sta, void *priv_sta) 317 struct ieee80211_sta *sta, void *priv_sta)
318{ 318{
319 struct rc_pid_sta_info *spinfo = priv_sta; 319 struct rc_pid_sta_info *spinfo = priv_sta;
320 struct rc_pid_info *pinfo = priv;
321 struct rc_pid_rateinfo *rinfo = pinfo->rinfo;
320 struct sta_info *si; 322 struct sta_info *si;
323 int i, j, tmp;
324 bool s;
321 325
322 /* TODO: This routine should consider using RSSI from previous packets 326 /* TODO: This routine should consider using RSSI from previous packets
323 * as we need to have IEEE 802.1X auth succeed immediately after assoc.. 327 * as we need to have IEEE 802.1X auth succeed immediately after assoc..
324 * Until that method is implemented, we will use the lowest supported 328 * Until that method is implemented, we will use the lowest supported
325 * rate as a workaround. */ 329 * rate as a workaround. */
326 330
331 /* Sort the rates. This is optimized for the most common case (i.e.
332 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
333 * mapping too. */
334 for (i = 0; i < sband->n_bitrates; i++) {
335 rinfo[i].index = i;
336 rinfo[i].rev_index = i;
337 if (RC_PID_FAST_START)
338 rinfo[i].diff = 0;
339 else
340 rinfo[i].diff = i * pinfo->norm_offset;
341 }
342 for (i = 1; i < sband->n_bitrates; i++) {
343 s = 0;
344 for (j = 0; j < sband->n_bitrates - i; j++)
345 if (unlikely(sband->bitrates[rinfo[j].index].bitrate >
346 sband->bitrates[rinfo[j + 1].index].bitrate)) {
347 tmp = rinfo[j].index;
348 rinfo[j].index = rinfo[j + 1].index;
349 rinfo[j + 1].index = tmp;
350 rinfo[rinfo[j].index].rev_index = j;
351 rinfo[rinfo[j + 1].index].rev_index = j + 1;
352 s = 1;
353 }
354 if (!s)
355 break;
356 }
357
327 spinfo->txrate_idx = rate_lowest_index(sband, sta); 358 spinfo->txrate_idx = rate_lowest_index(sband, sta);
328 /* HACK */ 359 /* HACK */
329 si = container_of(sta, struct sta_info, sta); 360 si = container_of(sta, struct sta_info, sta);
@@ -336,21 +367,22 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw,
336 struct rc_pid_info *pinfo; 367 struct rc_pid_info *pinfo;
337 struct rc_pid_rateinfo *rinfo; 368 struct rc_pid_rateinfo *rinfo;
338 struct ieee80211_supported_band *sband; 369 struct ieee80211_supported_band *sband;
339 int i, j, tmp; 370 int i, max_rates = 0;
340 bool s;
341#ifdef CONFIG_MAC80211_DEBUGFS 371#ifdef CONFIG_MAC80211_DEBUGFS
342 struct rc_pid_debugfs_entries *de; 372 struct rc_pid_debugfs_entries *de;
343#endif 373#endif
344 374
345 sband = hw->wiphy->bands[hw->conf.channel->band];
346
347 pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC); 375 pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC);
348 if (!pinfo) 376 if (!pinfo)
349 return NULL; 377 return NULL;
350 378
351 /* We can safely assume that sband won't change unless we get 379 for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
352 * reinitialized. */ 380 sband = hw->wiphy->bands[i];
353 rinfo = kmalloc(sizeof(*rinfo) * sband->n_bitrates, GFP_ATOMIC); 381 if (sband && sband->n_bitrates > max_rates)
382 max_rates = sband->n_bitrates;
383 }
384
385 rinfo = kmalloc(sizeof(*rinfo) * max_rates, GFP_ATOMIC);
354 if (!rinfo) { 386 if (!rinfo) {
355 kfree(pinfo); 387 kfree(pinfo);
356 return NULL; 388 return NULL;
@@ -368,33 +400,6 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw,
368 pinfo->rinfo = rinfo; 400 pinfo->rinfo = rinfo;
369 pinfo->oldrate = 0; 401 pinfo->oldrate = 0;
370 402
371 /* Sort the rates. This is optimized for the most common case (i.e.
372 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
373 * mapping too. */
374 for (i = 0; i < sband->n_bitrates; i++) {
375 rinfo[i].index = i;
376 rinfo[i].rev_index = i;
377 if (RC_PID_FAST_START)
378 rinfo[i].diff = 0;
379 else
380 rinfo[i].diff = i * pinfo->norm_offset;
381 }
382 for (i = 1; i < sband->n_bitrates; i++) {
383 s = 0;
384 for (j = 0; j < sband->n_bitrates - i; j++)
385 if (unlikely(sband->bitrates[rinfo[j].index].bitrate >
386 sband->bitrates[rinfo[j + 1].index].bitrate)) {
387 tmp = rinfo[j].index;
388 rinfo[j].index = rinfo[j + 1].index;
389 rinfo[j + 1].index = tmp;
390 rinfo[rinfo[j].index].rev_index = j;
391 rinfo[rinfo[j + 1].index].rev_index = j + 1;
392 s = 1;
393 }
394 if (!s)
395 break;
396 }
397
398#ifdef CONFIG_MAC80211_DEBUGFS 403#ifdef CONFIG_MAC80211_DEBUGFS
399 de = &pinfo->dentries; 404 de = &pinfo->dentries;
400 de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR, 405 de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 64ebe664effc..9776f73c51ad 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -29,6 +29,7 @@
29static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, 29static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
30 struct tid_ampdu_rx *tid_agg_rx, 30 struct tid_ampdu_rx *tid_agg_rx,
31 struct sk_buff *skb, 31 struct sk_buff *skb,
32 struct ieee80211_rx_status *status,
32 u16 mpdu_seq_num, 33 u16 mpdu_seq_num,
33 int bar_req); 34 int bar_req);
34/* 35/*
@@ -1396,7 +1397,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1396 * mac80211. That also explains the __skb_push() 1397 * mac80211. That also explains the __skb_push()
1397 * below. 1398 * below.
1398 */ 1399 */
1399 align = (unsigned long)skb->data & 4; 1400 align = (unsigned long)skb->data & 3;
1400 if (align) { 1401 if (align) {
1401 if (WARN_ON(skb_headroom(skb) < 3)) { 1402 if (WARN_ON(skb_headroom(skb) < 3)) {
1402 dev_kfree_skb(skb); 1403 dev_kfree_skb(skb);
@@ -1688,7 +1689,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
1688 /* manage reordering buffer according to requested */ 1689 /* manage reordering buffer according to requested */
1689 /* sequence number */ 1690 /* sequence number */
1690 rcu_read_lock(); 1691 rcu_read_lock();
1691 ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL, 1692 ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL, NULL,
1692 start_seq_num, 1); 1693 start_seq_num, 1);
1693 rcu_read_unlock(); 1694 rcu_read_unlock();
1694 return RX_DROP_UNUSABLE; 1695 return RX_DROP_UNUSABLE;
@@ -2293,6 +2294,7 @@ static inline u16 seq_sub(u16 sq1, u16 sq2)
2293static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, 2294static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
2294 struct tid_ampdu_rx *tid_agg_rx, 2295 struct tid_ampdu_rx *tid_agg_rx,
2295 struct sk_buff *skb, 2296 struct sk_buff *skb,
2297 struct ieee80211_rx_status *rxstatus,
2296 u16 mpdu_seq_num, 2298 u16 mpdu_seq_num,
2297 int bar_req) 2299 int bar_req)
2298{ 2300{
@@ -2374,6 +2376,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
2374 2376
2375 /* put the frame in the reordering buffer */ 2377 /* put the frame in the reordering buffer */
2376 tid_agg_rx->reorder_buf[index] = skb; 2378 tid_agg_rx->reorder_buf[index] = skb;
2379 memcpy(tid_agg_rx->reorder_buf[index]->cb, rxstatus,
2380 sizeof(*rxstatus));
2377 tid_agg_rx->stored_mpdu_num++; 2381 tid_agg_rx->stored_mpdu_num++;
2378 /* release the buffer until next missing frame */ 2382 /* release the buffer until next missing frame */
2379 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) 2383 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn)
@@ -2399,7 +2403,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
2399} 2403}
2400 2404
2401static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, 2405static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
2402 struct sk_buff *skb) 2406 struct sk_buff *skb,
2407 struct ieee80211_rx_status *status)
2403{ 2408{
2404 struct ieee80211_hw *hw = &local->hw; 2409 struct ieee80211_hw *hw = &local->hw;
2405 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 2410 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -2448,7 +2453,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
2448 2453
2449 /* according to mpdu sequence number deal with reordering buffer */ 2454 /* according to mpdu sequence number deal with reordering buffer */
2450 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; 2455 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
2451 ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, 2456 ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, status,
2452 mpdu_seq_num, 0); 2457 mpdu_seq_num, 0);
2453 end_reorder: 2458 end_reorder:
2454 return ret; 2459 return ret;
@@ -2512,7 +2517,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
2512 return; 2517 return;
2513 } 2518 }
2514 2519
2515 if (!ieee80211_rx_reorder_ampdu(local, skb)) 2520 if (!ieee80211_rx_reorder_ampdu(local, skb, status))
2516 __ieee80211_rx_handle_packet(hw, skb, status, rate); 2521 __ieee80211_rx_handle_packet(hw, skb, status, rate);
2517 2522
2518 rcu_read_unlock(); 2523 rcu_read_unlock();
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3fb04a86444d..63656266d567 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -772,7 +772,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
772 hdrlen = ieee80211_hdrlen(hdr->frame_control); 772 hdrlen = ieee80211_hdrlen(hdr->frame_control);
773 773
774 /* internal error, why is TX_FRAGMENTED set? */ 774 /* internal error, why is TX_FRAGMENTED set? */
775 if (WARN_ON(skb->len <= frag_threshold)) 775 if (WARN_ON(skb->len + FCS_LEN <= frag_threshold))
776 return TX_DROP; 776 return TX_DROP;
777 777
778 /* 778 /*
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index deb4ecec122a..959aa8379ccf 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -417,6 +417,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
417{ 417{
418 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 418 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
419 struct ieee80211_channel* chan = local->hw.conf.channel; 419 struct ieee80211_channel* chan = local->hw.conf.channel;
420 bool reconf = false;
420 u32 reconf_flags = 0; 421 u32 reconf_flags = 0;
421 int new_power_level; 422 int new_power_level;
422 423
@@ -427,14 +428,38 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
427 if (!chan) 428 if (!chan)
428 return -EINVAL; 429 return -EINVAL;
429 430
430 if (data->txpower.fixed) 431 /* only change when not disabling */
431 new_power_level = min(data->txpower.value, chan->max_power); 432 if (!data->txpower.disabled) {
432 else /* Automatic power level setting */ 433 if (data->txpower.fixed) {
433 new_power_level = chan->max_power; 434 if (data->txpower.value < 0)
435 return -EINVAL;
436 new_power_level = data->txpower.value;
437 /*
438 * Debatable, but we cannot do a fixed power
439 * level above the regulatory constraint.
440 * Use "iwconfig wlan0 txpower 15dBm" instead.
441 */
442 if (new_power_level > chan->max_power)
443 return -EINVAL;
444 } else {
445 /*
446 * Automatic power level setting, max being the value
447 * passed in from userland.
448 */
449 if (data->txpower.value < 0)
450 new_power_level = -1;
451 else
452 new_power_level = data->txpower.value;
453 }
454
455 reconf = true;
434 456
435 local->user_power_level = new_power_level; 457 /*
436 if (local->hw.conf.power_level != new_power_level) 458 * ieee80211_hw_config() will limit to the channel's
437 reconf_flags |= IEEE80211_CONF_CHANGE_POWER; 459 * max power and possibly power constraint from AP.
460 */
461 local->user_power_level = new_power_level;
462 }
438 463
439 if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { 464 if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) {
440 local->hw.conf.radio_enabled = !(data->txpower.disabled); 465 local->hw.conf.radio_enabled = !(data->txpower.disabled);
@@ -442,7 +467,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
442 ieee80211_led_radio(local, local->hw.conf.radio_enabled); 467 ieee80211_led_radio(local, local->hw.conf.radio_enabled);
443 } 468 }
444 469
445 if (reconf_flags) 470 if (reconf || reconf_flags)
446 ieee80211_hw_config(local, reconf_flags); 471 ieee80211_hw_config(local, reconf_flags);
447 472
448 return 0; 473 return 0;
@@ -530,7 +555,7 @@ static int ieee80211_ioctl_giwfrag(struct net_device *dev,
530 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 555 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
531 556
532 frag->value = local->fragmentation_threshold; 557 frag->value = local->fragmentation_threshold;
533 frag->disabled = (frag->value >= IEEE80211_MAX_RTS_THRESHOLD); 558 frag->disabled = (frag->value >= IEEE80211_MAX_FRAG_THRESHOLD);
534 frag->fixed = 1; 559 frag->fixed = 1;
535 560
536 return 0; 561 return 0;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 2c967e4f706c..cb3ad741ebf8 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -52,7 +52,7 @@ config NF_CT_ACCT
52 52
53 Please note that currently this option only sets a default state. 53 Please note that currently this option only sets a default state.
54 You may change it at boot time with nf_conntrack.acct=0/1 kernel 54 You may change it at boot time with nf_conntrack.acct=0/1 kernel
55 paramater or by loading the nf_conntrack module with acct=0/1. 55 parameter or by loading the nf_conntrack module with acct=0/1.
56 56
57 You may also disable/enable it on a running system with: 57 You may also disable/enable it on a running system with:
58 sysctl net.netfilter.nf_conntrack_acct=0/1 58 sysctl net.netfilter.nf_conntrack_acct=0/1
@@ -275,6 +275,8 @@ config NF_CT_NETLINK
275 help 275 help
276 This option enables support for a netlink-based userspace interface 276 This option enables support for a netlink-based userspace interface
277 277
278endif # NF_CONNTRACK
279
278# transparent proxy support 280# transparent proxy support
279config NETFILTER_TPROXY 281config NETFILTER_TPROXY
280 tristate "Transparent proxying support (EXPERIMENTAL)" 282 tristate "Transparent proxying support (EXPERIMENTAL)"
@@ -290,8 +292,6 @@ config NETFILTER_TPROXY
290 292
291 To compile it as a module, choose M here. If unsure, say N. 293 To compile it as a module, choose M here. If unsure, say N.
292 294
293endif # NF_CONNTRACK
294
295config NETFILTER_XTABLES 295config NETFILTER_XTABLES
296 tristate "Netfilter Xtables support (required for ip_tables)" 296 tristate "Netfilter Xtables support (required for ip_tables)"
297 default m if NETFILTER_ADVANCED=n 297 default m if NETFILTER_ADVANCED=n
@@ -374,7 +374,7 @@ config NETFILTER_XT_TARGET_HL
374 374
375config NETFILTER_XT_TARGET_LED 375config NETFILTER_XT_TARGET_LED
376 tristate '"LED" target support' 376 tristate '"LED" target support'
377 depends on LEDS_CLASS && LED_TRIGGERS 377 depends on LEDS_CLASS && LEDS_TRIGGERS
378 depends on NETFILTER_ADVANCED 378 depends on NETFILTER_ADVANCED
379 help 379 help
380 This option adds a `LED' target, which allows you to blink LEDs in 380 This option adds a `LED' target, which allows you to blink LEDs in
@@ -837,6 +837,7 @@ config NETFILTER_XT_MATCH_SOCKET
837 depends on NETFILTER_TPROXY 837 depends on NETFILTER_TPROXY
838 depends on NETFILTER_XTABLES 838 depends on NETFILTER_XTABLES
839 depends on NETFILTER_ADVANCED 839 depends on NETFILTER_ADVANCED
840 depends on !NF_CONNTRACK || NF_CONNTRACK
840 select NF_DEFRAG_IPV4 841 select NF_DEFRAG_IPV4
841 help 842 help
842 This option adds a `socket' match, which can be used to match 843 This option adds a `socket' match, which can be used to match
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 60aba45023ff..77bfdfeb966e 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -260,7 +260,10 @@ struct ip_vs_conn *ip_vs_ct_in_get
260 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 260 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
261 if (cp->af == af && 261 if (cp->af == af &&
262 ip_vs_addr_equal(af, s_addr, &cp->caddr) && 262 ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
263 ip_vs_addr_equal(af, d_addr, &cp->vaddr) && 263 /* protocol should only be IPPROTO_IP if
264 * d_addr is a fwmark */
265 ip_vs_addr_equal(protocol == IPPROTO_IP ? AF_UNSPEC : af,
266 d_addr, &cp->vaddr) &&
264 s_port == cp->cport && d_port == cp->vport && 267 s_port == cp->cport && d_port == cp->vport &&
265 cp->flags & IP_VS_CONN_F_TEMPLATE && 268 cp->flags & IP_VS_CONN_F_TEMPLATE &&
266 protocol == cp->protocol) { 269 protocol == cp->protocol) {
@@ -698,7 +701,9 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
698 cp->cport = cport; 701 cp->cport = cport;
699 ip_vs_addr_copy(af, &cp->vaddr, vaddr); 702 ip_vs_addr_copy(af, &cp->vaddr, vaddr);
700 cp->vport = vport; 703 cp->vport = vport;
701 ip_vs_addr_copy(af, &cp->daddr, daddr); 704 /* proto should only be IPPROTO_IP if d_addr is a fwmark */
705 ip_vs_addr_copy(proto == IPPROTO_IP ? AF_UNSPEC : af,
706 &cp->daddr, daddr);
702 cp->dport = dport; 707 cp->dport = dport;
703 cp->flags = flags; 708 cp->flags = flags;
704 spin_lock_init(&cp->lock); 709 spin_lock_init(&cp->lock);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index cb3e031335eb..8dddb17a947a 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -278,7 +278,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
278 */ 278 */
279 if (svc->fwmark) { 279 if (svc->fwmark) {
280 union nf_inet_addr fwmark = { 280 union nf_inet_addr fwmark = {
281 .all = { 0, 0, 0, htonl(svc->fwmark) } 281 .ip = htonl(svc->fwmark)
282 }; 282 };
283 283
284 ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0, 284 ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
@@ -306,7 +306,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
306 */ 306 */
307 if (svc->fwmark) { 307 if (svc->fwmark) {
308 union nf_inet_addr fwmark = { 308 union nf_inet_addr fwmark = {
309 .all = { 0, 0, 0, htonl(svc->fwmark) } 309 .ip = htonl(svc->fwmark)
310 }; 310 };
311 311
312 ct = ip_vs_conn_new(svc->af, IPPROTO_IP, 312 ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 3940f996a2e4..afde8f991646 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -372,7 +372,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
372 struct net *net = nf_ct_exp_net(expect); 372 struct net *net = nf_ct_exp_net(expect);
373 struct hlist_node *n; 373 struct hlist_node *n;
374 unsigned int h; 374 unsigned int h;
375 int ret = 0; 375 int ret = 1;
376 376
377 if (!master_help->helper) { 377 if (!master_help->helper) {
378 ret = -ESHUTDOWN; 378 ret = -ESHUTDOWN;
@@ -412,41 +412,23 @@ out:
412 return ret; 412 return ret;
413} 413}
414 414
415int nf_ct_expect_related(struct nf_conntrack_expect *expect) 415int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
416 u32 pid, int report)
416{ 417{
417 int ret; 418 int ret;
418 419
419 spin_lock_bh(&nf_conntrack_lock); 420 spin_lock_bh(&nf_conntrack_lock);
420 ret = __nf_ct_expect_check(expect); 421 ret = __nf_ct_expect_check(expect);
421 if (ret < 0) 422 if (ret <= 0)
422 goto out; 423 goto out;
423 424
425 ret = 0;
424 nf_ct_expect_insert(expect); 426 nf_ct_expect_insert(expect);
425 atomic_inc(&expect->use);
426 spin_unlock_bh(&nf_conntrack_lock);
427 nf_ct_expect_event(IPEXP_NEW, expect);
428 nf_ct_expect_put(expect);
429 return ret;
430out:
431 spin_unlock_bh(&nf_conntrack_lock); 427 spin_unlock_bh(&nf_conntrack_lock);
428 nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
432 return ret; 429 return ret;
433}
434EXPORT_SYMBOL_GPL(nf_ct_expect_related);
435
436int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
437 u32 pid, int report)
438{
439 int ret;
440
441 spin_lock_bh(&nf_conntrack_lock);
442 ret = __nf_ct_expect_check(expect);
443 if (ret < 0)
444 goto out;
445 nf_ct_expect_insert(expect);
446out: 430out:
447 spin_unlock_bh(&nf_conntrack_lock); 431 spin_unlock_bh(&nf_conntrack_lock);
448 if (ret == 0)
449 nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
450 return ret; 432 return ret;
451} 433}
452EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); 434EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 30b8e9009f99..0fa5a422959f 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -176,7 +176,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
176 } 176 }
177 177
178 /* Get rid of expecteds, set helpers to NULL. */ 178 /* Get rid of expecteds, set helpers to NULL. */
179 hlist_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) 179 hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
180 unhelp(h, me); 180 unhelp(h, me);
181 for (i = 0; i < nf_conntrack_htable_size; i++) { 181 for (i = 0; i < nf_conntrack_htable_size; i++) {
182 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) 182 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c6439c77953c..c523f0b8cee5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -512,7 +512,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
512 512
513 skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC); 513 skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC);
514 if (!skb) 514 if (!skb)
515 return NOTIFY_DONE; 515 goto errout;
516 516
517 b = skb->tail; 517 b = skb->tail;
518 518
@@ -591,8 +591,9 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
591nla_put_failure: 591nla_put_failure:
592 rcu_read_unlock(); 592 rcu_read_unlock();
593nlmsg_failure: 593nlmsg_failure:
594 nfnetlink_set_err(0, group, -ENOBUFS);
595 kfree_skb(skb); 594 kfree_skb(skb);
595errout:
596 nfnetlink_set_err(0, group, -ENOBUFS);
596 return NOTIFY_DONE; 597 return NOTIFY_DONE;
597} 598}
598#endif /* CONFIG_NF_CONNTRACK_EVENTS */ 599#endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -987,7 +988,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
987{ 988{
988 struct nf_conntrack_helper *helper; 989 struct nf_conntrack_helper *helper;
989 struct nf_conn_help *help = nfct_help(ct); 990 struct nf_conn_help *help = nfct_help(ct);
990 char *helpname; 991 char *helpname = NULL;
991 int err; 992 int err;
992 993
993 /* don't change helper of sibling connections */ 994 /* don't change helper of sibling connections */
@@ -1185,28 +1186,6 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
1185 return 0; 1186 return 0;
1186} 1187}
1187 1188
1188static inline void
1189ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report)
1190{
1191 unsigned int events = 0;
1192
1193 if (test_bit(IPS_EXPECTED_BIT, &ct->status))
1194 events |= IPCT_RELATED;
1195 else
1196 events |= IPCT_NEW;
1197
1198 nf_conntrack_event_report(IPCT_STATUS |
1199 IPCT_HELPER |
1200 IPCT_REFRESH |
1201 IPCT_PROTOINFO |
1202 IPCT_NATSEQADJ |
1203 IPCT_MARK |
1204 events,
1205 ct,
1206 pid,
1207 report);
1208}
1209
1210static struct nf_conn * 1189static struct nf_conn *
1211ctnetlink_create_conntrack(struct nlattr *cda[], 1190ctnetlink_create_conntrack(struct nlattr *cda[],
1212 struct nf_conntrack_tuple *otuple, 1191 struct nf_conntrack_tuple *otuple,
@@ -1230,7 +1209,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1230 1209
1231 rcu_read_lock(); 1210 rcu_read_lock();
1232 if (cda[CTA_HELP]) { 1211 if (cda[CTA_HELP]) {
1233 char *helpname; 1212 char *helpname = NULL;
1234 1213
1235 err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); 1214 err = ctnetlink_parse_help(cda[CTA_HELP], &helpname);
1236 if (err < 0) 1215 if (err < 0)
@@ -1372,6 +1351,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1372 err = -ENOENT; 1351 err = -ENOENT;
1373 if (nlh->nlmsg_flags & NLM_F_CREATE) { 1352 if (nlh->nlmsg_flags & NLM_F_CREATE) {
1374 struct nf_conn *ct; 1353 struct nf_conn *ct;
1354 enum ip_conntrack_events events;
1375 1355
1376 ct = ctnetlink_create_conntrack(cda, &otuple, 1356 ct = ctnetlink_create_conntrack(cda, &otuple,
1377 &rtuple, u3); 1357 &rtuple, u3);
@@ -1382,9 +1362,18 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1382 err = 0; 1362 err = 0;
1383 nf_conntrack_get(&ct->ct_general); 1363 nf_conntrack_get(&ct->ct_general);
1384 spin_unlock_bh(&nf_conntrack_lock); 1364 spin_unlock_bh(&nf_conntrack_lock);
1385 ctnetlink_event_report(ct, 1365 if (test_bit(IPS_EXPECTED_BIT, &ct->status))
1386 NETLINK_CB(skb).pid, 1366 events = IPCT_RELATED;
1387 nlmsg_report(nlh)); 1367 else
1368 events = IPCT_NEW;
1369
1370 nf_conntrack_event_report(IPCT_STATUS |
1371 IPCT_HELPER |
1372 IPCT_PROTOINFO |
1373 IPCT_NATSEQADJ |
1374 IPCT_MARK | events,
1375 ct, NETLINK_CB(skb).pid,
1376 nlmsg_report(nlh));
1388 nf_ct_put(ct); 1377 nf_ct_put(ct);
1389 } else 1378 } else
1390 spin_unlock_bh(&nf_conntrack_lock); 1379 spin_unlock_bh(&nf_conntrack_lock);
@@ -1403,9 +1392,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1403 if (err == 0) { 1392 if (err == 0) {
1404 nf_conntrack_get(&ct->ct_general); 1393 nf_conntrack_get(&ct->ct_general);
1405 spin_unlock_bh(&nf_conntrack_lock); 1394 spin_unlock_bh(&nf_conntrack_lock);
1406 ctnetlink_event_report(ct, 1395 nf_conntrack_event_report(IPCT_STATUS |
1407 NETLINK_CB(skb).pid, 1396 IPCT_HELPER |
1408 nlmsg_report(nlh)); 1397 IPCT_PROTOINFO |
1398 IPCT_NATSEQADJ |
1399 IPCT_MARK,
1400 ct, NETLINK_CB(skb).pid,
1401 nlmsg_report(nlh));
1409 nf_ct_put(ct); 1402 nf_ct_put(ct);
1410 } else 1403 } else
1411 spin_unlock_bh(&nf_conntrack_lock); 1404 spin_unlock_bh(&nf_conntrack_lock);
@@ -1564,7 +1557,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1564 1557
1565 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 1558 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
1566 if (!skb) 1559 if (!skb)
1567 return NOTIFY_DONE; 1560 goto errout;
1568 1561
1569 b = skb->tail; 1562 b = skb->tail;
1570 1563
@@ -1589,8 +1582,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1589nla_put_failure: 1582nla_put_failure:
1590 rcu_read_unlock(); 1583 rcu_read_unlock();
1591nlmsg_failure: 1584nlmsg_failure:
1592 nfnetlink_set_err(0, 0, -ENOBUFS);
1593 kfree_skb(skb); 1585 kfree_skb(skb);
1586errout:
1587 nfnetlink_set_err(0, 0, -ENOBUFS);
1594 return NOTIFY_DONE; 1588 return NOTIFY_DONE;
1595} 1589}
1596#endif 1590#endif
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 50dac8dbe7d8..aee0d6bea309 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -22,6 +22,7 @@
22#include <linux/netfilter/nfnetlink_conntrack.h> 22#include <linux/netfilter/nfnetlink_conntrack.h>
23#include <net/netfilter/nf_conntrack.h> 23#include <net/netfilter/nf_conntrack.h>
24#include <net/netfilter/nf_conntrack_l4proto.h> 24#include <net/netfilter/nf_conntrack_l4proto.h>
25#include <net/netfilter/nf_conntrack_ecache.h>
25#include <net/netfilter/nf_log.h> 26#include <net/netfilter/nf_log.h>
26 27
27static DEFINE_RWLOCK(dccp_lock); 28static DEFINE_RWLOCK(dccp_lock);
@@ -553,6 +554,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
553 ct->proto.dccp.state = new_state; 554 ct->proto.dccp.state = new_state;
554 write_unlock_bh(&dccp_lock); 555 write_unlock_bh(&dccp_lock);
555 556
557 if (new_state != old_state)
558 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
559
556 dn = dccp_pernet(net); 560 dn = dccp_pernet(net);
557 nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); 561 nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]);
558 562
@@ -633,6 +637,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
633 if (!nest_parms) 637 if (!nest_parms)
634 goto nla_put_failure; 638 goto nla_put_failure;
635 NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); 639 NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state);
640 NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE,
641 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]);
636 nla_nest_end(skb, nest_parms); 642 nla_nest_end(skb, nest_parms);
637 read_unlock_bh(&dccp_lock); 643 read_unlock_bh(&dccp_lock);
638 return 0; 644 return 0;
@@ -644,6 +650,7 @@ nla_put_failure:
644 650
645static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { 651static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
646 [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, 652 [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 },
653 [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 },
647}; 654};
648 655
649static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) 656static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
@@ -661,11 +668,21 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
661 return err; 668 return err;
662 669
663 if (!tb[CTA_PROTOINFO_DCCP_STATE] || 670 if (!tb[CTA_PROTOINFO_DCCP_STATE] ||
664 nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) 671 !tb[CTA_PROTOINFO_DCCP_ROLE] ||
672 nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX ||
673 nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) {
665 return -EINVAL; 674 return -EINVAL;
675 }
666 676
667 write_lock_bh(&dccp_lock); 677 write_lock_bh(&dccp_lock);
668 ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); 678 ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
679 if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
680 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
681 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
682 } else {
683 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
684 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
685 }
669 write_unlock_bh(&dccp_lock); 686 write_unlock_bh(&dccp_lock);
670 return 0; 687 return 0;
671} 688}
@@ -777,6 +794,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
777 .print_conntrack = dccp_print_conntrack, 794 .print_conntrack = dccp_print_conntrack,
778#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 795#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
779 .to_nlattr = dccp_to_nlattr, 796 .to_nlattr = dccp_to_nlattr,
797 .nlattr_size = dccp_nlattr_size,
780 .from_nlattr = nlattr_to_dccp, 798 .from_nlattr = nlattr_to_dccp,
781 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 799 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
782 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 800 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b5ccf2b4b2e7..97a6e93d742e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -634,6 +634,14 @@ static bool tcp_in_window(const struct nf_conn *ct,
634 sender->td_end = end; 634 sender->td_end = end;
635 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; 635 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
636 } 636 }
637 if (tcph->ack) {
638 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
639 sender->td_maxack = ack;
640 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
641 } else if (after(ack, sender->td_maxack))
642 sender->td_maxack = ack;
643 }
644
637 /* 645 /*
638 * Update receiver data. 646 * Update receiver data.
639 */ 647 */
@@ -919,6 +927,16 @@ static int tcp_packet(struct nf_conn *ct,
919 return -NF_ACCEPT; 927 return -NF_ACCEPT;
920 case TCP_CONNTRACK_CLOSE: 928 case TCP_CONNTRACK_CLOSE:
921 if (index == TCP_RST_SET 929 if (index == TCP_RST_SET
930 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
931 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
932 /* Invalid RST */
933 write_unlock_bh(&tcp_lock);
934 if (LOG_INVALID(net, IPPROTO_TCP))
935 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
936 "nf_ct_tcp: invalid RST ");
937 return -NF_ACCEPT;
938 }
939 if (index == TCP_RST_SET
922 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) 940 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
923 && ct->proto.tcp.last_index == TCP_SYN_SET) 941 && ct->proto.tcp.last_index == TCP_SYN_SET)
924 || (!test_bit(IPS_ASSURED_BIT, &ct->status) 942 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 4614696c1b88..0badedc542d3 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -204,6 +204,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
204 .error = udplite_error, 204 .error = udplite_error,
205#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 205#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
206 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 206 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
207 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
207 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 208 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
208 .nla_policy = nf_ct_port_nla_policy, 209 .nla_policy = nf_ct_port_nla_policy,
209#endif 210#endif
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8bb998fe098b..beb37311e1a5 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -36,10 +36,14 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
36int nf_log_register(u_int8_t pf, struct nf_logger *logger) 36int nf_log_register(u_int8_t pf, struct nf_logger *logger)
37{ 37{
38 const struct nf_logger *llog; 38 const struct nf_logger *llog;
39 int i;
39 40
40 if (pf >= ARRAY_SIZE(nf_loggers)) 41 if (pf >= ARRAY_SIZE(nf_loggers))
41 return -EINVAL; 42 return -EINVAL;
42 43
44 for (i = 0; i < ARRAY_SIZE(logger->list); i++)
45 INIT_LIST_HEAD(&logger->list[i]);
46
43 mutex_lock(&nf_log_mutex); 47 mutex_lock(&nf_log_mutex);
44 48
45 if (pf == NFPROTO_UNSPEC) { 49 if (pf == NFPROTO_UNSPEC) {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 2785d66a7e38..b8ab37ad7ed5 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -203,7 +203,7 @@ static int __init nfnetlink_init(void)
203 nfnetlink_rcv, NULL, THIS_MODULE); 203 nfnetlink_rcv, NULL, THIS_MODULE);
204 if (!nfnl) { 204 if (!nfnl) {
205 printk(KERN_ERR "cannot initialize nfnetlink!\n"); 205 printk(KERN_ERR "cannot initialize nfnetlink!\n");
206 return -1; 206 return -ENOMEM;
207 } 207 }
208 208
209 return 0; 209 return 0;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fd326ac27ec8..66a6dd5c519a 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -581,6 +581,12 @@ nfulnl_log_packet(u_int8_t pf,
581 + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) 581 + nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
582 + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); 582 + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
583 583
584 if (in && skb_mac_header_was_set(skb)) {
585 size += nla_total_size(skb->dev->hard_header_len)
586 + nla_total_size(sizeof(u_int16_t)) /* hwtype */
587 + nla_total_size(sizeof(u_int16_t)); /* hwlen */
588 }
589
584 spin_lock_bh(&inst->lock); 590 spin_lock_bh(&inst->lock);
585 591
586 if (inst->flags & NFULNL_CFG_F_SEQ) 592 if (inst->flags & NFULNL_CFG_F_SEQ)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 509a95621f9f..150e5cf62f85 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info)
625} 625}
626EXPORT_SYMBOL(xt_free_table_info); 626EXPORT_SYMBOL(xt_free_table_info);
627 627
628void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo,
629 struct xt_table_info *newinfo)
630{
631 unsigned int cpu;
632
633 for_each_possible_cpu(cpu) {
634 void *p = oldinfo->entries[cpu];
635 rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]);
636 newinfo->entries[cpu] = p;
637 }
638
639}
640EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu);
641
642/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ 628/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
643struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, 629struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
644 const char *name) 630 const char *name)
@@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af)
676EXPORT_SYMBOL_GPL(xt_compat_unlock); 662EXPORT_SYMBOL_GPL(xt_compat_unlock);
677#endif 663#endif
678 664
665DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
666EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
667
668
679struct xt_table_info * 669struct xt_table_info *
680xt_replace_table(struct xt_table *table, 670xt_replace_table(struct xt_table *table,
681 unsigned int num_counters, 671 unsigned int num_counters,
682 struct xt_table_info *newinfo, 672 struct xt_table_info *newinfo,
683 int *error) 673 int *error)
684{ 674{
685 struct xt_table_info *oldinfo, *private; 675 struct xt_table_info *private;
686 676
687 /* Do the substitution. */ 677 /* Do the substitution. */
688 mutex_lock(&table->lock); 678 local_bh_disable();
689 private = table->private; 679 private = table->private;
680
690 /* Check inside lock: is the old number correct? */ 681 /* Check inside lock: is the old number correct? */
691 if (num_counters != private->number) { 682 if (num_counters != private->number) {
692 duprintf("num_counters != table->private->number (%u/%u)\n", 683 duprintf("num_counters != table->private->number (%u/%u)\n",
693 num_counters, private->number); 684 num_counters, private->number);
694 mutex_unlock(&table->lock); 685 local_bh_enable();
695 *error = -EAGAIN; 686 *error = -EAGAIN;
696 return NULL; 687 return NULL;
697 } 688 }
698 oldinfo = private;
699 rcu_assign_pointer(table->private, newinfo);
700 newinfo->initial_entries = oldinfo->initial_entries;
701 mutex_unlock(&table->lock);
702 689
703 synchronize_net(); 690 table->private = newinfo;
704 return oldinfo; 691 newinfo->initial_entries = private->initial_entries;
692
693 /*
694 * Even though table entries have now been swapped, other CPU's
695 * may still be using the old entries. This is okay, because
696 * resynchronization happens because of the locking done
697 * during the get_counters() routine.
698 */
699 local_bh_enable();
700
701 return private;
705} 702}
706EXPORT_SYMBOL_GPL(xt_replace_table); 703EXPORT_SYMBOL_GPL(xt_replace_table);
707 704
@@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table,
734 731
735 /* Simplifies replace_table code. */ 732 /* Simplifies replace_table code. */
736 table->private = bootstrap; 733 table->private = bootstrap;
737 mutex_init(&table->lock);
738 734
739 if (!xt_replace_table(table, 0, newinfo, &ret)) 735 if (!xt_replace_table(table, 0, newinfo, &ret))
740 goto unlock; 736 goto unlock;
@@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = {
1147 1143
1148static int __init xt_init(void) 1144static int __init xt_init(void)
1149{ 1145{
1150 int i, rv; 1146 unsigned int i;
1147 int rv;
1148
1149 for_each_possible_cpu(i) {
1150 struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
1151 spin_lock_init(&lock->lock);
1152 lock->readers = 0;
1153 }
1151 1154
1152 xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); 1155 xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
1153 if (!xt) 1156 if (!xt)
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 6c4847662b85..69a639f35403 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -135,7 +135,13 @@ static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
135{ 135{
136 struct xt_cluster_match_info *info = par->matchinfo; 136 struct xt_cluster_match_info *info = par->matchinfo;
137 137
138 if (info->node_mask >= (1 << info->total_nodes)) { 138 if (info->total_nodes > XT_CLUSTER_NODES_MAX) {
139 printk(KERN_ERR "xt_cluster: you have exceeded the maximum "
140 "number of cluster nodes (%u > %u)\n",
141 info->total_nodes, XT_CLUSTER_NODES_MAX);
142 return false;
143 }
144 if (info->node_mask >= (1ULL << info->total_nodes)) {
139 printk(KERN_ERR "xt_cluster: this node mask cannot be " 145 printk(KERN_ERR "xt_cluster: this node mask cannot be "
140 "higher than the total number of nodes\n"); 146 "higher than the total number of nodes\n");
141 return false; 147 return false;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index a5b5369c30f9..219dcdbe388c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -926,7 +926,7 @@ static int dl_seq_show(struct seq_file *s, void *v)
926 if (!hlist_empty(&htable->hash[*bucket])) { 926 if (!hlist_empty(&htable->hash[*bucket])) {
927 hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) 927 hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node)
928 if (dl_seq_real_show(ent, htable->family, s)) 928 if (dl_seq_real_show(ent, htable->family, s))
929 return 1; 929 return -1;
930 } 930 }
931 return 0; 931 return 0;
932} 932}
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 791e030ea903..eb0ceb846527 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -474,7 +474,7 @@ static ssize_t recent_old_proc_write(struct file *file,
474 struct recent_table *t = pde->data; 474 struct recent_table *t = pde->data;
475 struct recent_entry *e; 475 struct recent_entry *e;
476 char buf[sizeof("+255.255.255.255")], *c = buf; 476 char buf[sizeof("+255.255.255.255")], *c = buf;
477 __be32 addr; 477 union nf_inet_addr addr = {};
478 int add; 478 int add;
479 479
480 if (size > sizeof(buf)) 480 if (size > sizeof(buf))
@@ -506,14 +506,13 @@ static ssize_t recent_old_proc_write(struct file *file,
506 add = 1; 506 add = 1;
507 break; 507 break;
508 } 508 }
509 addr = in_aton(c); 509 addr.ip = in_aton(c);
510 510
511 spin_lock_bh(&recent_lock); 511 spin_lock_bh(&recent_lock);
512 e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0); 512 e = recent_entry_lookup(t, &addr, NFPROTO_IPV4, 0);
513 if (e == NULL) { 513 if (e == NULL) {
514 if (add) 514 if (add)
515 recent_entry_init(t, (const void *)&addr, 515 recent_entry_init(t, &addr, NFPROTO_IPV4, 0);
516 NFPROTO_IPV4, 0);
517 } else { 516 } else {
518 if (add) 517 if (add)
519 recent_entry_update(t, e); 518 recent_entry_update(t, e);
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index 834c6eb7f484..c0519139679e 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c
@@ -256,13 +256,11 @@ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
256{ 256{
257 struct netlbl_af4list *entry; 257 struct netlbl_af4list *entry;
258 258
259 entry = netlbl_af4list_search(addr, head); 259 entry = netlbl_af4list_search_exact(addr, mask, head);
260 if (entry != NULL && entry->addr == addr && entry->mask == mask) { 260 if (entry == NULL)
261 netlbl_af4list_remove_entry(entry); 261 return NULL;
262 return entry; 262 netlbl_af4list_remove_entry(entry);
263 } 263 return entry;
264
265 return NULL;
266} 264}
267 265
268#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 266#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -299,15 +297,11 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
299{ 297{
300 struct netlbl_af6list *entry; 298 struct netlbl_af6list *entry;
301 299
302 entry = netlbl_af6list_search(addr, head); 300 entry = netlbl_af6list_search_exact(addr, mask, head);
303 if (entry != NULL && 301 if (entry == NULL)
304 ipv6_addr_equal(&entry->addr, addr) && 302 return NULL;
305 ipv6_addr_equal(&entry->mask, mask)) { 303 netlbl_af6list_remove_entry(entry);
306 netlbl_af6list_remove_entry(entry); 304 return entry;
307 return entry;
308 }
309
310 return NULL;
311} 305}
312#endif /* IPv6 */ 306#endif /* IPv6 */
313 307
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 4e705f87969f..3be0e016ab7d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1084,8 +1084,10 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
1084 1084
1085 /* Build a packet - the conventional user limit is 236 bytes. We can 1085 /* Build a packet - the conventional user limit is 236 bytes. We can
1086 do ludicrously large NetROM frames but must not overflow */ 1086 do ludicrously large NetROM frames but must not overflow */
1087 if (len > 65536) 1087 if (len > 65536) {
1088 return -EMSGSIZE; 1088 err = -EMSGSIZE;
1089 goto out;
1090 }
1089 1091
1090 SOCK_DEBUG(sk, "NET/ROM: sendto: building packet.\n"); 1092 SOCK_DEBUG(sk, "NET/ROM: sendto: building packet.\n");
1091 size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN; 1093 size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 74776de523ec..f546e81acc45 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1758,8 +1758,9 @@ static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1758 1758
1759static inline char *alloc_one_pg_vec_page(unsigned long order) 1759static inline char *alloc_one_pg_vec_page(unsigned long order)
1760{ 1760{
1761 return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO, 1761 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
1762 order); 1762
1763 return (char *) __get_free_pages(gfp_flags, order);
1763} 1764}
1764 1765
1765static char **alloc_pg_vec(struct tpacket_req *req, int order) 1766static char **alloc_pg_vec(struct tpacket_req *req, int order)
diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
index 51a5669573f2..6ec7d55b1769 100644
--- a/net/phonet/Kconfig
+++ b/net/phonet/Kconfig
@@ -6,7 +6,7 @@ config PHONET
6 tristate "Phonet protocols family" 6 tristate "Phonet protocols family"
7 help 7 help
8 The Phone Network protocol (PhoNet) is a packet-oriented 8 The Phone Network protocol (PhoNet) is a packet-oriented
9 communication protocol developped by Nokia for use with its modems. 9 communication protocol developed by Nokia for use with its modems.
10 10
11 This is required for Maemo to use cellular data connectivity (if 11 This is required for Maemo to use cellular data connectivity (if
12 supported). It can also be used to control Nokia phones 12 supported). It can also be used to control Nokia phones
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 06a7b798d9a7..4933b380985e 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
51 51
52struct list_head rds_ib_devices; 52struct list_head rds_ib_devices;
53 53
54/* NOTE: if also grabbing ibdev lock, grab this first */
54DEFINE_SPINLOCK(ib_nodev_conns_lock); 55DEFINE_SPINLOCK(ib_nodev_conns_lock);
55LIST_HEAD(ib_nodev_conns); 56LIST_HEAD(ib_nodev_conns);
56 57
@@ -137,7 +138,7 @@ void rds_ib_remove_one(struct ib_device *device)
137 kfree(i_ipaddr); 138 kfree(i_ipaddr);
138 } 139 }
139 140
140 rds_ib_remove_conns(rds_ibdev); 141 rds_ib_destroy_conns(rds_ibdev);
141 142
142 if (rds_ibdev->mr_pool) 143 if (rds_ibdev->mr_pool)
143 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); 144 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
@@ -249,7 +250,7 @@ static int rds_ib_laddr_check(__be32 addr)
249void rds_ib_exit(void) 250void rds_ib_exit(void)
250{ 251{
251 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 252 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
252 rds_ib_remove_nodev_conns(); 253 rds_ib_destroy_nodev_conns();
253 ib_unregister_client(&rds_ib_client); 254 ib_unregister_client(&rds_ib_client);
254 rds_ib_sysctl_exit(); 255 rds_ib_sysctl_exit();
255 rds_ib_recv_exit(); 256 rds_ib_recv_exit();
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 8be563a1363a..069206cae733 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -108,7 +108,12 @@ struct rds_ib_connection {
108 108
109 /* sending acks */ 109 /* sending acks */
110 unsigned long i_ack_flags; 110 unsigned long i_ack_flags;
111#ifdef KERNEL_HAS_ATOMIC64
112 atomic64_t i_ack_next; /* next ACK to send */
113#else
114 spinlock_t i_ack_lock; /* protect i_ack_next */
111 u64 i_ack_next; /* next ACK to send */ 115 u64 i_ack_next; /* next ACK to send */
116#endif
112 struct rds_header *i_ack; 117 struct rds_header *i_ack;
113 struct ib_send_wr i_ack_wr; 118 struct ib_send_wr i_ack_wr;
114 struct ib_sge i_ack_sge; 119 struct ib_sge i_ack_sge;
@@ -267,9 +272,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
267 272
268/* ib_rdma.c */ 273/* ib_rdma.c */
269int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); 274int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
270int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 275void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
271void rds_ib_remove_nodev_conns(void); 276void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
272void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev); 277void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock);
278static inline void rds_ib_destroy_nodev_conns(void)
279{
280 __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock);
281}
282static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev)
283{
284 __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock);
285}
273struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); 286struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
274void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); 287void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
275void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); 288void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
@@ -355,13 +368,4 @@ rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
355 return &sge[1]; 368 return &sge[1];
356} 369}
357 370
358static inline void rds_ib_set_64bit(u64 *ptr, u64 val)
359{
360#if BITS_PER_LONG == 64
361 *ptr = val;
362#else
363 set_64bit(ptr, val);
364#endif
365}
366
367#endif 371#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 0532237bd128..f8e40e1a6038 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -126,9 +126,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
126 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); 126 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
127 if (err) 127 if (err)
128 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); 128 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err);
129 err = rds_ib_add_conn(rds_ibdev, conn); 129 rds_ib_add_conn(rds_ibdev, conn);
130 if (err)
131 printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err);
132 130
133 /* If the peer gave us the last packet it saw, process this as if 131 /* If the peer gave us the last packet it saw, process this as if
134 * we had received a regular ACK. */ 132 * we had received a regular ACK. */
@@ -616,18 +614,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
616 /* 614 /*
617 * Move connection back to the nodev list. 615 * Move connection back to the nodev list.
618 */ 616 */
619 if (ic->rds_ibdev) { 617 if (ic->rds_ibdev)
620 618 rds_ib_remove_conn(ic->rds_ibdev, conn);
621 spin_lock_irq(&ic->rds_ibdev->spinlock);
622 BUG_ON(list_empty(&ic->ib_node));
623 list_del(&ic->ib_node);
624 spin_unlock_irq(&ic->rds_ibdev->spinlock);
625
626 spin_lock_irq(&ib_nodev_conns_lock);
627 list_add_tail(&ic->ib_node, &ib_nodev_conns);
628 spin_unlock_irq(&ib_nodev_conns_lock);
629 ic->rds_ibdev = NULL;
630 }
631 619
632 ic->i_cm_id = NULL; 620 ic->i_cm_id = NULL;
633 ic->i_pd = NULL; 621 ic->i_pd = NULL;
@@ -648,7 +636,11 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
648 636
649 /* Clear the ACK state */ 637 /* Clear the ACK state */
650 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 638 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
651 rds_ib_set_64bit(&ic->i_ack_next, 0); 639#ifdef KERNEL_HAS_ATOMIC64
640 atomic64_set(&ic->i_ack_next, 0);
641#else
642 ic->i_ack_next = 0;
643#endif
652 ic->i_ack_recv = 0; 644 ic->i_ack_recv = 0;
653 645
654 /* Clear flow control state */ 646 /* Clear flow control state */
@@ -681,6 +673,9 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
681 673
682 INIT_LIST_HEAD(&ic->ib_node); 674 INIT_LIST_HEAD(&ic->ib_node);
683 mutex_init(&ic->i_recv_mutex); 675 mutex_init(&ic->i_recv_mutex);
676#ifndef KERNEL_HAS_ATOMIC64
677 spin_lock_init(&ic->i_ack_lock);
678#endif
684 679
685 /* 680 /*
686 * rds_ib_conn_shutdown() waits for these to be emptied so they 681 * rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -701,11 +696,27 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
701 return 0; 696 return 0;
702} 697}
703 698
699/*
700 * Free a connection. Connection must be shut down and not set for reconnect.
701 */
704void rds_ib_conn_free(void *arg) 702void rds_ib_conn_free(void *arg)
705{ 703{
706 struct rds_ib_connection *ic = arg; 704 struct rds_ib_connection *ic = arg;
705 spinlock_t *lock_ptr;
706
707 rdsdebug("ic %p\n", ic); 707 rdsdebug("ic %p\n", ic);
708
709 /*
710 * Conn is either on a dev's list or on the nodev list.
711 * A race with shutdown() or connect() would cause problems
712 * (since rds_ibdev would change) but that should never happen.
713 */
714 lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock;
715
716 spin_lock_irq(lock_ptr);
708 list_del(&ic->ib_node); 717 list_del(&ic->ib_node);
718 spin_unlock_irq(lock_ptr);
719
709 kfree(ic); 720 kfree(ic);
710} 721}
711 722
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 69a6289ed672..81033af93020 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -139,7 +139,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
139 return rds_ib_add_ipaddr(rds_ibdev, ipaddr); 139 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
140} 140}
141 141
142int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 142void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
143{ 143{
144 struct rds_ib_connection *ic = conn->c_transport_data; 144 struct rds_ib_connection *ic = conn->c_transport_data;
145 145
@@ -148,45 +148,44 @@ int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn
148 BUG_ON(list_empty(&ib_nodev_conns)); 148 BUG_ON(list_empty(&ib_nodev_conns));
149 BUG_ON(list_empty(&ic->ib_node)); 149 BUG_ON(list_empty(&ic->ib_node));
150 list_del(&ic->ib_node); 150 list_del(&ic->ib_node);
151 spin_unlock_irq(&ib_nodev_conns_lock);
152 151
153 spin_lock_irq(&rds_ibdev->spinlock); 152 spin_lock_irq(&rds_ibdev->spinlock);
154 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); 153 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
155 spin_unlock_irq(&rds_ibdev->spinlock); 154 spin_unlock_irq(&rds_ibdev->spinlock);
155 spin_unlock_irq(&ib_nodev_conns_lock);
156 156
157 ic->rds_ibdev = rds_ibdev; 157 ic->rds_ibdev = rds_ibdev;
158
159 return 0;
160} 158}
161 159
162void rds_ib_remove_nodev_conns(void) 160void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
163{ 161{
164 struct rds_ib_connection *ic, *_ic; 162 struct rds_ib_connection *ic = conn->c_transport_data;
165 LIST_HEAD(tmp_list);
166 163
167 /* avoid calling conn_destroy with irqs off */ 164 /* place conn on nodev_conns_list */
168 spin_lock_irq(&ib_nodev_conns_lock); 165 spin_lock(&ib_nodev_conns_lock);
169 list_splice(&ib_nodev_conns, &tmp_list);
170 INIT_LIST_HEAD(&ib_nodev_conns);
171 spin_unlock_irq(&ib_nodev_conns_lock);
172 166
173 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { 167 spin_lock_irq(&rds_ibdev->spinlock);
174 if (ic->conn->c_passive) 168 BUG_ON(list_empty(&ic->ib_node));
175 rds_conn_destroy(ic->conn->c_passive); 169 list_del(&ic->ib_node);
176 rds_conn_destroy(ic->conn); 170 spin_unlock_irq(&rds_ibdev->spinlock);
177 } 171
172 list_add_tail(&ic->ib_node, &ib_nodev_conns);
173
174 spin_unlock(&ib_nodev_conns_lock);
175
176 ic->rds_ibdev = NULL;
178} 177}
179 178
180void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev) 179void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock)
181{ 180{
182 struct rds_ib_connection *ic, *_ic; 181 struct rds_ib_connection *ic, *_ic;
183 LIST_HEAD(tmp_list); 182 LIST_HEAD(tmp_list);
184 183
185 /* avoid calling conn_destroy with irqs off */ 184 /* avoid calling conn_destroy with irqs off */
186 spin_lock_irq(&rds_ibdev->spinlock); 185 spin_lock_irq(list_lock);
187 list_splice(&rds_ibdev->conn_list, &tmp_list); 186 list_splice(list, &tmp_list);
188 INIT_LIST_HEAD(&rds_ibdev->conn_list); 187 INIT_LIST_HEAD(list);
189 spin_unlock_irq(&rds_ibdev->spinlock); 188 spin_unlock_irq(list_lock);
190 189
191 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { 190 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
192 if (ic->conn->c_passive) 191 if (ic->conn->c_passive)
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 5061b5502162..36d931573ff4 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -395,10 +395,37 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
395 * room for it beyond the ring size. Send completion notices its special 395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case. 396 * wr_id and avoids working with the ring in that case.
397 */ 397 */
398#ifndef KERNEL_HAS_ATOMIC64
398static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, 399static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
399 int ack_required) 400 int ack_required)
400{ 401{
401 rds_ib_set_64bit(&ic->i_ack_next, seq); 402 unsigned long flags;
403
404 spin_lock_irqsave(&ic->i_ack_lock, flags);
405 ic->i_ack_next = seq;
406 if (ack_required)
407 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
408 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
409}
410
411static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
412{
413 unsigned long flags;
414 u64 seq;
415
416 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
417
418 spin_lock_irqsave(&ic->i_ack_lock, flags);
419 seq = ic->i_ack_next;
420 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
421
422 return seq;
423}
424#else
425static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
426 int ack_required)
427{
428 atomic64_set(&ic->i_ack_next, seq);
402 if (ack_required) { 429 if (ack_required) {
403 smp_mb__before_clear_bit(); 430 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 431 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
@@ -410,8 +437,10 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 437 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit(); 438 smp_mb__after_clear_bit();
412 439
413 return ic->i_ack_next; 440 return atomic64_read(&ic->i_ack_next);
414} 441}
442#endif
443
415 444
416static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) 445static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
417{ 446{
@@ -464,6 +493,10 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
464 * - i_ack_next, which is the last sequence number we received 493 * - i_ack_next, which is the last sequence number we received
465 * 494 *
466 * Potentially, send queue and receive queue handlers can run concurrently. 495 * Potentially, send queue and receive queue handlers can run concurrently.
496 * It would be nice to not have to use a spinlock to synchronize things,
497 * but the one problem that rules this out is that 64bit updates are
498 * not atomic on all platforms. Things would be a lot simpler if
499 * we had atomic64 or maybe cmpxchg64 everywhere.
467 * 500 *
468 * Reconnecting complicates this picture just slightly. When we 501 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer 502 * reconnect, we may be seeing duplicate packets. The peer
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 1b56905c4c08..b732efb5b634 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MR
51 51
52struct list_head rds_iw_devices; 52struct list_head rds_iw_devices;
53 53
54/* NOTE: if also grabbing iwdev lock, grab this first */
54DEFINE_SPINLOCK(iw_nodev_conns_lock); 55DEFINE_SPINLOCK(iw_nodev_conns_lock);
55LIST_HEAD(iw_nodev_conns); 56LIST_HEAD(iw_nodev_conns);
56 57
@@ -145,7 +146,7 @@ void rds_iw_remove_one(struct ib_device *device)
145 } 146 }
146 spin_unlock_irq(&rds_iwdev->spinlock); 147 spin_unlock_irq(&rds_iwdev->spinlock);
147 148
148 rds_iw_remove_conns(rds_iwdev); 149 rds_iw_destroy_conns(rds_iwdev);
149 150
150 if (rds_iwdev->mr_pool) 151 if (rds_iwdev->mr_pool)
151 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool); 152 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool);
@@ -258,7 +259,7 @@ static int rds_iw_laddr_check(__be32 addr)
258void rds_iw_exit(void) 259void rds_iw_exit(void)
259{ 260{
260 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); 261 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
261 rds_iw_remove_nodev_conns(); 262 rds_iw_destroy_nodev_conns();
262 ib_unregister_client(&rds_iw_client); 263 ib_unregister_client(&rds_iw_client);
263 rds_iw_sysctl_exit(); 264 rds_iw_sysctl_exit();
264 rds_iw_recv_exit(); 265 rds_iw_recv_exit();
diff --git a/net/rds/iw.h b/net/rds/iw.h
index 0ddda34f2a1c..b4fb27252895 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -131,7 +131,12 @@ struct rds_iw_connection {
131 131
132 /* sending acks */ 132 /* sending acks */
133 unsigned long i_ack_flags; 133 unsigned long i_ack_flags;
134#ifdef KERNEL_HAS_ATOMIC64
135 atomic64_t i_ack_next; /* next ACK to send */
136#else
137 spinlock_t i_ack_lock; /* protect i_ack_next */
134 u64 i_ack_next; /* next ACK to send */ 138 u64 i_ack_next; /* next ACK to send */
139#endif
135 struct rds_header *i_ack; 140 struct rds_header *i_ack;
136 struct ib_send_wr i_ack_wr; 141 struct ib_send_wr i_ack_wr;
137 struct ib_sge i_ack_sge; 142 struct ib_sge i_ack_sge;
@@ -294,9 +299,17 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn,
294 299
295/* ib_rdma.c */ 300/* ib_rdma.c */
296int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); 301int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
297int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); 302void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
298void rds_iw_remove_nodev_conns(void); 303void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
299void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev); 304void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock);
305static inline void rds_iw_destroy_nodev_conns(void)
306{
307 __rds_iw_destroy_conns(&iw_nodev_conns, &iw_nodev_conns_lock);
308}
309static inline void rds_iw_destroy_conns(struct rds_iw_device *rds_iwdev)
310{
311 __rds_iw_destroy_conns(&rds_iwdev->conn_list, &rds_iwdev->spinlock);
312}
300struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *); 313struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *);
301void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo); 314void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo);
302void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *); 315void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *);
@@ -383,13 +396,4 @@ rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
383 return &sge[1]; 396 return &sge[1];
384} 397}
385 398
386static inline void rds_iw_set_64bit(u64 *ptr, u64 val)
387{
388#if BITS_PER_LONG == 64
389 *ptr = val;
390#else
391 set_64bit(ptr, val);
392#endif
393}
394
395#endif 399#endif
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 57ecb3d4b8a5..a416b0d492b1 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -86,9 +86,7 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
86 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); 86 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
87 if (err) 87 if (err)
88 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); 88 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
89 err = rds_iw_add_conn(rds_iwdev, conn); 89 rds_iw_add_conn(rds_iwdev, conn);
90 if (err)
91 printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err);
92 90
93 /* If the peer gave us the last packet it saw, process this as if 91 /* If the peer gave us the last packet it saw, process this as if
94 * we had received a regular ACK. */ 92 * we had received a regular ACK. */
@@ -637,19 +635,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
637 * Move connection back to the nodev list. 635 * Move connection back to the nodev list.
638 * Remove cm_id from the device cm_id list. 636 * Remove cm_id from the device cm_id list.
639 */ 637 */
640 if (ic->rds_iwdev) { 638 if (ic->rds_iwdev)
641 639 rds_iw_remove_conn(ic->rds_iwdev, conn);
642 spin_lock_irq(&ic->rds_iwdev->spinlock);
643 BUG_ON(list_empty(&ic->iw_node));
644 list_del(&ic->iw_node);
645 spin_unlock_irq(&ic->rds_iwdev->spinlock);
646
647 spin_lock_irq(&iw_nodev_conns_lock);
648 list_add_tail(&ic->iw_node, &iw_nodev_conns);
649 spin_unlock_irq(&iw_nodev_conns_lock);
650 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
651 ic->rds_iwdev = NULL;
652 }
653 640
654 rdma_destroy_id(ic->i_cm_id); 641 rdma_destroy_id(ic->i_cm_id);
655 642
@@ -672,7 +659,11 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
672 659
673 /* Clear the ACK state */ 660 /* Clear the ACK state */
674 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 661 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
675 rds_iw_set_64bit(&ic->i_ack_next, 0); 662#ifdef KERNEL_HAS_ATOMIC64
663 atomic64_set(&ic->i_ack_next, 0);
664#else
665 ic->i_ack_next = 0;
666#endif
676 ic->i_ack_recv = 0; 667 ic->i_ack_recv = 0;
677 668
678 /* Clear flow control state */ 669 /* Clear flow control state */
@@ -706,6 +697,9 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
706 697
707 INIT_LIST_HEAD(&ic->iw_node); 698 INIT_LIST_HEAD(&ic->iw_node);
708 mutex_init(&ic->i_recv_mutex); 699 mutex_init(&ic->i_recv_mutex);
700#ifndef KERNEL_HAS_ATOMIC64
701 spin_lock_init(&ic->i_ack_lock);
702#endif
709 703
710 /* 704 /*
711 * rds_iw_conn_shutdown() waits for these to be emptied so they 705 * rds_iw_conn_shutdown() waits for these to be emptied so they
@@ -726,11 +720,27 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
726 return 0; 720 return 0;
727} 721}
728 722
723/*
724 * Free a connection. Connection must be shut down and not set for reconnect.
725 */
729void rds_iw_conn_free(void *arg) 726void rds_iw_conn_free(void *arg)
730{ 727{
731 struct rds_iw_connection *ic = arg; 728 struct rds_iw_connection *ic = arg;
729 spinlock_t *lock_ptr;
730
732 rdsdebug("ic %p\n", ic); 731 rdsdebug("ic %p\n", ic);
732
733 /*
734 * Conn is either on a dev's list or on the nodev list.
735 * A race with shutdown() or connect() would cause problems
736 * (since rds_iwdev would change) but that should never happen.
737 */
738 lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock;
739
740 spin_lock_irq(lock_ptr);
733 list_del(&ic->iw_node); 741 list_del(&ic->iw_node);
742 spin_unlock_irq(lock_ptr);
743
734 kfree(ic); 744 kfree(ic);
735} 745}
736 746
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 1c02a8f952d0..dcdb37da80f2 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -196,7 +196,7 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i
196 return rds_iw_add_cm_id(rds_iwdev, cm_id); 196 return rds_iw_add_cm_id(rds_iwdev, cm_id);
197} 197}
198 198
199int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) 199void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
200{ 200{
201 struct rds_iw_connection *ic = conn->c_transport_data; 201 struct rds_iw_connection *ic = conn->c_transport_data;
202 202
@@ -205,45 +205,45 @@ int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn
205 BUG_ON(list_empty(&iw_nodev_conns)); 205 BUG_ON(list_empty(&iw_nodev_conns));
206 BUG_ON(list_empty(&ic->iw_node)); 206 BUG_ON(list_empty(&ic->iw_node));
207 list_del(&ic->iw_node); 207 list_del(&ic->iw_node);
208 spin_unlock_irq(&iw_nodev_conns_lock);
209 208
210 spin_lock_irq(&rds_iwdev->spinlock); 209 spin_lock_irq(&rds_iwdev->spinlock);
211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); 210 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
212 spin_unlock_irq(&rds_iwdev->spinlock); 211 spin_unlock_irq(&rds_iwdev->spinlock);
212 spin_unlock_irq(&iw_nodev_conns_lock);
213 213
214 ic->rds_iwdev = rds_iwdev; 214 ic->rds_iwdev = rds_iwdev;
215
216 return 0;
217} 215}
218 216
219void rds_iw_remove_nodev_conns(void) 217void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
220{ 218{
221 struct rds_iw_connection *ic, *_ic; 219 struct rds_iw_connection *ic = conn->c_transport_data;
222 LIST_HEAD(tmp_list);
223 220
224 /* avoid calling conn_destroy with irqs off */ 221 /* place conn on nodev_conns_list */
225 spin_lock_irq(&iw_nodev_conns_lock); 222 spin_lock(&iw_nodev_conns_lock);
226 list_splice(&iw_nodev_conns, &tmp_list);
227 INIT_LIST_HEAD(&iw_nodev_conns);
228 spin_unlock_irq(&iw_nodev_conns_lock);
229 223
230 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { 224 spin_lock_irq(&rds_iwdev->spinlock);
231 if (ic->conn->c_passive) 225 BUG_ON(list_empty(&ic->iw_node));
232 rds_conn_destroy(ic->conn->c_passive); 226 list_del(&ic->iw_node);
233 rds_conn_destroy(ic->conn); 227 spin_unlock_irq(&rds_iwdev->spinlock);
234 } 228
229 list_add_tail(&ic->iw_node, &iw_nodev_conns);
230
231 spin_unlock(&iw_nodev_conns_lock);
232
233 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
234 ic->rds_iwdev = NULL;
235} 235}
236 236
237void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev) 237void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock)
238{ 238{
239 struct rds_iw_connection *ic, *_ic; 239 struct rds_iw_connection *ic, *_ic;
240 LIST_HEAD(tmp_list); 240 LIST_HEAD(tmp_list);
241 241
242 /* avoid calling conn_destroy with irqs off */ 242 /* avoid calling conn_destroy with irqs off */
243 spin_lock_irq(&rds_iwdev->spinlock); 243 spin_lock_irq(list_lock);
244 list_splice(&rds_iwdev->conn_list, &tmp_list); 244 list_splice(list, &tmp_list);
245 INIT_LIST_HEAD(&rds_iwdev->conn_list); 245 INIT_LIST_HEAD(list);
246 spin_unlock_irq(&rds_iwdev->spinlock); 246 spin_unlock_irq(list_lock);
247 247
248 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { 248 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
249 if (ic->conn->c_passive) 249 if (ic->conn->c_passive)
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index a1931f0027a2..fde470fa50d5 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -395,10 +395,37 @@ void rds_iw_recv_init_ack(struct rds_iw_connection *ic)
395 * room for it beyond the ring size. Send completion notices its special 395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case. 396 * wr_id and avoids working with the ring in that case.
397 */ 397 */
398#ifndef KERNEL_HAS_ATOMIC64
398static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, 399static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
399 int ack_required) 400 int ack_required)
400{ 401{
401 rds_iw_set_64bit(&ic->i_ack_next, seq); 402 unsigned long flags;
403
404 spin_lock_irqsave(&ic->i_ack_lock, flags);
405 ic->i_ack_next = seq;
406 if (ack_required)
407 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
408 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
409}
410
411static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
412{
413 unsigned long flags;
414 u64 seq;
415
416 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
417
418 spin_lock_irqsave(&ic->i_ack_lock, flags);
419 seq = ic->i_ack_next;
420 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
421
422 return seq;
423}
424#else
425static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
426 int ack_required)
427{
428 atomic64_set(&ic->i_ack_next, seq);
402 if (ack_required) { 429 if (ack_required) {
403 smp_mb__before_clear_bit(); 430 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 431 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
@@ -410,8 +437,10 @@ static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 437 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit(); 438 smp_mb__after_clear_bit();
412 439
413 return ic->i_ack_next; 440 return atomic64_read(&ic->i_ack_next);
414} 441}
442#endif
443
415 444
416static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits) 445static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits)
417{ 446{
@@ -464,6 +493,10 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi
464 * - i_ack_next, which is the last sequence number we received 493 * - i_ack_next, which is the last sequence number we received
465 * 494 *
466 * Potentially, send queue and receive queue handlers can run concurrently. 495 * Potentially, send queue and receive queue handlers can run concurrently.
496 * It would be nice to not have to use a spinlock to synchronize things,
497 * but the one problem that rules this out is that 64bit updates are
498 * not atomic on all platforms. Things would be a lot simpler if
499 * we had atomic64 or maybe cmpxchg64 everywhere.
467 * 500 *
468 * Reconnecting complicates this picture just slightly. When we 501 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer 502 * reconnect, we may be seeing duplicate packets. The peer
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 060400704979..71794449ca4e 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -28,6 +28,10 @@
28 */ 28 */
29#define RDS_PORT 18634 29#define RDS_PORT 18634
30 30
31#ifdef ATOMIC64_INIT
32#define KERNEL_HAS_ATOMIC64
33#endif
34
31#ifdef DEBUG 35#ifdef DEBUG
32#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) 36#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
33#else 37#else
@@ -634,7 +638,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *,
634void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); 638void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
635 639
636/* stats.c */ 640/* stats.c */
637DECLARE_PER_CPU(struct rds_statistics, rds_stats); 641DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
638#define rds_stats_inc_which(which, member) do { \ 642#define rds_stats_inc_which(which, member) do { \
639 per_cpu(which, get_cpu()).member++; \ 643 per_cpu(which, get_cpu()).member++; \
640 put_cpu(); \ 644 put_cpu(); \
diff --git a/net/rds/send.c b/net/rds/send.c
index 1b37364656f0..104fe033203d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -615,7 +615,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
615{ 615{
616 struct rds_message *rm, *tmp; 616 struct rds_message *rm, *tmp;
617 struct rds_connection *conn; 617 struct rds_connection *conn;
618 unsigned long flags; 618 unsigned long flags, flags2;
619 LIST_HEAD(list); 619 LIST_HEAD(list);
620 int wake = 0; 620 int wake = 0;
621 621
@@ -651,9 +651,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
651 list_for_each_entry(rm, &list, m_sock_item) { 651 list_for_each_entry(rm, &list, m_sock_item) {
652 /* We do this here rather than in the loop above, so that 652 /* We do this here rather than in the loop above, so that
653 * we don't have to nest m_rs_lock under rs->rs_lock */ 653 * we don't have to nest m_rs_lock under rs->rs_lock */
654 spin_lock(&rm->m_rs_lock); 654 spin_lock_irqsave(&rm->m_rs_lock, flags2);
655 rm->m_rs = NULL; 655 rm->m_rs = NULL;
656 spin_unlock(&rm->m_rs_lock); 656 spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
657 657
658 /* 658 /*
659 * If we see this flag cleared then we're *sure* that someone 659 * If we see this flag cleared then we're *sure* that someone
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 0f36e8d59b29..877a7f65f707 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1072,10 +1072,6 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
1072 unsigned char *asmptr; 1072 unsigned char *asmptr;
1073 int n, size, qbit = 0; 1073 int n, size, qbit = 0;
1074 1074
1075 /* ROSE empty frame has no meaning : don't send */
1076 if (len == 0)
1077 return 0;
1078
1079 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) 1075 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT))
1080 return -EINVAL; 1076 return -EINVAL;
1081 1077
@@ -1273,12 +1269,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
1273 skb_reset_transport_header(skb); 1269 skb_reset_transport_header(skb);
1274 copied = skb->len; 1270 copied = skb->len;
1275 1271
1276 /* ROSE empty frame has no meaning : ignore it */
1277 if (copied == 0) {
1278 skb_free_datagram(sk, skb);
1279 return copied;
1280 }
1281
1282 if (copied > size) { 1272 if (copied > size) {
1283 copied = size; 1273 copied = size;
1284 msg->msg_flags |= MSG_TRUNC; 1274 msg->msg_flags |= MSG_TRUNC;
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 0f1218b8d289..67e38a056240 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -343,9 +343,9 @@ static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
343 /* not yet present - create a candidate for a new connection 343 /* not yet present - create a candidate for a new connection
344 * and then redo the check */ 344 * and then redo the check */
345 conn = rxrpc_alloc_connection(gfp); 345 conn = rxrpc_alloc_connection(gfp);
346 if (IS_ERR(conn)) { 346 if (!conn) {
347 _leave(" = %ld", PTR_ERR(conn)); 347 _leave(" = -ENOMEM");
348 return PTR_ERR(conn); 348 return -ENOMEM;
349 } 349 }
350 350
351 conn->trans = trans; 351 conn->trans = trans;
@@ -508,9 +508,9 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
508 /* not yet present - create a candidate for a new connection and then 508 /* not yet present - create a candidate for a new connection and then
509 * redo the check */ 509 * redo the check */
510 candidate = rxrpc_alloc_connection(gfp); 510 candidate = rxrpc_alloc_connection(gfp);
511 if (IS_ERR(candidate)) { 511 if (!candidate) {
512 _leave(" = %ld", PTR_ERR(candidate)); 512 _leave(" = -ENOMEM");
513 return PTR_ERR(candidate); 513 return -ENOMEM;
514 } 514 }
515 515
516 candidate->trans = trans; 516 candidate->trans = trans;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 173fcc4b050d..09cdcdfe7e91 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -135,6 +135,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
135 unsigned long cl; 135 unsigned long cl;
136 unsigned long fh; 136 unsigned long fh;
137 int err; 137 int err;
138 int tp_created = 0;
138 139
139 if (net != &init_net) 140 if (net != &init_net)
140 return -EINVAL; 141 return -EINVAL;
@@ -254,7 +255,7 @@ replay:
254 } 255 }
255 tp->ops = tp_ops; 256 tp->ops = tp_ops;
256 tp->protocol = protocol; 257 tp->protocol = protocol;
257 tp->prio = nprio ? : tcf_auto_prio(*back); 258 tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back));
258 tp->q = q; 259 tp->q = q;
259 tp->classify = tp_ops->classify; 260 tp->classify = tp_ops->classify;
260 tp->classid = parent; 261 tp->classid = parent;
@@ -266,10 +267,7 @@ replay:
266 goto errout; 267 goto errout;
267 } 268 }
268 269
269 spin_lock_bh(root_lock); 270 tp_created = 1;
270 tp->next = *back;
271 *back = tp;
272 spin_unlock_bh(root_lock);
273 271
274 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) 272 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
275 goto errout; 273 goto errout;
@@ -296,8 +294,11 @@ replay:
296 switch (n->nlmsg_type) { 294 switch (n->nlmsg_type) {
297 case RTM_NEWTFILTER: 295 case RTM_NEWTFILTER:
298 err = -EEXIST; 296 err = -EEXIST;
299 if (n->nlmsg_flags & NLM_F_EXCL) 297 if (n->nlmsg_flags & NLM_F_EXCL) {
298 if (tp_created)
299 tcf_destroy(tp);
300 goto errout; 300 goto errout;
301 }
301 break; 302 break;
302 case RTM_DELTFILTER: 303 case RTM_DELTFILTER:
303 err = tp->ops->delete(tp, fh); 304 err = tp->ops->delete(tp, fh);
@@ -314,8 +315,18 @@ replay:
314 } 315 }
315 316
316 err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh); 317 err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh);
317 if (err == 0) 318 if (err == 0) {
319 if (tp_created) {
320 spin_lock_bh(root_lock);
321 tp->next = *back;
322 *back = tp;
323 spin_unlock_bh(root_lock);
324 }
318 tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); 325 tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
326 } else {
327 if (tp_created)
328 tcf_destroy(tp);
329 }
319 330
320errout: 331errout:
321 if (cl) 332 if (cl)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 91a3db4a76f8..e5becb92b3e7 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -104,8 +104,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
104 struct tcf_result *res) 104 struct tcf_result *res)
105{ 105{
106 struct cls_cgroup_head *head = tp->root; 106 struct cls_cgroup_head *head = tp->root;
107 struct cgroup_cls_state *cs; 107 u32 classid;
108 int ret = 0;
109 108
110 /* 109 /*
111 * Due to the nature of the classifier it is required to ignore all 110 * Due to the nature of the classifier it is required to ignore all
@@ -121,17 +120,18 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
121 return -1; 120 return -1;
122 121
123 rcu_read_lock(); 122 rcu_read_lock();
124 cs = task_cls_state(current); 123 classid = task_cls_state(current)->classid;
125 if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) {
126 res->classid = cs->classid;
127 res->class = 0;
128 ret = tcf_exts_exec(skb, &head->exts, res);
129 } else
130 ret = -1;
131
132 rcu_read_unlock(); 124 rcu_read_unlock();
133 125
134 return ret; 126 if (!classid)
127 return -1;
128
129 if (!tcf_em_tree_match(skb, &head->ematches, NULL))
130 return -1;
131
132 res->classid = classid;
133 res->class = 0;
134 return tcf_exts_exec(skb, &head->exts, res);
135} 135}
136 136
137static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) 137static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
@@ -167,6 +167,9 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
167 struct tcf_exts e; 167 struct tcf_exts e;
168 int err; 168 int err;
169 169
170 if (!tca[TCA_OPTIONS])
171 return -EINVAL;
172
170 if (head == NULL) { 173 if (head == NULL) {
171 if (!handle) 174 if (!handle)
172 return -EINVAL; 175 return -EINVAL;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 72cf86e3c090..fad596bf32d7 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -176,8 +176,10 @@ META_COLLECTOR(var_dev)
176 176
177META_COLLECTOR(int_vlan_tag) 177META_COLLECTOR(int_vlan_tag)
178{ 178{
179 unsigned short uninitialized_var(tag); 179 unsigned short tag;
180 if (vlan_get_tag(skb, &tag) < 0) 180
181 tag = vlan_tx_tag_get(skb);
182 if (!tag && __vlan_get_tag(skb, &tag))
181 *err = -1; 183 *err = -1;
182 else 184 else
183 dst->value = tag; 185 dst->value = tag;
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 92cfc9d7e3b9..69188e8358b4 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -51,7 +51,7 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
51 u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; 51 u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
52 52
53 if (sch->ops == &bfifo_qdisc_ops) 53 if (sch->ops == &bfifo_qdisc_ops)
54 limit *= qdisc_dev(sch)->mtu; 54 limit *= psched_mtu(qdisc_dev(sch));
55 55
56 q->limit = limit; 56 q->limit = limit;
57 } else { 57 } else {
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index d876b8734848..2b88295cb7b7 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -280,6 +280,14 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
280 if (unlikely(!skb)) 280 if (unlikely(!skb))
281 return NULL; 281 return NULL;
282 282
283#ifdef CONFIG_NET_CLS_ACT
284 /*
285 * If it's at ingress let's pretend the delay is
286 * from the network (tstamp will be updated).
287 */
288 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
289 skb->tstamp.tv64 = 0;
290#endif
283 pr_debug("netem_dequeue: return skb=%p\n", skb); 291 pr_debug("netem_dequeue: return skb=%p\n", skb);
284 sch->q.qlen--; 292 sch->q.qlen--;
285 return skb; 293 return skb;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index ec697cebb63b..3b6418297231 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -303,6 +303,8 @@ restart:
303 switch (teql_resolve(skb, skb_res, slave)) { 303 switch (teql_resolve(skb, skb_res, slave)) {
304 case 0: 304 case 0:
305 if (__netif_tx_trylock(slave_txq)) { 305 if (__netif_tx_trylock(slave_txq)) {
306 unsigned int length = qdisc_pkt_len(skb);
307
306 if (!netif_tx_queue_stopped(slave_txq) && 308 if (!netif_tx_queue_stopped(slave_txq) &&
307 !netif_tx_queue_frozen(slave_txq) && 309 !netif_tx_queue_frozen(slave_txq) &&
308 slave_ops->ndo_start_xmit(skb, slave) == 0) { 310 slave_ops->ndo_start_xmit(skb, slave) == 0) {
@@ -310,8 +312,7 @@ restart:
310 master->slaves = NEXT_SLAVE(q); 312 master->slaves = NEXT_SLAVE(q);
311 netif_wake_queue(dev); 313 netif_wake_queue(dev);
312 master->stats.tx_packets++; 314 master->stats.tx_packets++;
313 master->stats.tx_bytes += 315 master->stats.tx_bytes += length;
314 qdisc_pkt_len(skb);
315 return 0; 316 return 0;
316 } 317 }
317 __netif_tx_unlock(slave_txq); 318 __netif_tx_unlock(slave_txq);
diff --git a/net/socket.c b/net/socket.c
index 91d0c0254ffe..791d71a36a93 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -493,8 +493,7 @@ static struct socket *sock_alloc(void)
493 inode->i_uid = current_fsuid(); 493 inode->i_uid = current_fsuid();
494 inode->i_gid = current_fsgid(); 494 inode->i_gid = current_fsgid();
495 495
496 get_cpu_var(sockets_in_use)++; 496 percpu_add(sockets_in_use, 1);
497 put_cpu_var(sockets_in_use);
498 return sock; 497 return sock;
499} 498}
500 499
@@ -536,8 +535,7 @@ void sock_release(struct socket *sock)
536 if (sock->fasync_list) 535 if (sock->fasync_list)
537 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 536 printk(KERN_ERR "sock_release: fasync list not empty!\n");
538 537
539 get_cpu_var(sockets_in_use)--; 538 percpu_sub(sockets_in_use, 1);
540 put_cpu_var(sockets_in_use);
541 if (!sock->file) { 539 if (!sock->file) {
542 iput(SOCK_INODE(sock)); 540 iput(SOCK_INODE(sock));
543 return; 541 return;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 5592883e1e4a..443c161eb8bd 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -17,28 +17,6 @@ config SUNRPC_XPRT_RDMA
17 17
18 If unsure, say N. 18 If unsure, say N.
19 19
20config SUNRPC_REGISTER_V4
21 bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)"
22 depends on SUNRPC && EXPERIMENTAL
23 default n
24 help
25 Sun added support for registering RPC services at an IPv6
26 address by creating two new versions of the rpcbind protocol
27 (RFC 1833).
28
29 This option enables support in the kernel RPC server for
30 registering kernel RPC services via version 4 of the rpcbind
31 protocol. If you enable this option, you must run a portmapper
32 daemon that supports rpcbind protocol version 4.
33
34 Serving NFS over IPv6 from knfsd (the kernel's NFS server)
35 requires that you enable this option and use a portmapper that
36 supports rpcbind version 4.
37
38 If unsure, say N to get traditional behavior (register kernel
39 RPC services using only rpcbind version 2). Distributions
40 using the legacy Linux portmapper daemon must say N here.
41
42config RPCSEC_GSS_KRB5 20config RPCSEC_GSS_KRB5
43 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" 21 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
44 depends on SUNRPC && EXPERIMENTAL 22 depends on SUNRPC && EXPERIMENTAL
@@ -69,7 +47,7 @@ config RPCSEC_GSS_SPKM3
69 select CRYPTO_CBC 47 select CRYPTO_CBC
70 help 48 help
71 Choose Y here to enable Secure RPC using the SPKM3 public key 49 Choose Y here to enable Secure RPC using the SPKM3 public key
72 GSS-API mechansim (RFC 2025). 50 GSS-API mechanism (RFC 2025).
73 51
74 Secure RPC calls with SPKM3 require an auxiliary userspace 52 Secure RPC calls with SPKM3 require an auxiliary userspace
75 daemon which may be found in the Linux nfs-utils package 53 daemon which may be found in the Linux nfs-utils package
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 836f15c0c4a3..5abab094441f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1032,27 +1032,20 @@ call_connect_status(struct rpc_task *task)
1032 dprint_status(task); 1032 dprint_status(task);
1033 1033
1034 task->tk_status = 0; 1034 task->tk_status = 0;
1035 if (status >= 0) { 1035 if (status >= 0 || status == -EAGAIN) {
1036 clnt->cl_stats->netreconn++; 1036 clnt->cl_stats->netreconn++;
1037 task->tk_action = call_transmit; 1037 task->tk_action = call_transmit;
1038 return; 1038 return;
1039 } 1039 }
1040 1040
1041 /* Something failed: remote service port may have changed */
1042 rpc_force_rebind(clnt);
1043
1044 switch (status) { 1041 switch (status) {
1045 case -ENOTCONN:
1046 case -EAGAIN:
1047 task->tk_action = call_bind;
1048 if (!RPC_IS_SOFT(task))
1049 return;
1050 /* if soft mounted, test if we've timed out */ 1042 /* if soft mounted, test if we've timed out */
1051 case -ETIMEDOUT: 1043 case -ETIMEDOUT:
1052 task->tk_action = call_timeout; 1044 task->tk_action = call_timeout;
1053 return; 1045 break;
1046 default:
1047 rpc_exit(task, -EIO);
1054 } 1048 }
1055 rpc_exit(task, -EIO);
1056} 1049}
1057 1050
1058/* 1051/*
@@ -1105,14 +1098,26 @@ static void
1105call_transmit_status(struct rpc_task *task) 1098call_transmit_status(struct rpc_task *task)
1106{ 1099{
1107 task->tk_action = call_status; 1100 task->tk_action = call_status;
1108 /* 1101 switch (task->tk_status) {
1109 * Special case: if we've been waiting on the socket's write_space() 1102 case -EAGAIN:
1110 * callback, then don't call xprt_end_transmit(). 1103 break;
1111 */ 1104 default:
1112 if (task->tk_status == -EAGAIN) 1105 xprt_end_transmit(task);
1113 return; 1106 /*
1114 xprt_end_transmit(task); 1107 * Special cases: if we've been waiting on the
1115 rpc_task_force_reencode(task); 1108 * socket's write_space() callback, or if the
1109 * socket just returned a connection error,
1110 * then hold onto the transport lock.
1111 */
1112 case -ECONNREFUSED:
1113 case -ECONNRESET:
1114 case -ENOTCONN:
1115 case -EHOSTDOWN:
1116 case -EHOSTUNREACH:
1117 case -ENETUNREACH:
1118 case -EPIPE:
1119 rpc_task_force_reencode(task);
1120 }
1116} 1121}
1117 1122
1118/* 1123/*
@@ -1152,9 +1157,12 @@ call_status(struct rpc_task *task)
1152 xprt_conditional_disconnect(task->tk_xprt, 1157 xprt_conditional_disconnect(task->tk_xprt,
1153 req->rq_connect_cookie); 1158 req->rq_connect_cookie);
1154 break; 1159 break;
1160 case -ECONNRESET:
1155 case -ECONNREFUSED: 1161 case -ECONNREFUSED:
1156 case -ENOTCONN:
1157 rpc_force_rebind(clnt); 1162 rpc_force_rebind(clnt);
1163 rpc_delay(task, 3*HZ);
1164 case -EPIPE:
1165 case -ENOTCONN:
1158 task->tk_action = call_bind; 1166 task->tk_action = call_bind;
1159 break; 1167 break;
1160 case -EAGAIN: 1168 case -EAGAIN:
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 03ae007641e4..beee6da33035 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -63,9 +63,16 @@ enum {
63 * r_owner 63 * r_owner
64 * 64 *
65 * The "owner" is allowed to unset a service in the rpcbind database. 65 * The "owner" is allowed to unset a service in the rpcbind database.
66 * We always use the following (arbitrary) fixed string. 66 *
67 * For AF_LOCAL SET/UNSET requests, rpcbind treats this string as a
68 * UID which it maps to a local user name via a password lookup.
69 * In all other cases it is ignored.
70 *
71 * For SET/UNSET requests, user space provides a value, even for
72 * network requests, and GETADDR uses an empty string. We follow
73 * those precedents here.
67 */ 74 */
68#define RPCB_OWNER_STRING "rpcb" 75#define RPCB_OWNER_STRING "0"
69#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) 76#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
70 77
71static void rpcb_getport_done(struct rpc_task *, void *); 78static void rpcb_getport_done(struct rpc_task *, void *);
@@ -124,12 +131,6 @@ static const struct sockaddr_in rpcb_inaddr_loopback = {
124 .sin_port = htons(RPCBIND_PORT), 131 .sin_port = htons(RPCBIND_PORT),
125}; 132};
126 133
127static const struct sockaddr_in6 rpcb_in6addr_loopback = {
128 .sin6_family = AF_INET6,
129 .sin6_addr = IN6ADDR_LOOPBACK_INIT,
130 .sin6_port = htons(RPCBIND_PORT),
131};
132
133static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, 134static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
134 size_t addrlen, u32 version) 135 size_t addrlen, u32 version)
135{ 136{
@@ -176,9 +177,10 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
176 return rpc_create(&args); 177 return rpc_create(&args);
177} 178}
178 179
179static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, 180static int rpcb_register_call(const u32 version, struct rpc_message *msg)
180 u32 version, struct rpc_message *msg)
181{ 181{
182 struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback;
183 size_t addrlen = sizeof(rpcb_inaddr_loopback);
182 struct rpc_clnt *rpcb_clnt; 184 struct rpc_clnt *rpcb_clnt;
183 int result, error = 0; 185 int result, error = 0;
184 186
@@ -192,7 +194,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
192 error = PTR_ERR(rpcb_clnt); 194 error = PTR_ERR(rpcb_clnt);
193 195
194 if (error < 0) { 196 if (error < 0) {
195 printk(KERN_WARNING "RPC: failed to contact local rpcbind " 197 dprintk("RPC: failed to contact local rpcbind "
196 "server (errno %d).\n", -error); 198 "server (errno %d).\n", -error);
197 return error; 199 return error;
198 } 200 }
@@ -254,25 +256,23 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
254 if (port) 256 if (port)
255 msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; 257 msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
256 258
257 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, 259 return rpcb_register_call(RPCBVERS_2, &msg);
258 sizeof(rpcb_inaddr_loopback),
259 RPCBVERS_2, &msg);
260} 260}
261 261
262/* 262/*
263 * Fill in AF_INET family-specific arguments to register 263 * Fill in AF_INET family-specific arguments to register
264 */ 264 */
265static int rpcb_register_netid4(struct sockaddr_in *address_to_register, 265static int rpcb_register_inet4(const struct sockaddr *sap,
266 struct rpc_message *msg) 266 struct rpc_message *msg)
267{ 267{
268 const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
268 struct rpcbind_args *map = msg->rpc_argp; 269 struct rpcbind_args *map = msg->rpc_argp;
269 unsigned short port = ntohs(address_to_register->sin_port); 270 unsigned short port = ntohs(sin->sin_port);
270 char buf[32]; 271 char buf[32];
271 272
272 /* Construct AF_INET universal address */ 273 /* Construct AF_INET universal address */
273 snprintf(buf, sizeof(buf), "%pI4.%u.%u", 274 snprintf(buf, sizeof(buf), "%pI4.%u.%u",
274 &address_to_register->sin_addr.s_addr, 275 &sin->sin_addr.s_addr, port >> 8, port & 0xff);
275 port >> 8, port & 0xff);
276 map->r_addr = buf; 276 map->r_addr = buf;
277 277
278 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " 278 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -284,29 +284,27 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
284 if (port) 284 if (port)
285 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; 285 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
286 286
287 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, 287 return rpcb_register_call(RPCBVERS_4, msg);
288 sizeof(rpcb_inaddr_loopback),
289 RPCBVERS_4, msg);
290} 288}
291 289
292/* 290/*
293 * Fill in AF_INET6 family-specific arguments to register 291 * Fill in AF_INET6 family-specific arguments to register
294 */ 292 */
295static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, 293static int rpcb_register_inet6(const struct sockaddr *sap,
296 struct rpc_message *msg) 294 struct rpc_message *msg)
297{ 295{
296 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
298 struct rpcbind_args *map = msg->rpc_argp; 297 struct rpcbind_args *map = msg->rpc_argp;
299 unsigned short port = ntohs(address_to_register->sin6_port); 298 unsigned short port = ntohs(sin6->sin6_port);
300 char buf[64]; 299 char buf[64];
301 300
302 /* Construct AF_INET6 universal address */ 301 /* Construct AF_INET6 universal address */
303 if (ipv6_addr_any(&address_to_register->sin6_addr)) 302 if (ipv6_addr_any(&sin6->sin6_addr))
304 snprintf(buf, sizeof(buf), "::.%u.%u", 303 snprintf(buf, sizeof(buf), "::.%u.%u",
305 port >> 8, port & 0xff); 304 port >> 8, port & 0xff);
306 else 305 else
307 snprintf(buf, sizeof(buf), "%pI6.%u.%u", 306 snprintf(buf, sizeof(buf), "%pI6.%u.%u",
308 &address_to_register->sin6_addr, 307 &sin6->sin6_addr, port >> 8, port & 0xff);
309 port >> 8, port & 0xff);
310 map->r_addr = buf; 308 map->r_addr = buf;
311 309
312 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " 310 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -318,9 +316,21 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
318 if (port) 316 if (port)
319 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; 317 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
320 318
321 return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, 319 return rpcb_register_call(RPCBVERS_4, msg);
322 sizeof(rpcb_in6addr_loopback), 320}
323 RPCBVERS_4, msg); 321
322static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
323{
324 struct rpcbind_args *map = msg->rpc_argp;
325
326 dprintk("RPC: unregistering [%u, %u, '%s'] with "
327 "local rpcbind\n",
328 map->r_prog, map->r_vers, map->r_netid);
329
330 map->r_addr = "";
331 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
332
333 return rpcb_register_call(RPCBVERS_4, msg);
324} 334}
325 335
326/** 336/**
@@ -340,10 +350,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
340 * invoke this function once for each [program, version, address, 350 * invoke this function once for each [program, version, address,
341 * netid] tuple they wish to advertise. 351 * netid] tuple they wish to advertise.
342 * 352 *
343 * Callers may also unregister RPC services that are no longer 353 * Callers may also unregister RPC services that are registered at a
344 * available by setting the port number in the passed-in address 354 * specific address by setting the port number in @address to zero.
345 * to zero. Callers pass a netid of "" to unregister all 355 * They may unregister all registered protocol families at once for
346 * transport netids associated with [program, version, address]. 356 * a service by passing a NULL @address argument. If @netid is ""
357 * then all netids for [program, version, address] are unregistered.
347 * 358 *
348 * This function uses rpcbind protocol version 4 to contact the 359 * This function uses rpcbind protocol version 4 to contact the
349 * local rpcbind daemon. The local rpcbind daemon must support 360 * local rpcbind daemon. The local rpcbind daemon must support
@@ -378,13 +389,14 @@ int rpcb_v4_register(const u32 program, const u32 version,
378 .rpc_argp = &map, 389 .rpc_argp = &map,
379 }; 390 };
380 391
392 if (address == NULL)
393 return rpcb_unregister_all_protofamilies(&msg);
394
381 switch (address->sa_family) { 395 switch (address->sa_family) {
382 case AF_INET: 396 case AF_INET:
383 return rpcb_register_netid4((struct sockaddr_in *)address, 397 return rpcb_register_inet4(address, &msg);
384 &msg);
385 case AF_INET6: 398 case AF_INET6:
386 return rpcb_register_netid6((struct sockaddr_in6 *)address, 399 return rpcb_register_inet6(address, &msg);
387 &msg);
388 } 400 }
389 401
390 return -EAFNOSUPPORT; 402 return -EAFNOSUPPORT;
@@ -579,7 +591,7 @@ void rpcb_getport_async(struct rpc_task *task)
579 map->r_xprt = xprt_get(xprt); 591 map->r_xprt = xprt_get(xprt);
580 map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); 592 map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
581 map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); 593 map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
582 map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ 594 map->r_owner = "";
583 map->r_status = -EIO; 595 map->r_status = -EIO;
584 596
585 child = rpcb_call_async(rpcb_clnt, map, proc); 597 child = rpcb_call_async(rpcb_clnt, map, proc);
@@ -703,11 +715,16 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
703 *portp = 0; 715 *portp = 0;
704 addr_len = ntohl(*p++); 716 addr_len = ntohl(*p++);
705 717
718 if (addr_len == 0) {
719 dprintk("RPC: rpcb_decode_getaddr: "
720 "service is not registered\n");
721 return 0;
722 }
723
706 /* 724 /*
707 * Simple sanity check. The smallest possible universal 725 * Simple sanity check.
708 * address is an IPv4 address string containing 11 bytes.
709 */ 726 */
710 if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN) 727 if (addr_len > RPCBIND_MAXUADDRLEN)
711 goto out_err; 728 goto out_err;
712 729
713 /* 730 /*
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bb507e2bb94d..8847add6ca16 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -317,8 +317,7 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
317 } 317 }
318 case SVC_POOL_PERNODE: 318 case SVC_POOL_PERNODE:
319 { 319 {
320 node_to_cpumask_ptr(nodecpumask, node); 320 set_cpus_allowed_ptr(task, cpumask_of_node(node));
321 set_cpus_allowed_ptr(task, nodecpumask);
322 break; 321 break;
323 } 322 }
324 } 323 }
@@ -359,7 +358,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
359 */ 358 */
360static struct svc_serv * 359static struct svc_serv *
361__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, 360__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
362 sa_family_t family, void (*shutdown)(struct svc_serv *serv)) 361 void (*shutdown)(struct svc_serv *serv))
363{ 362{
364 struct svc_serv *serv; 363 struct svc_serv *serv;
365 unsigned int vers; 364 unsigned int vers;
@@ -368,7 +367,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
368 367
369 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) 368 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
370 return NULL; 369 return NULL;
371 serv->sv_family = family;
372 serv->sv_name = prog->pg_name; 370 serv->sv_name = prog->pg_name;
373 serv->sv_program = prog; 371 serv->sv_program = prog;
374 serv->sv_nrthreads = 1; 372 serv->sv_nrthreads = 1;
@@ -427,21 +425,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
427 425
428struct svc_serv * 426struct svc_serv *
429svc_create(struct svc_program *prog, unsigned int bufsize, 427svc_create(struct svc_program *prog, unsigned int bufsize,
430 sa_family_t family, void (*shutdown)(struct svc_serv *serv)) 428 void (*shutdown)(struct svc_serv *serv))
431{ 429{
432 return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); 430 return __svc_create(prog, bufsize, /*npools*/1, shutdown);
433} 431}
434EXPORT_SYMBOL_GPL(svc_create); 432EXPORT_SYMBOL_GPL(svc_create);
435 433
436struct svc_serv * 434struct svc_serv *
437svc_create_pooled(struct svc_program *prog, unsigned int bufsize, 435svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
438 sa_family_t family, void (*shutdown)(struct svc_serv *serv), 436 void (*shutdown)(struct svc_serv *serv),
439 svc_thread_fn func, struct module *mod) 437 svc_thread_fn func, struct module *mod)
440{ 438{
441 struct svc_serv *serv; 439 struct svc_serv *serv;
442 unsigned int npools = svc_pool_map_get(); 440 unsigned int npools = svc_pool_map_get();
443 441
444 serv = __svc_create(prog, bufsize, npools, family, shutdown); 442 serv = __svc_create(prog, bufsize, npools, shutdown);
445 443
446 if (serv != NULL) { 444 if (serv != NULL) {
447 serv->sv_function = func; 445 serv->sv_function = func;
@@ -719,8 +717,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
719} 717}
720EXPORT_SYMBOL_GPL(svc_exit_thread); 718EXPORT_SYMBOL_GPL(svc_exit_thread);
721 719
722#ifdef CONFIG_SUNRPC_REGISTER_V4
723
724/* 720/*
725 * Register an "inet" protocol family netid with the local 721 * Register an "inet" protocol family netid with the local
726 * rpcbind daemon via an rpcbind v4 SET request. 722 * rpcbind daemon via an rpcbind v4 SET request.
@@ -735,12 +731,13 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
735 const unsigned short protocol, 731 const unsigned short protocol,
736 const unsigned short port) 732 const unsigned short port)
737{ 733{
738 struct sockaddr_in sin = { 734 const struct sockaddr_in sin = {
739 .sin_family = AF_INET, 735 .sin_family = AF_INET,
740 .sin_addr.s_addr = htonl(INADDR_ANY), 736 .sin_addr.s_addr = htonl(INADDR_ANY),
741 .sin_port = htons(port), 737 .sin_port = htons(port),
742 }; 738 };
743 char *netid; 739 const char *netid;
740 int error;
744 741
745 switch (protocol) { 742 switch (protocol) {
746 case IPPROTO_UDP: 743 case IPPROTO_UDP:
@@ -750,13 +747,23 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
750 netid = RPCBIND_NETID_TCP; 747 netid = RPCBIND_NETID_TCP;
751 break; 748 break;
752 default: 749 default:
753 return -EPROTONOSUPPORT; 750 return -ENOPROTOOPT;
754 } 751 }
755 752
756 return rpcb_v4_register(program, version, 753 error = rpcb_v4_register(program, version,
757 (struct sockaddr *)&sin, netid); 754 (const struct sockaddr *)&sin, netid);
755
756 /*
757 * User space didn't support rpcbind v4, so retry this
758 * registration request with the legacy rpcbind v2 protocol.
759 */
760 if (error == -EPROTONOSUPPORT)
761 error = rpcb_register(program, version, protocol, port);
762
763 return error;
758} 764}
759 765
766#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
760/* 767/*
761 * Register an "inet6" protocol family netid with the local 768 * Register an "inet6" protocol family netid with the local
762 * rpcbind daemon via an rpcbind v4 SET request. 769 * rpcbind daemon via an rpcbind v4 SET request.
@@ -771,12 +778,13 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
771 const unsigned short protocol, 778 const unsigned short protocol,
772 const unsigned short port) 779 const unsigned short port)
773{ 780{
774 struct sockaddr_in6 sin6 = { 781 const struct sockaddr_in6 sin6 = {
775 .sin6_family = AF_INET6, 782 .sin6_family = AF_INET6,
776 .sin6_addr = IN6ADDR_ANY_INIT, 783 .sin6_addr = IN6ADDR_ANY_INIT,
777 .sin6_port = htons(port), 784 .sin6_port = htons(port),
778 }; 785 };
779 char *netid; 786 const char *netid;
787 int error;
780 788
781 switch (protocol) { 789 switch (protocol) {
782 case IPPROTO_UDP: 790 case IPPROTO_UDP:
@@ -786,12 +794,22 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
786 netid = RPCBIND_NETID_TCP6; 794 netid = RPCBIND_NETID_TCP6;
787 break; 795 break;
788 default: 796 default:
789 return -EPROTONOSUPPORT; 797 return -ENOPROTOOPT;
790 } 798 }
791 799
792 return rpcb_v4_register(program, version, 800 error = rpcb_v4_register(program, version,
793 (struct sockaddr *)&sin6, netid); 801 (const struct sockaddr *)&sin6, netid);
802
803 /*
804 * User space didn't support rpcbind version 4, so we won't
805 * use a PF_INET6 listener.
806 */
807 if (error == -EPROTONOSUPPORT)
808 error = -EAFNOSUPPORT;
809
810 return error;
794} 811}
812#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
795 813
796/* 814/*
797 * Register a kernel RPC service via rpcbind version 4. 815 * Register a kernel RPC service via rpcbind version 4.
@@ -799,69 +817,43 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
799 * Returns zero on success; a negative errno value is returned 817 * Returns zero on success; a negative errno value is returned
800 * if any error occurs. 818 * if any error occurs.
801 */ 819 */
802static int __svc_register(const u32 program, const u32 version, 820static int __svc_register(const char *progname,
803 const sa_family_t family, 821 const u32 program, const u32 version,
822 const int family,
804 const unsigned short protocol, 823 const unsigned short protocol,
805 const unsigned short port) 824 const unsigned short port)
806{ 825{
807 int error; 826 int error = -EAFNOSUPPORT;
808 827
809 switch (family) { 828 switch (family) {
810 case AF_INET: 829 case PF_INET:
811 return __svc_rpcb_register4(program, version, 830 error = __svc_rpcb_register4(program, version,
812 protocol, port); 831 protocol, port);
813 case AF_INET6: 832 break;
833#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
834 case PF_INET6:
814 error = __svc_rpcb_register6(program, version, 835 error = __svc_rpcb_register6(program, version,
815 protocol, port); 836 protocol, port);
816 if (error < 0) 837#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
817 return error;
818
819 /*
820 * Work around bug in some versions of Linux rpcbind
821 * which don't allow registration of both inet and
822 * inet6 netids.
823 *
824 * Error return ignored for now.
825 */
826 __svc_rpcb_register4(program, version,
827 protocol, port);
828 return 0;
829 } 838 }
830 839
831 return -EAFNOSUPPORT; 840 if (error < 0)
832} 841 printk(KERN_WARNING "svc: failed to register %sv%u RPC "
833 842 "service (errno %d).\n", progname, version, -error);
834#else /* CONFIG_SUNRPC_REGISTER_V4 */ 843 return error;
835
836/*
837 * Register a kernel RPC service via rpcbind version 2.
838 *
839 * Returns zero on success; a negative errno value is returned
840 * if any error occurs.
841 */
842static int __svc_register(const u32 program, const u32 version,
843 sa_family_t family,
844 const unsigned short protocol,
845 const unsigned short port)
846{
847 if (family != AF_INET)
848 return -EAFNOSUPPORT;
849
850 return rpcb_register(program, version, protocol, port);
851} 844}
852 845
853#endif /* CONFIG_SUNRPC_REGISTER_V4 */
854
855/** 846/**
856 * svc_register - register an RPC service with the local portmapper 847 * svc_register - register an RPC service with the local portmapper
857 * @serv: svc_serv struct for the service to register 848 * @serv: svc_serv struct for the service to register
849 * @family: protocol family of service's listener socket
858 * @proto: transport protocol number to advertise 850 * @proto: transport protocol number to advertise
859 * @port: port to advertise 851 * @port: port to advertise
860 * 852 *
861 * Service is registered for any address in serv's address family 853 * Service is registered for any address in the passed-in protocol family
862 */ 854 */
863int svc_register(const struct svc_serv *serv, const unsigned short proto, 855int svc_register(const struct svc_serv *serv, const int family,
864 const unsigned short port) 856 const unsigned short proto, const unsigned short port)
865{ 857{
866 struct svc_program *progp; 858 struct svc_program *progp;
867 unsigned int i; 859 unsigned int i;
@@ -879,15 +871,15 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
879 i, 871 i,
880 proto == IPPROTO_UDP? "udp" : "tcp", 872 proto == IPPROTO_UDP? "udp" : "tcp",
881 port, 873 port,
882 serv->sv_family, 874 family,
883 progp->pg_vers[i]->vs_hidden? 875 progp->pg_vers[i]->vs_hidden?
884 " (but not telling portmap)" : ""); 876 " (but not telling portmap)" : "");
885 877
886 if (progp->pg_vers[i]->vs_hidden) 878 if (progp->pg_vers[i]->vs_hidden)
887 continue; 879 continue;
888 880
889 error = __svc_register(progp->pg_prog, i, 881 error = __svc_register(progp->pg_name, progp->pg_prog,
890 serv->sv_family, proto, port); 882 i, family, proto, port);
891 if (error < 0) 883 if (error < 0)
892 break; 884 break;
893 } 885 }
@@ -896,38 +888,31 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
896 return error; 888 return error;
897} 889}
898 890
899#ifdef CONFIG_SUNRPC_REGISTER_V4 891/*
900 892 * If user space is running rpcbind, it should take the v4 UNSET
893 * and clear everything for this [program, version]. If user space
894 * is running portmap, it will reject the v4 UNSET, but won't have
895 * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient
896 * in this case to clear all existing entries for [program, version].
897 */
901static void __svc_unregister(const u32 program, const u32 version, 898static void __svc_unregister(const u32 program, const u32 version,
902 const char *progname) 899 const char *progname)
903{ 900{
904 struct sockaddr_in6 sin6 = {
905 .sin6_family = AF_INET6,
906 .sin6_addr = IN6ADDR_ANY_INIT,
907 .sin6_port = 0,
908 };
909 int error; 901 int error;
910 902
911 error = rpcb_v4_register(program, version, 903 error = rpcb_v4_register(program, version, NULL, "");
912 (struct sockaddr *)&sin6, "");
913 dprintk("svc: %s(%sv%u), error %d\n",
914 __func__, progname, version, error);
915}
916
917#else /* CONFIG_SUNRPC_REGISTER_V4 */
918 904
919static void __svc_unregister(const u32 program, const u32 version, 905 /*
920 const char *progname) 906 * User space didn't support rpcbind v4, so retry this
921{ 907 * request with the legacy rpcbind v2 protocol.
922 int error; 908 */
909 if (error == -EPROTONOSUPPORT)
910 error = rpcb_register(program, version, 0, 0);
923 911
924 error = rpcb_register(program, version, 0, 0);
925 dprintk("svc: %s(%sv%u), error %d\n", 912 dprintk("svc: %s(%sv%u), error %d\n",
926 __func__, progname, version, error); 913 __func__, progname, version, error);
927} 914}
928 915
929#endif /* CONFIG_SUNRPC_REGISTER_V4 */
930
931/* 916/*
932 * All netids, bind addresses and ports registered for [program, version] 917 * All netids, bind addresses and ports registered for [program, version]
933 * are removed from the local rpcbind database (if the service is not 918 * are removed from the local rpcbind database (if the service is not
@@ -1023,6 +1008,8 @@ svc_process(struct svc_rqst *rqstp)
1023 rqstp->rq_res.tail[0].iov_len = 0; 1008 rqstp->rq_res.tail[0].iov_len = 0;
1024 /* Will be turned off only in gss privacy case: */ 1009 /* Will be turned off only in gss privacy case: */
1025 rqstp->rq_splice_ok = 1; 1010 rqstp->rq_splice_ok = 1;
1011 /* Will be turned off only when NFSv4 Sessions are used */
1012 rqstp->rq_usedeferral = 1;
1026 1013
1027 /* Setup reply header */ 1014 /* Setup reply header */
1028 rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); 1015 rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
@@ -1093,7 +1080,6 @@ svc_process(struct svc_rqst *rqstp)
1093 procp = versp->vs_proc + proc; 1080 procp = versp->vs_proc + proc;
1094 if (proc >= versp->vs_nproc || !procp->pc_func) 1081 if (proc >= versp->vs_nproc || !procp->pc_func)
1095 goto err_bad_proc; 1082 goto err_bad_proc;
1096 rqstp->rq_server = serv;
1097 rqstp->rq_procinfo = procp; 1083 rqstp->rq_procinfo = procp;
1098 1084
1099 /* Syntactic check complete */ 1085 /* Syntactic check complete */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e588df5d6b34..c200d92e57e4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -14,6 +14,8 @@
14 14
15#define RPCDBG_FACILITY RPCDBG_SVCXPRT 15#define RPCDBG_FACILITY RPCDBG_SVCXPRT
16 16
17#define SVC_MAX_WAKING 5
18
17static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); 19static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
18static int svc_deferred_recv(struct svc_rqst *rqstp); 20static int svc_deferred_recv(struct svc_rqst *rqstp);
19static struct cache_deferred_req *svc_defer(struct cache_req *req); 21static struct cache_deferred_req *svc_defer(struct cache_req *req);
@@ -161,7 +163,9 @@ EXPORT_SYMBOL_GPL(svc_xprt_init);
161 163
162static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, 164static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
163 struct svc_serv *serv, 165 struct svc_serv *serv,
164 unsigned short port, int flags) 166 const int family,
167 const unsigned short port,
168 int flags)
165{ 169{
166 struct sockaddr_in sin = { 170 struct sockaddr_in sin = {
167 .sin_family = AF_INET, 171 .sin_family = AF_INET,
@@ -176,12 +180,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
176 struct sockaddr *sap; 180 struct sockaddr *sap;
177 size_t len; 181 size_t len;
178 182
179 switch (serv->sv_family) { 183 switch (family) {
180 case AF_INET: 184 case PF_INET:
181 sap = (struct sockaddr *)&sin; 185 sap = (struct sockaddr *)&sin;
182 len = sizeof(sin); 186 len = sizeof(sin);
183 break; 187 break;
184 case AF_INET6: 188 case PF_INET6:
185 sap = (struct sockaddr *)&sin6; 189 sap = (struct sockaddr *)&sin6;
186 len = sizeof(sin6); 190 len = sizeof(sin6);
187 break; 191 break;
@@ -192,7 +196,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
192 return xcl->xcl_ops->xpo_create(serv, sap, len, flags); 196 return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
193} 197}
194 198
195int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, 199int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
200 const int family, const unsigned short port,
196 int flags) 201 int flags)
197{ 202{
198 struct svc_xprt_class *xcl; 203 struct svc_xprt_class *xcl;
@@ -209,7 +214,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
209 goto err; 214 goto err;
210 215
211 spin_unlock(&svc_xprt_class_lock); 216 spin_unlock(&svc_xprt_class_lock);
212 newxprt = __svc_xpo_create(xcl, serv, port, flags); 217 newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
213 if (IS_ERR(newxprt)) { 218 if (IS_ERR(newxprt)) {
214 module_put(xcl->xcl_owner); 219 module_put(xcl->xcl_owner);
215 return PTR_ERR(newxprt); 220 return PTR_ERR(newxprt);
@@ -298,6 +303,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
298 struct svc_pool *pool; 303 struct svc_pool *pool;
299 struct svc_rqst *rqstp; 304 struct svc_rqst *rqstp;
300 int cpu; 305 int cpu;
306 int thread_avail;
301 307
302 if (!(xprt->xpt_flags & 308 if (!(xprt->xpt_flags &
303 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) 309 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
@@ -309,18 +315,14 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
309 315
310 spin_lock_bh(&pool->sp_lock); 316 spin_lock_bh(&pool->sp_lock);
311 317
312 if (!list_empty(&pool->sp_threads) &&
313 !list_empty(&pool->sp_sockets))
314 printk(KERN_ERR
315 "svc_xprt_enqueue: "
316 "threads and transports both waiting??\n");
317
318 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { 318 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
319 /* Don't enqueue dead transports */ 319 /* Don't enqueue dead transports */
320 dprintk("svc: transport %p is dead, not enqueued\n", xprt); 320 dprintk("svc: transport %p is dead, not enqueued\n", xprt);
321 goto out_unlock; 321 goto out_unlock;
322 } 322 }
323 323
324 pool->sp_stats.packets++;
325
324 /* Mark transport as busy. It will remain in this state until 326 /* Mark transport as busy. It will remain in this state until
325 * the provider calls svc_xprt_received. We update XPT_BUSY 327 * the provider calls svc_xprt_received. We update XPT_BUSY
326 * atomically because it also guards against trying to enqueue 328 * atomically because it also guards against trying to enqueue
@@ -353,7 +355,15 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
353 } 355 }
354 356
355 process: 357 process:
356 if (!list_empty(&pool->sp_threads)) { 358 /* Work out whether threads are available */
359 thread_avail = !list_empty(&pool->sp_threads); /* threads are asleep */
360 if (pool->sp_nwaking >= SVC_MAX_WAKING) {
361 /* too many threads are runnable and trying to wake up */
362 thread_avail = 0;
363 pool->sp_stats.overloads_avoided++;
364 }
365
366 if (thread_avail) {
357 rqstp = list_entry(pool->sp_threads.next, 367 rqstp = list_entry(pool->sp_threads.next,
358 struct svc_rqst, 368 struct svc_rqst,
359 rq_list); 369 rq_list);
@@ -368,11 +378,15 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
368 svc_xprt_get(xprt); 378 svc_xprt_get(xprt);
369 rqstp->rq_reserved = serv->sv_max_mesg; 379 rqstp->rq_reserved = serv->sv_max_mesg;
370 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); 380 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
381 rqstp->rq_waking = 1;
382 pool->sp_nwaking++;
383 pool->sp_stats.threads_woken++;
371 BUG_ON(xprt->xpt_pool != pool); 384 BUG_ON(xprt->xpt_pool != pool);
372 wake_up(&rqstp->rq_wait); 385 wake_up(&rqstp->rq_wait);
373 } else { 386 } else {
374 dprintk("svc: transport %p put into queue\n", xprt); 387 dprintk("svc: transport %p put into queue\n", xprt);
375 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); 388 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
389 pool->sp_stats.sockets_queued++;
376 BUG_ON(xprt->xpt_pool != pool); 390 BUG_ON(xprt->xpt_pool != pool);
377 } 391 }
378 392
@@ -585,6 +599,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
585 int pages; 599 int pages;
586 struct xdr_buf *arg; 600 struct xdr_buf *arg;
587 DECLARE_WAITQUEUE(wait, current); 601 DECLARE_WAITQUEUE(wait, current);
602 long time_left;
588 603
589 dprintk("svc: server %p waiting for data (to = %ld)\n", 604 dprintk("svc: server %p waiting for data (to = %ld)\n",
590 rqstp, timeout); 605 rqstp, timeout);
@@ -633,6 +648,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
633 return -EINTR; 648 return -EINTR;
634 649
635 spin_lock_bh(&pool->sp_lock); 650 spin_lock_bh(&pool->sp_lock);
651 if (rqstp->rq_waking) {
652 rqstp->rq_waking = 0;
653 pool->sp_nwaking--;
654 BUG_ON(pool->sp_nwaking < 0);
655 }
636 xprt = svc_xprt_dequeue(pool); 656 xprt = svc_xprt_dequeue(pool);
637 if (xprt) { 657 if (xprt) {
638 rqstp->rq_xprt = xprt; 658 rqstp->rq_xprt = xprt;
@@ -665,12 +685,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
665 add_wait_queue(&rqstp->rq_wait, &wait); 685 add_wait_queue(&rqstp->rq_wait, &wait);
666 spin_unlock_bh(&pool->sp_lock); 686 spin_unlock_bh(&pool->sp_lock);
667 687
668 schedule_timeout(timeout); 688 time_left = schedule_timeout(timeout);
669 689
670 try_to_freeze(); 690 try_to_freeze();
671 691
672 spin_lock_bh(&pool->sp_lock); 692 spin_lock_bh(&pool->sp_lock);
673 remove_wait_queue(&rqstp->rq_wait, &wait); 693 remove_wait_queue(&rqstp->rq_wait, &wait);
694 if (!time_left)
695 pool->sp_stats.threads_timedout++;
674 696
675 xprt = rqstp->rq_xprt; 697 xprt = rqstp->rq_xprt;
676 if (!xprt) { 698 if (!xprt) {
@@ -955,7 +977,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
955 struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); 977 struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
956 struct svc_deferred_req *dr; 978 struct svc_deferred_req *dr;
957 979
958 if (rqstp->rq_arg.page_len) 980 if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral)
959 return NULL; /* if more than a page, give up FIXME */ 981 return NULL; /* if more than a page, give up FIXME */
960 if (rqstp->rq_deferred) { 982 if (rqstp->rq_deferred) {
961 dr = rqstp->rq_deferred; 983 dr = rqstp->rq_deferred;
@@ -1033,7 +1055,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
1033 return dr; 1055 return dr;
1034} 1056}
1035 1057
1036/* 1058/**
1059 * svc_find_xprt - find an RPC transport instance
1060 * @serv: pointer to svc_serv to search
1061 * @xcl_name: C string containing transport's class name
1062 * @af: Address family of transport's local address
1063 * @port: transport's IP port number
1064 *
1037 * Return the transport instance pointer for the endpoint accepting 1065 * Return the transport instance pointer for the endpoint accepting
1038 * connections/peer traffic from the specified transport class, 1066 * connections/peer traffic from the specified transport class,
1039 * address family and port. 1067 * address family and port.
@@ -1042,14 +1070,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
1042 * wild-card, and will result in matching the first transport in the 1070 * wild-card, and will result in matching the first transport in the
1043 * service's list that has a matching class name. 1071 * service's list that has a matching class name.
1044 */ 1072 */
1045struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, 1073struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
1046 int af, int port) 1074 const sa_family_t af, const unsigned short port)
1047{ 1075{
1048 struct svc_xprt *xprt; 1076 struct svc_xprt *xprt;
1049 struct svc_xprt *found = NULL; 1077 struct svc_xprt *found = NULL;
1050 1078
1051 /* Sanity check the args */ 1079 /* Sanity check the args */
1052 if (!serv || !xcl_name) 1080 if (serv == NULL || xcl_name == NULL)
1053 return found; 1081 return found;
1054 1082
1055 spin_lock_bh(&serv->sv_lock); 1083 spin_lock_bh(&serv->sv_lock);
@@ -1058,7 +1086,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
1058 continue; 1086 continue;
1059 if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) 1087 if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
1060 continue; 1088 continue;
1061 if (port && port != svc_xprt_local_port(xprt)) 1089 if (port != 0 && port != svc_xprt_local_port(xprt))
1062 continue; 1090 continue;
1063 found = xprt; 1091 found = xprt;
1064 svc_xprt_get(xprt); 1092 svc_xprt_get(xprt);
@@ -1103,3 +1131,93 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen)
1103 return totlen; 1131 return totlen;
1104} 1132}
1105EXPORT_SYMBOL_GPL(svc_xprt_names); 1133EXPORT_SYMBOL_GPL(svc_xprt_names);
1134
1135
1136/*----------------------------------------------------------------------------*/
1137
1138static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
1139{
1140 unsigned int pidx = (unsigned int)*pos;
1141 struct svc_serv *serv = m->private;
1142
1143 dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
1144
1145 lock_kernel();
1146 /* bump up the pseudo refcount while traversing */
1147 svc_get(serv);
1148 unlock_kernel();
1149
1150 if (!pidx)
1151 return SEQ_START_TOKEN;
1152 return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
1153}
1154
1155static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
1156{
1157 struct svc_pool *pool = p;
1158 struct svc_serv *serv = m->private;
1159
1160 dprintk("svc_pool_stats_next, *pos=%llu\n", *pos);
1161
1162 if (p == SEQ_START_TOKEN) {
1163 pool = &serv->sv_pools[0];
1164 } else {
1165 unsigned int pidx = (pool - &serv->sv_pools[0]);
1166 if (pidx < serv->sv_nrpools-1)
1167 pool = &serv->sv_pools[pidx+1];
1168 else
1169 pool = NULL;
1170 }
1171 ++*pos;
1172 return pool;
1173}
1174
1175static void svc_pool_stats_stop(struct seq_file *m, void *p)
1176{
1177 struct svc_serv *serv = m->private;
1178
1179 lock_kernel();
1180 /* this function really, really should have been called svc_put() */
1181 svc_destroy(serv);
1182 unlock_kernel();
1183}
1184
1185static int svc_pool_stats_show(struct seq_file *m, void *p)
1186{
1187 struct svc_pool *pool = p;
1188
1189 if (p == SEQ_START_TOKEN) {
1190 seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n");
1191 return 0;
1192 }
1193
1194 seq_printf(m, "%u %lu %lu %lu %lu %lu\n",
1195 pool->sp_id,
1196 pool->sp_stats.packets,
1197 pool->sp_stats.sockets_queued,
1198 pool->sp_stats.threads_woken,
1199 pool->sp_stats.overloads_avoided,
1200 pool->sp_stats.threads_timedout);
1201
1202 return 0;
1203}
1204
1205static const struct seq_operations svc_pool_stats_seq_ops = {
1206 .start = svc_pool_stats_start,
1207 .next = svc_pool_stats_next,
1208 .stop = svc_pool_stats_stop,
1209 .show = svc_pool_stats_show,
1210};
1211
1212int svc_pool_stats_open(struct svc_serv *serv, struct file *file)
1213{
1214 int err;
1215
1216 err = seq_open(file, &svc_pool_stats_seq_ops);
1217 if (!err)
1218 ((struct seq_file *) file->private_data)->private = serv;
1219 return err;
1220}
1221EXPORT_SYMBOL(svc_pool_stats_open);
1222
1223/*----------------------------------------------------------------------------*/
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5763e6460fea..9d504234af4a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1110,7 +1110,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1110 struct svc_sock *svsk; 1110 struct svc_sock *svsk;
1111 struct sock *inet; 1111 struct sock *inet;
1112 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); 1112 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
1113 int val;
1114 1113
1115 dprintk("svc: svc_setup_socket %p\n", sock); 1114 dprintk("svc: svc_setup_socket %p\n", sock);
1116 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { 1115 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1122,7 +1121,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1122 1121
1123 /* Register socket with portmapper */ 1122 /* Register socket with portmapper */
1124 if (*errp >= 0 && pmap_register) 1123 if (*errp >= 0 && pmap_register)
1125 *errp = svc_register(serv, inet->sk_protocol, 1124 *errp = svc_register(serv, inet->sk_family, inet->sk_protocol,
1126 ntohs(inet_sk(inet)->sport)); 1125 ntohs(inet_sk(inet)->sport));
1127 1126
1128 if (*errp < 0) { 1127 if (*errp < 0) {
@@ -1143,18 +1142,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1143 else 1142 else
1144 svc_tcp_init(svsk, serv); 1143 svc_tcp_init(svsk, serv);
1145 1144
1146 /*
1147 * We start one listener per sv_serv. We want AF_INET
1148 * requests to be automatically shunted to our AF_INET6
1149 * listener using a mapped IPv4 address. Make sure
1150 * no-one starts an equivalent IPv4 listener, which
1151 * would steal our incoming connections.
1152 */
1153 val = 0;
1154 if (serv->sv_family == AF_INET6)
1155 kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
1156 (char *)&val, sizeof(val));
1157
1158 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1145 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1159 svsk, svsk->sk_sk); 1146 svsk, svsk->sk_sk);
1160 1147
@@ -1222,6 +1209,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1222 struct sockaddr_storage addr; 1209 struct sockaddr_storage addr;
1223 struct sockaddr *newsin = (struct sockaddr *)&addr; 1210 struct sockaddr *newsin = (struct sockaddr *)&addr;
1224 int newlen; 1211 int newlen;
1212 int family;
1213 int val;
1225 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 1214 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
1226 1215
1227 dprintk("svc: svc_create_socket(%s, %d, %s)\n", 1216 dprintk("svc: svc_create_socket(%s, %d, %s)\n",
@@ -1233,14 +1222,35 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1233 "sockets supported\n"); 1222 "sockets supported\n");
1234 return ERR_PTR(-EINVAL); 1223 return ERR_PTR(-EINVAL);
1235 } 1224 }
1225
1236 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; 1226 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
1227 switch (sin->sa_family) {
1228 case AF_INET6:
1229 family = PF_INET6;
1230 break;
1231 case AF_INET:
1232 family = PF_INET;
1233 break;
1234 default:
1235 return ERR_PTR(-EINVAL);
1236 }
1237 1237
1238 error = sock_create_kern(sin->sa_family, type, protocol, &sock); 1238 error = sock_create_kern(family, type, protocol, &sock);
1239 if (error < 0) 1239 if (error < 0)
1240 return ERR_PTR(error); 1240 return ERR_PTR(error);
1241 1241
1242 svc_reclassify_socket(sock); 1242 svc_reclassify_socket(sock);
1243 1243
1244 /*
1245 * If this is an PF_INET6 listener, we want to avoid
1246 * getting requests from IPv4 remotes. Those should
1247 * be shunted to a PF_INET listener via rpcbind.
1248 */
1249 val = 1;
1250 if (family == PF_INET6)
1251 kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
1252 (char *)&val, sizeof(val));
1253
1244 if (type == SOCK_STREAM) 1254 if (type == SOCK_STREAM)
1245 sock->sk->sk_reuse = 1; /* allow address reuse */ 1255 sock->sk->sk_reuse = 1; /* allow address reuse */
1246 error = kernel_bind(sock, sin, len); 1256 error = kernel_bind(sock, sin, len);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 62098d101a1f..06ca058572f2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -152,6 +152,37 @@ out:
152EXPORT_SYMBOL_GPL(xprt_unregister_transport); 152EXPORT_SYMBOL_GPL(xprt_unregister_transport);
153 153
154/** 154/**
155 * xprt_load_transport - load a transport implementation
156 * @transport_name: transport to load
157 *
158 * Returns:
159 * 0: transport successfully loaded
160 * -ENOENT: transport module not available
161 */
162int xprt_load_transport(const char *transport_name)
163{
164 struct xprt_class *t;
165 char module_name[sizeof t->name + 5];
166 int result;
167
168 result = 0;
169 spin_lock(&xprt_list_lock);
170 list_for_each_entry(t, &xprt_list, list) {
171 if (strcmp(t->name, transport_name) == 0) {
172 spin_unlock(&xprt_list_lock);
173 goto out;
174 }
175 }
176 spin_unlock(&xprt_list_lock);
177 strcpy(module_name, "xprt");
178 strncat(module_name, transport_name, sizeof t->name);
179 result = request_module(module_name);
180out:
181 return result;
182}
183EXPORT_SYMBOL_GPL(xprt_load_transport);
184
185/**
155 * xprt_reserve_xprt - serialize write access to transports 186 * xprt_reserve_xprt - serialize write access to transports
156 * @task: task that is requesting access to the transport 187 * @task: task that is requesting access to the transport
157 * 188 *
@@ -580,7 +611,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
580 dprintk("RPC: disconnected transport %p\n", xprt); 611 dprintk("RPC: disconnected transport %p\n", xprt);
581 spin_lock_bh(&xprt->transport_lock); 612 spin_lock_bh(&xprt->transport_lock);
582 xprt_clear_connected(xprt); 613 xprt_clear_connected(xprt);
583 xprt_wake_pending_tasks(xprt, -ENOTCONN); 614 xprt_wake_pending_tasks(xprt, -EAGAIN);
584 spin_unlock_bh(&xprt->transport_lock); 615 spin_unlock_bh(&xprt->transport_lock);
585} 616}
586EXPORT_SYMBOL_GPL(xprt_disconnect_done); 617EXPORT_SYMBOL_GPL(xprt_disconnect_done);
@@ -598,7 +629,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
598 /* Try to schedule an autoclose RPC call */ 629 /* Try to schedule an autoclose RPC call */
599 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) 630 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
600 queue_work(rpciod_workqueue, &xprt->task_cleanup); 631 queue_work(rpciod_workqueue, &xprt->task_cleanup);
601 xprt_wake_pending_tasks(xprt, -ENOTCONN); 632 xprt_wake_pending_tasks(xprt, -EAGAIN);
602 spin_unlock_bh(&xprt->transport_lock); 633 spin_unlock_bh(&xprt->transport_lock);
603} 634}
604 635
@@ -625,7 +656,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
625 /* Try to schedule an autoclose RPC call */ 656 /* Try to schedule an autoclose RPC call */
626 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) 657 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
627 queue_work(rpciod_workqueue, &xprt->task_cleanup); 658 queue_work(rpciod_workqueue, &xprt->task_cleanup);
628 xprt_wake_pending_tasks(xprt, -ENOTCONN); 659 xprt_wake_pending_tasks(xprt, -EAGAIN);
629out: 660out:
630 spin_unlock_bh(&xprt->transport_lock); 661 spin_unlock_bh(&xprt->transport_lock);
631} 662}
@@ -641,10 +672,8 @@ xprt_init_autodisconnect(unsigned long data)
641 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 672 if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
642 goto out_abort; 673 goto out_abort;
643 spin_unlock(&xprt->transport_lock); 674 spin_unlock(&xprt->transport_lock);
644 if (xprt_connecting(xprt)) 675 set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
645 xprt_release_write(xprt, NULL); 676 queue_work(rpciod_workqueue, &xprt->task_cleanup);
646 else
647 queue_work(rpciod_workqueue, &xprt->task_cleanup);
648 return; 677 return;
649out_abort: 678out_abort:
650 spin_unlock(&xprt->transport_lock); 679 spin_unlock(&xprt->transport_lock);
@@ -695,9 +724,8 @@ static void xprt_connect_status(struct rpc_task *task)
695 } 724 }
696 725
697 switch (task->tk_status) { 726 switch (task->tk_status) {
698 case -ENOTCONN: 727 case -EAGAIN:
699 dprintk("RPC: %5u xprt_connect_status: connection broken\n", 728 dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
700 task->tk_pid);
701 break; 729 break;
702 case -ETIMEDOUT: 730 case -ETIMEDOUT:
703 dprintk("RPC: %5u xprt_connect_status: connect attempt timed " 731 dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
@@ -818,15 +846,8 @@ int xprt_prepare_transmit(struct rpc_task *task)
818 err = req->rq_received; 846 err = req->rq_received;
819 goto out_unlock; 847 goto out_unlock;
820 } 848 }
821 if (!xprt->ops->reserve_xprt(task)) { 849 if (!xprt->ops->reserve_xprt(task))
822 err = -EAGAIN; 850 err = -EAGAIN;
823 goto out_unlock;
824 }
825
826 if (!xprt_connected(xprt)) {
827 err = -ENOTCONN;
828 goto out_unlock;
829 }
830out_unlock: 851out_unlock:
831 spin_unlock_bh(&xprt->transport_lock); 852 spin_unlock_bh(&xprt->transport_lock);
832 return err; 853 return err;
@@ -870,32 +891,26 @@ void xprt_transmit(struct rpc_task *task)
870 req->rq_connect_cookie = xprt->connect_cookie; 891 req->rq_connect_cookie = xprt->connect_cookie;
871 req->rq_xtime = jiffies; 892 req->rq_xtime = jiffies;
872 status = xprt->ops->send_request(task); 893 status = xprt->ops->send_request(task);
873 if (status == 0) { 894 if (status != 0) {
874 dprintk("RPC: %5u xmit complete\n", task->tk_pid); 895 task->tk_status = status;
875 spin_lock_bh(&xprt->transport_lock); 896 return;
897 }
876 898
877 xprt->ops->set_retrans_timeout(task); 899 dprintk("RPC: %5u xmit complete\n", task->tk_pid);
900 spin_lock_bh(&xprt->transport_lock);
878 901
879 xprt->stat.sends++; 902 xprt->ops->set_retrans_timeout(task);
880 xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
881 xprt->stat.bklog_u += xprt->backlog.qlen;
882 903
883 /* Don't race with disconnect */ 904 xprt->stat.sends++;
884 if (!xprt_connected(xprt)) 905 xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
885 task->tk_status = -ENOTCONN; 906 xprt->stat.bklog_u += xprt->backlog.qlen;
886 else if (!req->rq_received)
887 rpc_sleep_on(&xprt->pending, task, xprt_timer);
888 spin_unlock_bh(&xprt->transport_lock);
889 return;
890 }
891 907
892 /* Note: at this point, task->tk_sleeping has not yet been set, 908 /* Don't race with disconnect */
893 * hence there is no danger of the waking up task being put on 909 if (!xprt_connected(xprt))
894 * schedq, and being picked up by a parallel run of rpciod(). 910 task->tk_status = -ENOTCONN;
895 */ 911 else if (!req->rq_received)
896 task->tk_status = status; 912 rpc_sleep_on(&xprt->pending, task, xprt_timer);
897 if (status == -ECONNREFUSED) 913 spin_unlock_bh(&xprt->transport_lock);
898 rpc_sleep_on(&xprt->sending, task, NULL);
899} 914}
900 915
901static inline void do_xprt_reserve(struct rpc_task *task) 916static inline void do_xprt_reserve(struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 14106d26bb95..e5e28d1946a4 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -310,6 +310,19 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
310 __func__, pad, destp, rqst->rq_slen, curlen); 310 __func__, pad, destp, rqst->rq_slen, curlen);
311 311
312 copy_len = rqst->rq_snd_buf.page_len; 312 copy_len = rqst->rq_snd_buf.page_len;
313
314 if (rqst->rq_snd_buf.tail[0].iov_len) {
315 curlen = rqst->rq_snd_buf.tail[0].iov_len;
316 if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) {
317 memmove(destp + copy_len,
318 rqst->rq_snd_buf.tail[0].iov_base, curlen);
319 r_xprt->rx_stats.pullup_copy_count += curlen;
320 }
321 dprintk("RPC: %s: tail destp 0x%p len %d\n",
322 __func__, destp + copy_len, curlen);
323 rqst->rq_svec[0].iov_len += curlen;
324 }
325
313 r_xprt->rx_stats.pullup_copy_count += copy_len; 326 r_xprt->rx_stats.pullup_copy_count += copy_len;
314 npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; 327 npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
315 for (i = 0; copy_len && i < npages; i++) { 328 for (i = 0; copy_len && i < npages; i++) {
@@ -332,17 +345,6 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
332 destp += curlen; 345 destp += curlen;
333 copy_len -= curlen; 346 copy_len -= curlen;
334 } 347 }
335 if (rqst->rq_snd_buf.tail[0].iov_len) {
336 curlen = rqst->rq_snd_buf.tail[0].iov_len;
337 if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
338 memcpy(destp,
339 rqst->rq_snd_buf.tail[0].iov_base, curlen);
340 r_xprt->rx_stats.pullup_copy_count += curlen;
341 }
342 dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n",
343 __func__, destp, copy_len, curlen);
344 rqst->rq_svec[0].iov_len += curlen;
345 }
346 /* header now contains entire send message */ 348 /* header now contains entire send message */
347 return pad; 349 return pad;
348} 350}
@@ -656,7 +658,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
656 if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) 658 if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
657 curlen = rqst->rq_rcv_buf.tail[0].iov_len; 659 curlen = rqst->rq_rcv_buf.tail[0].iov_len;
658 if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) 660 if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
659 memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); 661 memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
660 dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", 662 dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n",
661 __func__, srcp, copy_len, curlen); 663 __func__, srcp, copy_len, curlen);
662 rqst->rq_rcv_buf.tail[0].iov_len = curlen; 664 rqst->rq_rcv_buf.tail[0].iov_len = curlen;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 629a28764da9..42a6f9f20285 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -265,7 +265,7 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
265 frmr->page_list->page_list[page_no] = 265 frmr->page_list->page_list[page_no] =
266 ib_dma_map_single(xprt->sc_cm_id->device, 266 ib_dma_map_single(xprt->sc_cm_id->device,
267 page_address(rqstp->rq_arg.pages[page_no]), 267 page_address(rqstp->rq_arg.pages[page_no]),
268 PAGE_SIZE, DMA_TO_DEVICE); 268 PAGE_SIZE, DMA_FROM_DEVICE);
269 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 269 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
270 frmr->page_list->page_list[page_no])) 270 frmr->page_list->page_list[page_no]))
271 goto fatal_err; 271 goto fatal_err;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index a3334e3b73cc..f11be72a1a80 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -128,7 +128,8 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
128 page_bytes -= sge_bytes; 128 page_bytes -= sge_bytes;
129 129
130 frmr->page_list->page_list[page_no] = 130 frmr->page_list->page_list[page_no] =
131 ib_dma_map_page(xprt->sc_cm_id->device, page, 0, 131 ib_dma_map_single(xprt->sc_cm_id->device,
132 page_address(page),
132 PAGE_SIZE, DMA_TO_DEVICE); 133 PAGE_SIZE, DMA_TO_DEVICE);
133 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 134 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
134 frmr->page_list->page_list[page_no])) 135 frmr->page_list->page_list[page_no]))
@@ -183,6 +184,7 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
183 184
184 fatal_err: 185 fatal_err:
185 printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); 186 printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
187 vec->frmr = NULL;
186 svc_rdma_put_frmr(xprt, frmr); 188 svc_rdma_put_frmr(xprt, frmr);
187 return -EIO; 189 return -EIO;
188} 190}
@@ -191,7 +193,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
191 struct xdr_buf *xdr, 193 struct xdr_buf *xdr,
192 struct svc_rdma_req_map *vec) 194 struct svc_rdma_req_map *vec)
193{ 195{
194 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
195 int sge_no; 196 int sge_no;
196 u32 sge_bytes; 197 u32 sge_bytes;
197 u32 page_bytes; 198 u32 page_bytes;
@@ -235,7 +236,11 @@ static int map_xdr(struct svcxprt_rdma *xprt,
235 sge_no++; 236 sge_no++;
236 } 237 }
237 238
238 BUG_ON(sge_no > sge_max); 239 dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
240 "page_base %u page_len %u head_len %zu tail_len %zu\n",
241 sge_no, page_no, xdr->page_base, xdr->page_len,
242 xdr->head[0].iov_len, xdr->tail[0].iov_len);
243
239 vec->count = sge_no; 244 vec->count = sge_no;
240 return 0; 245 return 0;
241} 246}
@@ -513,6 +518,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
513 "svcrdma: could not post a receive buffer, err=%d." 518 "svcrdma: could not post a receive buffer, err=%d."
514 "Closing transport %p.\n", ret, rdma); 519 "Closing transport %p.\n", ret, rdma);
515 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 520 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
521 svc_rdma_put_frmr(rdma, vec->frmr);
516 svc_rdma_put_context(ctxt, 0); 522 svc_rdma_put_context(ctxt, 0);
517 return -ENOTCONN; 523 return -ENOTCONN;
518 } 524 }
@@ -527,18 +533,17 @@ static int send_reply(struct svcxprt_rdma *rdma,
527 clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); 533 clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
528 534
529 /* Prepare the SGE for the RPCRDMA Header */ 535 /* Prepare the SGE for the RPCRDMA Header */
536 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
537 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
530 ctxt->sge[0].addr = 538 ctxt->sge[0].addr =
531 ib_dma_map_page(rdma->sc_cm_id->device, 539 ib_dma_map_single(rdma->sc_cm_id->device, page_address(page),
532 page, 0, PAGE_SIZE, DMA_TO_DEVICE); 540 ctxt->sge[0].length, DMA_TO_DEVICE);
533 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) 541 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
534 goto err; 542 goto err;
535 atomic_inc(&rdma->sc_dma_used); 543 atomic_inc(&rdma->sc_dma_used);
536 544
537 ctxt->direction = DMA_TO_DEVICE; 545 ctxt->direction = DMA_TO_DEVICE;
538 546
539 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
540 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
541
542 /* Determine how many of our SGE are to be transmitted */ 547 /* Determine how many of our SGE are to be transmitted */
543 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 548 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
544 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 549 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
@@ -579,7 +584,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
579 ctxt->sge[page_no+1].length = 0; 584 ctxt->sge[page_no+1].length = 0;
580 } 585 }
581 BUG_ON(sge_no > rdma->sc_max_sge); 586 BUG_ON(sge_no > rdma->sc_max_sge);
582 BUG_ON(sge_no > ctxt->count);
583 memset(&send_wr, 0, sizeof send_wr); 587 memset(&send_wr, 0, sizeof send_wr);
584 ctxt->wr_op = IB_WR_SEND; 588 ctxt->wr_op = IB_WR_SEND;
585 send_wr.wr_id = (unsigned long)ctxt; 589 send_wr.wr_id = (unsigned long)ctxt;
@@ -604,6 +608,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
604 return 0; 608 return 0;
605 609
606 err: 610 err:
611 svc_rdma_unmap_dma(ctxt);
607 svc_rdma_put_frmr(rdma, vec->frmr); 612 svc_rdma_put_frmr(rdma, vec->frmr);
608 svc_rdma_put_context(ctxt, 1); 613 svc_rdma_put_context(ctxt, 1);
609 return -EIO; 614 return -EIO;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 3d810e7df3fb..5151f9f6c573 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -500,8 +500,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
500 BUG_ON(sge_no >= xprt->sc_max_sge); 500 BUG_ON(sge_no >= xprt->sc_max_sge);
501 page = svc_rdma_get_page(); 501 page = svc_rdma_get_page();
502 ctxt->pages[sge_no] = page; 502 ctxt->pages[sge_no] = page;
503 pa = ib_dma_map_page(xprt->sc_cm_id->device, 503 pa = ib_dma_map_single(xprt->sc_cm_id->device,
504 page, 0, PAGE_SIZE, 504 page_address(page), PAGE_SIZE,
505 DMA_FROM_DEVICE); 505 DMA_FROM_DEVICE);
506 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) 506 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
507 goto err_put_ctxt; 507 goto err_put_ctxt;
@@ -520,8 +520,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
520 svc_xprt_get(&xprt->sc_xprt); 520 svc_xprt_get(&xprt->sc_xprt);
521 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); 521 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
522 if (ret) { 522 if (ret) {
523 svc_xprt_put(&xprt->sc_xprt); 523 svc_rdma_unmap_dma(ctxt);
524 svc_rdma_put_context(ctxt, 1); 524 svc_rdma_put_context(ctxt, 1);
525 svc_xprt_put(&xprt->sc_xprt);
525 } 526 }
526 return ret; 527 return ret;
527 528
@@ -1314,8 +1315,8 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1314 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1315 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1315 1316
1316 /* Prepare SGE for local address */ 1317 /* Prepare SGE for local address */
1317 sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, 1318 sge.addr = ib_dma_map_single(xprt->sc_cm_id->device,
1318 p, 0, PAGE_SIZE, DMA_FROM_DEVICE); 1319 page_address(p), PAGE_SIZE, DMA_FROM_DEVICE);
1319 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { 1320 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
1320 put_page(p); 1321 put_page(p);
1321 return; 1322 return;
@@ -1342,7 +1343,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1342 if (ret) { 1343 if (ret) {
1343 dprintk("svcrdma: Error %d posting send for protocol error\n", 1344 dprintk("svcrdma: Error %d posting send for protocol error\n",
1344 ret); 1345 ret);
1345 ib_dma_unmap_page(xprt->sc_cm_id->device, 1346 ib_dma_unmap_single(xprt->sc_cm_id->device,
1346 sge.addr, PAGE_SIZE, 1347 sge.addr, PAGE_SIZE,
1347 DMA_FROM_DEVICE); 1348 DMA_FROM_DEVICE);
1348 svc_rdma_put_context(ctxt, 1); 1349 svc_rdma_put_context(ctxt, 1);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3b21e0cc5e69..465aafc2007f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1495,7 +1495,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1495 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 1495 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1496 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; 1496 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
1497 frmr_wr.wr.fast_reg.access_flags = (writing ? 1497 frmr_wr.wr.fast_reg.access_flags = (writing ?
1498 IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); 1498 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1499 IB_ACCESS_REMOTE_READ);
1499 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1500 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1500 DECR_CQCOUNT(&r_xprt->rx_ep); 1501 DECR_CQCOUNT(&r_xprt->rx_ep);
1501 1502
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 568330eebbfe..e18596146013 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -49,6 +49,9 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
49unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; 49unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
50unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; 50unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
51 51
52#define XS_TCP_LINGER_TO (15U * HZ)
53static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
54
52/* 55/*
53 * We can register our own files under /proc/sys/sunrpc by 56 * We can register our own files under /proc/sys/sunrpc by
54 * calling register_sysctl_table() again. The files in that 57 * calling register_sysctl_table() again. The files in that
@@ -117,6 +120,14 @@ static ctl_table xs_tunables_table[] = {
117 .extra2 = &xprt_max_resvport_limit 120 .extra2 = &xprt_max_resvport_limit
118 }, 121 },
119 { 122 {
123 .procname = "tcp_fin_timeout",
124 .data = &xs_tcp_fin_timeout,
125 .maxlen = sizeof(xs_tcp_fin_timeout),
126 .mode = 0644,
127 .proc_handler = &proc_dointvec_jiffies,
128 .strategy = sysctl_jiffies
129 },
130 {
120 .ctl_name = 0, 131 .ctl_name = 0,
121 }, 132 },
122}; 133};
@@ -521,11 +532,12 @@ static void xs_nospace_callback(struct rpc_task *task)
521 * @task: task to put to sleep 532 * @task: task to put to sleep
522 * 533 *
523 */ 534 */
524static void xs_nospace(struct rpc_task *task) 535static int xs_nospace(struct rpc_task *task)
525{ 536{
526 struct rpc_rqst *req = task->tk_rqstp; 537 struct rpc_rqst *req = task->tk_rqstp;
527 struct rpc_xprt *xprt = req->rq_xprt; 538 struct rpc_xprt *xprt = req->rq_xprt;
528 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 539 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
540 int ret = 0;
529 541
530 dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", 542 dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
531 task->tk_pid, req->rq_slen - req->rq_bytes_sent, 543 task->tk_pid, req->rq_slen - req->rq_bytes_sent,
@@ -537,6 +549,7 @@ static void xs_nospace(struct rpc_task *task)
537 /* Don't race with disconnect */ 549 /* Don't race with disconnect */
538 if (xprt_connected(xprt)) { 550 if (xprt_connected(xprt)) {
539 if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { 551 if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
552 ret = -EAGAIN;
540 /* 553 /*
541 * Notify TCP that we're limited by the application 554 * Notify TCP that we're limited by the application
542 * window size 555 * window size
@@ -548,10 +561,11 @@ static void xs_nospace(struct rpc_task *task)
548 } 561 }
549 } else { 562 } else {
550 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 563 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
551 task->tk_status = -ENOTCONN; 564 ret = -ENOTCONN;
552 } 565 }
553 566
554 spin_unlock_bh(&xprt->transport_lock); 567 spin_unlock_bh(&xprt->transport_lock);
568 return ret;
555} 569}
556 570
557/** 571/**
@@ -594,6 +608,8 @@ static int xs_udp_send_request(struct rpc_task *task)
594 /* Still some bytes left; set up for a retry later. */ 608 /* Still some bytes left; set up for a retry later. */
595 status = -EAGAIN; 609 status = -EAGAIN;
596 } 610 }
611 if (!transport->sock)
612 goto out;
597 613
598 switch (status) { 614 switch (status) {
599 case -ENOTSOCK: 615 case -ENOTSOCK:
@@ -601,21 +617,19 @@ static int xs_udp_send_request(struct rpc_task *task)
601 /* Should we call xs_close() here? */ 617 /* Should we call xs_close() here? */
602 break; 618 break;
603 case -EAGAIN: 619 case -EAGAIN:
604 xs_nospace(task); 620 status = xs_nospace(task);
605 break; 621 break;
622 default:
623 dprintk("RPC: sendmsg returned unrecognized error %d\n",
624 -status);
606 case -ENETUNREACH: 625 case -ENETUNREACH:
607 case -EPIPE: 626 case -EPIPE:
608 case -ECONNREFUSED: 627 case -ECONNREFUSED:
609 /* When the server has died, an ICMP port unreachable message 628 /* When the server has died, an ICMP port unreachable message
610 * prompts ECONNREFUSED. */ 629 * prompts ECONNREFUSED. */
611 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 630 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
612 break;
613 default:
614 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
615 dprintk("RPC: sendmsg returned unrecognized error %d\n",
616 -status);
617 } 631 }
618 632out:
619 return status; 633 return status;
620} 634}
621 635
@@ -697,6 +711,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
697 status = -EAGAIN; 711 status = -EAGAIN;
698 break; 712 break;
699 } 713 }
714 if (!transport->sock)
715 goto out;
700 716
701 switch (status) { 717 switch (status) {
702 case -ENOTSOCK: 718 case -ENOTSOCK:
@@ -704,23 +720,19 @@ static int xs_tcp_send_request(struct rpc_task *task)
704 /* Should we call xs_close() here? */ 720 /* Should we call xs_close() here? */
705 break; 721 break;
706 case -EAGAIN: 722 case -EAGAIN:
707 xs_nospace(task); 723 status = xs_nospace(task);
708 break; 724 break;
725 default:
726 dprintk("RPC: sendmsg returned unrecognized error %d\n",
727 -status);
709 case -ECONNRESET: 728 case -ECONNRESET:
729 case -EPIPE:
710 xs_tcp_shutdown(xprt); 730 xs_tcp_shutdown(xprt);
711 case -ECONNREFUSED: 731 case -ECONNREFUSED:
712 case -ENOTCONN: 732 case -ENOTCONN:
713 case -EPIPE:
714 status = -ENOTCONN;
715 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
716 break;
717 default:
718 dprintk("RPC: sendmsg returned unrecognized error %d\n",
719 -status);
720 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 733 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
721 xs_tcp_shutdown(xprt);
722 } 734 }
723 735out:
724 return status; 736 return status;
725} 737}
726 738
@@ -767,23 +779,13 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
767 sk->sk_error_report = transport->old_error_report; 779 sk->sk_error_report = transport->old_error_report;
768} 780}
769 781
770/** 782static void xs_reset_transport(struct sock_xprt *transport)
771 * xs_close - close a socket
772 * @xprt: transport
773 *
774 * This is used when all requests are complete; ie, no DRC state remains
775 * on the server we want to save.
776 */
777static void xs_close(struct rpc_xprt *xprt)
778{ 783{
779 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
780 struct socket *sock = transport->sock; 784 struct socket *sock = transport->sock;
781 struct sock *sk = transport->inet; 785 struct sock *sk = transport->inet;
782 786
783 if (!sk) 787 if (sk == NULL)
784 goto clear_close_wait; 788 return;
785
786 dprintk("RPC: xs_close xprt %p\n", xprt);
787 789
788 write_lock_bh(&sk->sk_callback_lock); 790 write_lock_bh(&sk->sk_callback_lock);
789 transport->inet = NULL; 791 transport->inet = NULL;
@@ -797,14 +799,42 @@ static void xs_close(struct rpc_xprt *xprt)
797 sk->sk_no_check = 0; 799 sk->sk_no_check = 0;
798 800
799 sock_release(sock); 801 sock_release(sock);
800clear_close_wait: 802}
803
804/**
805 * xs_close - close a socket
806 * @xprt: transport
807 *
808 * This is used when all requests are complete; ie, no DRC state remains
809 * on the server we want to save.
810 *
811 * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with
812 * xs_reset_transport() zeroing the socket from underneath a writer.
813 */
814static void xs_close(struct rpc_xprt *xprt)
815{
816 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
817
818 dprintk("RPC: xs_close xprt %p\n", xprt);
819
820 xs_reset_transport(transport);
821
801 smp_mb__before_clear_bit(); 822 smp_mb__before_clear_bit();
823 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
802 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 824 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
803 clear_bit(XPRT_CLOSING, &xprt->state); 825 clear_bit(XPRT_CLOSING, &xprt->state);
804 smp_mb__after_clear_bit(); 826 smp_mb__after_clear_bit();
805 xprt_disconnect_done(xprt); 827 xprt_disconnect_done(xprt);
806} 828}
807 829
830static void xs_tcp_close(struct rpc_xprt *xprt)
831{
832 if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state))
833 xs_close(xprt);
834 else
835 xs_tcp_shutdown(xprt);
836}
837
808/** 838/**
809 * xs_destroy - prepare to shutdown a transport 839 * xs_destroy - prepare to shutdown a transport
810 * @xprt: doomed transport 840 * @xprt: doomed transport
@@ -1126,6 +1156,47 @@ out:
1126 read_unlock(&sk->sk_callback_lock); 1156 read_unlock(&sk->sk_callback_lock);
1127} 1157}
1128 1158
1159/*
1160 * Do the equivalent of linger/linger2 handling for dealing with
1161 * broken servers that don't close the socket in a timely
1162 * fashion
1163 */
1164static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
1165 unsigned long timeout)
1166{
1167 struct sock_xprt *transport;
1168
1169 if (xprt_test_and_set_connecting(xprt))
1170 return;
1171 set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1172 transport = container_of(xprt, struct sock_xprt, xprt);
1173 queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
1174 timeout);
1175}
1176
1177static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
1178{
1179 struct sock_xprt *transport;
1180
1181 transport = container_of(xprt, struct sock_xprt, xprt);
1182
1183 if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
1184 !cancel_delayed_work(&transport->connect_worker))
1185 return;
1186 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1187 xprt_clear_connecting(xprt);
1188}
1189
1190static void xs_sock_mark_closed(struct rpc_xprt *xprt)
1191{
1192 smp_mb__before_clear_bit();
1193 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1194 clear_bit(XPRT_CLOSING, &xprt->state);
1195 smp_mb__after_clear_bit();
1196 /* Mark transport as closed and wake up all pending tasks */
1197 xprt_disconnect_done(xprt);
1198}
1199
1129/** 1200/**
1130 * xs_tcp_state_change - callback to handle TCP socket state changes 1201 * xs_tcp_state_change - callback to handle TCP socket state changes
1131 * @sk: socket whose state has changed 1202 * @sk: socket whose state has changed
@@ -1158,7 +1229,7 @@ static void xs_tcp_state_change(struct sock *sk)
1158 transport->tcp_flags = 1229 transport->tcp_flags =
1159 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; 1230 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
1160 1231
1161 xprt_wake_pending_tasks(xprt, 0); 1232 xprt_wake_pending_tasks(xprt, -EAGAIN);
1162 } 1233 }
1163 spin_unlock_bh(&xprt->transport_lock); 1234 spin_unlock_bh(&xprt->transport_lock);
1164 break; 1235 break;
@@ -1171,10 +1242,10 @@ static void xs_tcp_state_change(struct sock *sk)
1171 clear_bit(XPRT_CONNECTED, &xprt->state); 1242 clear_bit(XPRT_CONNECTED, &xprt->state);
1172 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1243 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1173 smp_mb__after_clear_bit(); 1244 smp_mb__after_clear_bit();
1245 xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
1174 break; 1246 break;
1175 case TCP_CLOSE_WAIT: 1247 case TCP_CLOSE_WAIT:
1176 /* The server initiated a shutdown of the socket */ 1248 /* The server initiated a shutdown of the socket */
1177 set_bit(XPRT_CLOSING, &xprt->state);
1178 xprt_force_disconnect(xprt); 1249 xprt_force_disconnect(xprt);
1179 case TCP_SYN_SENT: 1250 case TCP_SYN_SENT:
1180 xprt->connect_cookie++; 1251 xprt->connect_cookie++;
@@ -1187,40 +1258,35 @@ static void xs_tcp_state_change(struct sock *sk)
1187 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 1258 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
1188 break; 1259 break;
1189 case TCP_LAST_ACK: 1260 case TCP_LAST_ACK:
1261 set_bit(XPRT_CLOSING, &xprt->state);
1262 xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
1190 smp_mb__before_clear_bit(); 1263 smp_mb__before_clear_bit();
1191 clear_bit(XPRT_CONNECTED, &xprt->state); 1264 clear_bit(XPRT_CONNECTED, &xprt->state);
1192 smp_mb__after_clear_bit(); 1265 smp_mb__after_clear_bit();
1193 break; 1266 break;
1194 case TCP_CLOSE: 1267 case TCP_CLOSE:
1195 smp_mb__before_clear_bit(); 1268 xs_tcp_cancel_linger_timeout(xprt);
1196 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1269 xs_sock_mark_closed(xprt);
1197 clear_bit(XPRT_CLOSING, &xprt->state);
1198 smp_mb__after_clear_bit();
1199 /* Mark transport as closed and wake up all pending tasks */
1200 xprt_disconnect_done(xprt);
1201 } 1270 }
1202 out: 1271 out:
1203 read_unlock(&sk->sk_callback_lock); 1272 read_unlock(&sk->sk_callback_lock);
1204} 1273}
1205 1274
1206/** 1275/**
1207 * xs_tcp_error_report - callback mainly for catching RST events 1276 * xs_error_report - callback mainly for catching socket errors
1208 * @sk: socket 1277 * @sk: socket
1209 */ 1278 */
1210static void xs_tcp_error_report(struct sock *sk) 1279static void xs_error_report(struct sock *sk)
1211{ 1280{
1212 struct rpc_xprt *xprt; 1281 struct rpc_xprt *xprt;
1213 1282
1214 read_lock(&sk->sk_callback_lock); 1283 read_lock(&sk->sk_callback_lock);
1215 if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED)
1216 goto out;
1217 if (!(xprt = xprt_from_sock(sk))) 1284 if (!(xprt = xprt_from_sock(sk)))
1218 goto out; 1285 goto out;
1219 dprintk("RPC: %s client %p...\n" 1286 dprintk("RPC: %s client %p...\n"
1220 "RPC: error %d\n", 1287 "RPC: error %d\n",
1221 __func__, xprt, sk->sk_err); 1288 __func__, xprt, sk->sk_err);
1222 1289 xprt_wake_pending_tasks(xprt, -EAGAIN);
1223 xprt_force_disconnect(xprt);
1224out: 1290out:
1225 read_unlock(&sk->sk_callback_lock); 1291 read_unlock(&sk->sk_callback_lock);
1226} 1292}
@@ -1494,6 +1560,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1494 sk->sk_user_data = xprt; 1560 sk->sk_user_data = xprt;
1495 sk->sk_data_ready = xs_udp_data_ready; 1561 sk->sk_data_ready = xs_udp_data_ready;
1496 sk->sk_write_space = xs_udp_write_space; 1562 sk->sk_write_space = xs_udp_write_space;
1563 sk->sk_error_report = xs_error_report;
1497 sk->sk_no_check = UDP_CSUM_NORCV; 1564 sk->sk_no_check = UDP_CSUM_NORCV;
1498 sk->sk_allocation = GFP_ATOMIC; 1565 sk->sk_allocation = GFP_ATOMIC;
1499 1566
@@ -1526,9 +1593,10 @@ static void xs_udp_connect_worker4(struct work_struct *work)
1526 goto out; 1593 goto out;
1527 1594
1528 /* Start by resetting any existing state */ 1595 /* Start by resetting any existing state */
1529 xs_close(xprt); 1596 xs_reset_transport(transport);
1530 1597
1531 if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { 1598 err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1599 if (err < 0) {
1532 dprintk("RPC: can't create UDP transport socket (%d).\n", -err); 1600 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1533 goto out; 1601 goto out;
1534 } 1602 }
@@ -1545,8 +1613,8 @@ static void xs_udp_connect_worker4(struct work_struct *work)
1545 xs_udp_finish_connecting(xprt, sock); 1613 xs_udp_finish_connecting(xprt, sock);
1546 status = 0; 1614 status = 0;
1547out: 1615out:
1548 xprt_wake_pending_tasks(xprt, status);
1549 xprt_clear_connecting(xprt); 1616 xprt_clear_connecting(xprt);
1617 xprt_wake_pending_tasks(xprt, status);
1550} 1618}
1551 1619
1552/** 1620/**
@@ -1567,9 +1635,10 @@ static void xs_udp_connect_worker6(struct work_struct *work)
1567 goto out; 1635 goto out;
1568 1636
1569 /* Start by resetting any existing state */ 1637 /* Start by resetting any existing state */
1570 xs_close(xprt); 1638 xs_reset_transport(transport);
1571 1639
1572 if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { 1640 err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
1641 if (err < 0) {
1573 dprintk("RPC: can't create UDP transport socket (%d).\n", -err); 1642 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1574 goto out; 1643 goto out;
1575 } 1644 }
@@ -1586,18 +1655,17 @@ static void xs_udp_connect_worker6(struct work_struct *work)
1586 xs_udp_finish_connecting(xprt, sock); 1655 xs_udp_finish_connecting(xprt, sock);
1587 status = 0; 1656 status = 0;
1588out: 1657out:
1589 xprt_wake_pending_tasks(xprt, status);
1590 xprt_clear_connecting(xprt); 1658 xprt_clear_connecting(xprt);
1659 xprt_wake_pending_tasks(xprt, status);
1591} 1660}
1592 1661
1593/* 1662/*
1594 * We need to preserve the port number so the reply cache on the server can 1663 * We need to preserve the port number so the reply cache on the server can
1595 * find our cached RPC replies when we get around to reconnecting. 1664 * find our cached RPC replies when we get around to reconnecting.
1596 */ 1665 */
1597static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) 1666static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
1598{ 1667{
1599 int result; 1668 int result;
1600 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1601 struct sockaddr any; 1669 struct sockaddr any;
1602 1670
1603 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); 1671 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt);
@@ -1609,11 +1677,24 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
1609 memset(&any, 0, sizeof(any)); 1677 memset(&any, 0, sizeof(any));
1610 any.sa_family = AF_UNSPEC; 1678 any.sa_family = AF_UNSPEC;
1611 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 1679 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
1612 if (result) 1680 if (!result)
1681 xs_sock_mark_closed(xprt);
1682 else
1613 dprintk("RPC: AF_UNSPEC connect return code %d\n", 1683 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1614 result); 1684 result);
1615} 1685}
1616 1686
1687static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
1688{
1689 unsigned int state = transport->inet->sk_state;
1690
1691 if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
1692 return;
1693 if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
1694 return;
1695 xs_abort_connection(xprt, transport);
1696}
1697
1617static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1698static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1618{ 1699{
1619 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1700 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1629,7 +1710,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1629 sk->sk_data_ready = xs_tcp_data_ready; 1710 sk->sk_data_ready = xs_tcp_data_ready;
1630 sk->sk_state_change = xs_tcp_state_change; 1711 sk->sk_state_change = xs_tcp_state_change;
1631 sk->sk_write_space = xs_tcp_write_space; 1712 sk->sk_write_space = xs_tcp_write_space;
1632 sk->sk_error_report = xs_tcp_error_report; 1713 sk->sk_error_report = xs_error_report;
1633 sk->sk_allocation = GFP_ATOMIC; 1714 sk->sk_allocation = GFP_ATOMIC;
1634 1715
1635 /* socket options */ 1716 /* socket options */
@@ -1657,37 +1738,42 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1657} 1738}
1658 1739
1659/** 1740/**
1660 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint 1741 * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
1661 * @work: RPC transport to connect 1742 * @xprt: RPC transport to connect
1743 * @transport: socket transport to connect
1744 * @create_sock: function to create a socket of the correct type
1662 * 1745 *
1663 * Invoked by a work queue tasklet. 1746 * Invoked by a work queue tasklet.
1664 */ 1747 */
1665static void xs_tcp_connect_worker4(struct work_struct *work) 1748static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1749 struct sock_xprt *transport,
1750 struct socket *(*create_sock)(struct rpc_xprt *,
1751 struct sock_xprt *))
1666{ 1752{
1667 struct sock_xprt *transport =
1668 container_of(work, struct sock_xprt, connect_worker.work);
1669 struct rpc_xprt *xprt = &transport->xprt;
1670 struct socket *sock = transport->sock; 1753 struct socket *sock = transport->sock;
1671 int err, status = -EIO; 1754 int status = -EIO;
1672 1755
1673 if (xprt->shutdown) 1756 if (xprt->shutdown)
1674 goto out; 1757 goto out;
1675 1758
1676 if (!sock) { 1759 if (!sock) {
1677 /* start from scratch */ 1760 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1678 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { 1761 sock = create_sock(xprt, transport);
1679 dprintk("RPC: can't create TCP transport socket (%d).\n", -err); 1762 if (IS_ERR(sock)) {
1763 status = PTR_ERR(sock);
1680 goto out; 1764 goto out;
1681 } 1765 }
1682 xs_reclassify_socket4(sock); 1766 } else {
1767 int abort_and_exit;
1683 1768
1684 if (xs_bind4(transport, sock) < 0) { 1769 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
1685 sock_release(sock); 1770 &xprt->state);
1686 goto out;
1687 }
1688 } else
1689 /* "close" the socket, preserving the local port */ 1771 /* "close" the socket, preserving the local port */
1690 xs_tcp_reuse_connection(xprt); 1772 xs_tcp_reuse_connection(xprt, transport);
1773
1774 if (abort_and_exit)
1775 goto out_eagain;
1776 }
1691 1777
1692 dprintk("RPC: worker connecting xprt %p to address: %s\n", 1778 dprintk("RPC: worker connecting xprt %p to address: %s\n",
1693 xprt, xprt->address_strings[RPC_DISPLAY_ALL]); 1779 xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
@@ -1696,83 +1782,109 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
1696 dprintk("RPC: %p connect status %d connected %d sock state %d\n", 1782 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
1697 xprt, -status, xprt_connected(xprt), 1783 xprt, -status, xprt_connected(xprt),
1698 sock->sk->sk_state); 1784 sock->sk->sk_state);
1699 if (status < 0) { 1785 switch (status) {
1700 switch (status) { 1786 default:
1701 case -EINPROGRESS: 1787 printk("%s: connect returned unhandled error %d\n",
1702 case -EALREADY: 1788 __func__, status);
1703 goto out_clear; 1789 case -EADDRNOTAVAIL:
1704 case -ECONNREFUSED: 1790 /* We're probably in TIME_WAIT. Get rid of existing socket,
1705 case -ECONNRESET: 1791 * and retry
1706 /* retry with existing socket, after a delay */ 1792 */
1707 break; 1793 set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
1708 default: 1794 xprt_force_disconnect(xprt);
1709 /* get rid of existing socket, and retry */ 1795 case -ECONNREFUSED:
1710 xs_tcp_shutdown(xprt); 1796 case -ECONNRESET:
1711 } 1797 case -ENETUNREACH:
1798 /* retry with existing socket, after a delay */
1799 case 0:
1800 case -EINPROGRESS:
1801 case -EALREADY:
1802 xprt_clear_connecting(xprt);
1803 return;
1712 } 1804 }
1805out_eagain:
1806 status = -EAGAIN;
1713out: 1807out:
1714 xprt_wake_pending_tasks(xprt, status);
1715out_clear:
1716 xprt_clear_connecting(xprt); 1808 xprt_clear_connecting(xprt);
1809 xprt_wake_pending_tasks(xprt, status);
1810}
1811
1812static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
1813 struct sock_xprt *transport)
1814{
1815 struct socket *sock;
1816 int err;
1817
1818 /* start from scratch */
1819 err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
1820 if (err < 0) {
1821 dprintk("RPC: can't create TCP transport socket (%d).\n",
1822 -err);
1823 goto out_err;
1824 }
1825 xs_reclassify_socket4(sock);
1826
1827 if (xs_bind4(transport, sock) < 0) {
1828 sock_release(sock);
1829 goto out_err;
1830 }
1831 return sock;
1832out_err:
1833 return ERR_PTR(-EIO);
1717} 1834}
1718 1835
1719/** 1836/**
1720 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint 1837 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
1721 * @work: RPC transport to connect 1838 * @work: RPC transport to connect
1722 * 1839 *
1723 * Invoked by a work queue tasklet. 1840 * Invoked by a work queue tasklet.
1724 */ 1841 */
1725static void xs_tcp_connect_worker6(struct work_struct *work) 1842static void xs_tcp_connect_worker4(struct work_struct *work)
1726{ 1843{
1727 struct sock_xprt *transport = 1844 struct sock_xprt *transport =
1728 container_of(work, struct sock_xprt, connect_worker.work); 1845 container_of(work, struct sock_xprt, connect_worker.work);
1729 struct rpc_xprt *xprt = &transport->xprt; 1846 struct rpc_xprt *xprt = &transport->xprt;
1730 struct socket *sock = transport->sock;
1731 int err, status = -EIO;
1732 1847
1733 if (xprt->shutdown) 1848 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
1734 goto out; 1849}
1735 1850
1736 if (!sock) { 1851static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
1737 /* start from scratch */ 1852 struct sock_xprt *transport)
1738 if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { 1853{
1739 dprintk("RPC: can't create TCP transport socket (%d).\n", -err); 1854 struct socket *sock;
1740 goto out; 1855 int err;
1741 } 1856
1742 xs_reclassify_socket6(sock); 1857 /* start from scratch */
1858 err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
1859 if (err < 0) {
1860 dprintk("RPC: can't create TCP transport socket (%d).\n",
1861 -err);
1862 goto out_err;
1863 }
1864 xs_reclassify_socket6(sock);
1743 1865
1744 if (xs_bind6(transport, sock) < 0) { 1866 if (xs_bind6(transport, sock) < 0) {
1745 sock_release(sock); 1867 sock_release(sock);
1746 goto out; 1868 goto out_err;
1747 } 1869 }
1748 } else 1870 return sock;
1749 /* "close" the socket, preserving the local port */ 1871out_err:
1750 xs_tcp_reuse_connection(xprt); 1872 return ERR_PTR(-EIO);
1873}
1751 1874
1752 dprintk("RPC: worker connecting xprt %p to address: %s\n", 1875/**
1753 xprt, xprt->address_strings[RPC_DISPLAY_ALL]); 1876 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
1877 * @work: RPC transport to connect
1878 *
1879 * Invoked by a work queue tasklet.
1880 */
1881static void xs_tcp_connect_worker6(struct work_struct *work)
1882{
1883 struct sock_xprt *transport =
1884 container_of(work, struct sock_xprt, connect_worker.work);
1885 struct rpc_xprt *xprt = &transport->xprt;
1754 1886
1755 status = xs_tcp_finish_connecting(xprt, sock); 1887 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
1756 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
1757 xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
1758 if (status < 0) {
1759 switch (status) {
1760 case -EINPROGRESS:
1761 case -EALREADY:
1762 goto out_clear;
1763 case -ECONNREFUSED:
1764 case -ECONNRESET:
1765 /* retry with existing socket, after a delay */
1766 break;
1767 default:
1768 /* get rid of existing socket, and retry */
1769 xs_tcp_shutdown(xprt);
1770 }
1771 }
1772out:
1773 xprt_wake_pending_tasks(xprt, status);
1774out_clear:
1775 xprt_clear_connecting(xprt);
1776} 1888}
1777 1889
1778/** 1890/**
@@ -1817,9 +1929,6 @@ static void xs_tcp_connect(struct rpc_task *task)
1817{ 1929{
1818 struct rpc_xprt *xprt = task->tk_xprt; 1930 struct rpc_xprt *xprt = task->tk_xprt;
1819 1931
1820 /* Initiate graceful shutdown of the socket if not already done */
1821 if (test_bit(XPRT_CONNECTED, &xprt->state))
1822 xs_tcp_shutdown(xprt);
1823 /* Exit if we need to wait for socket shutdown to complete */ 1932 /* Exit if we need to wait for socket shutdown to complete */
1824 if (test_bit(XPRT_CLOSING, &xprt->state)) 1933 if (test_bit(XPRT_CLOSING, &xprt->state))
1825 return; 1934 return;
@@ -1901,7 +2010,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
1901 .buf_free = rpc_free, 2010 .buf_free = rpc_free,
1902 .send_request = xs_tcp_send_request, 2011 .send_request = xs_tcp_send_request,
1903 .set_retrans_timeout = xprt_set_retrans_timeout_def, 2012 .set_retrans_timeout = xprt_set_retrans_timeout_def,
1904 .close = xs_tcp_shutdown, 2013 .close = xs_tcp_close,
1905 .destroy = xs_destroy, 2014 .destroy = xs_destroy,
1906 .print_stats = xs_tcp_print_stats, 2015 .print_stats = xs_tcp_print_stats,
1907}; 2016};
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index baac91049b0e..9dcc6e7f96ec 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -832,7 +832,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
832 * All right, let's create it. 832 * All right, let's create it.
833 */ 833 */
834 mode = S_IFSOCK | 834 mode = S_IFSOCK |
835 (SOCK_INODE(sock)->i_mode & ~current->fs->umask); 835 (SOCK_INODE(sock)->i_mode & ~current_umask());
836 err = mnt_want_write(nd.path.mnt); 836 err = mnt_want_write(nd.path.mnt);
837 if (err) 837 if (err)
838 goto out_mknod_dput; 838 goto out_mknod_dput;
diff --git a/net/wimax/Kconfig b/net/wimax/Kconfig
index 18495cdcd10d..1b46747a5f5a 100644
--- a/net/wimax/Kconfig
+++ b/net/wimax/Kconfig
@@ -8,7 +8,7 @@
8# 8#
9# As well, enablement of the RFKILL code means we need the INPUT layer 9# As well, enablement of the RFKILL code means we need the INPUT layer
10# support to inject events coming from hw rfkill switches. That 10# support to inject events coming from hw rfkill switches. That
11# dependency could be killed if input.h provided appropiate means to 11# dependency could be killed if input.h provided appropriate means to
12# work when input is disabled. 12# work when input is disabled.
13 13
14comment "WiMAX Wireless Broadband support requires CONFIG_INPUT enabled" 14comment "WiMAX Wireless Broadband support requires CONFIG_INPUT enabled"
diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c
index 5d149c1b5f0d..9ad4d893a566 100644
--- a/net/wimax/op-msg.c
+++ b/net/wimax/op-msg.c
@@ -149,7 +149,8 @@ struct sk_buff *wimax_msg_alloc(struct wimax_dev *wimax_dev,
149 } 149 }
150 result = nla_put(skb, WIMAX_GNL_MSG_DATA, size, msg); 150 result = nla_put(skb, WIMAX_GNL_MSG_DATA, size, msg);
151 if (result < 0) { 151 if (result < 0) {
152 dev_err(dev, "no memory to add payload in attribute\n"); 152 dev_err(dev, "no memory to add payload (msg %p size %zu) in "
153 "attribute: %d\n", msg, size, result);
153 goto error_nla_put; 154 goto error_nla_put;
154 } 155 }
155 genlmsg_end(skb, genl_msg); 156 genlmsg_end(skb, genl_msg);
@@ -299,10 +300,10 @@ int wimax_msg(struct wimax_dev *wimax_dev, const char *pipe_name,
299 struct sk_buff *skb; 300 struct sk_buff *skb;
300 301
301 skb = wimax_msg_alloc(wimax_dev, pipe_name, buf, size, gfp_flags); 302 skb = wimax_msg_alloc(wimax_dev, pipe_name, buf, size, gfp_flags);
302 if (skb == NULL) 303 if (IS_ERR(skb))
303 goto error_msg_new; 304 result = PTR_ERR(skb);
304 result = wimax_msg_send(wimax_dev, skb); 305 else
305error_msg_new: 306 result = wimax_msg_send(wimax_dev, skb);
306 return result; 307 return result;
307} 308}
308EXPORT_SYMBOL_GPL(wimax_msg); 309EXPORT_SYMBOL_GPL(wimax_msg);
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index a0ee76b52510..933e1422b09f 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -338,8 +338,21 @@ out:
338 */ 338 */
339void wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state) 339void wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
340{ 340{
341 /*
342 * A driver cannot take the wimax_dev out of the
343 * __WIMAX_ST_NULL state unless by calling wimax_dev_add(). If
344 * the wimax_dev's state is still NULL, we ignore any request
345 * to change its state because it means it hasn't been yet
346 * registered.
347 *
348 * There is no need to complain about it, as routines that
349 * call this might be shared from different code paths that
350 * are called before or after wimax_dev_add() has done its
351 * job.
352 */
341 mutex_lock(&wimax_dev->mutex); 353 mutex_lock(&wimax_dev->mutex);
342 __wimax_state_change(wimax_dev, new_state); 354 if (wimax_dev->state > __WIMAX_ST_NULL)
355 __wimax_state_change(wimax_dev, new_state);
343 mutex_unlock(&wimax_dev->mutex); 356 mutex_unlock(&wimax_dev->mutex);
344 return; 357 return;
345} 358}
@@ -376,7 +389,7 @@ EXPORT_SYMBOL_GPL(wimax_state_get);
376void wimax_dev_init(struct wimax_dev *wimax_dev) 389void wimax_dev_init(struct wimax_dev *wimax_dev)
377{ 390{
378 INIT_LIST_HEAD(&wimax_dev->id_table_node); 391 INIT_LIST_HEAD(&wimax_dev->id_table_node);
379 __wimax_state_set(wimax_dev, WIMAX_ST_UNINITIALIZED); 392 __wimax_state_set(wimax_dev, __WIMAX_ST_NULL);
380 mutex_init(&wimax_dev->mutex); 393 mutex_init(&wimax_dev->mutex);
381 mutex_init(&wimax_dev->mutex_reset); 394 mutex_init(&wimax_dev->mutex_reset);
382} 395}
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d43daa236ef9..0a592e4295f0 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -90,7 +90,7 @@ struct cfg80211_internal_bss {
90 struct rb_node rbn; 90 struct rb_node rbn;
91 unsigned long ts; 91 unsigned long ts;
92 struct kref ref; 92 struct kref ref;
93 bool hold; 93 bool hold, ies_allocated;
94 94
95 /* must be last because of priv member */ 95 /* must be last because of priv member */
96 struct cfg80211_bss pub; 96 struct cfg80211_bss pub;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 353e1a4ece83..2456e4ee445e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3334,7 +3334,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
3334 struct sk_buff *msg; 3334 struct sk_buff *msg;
3335 void *hdr; 3335 void *hdr;
3336 3336
3337 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 3337 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
3338 if (!msg) 3338 if (!msg)
3339 return; 3339 return;
3340 3340
@@ -3353,7 +3353,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
3353 return; 3353 return;
3354 } 3354 }
3355 3355
3356 genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL); 3356 genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_ATOMIC);
3357 return; 3357 return;
3358 3358
3359 nla_put_failure: 3359 nla_put_failure:
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 6327e1617acb..487cb627ddba 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -907,6 +907,7 @@ EXPORT_SYMBOL(freq_reg_info);
907int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, 907int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth,
908 const struct ieee80211_reg_rule **reg_rule) 908 const struct ieee80211_reg_rule **reg_rule)
909{ 909{
910 assert_cfg80211_lock();
910 return freq_reg_info_regd(wiphy, center_freq, 911 return freq_reg_info_regd(wiphy, center_freq,
911 bandwidth, reg_rule, NULL); 912 bandwidth, reg_rule, NULL);
912} 913}
@@ -1133,7 +1134,8 @@ static bool reg_is_world_roaming(struct wiphy *wiphy)
1133 if (is_world_regdom(cfg80211_regdomain->alpha2) || 1134 if (is_world_regdom(cfg80211_regdomain->alpha2) ||
1134 (wiphy->regd && is_world_regdom(wiphy->regd->alpha2))) 1135 (wiphy->regd && is_world_regdom(wiphy->regd->alpha2)))
1135 return true; 1136 return true;
1136 if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && 1137 if (last_request &&
1138 last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
1137 wiphy->custom_regulatory) 1139 wiphy->custom_regulatory)
1138 return true; 1140 return true;
1139 return false; 1141 return false;
@@ -1142,6 +1144,12 @@ static bool reg_is_world_roaming(struct wiphy *wiphy)
1142/* Reap the advantages of previously found beacons */ 1144/* Reap the advantages of previously found beacons */
1143static void reg_process_beacons(struct wiphy *wiphy) 1145static void reg_process_beacons(struct wiphy *wiphy)
1144{ 1146{
1147 /*
1148 * Means we are just firing up cfg80211, so no beacons would
1149 * have been processed yet.
1150 */
1151 if (!last_request)
1152 return;
1145 if (!reg_is_world_roaming(wiphy)) 1153 if (!reg_is_world_roaming(wiphy))
1146 return; 1154 return;
1147 wiphy_update_beacon_reg(wiphy); 1155 wiphy_update_beacon_reg(wiphy);
@@ -1176,6 +1184,8 @@ static void handle_channel_custom(struct wiphy *wiphy,
1176 struct ieee80211_supported_band *sband; 1184 struct ieee80211_supported_band *sband;
1177 struct ieee80211_channel *chan; 1185 struct ieee80211_channel *chan;
1178 1186
1187 assert_cfg80211_lock();
1188
1179 sband = wiphy->bands[band]; 1189 sband = wiphy->bands[band];
1180 BUG_ON(chan_idx >= sband->n_channels); 1190 BUG_ON(chan_idx >= sband->n_channels);
1181 chan = &sband->channels[chan_idx]; 1191 chan = &sband->channels[chan_idx];
@@ -1214,10 +1224,13 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
1214 const struct ieee80211_regdomain *regd) 1224 const struct ieee80211_regdomain *regd)
1215{ 1225{
1216 enum ieee80211_band band; 1226 enum ieee80211_band band;
1227
1228 mutex_lock(&cfg80211_mutex);
1217 for (band = 0; band < IEEE80211_NUM_BANDS; band++) { 1229 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
1218 if (wiphy->bands[band]) 1230 if (wiphy->bands[band])
1219 handle_band_custom(wiphy, band, regd); 1231 handle_band_custom(wiphy, band, regd);
1220 } 1232 }
1233 mutex_unlock(&cfg80211_mutex);
1221} 1234}
1222EXPORT_SYMBOL(wiphy_apply_custom_regulatory); 1235EXPORT_SYMBOL(wiphy_apply_custom_regulatory);
1223 1236
@@ -1423,7 +1436,7 @@ new_request:
1423 return call_crda(last_request->alpha2); 1436 return call_crda(last_request->alpha2);
1424} 1437}
1425 1438
1426/* This currently only processes user and driver regulatory hints */ 1439/* This processes *all* regulatory hints */
1427static void reg_process_hint(struct regulatory_request *reg_request) 1440static void reg_process_hint(struct regulatory_request *reg_request)
1428{ 1441{
1429 int r = 0; 1442 int r = 0;
@@ -1538,6 +1551,13 @@ static int regulatory_hint_core(const char *alpha2)
1538 1551
1539 queue_regulatory_request(request); 1552 queue_regulatory_request(request);
1540 1553
1554 /*
1555 * This ensures last_request is populated once modules
1556 * come swinging in and calling regulatory hints and
1557 * wiphy_apply_custom_regulatory().
1558 */
1559 flush_scheduled_work();
1560
1541 return 0; 1561 return 0;
1542} 1562}
1543 1563
@@ -2095,11 +2115,12 @@ int set_regdom(const struct ieee80211_regdomain *rd)
2095/* Caller must hold cfg80211_mutex */ 2115/* Caller must hold cfg80211_mutex */
2096void reg_device_remove(struct wiphy *wiphy) 2116void reg_device_remove(struct wiphy *wiphy)
2097{ 2117{
2098 struct wiphy *request_wiphy; 2118 struct wiphy *request_wiphy = NULL;
2099 2119
2100 assert_cfg80211_lock(); 2120 assert_cfg80211_lock();
2101 2121
2102 request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); 2122 if (last_request)
2123 request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
2103 2124
2104 kfree(wiphy->regd); 2125 kfree(wiphy->regd);
2105 if (!last_request || !request_wiphy) 2126 if (!last_request || !request_wiphy)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 2a00e362f5fe..1f260c40b6ca 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -58,6 +58,10 @@ static void bss_release(struct kref *ref)
58 bss = container_of(ref, struct cfg80211_internal_bss, ref); 58 bss = container_of(ref, struct cfg80211_internal_bss, ref);
59 if (bss->pub.free_priv) 59 if (bss->pub.free_priv)
60 bss->pub.free_priv(&bss->pub); 60 bss->pub.free_priv(&bss->pub);
61
62 if (bss->ies_allocated)
63 kfree(bss->pub.information_elements);
64
61 kfree(bss); 65 kfree(bss);
62} 66}
63 67
@@ -360,19 +364,42 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
360 364
361 found = rb_find_bss(dev, res); 365 found = rb_find_bss(dev, res);
362 366
363 if (found && overwrite) { 367 if (found) {
364 list_replace(&found->list, &res->list);
365 rb_replace_node(&found->rbn, &res->rbn,
366 &dev->bss_tree);
367 kref_put(&found->ref, bss_release);
368 found = res;
369 } else if (found) {
370 kref_get(&found->ref); 368 kref_get(&found->ref);
371 found->pub.beacon_interval = res->pub.beacon_interval; 369 found->pub.beacon_interval = res->pub.beacon_interval;
372 found->pub.tsf = res->pub.tsf; 370 found->pub.tsf = res->pub.tsf;
373 found->pub.signal = res->pub.signal; 371 found->pub.signal = res->pub.signal;
374 found->pub.capability = res->pub.capability; 372 found->pub.capability = res->pub.capability;
375 found->ts = res->ts; 373 found->ts = res->ts;
374
375 /* overwrite IEs */
376 if (overwrite) {
377 size_t used = dev->wiphy.bss_priv_size + sizeof(*res);
378 size_t ielen = res->pub.len_information_elements;
379
380 if (ksize(found) >= used + ielen) {
381 memcpy(found->pub.information_elements,
382 res->pub.information_elements, ielen);
383 found->pub.len_information_elements = ielen;
384 } else {
385 u8 *ies = found->pub.information_elements;
386
387 if (found->ies_allocated) {
388 if (ksize(ies) < ielen)
389 ies = krealloc(ies, ielen,
390 GFP_ATOMIC);
391 } else
392 ies = kmalloc(ielen, GFP_ATOMIC);
393
394 if (ies) {
395 memcpy(ies, res->pub.information_elements, ielen);
396 found->ies_allocated = true;
397 found->pub.information_elements = ies;
398 found->pub.len_information_elements = ielen;
399 }
400 }
401 }
402
376 kref_put(&res->ref, bss_release); 403 kref_put(&res->ref, bss_release);
377 } else { 404 } else {
378 /* this "consumes" the reference */ 405 /* this "consumes" the reference */
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index cb6a5bb85d80..0e59f9ae9b81 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -786,6 +786,13 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
786 err = -EFAULT; 786 err = -EFAULT;
787 goto out; 787 goto out;
788 } 788 }
789
790 if (cmd == SIOCSIWENCODEEXT) {
791 struct iw_encode_ext *ee = (void *) extra;
792
793 if (iwp->length < sizeof(*ee) + ee->key_len)
794 return -EFAULT;
795 }
789 } 796 }
790 797
791 err = handler(dev, info, (union iwreq_data *) iwp, extra); 798 err = handler(dev, info, (union iwreq_data *) iwp, extra);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 82271720d970..5f1f86565f16 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -794,7 +794,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
794{ 794{
795 static xfrm_address_t saddr_wildcard = { }; 795 static xfrm_address_t saddr_wildcard = { };
796 struct net *net = xp_net(pol); 796 struct net *net = xp_net(pol);
797 unsigned int h; 797 unsigned int h, h_wildcard;
798 struct hlist_node *entry; 798 struct hlist_node *entry;
799 struct xfrm_state *x, *x0, *to_put; 799 struct xfrm_state *x, *x0, *to_put;
800 int acquire_in_progress = 0; 800 int acquire_in_progress = 0;
@@ -819,8 +819,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
819 if (best) 819 if (best)
820 goto found; 820 goto found;
821 821
822 h = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); 822 h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family);
823 hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { 823 hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) {
824 if (x->props.family == family && 824 if (x->props.family == family &&
825 x->props.reqid == tmpl->reqid && 825 x->props.reqid == tmpl->reqid &&
826 !(x->props.flags & XFRM_STATE_WILDRECV) && 826 !(x->props.flags & XFRM_STATE_WILDRECV) &&