aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-06-10 05:22:26 -0400
committerDavid S. Miller <davem@davemloft.net>2008-06-10 05:22:26 -0400
commit65b53e4cc90e59936733b3b95b9451d2ca47528d (patch)
tree29932718192962671c48c3fd1ea017a6112459e8 /net
parent788c0a53164c05c5ccdb1472474372b72ba74644 (diff)
parent2e761e0532a784816e7e822dbaaece8c5d4be14d (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Conflicts: drivers/net/tg3.c drivers/net/wireless/rt2x00/rt2x00dev.c net/mac80211/ieee80211_i.h
Diffstat (limited to 'net')
-rw-r--r--net/ax25/ax25_subr.c11
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/rfcomm/tty.c13
-rw-r--r--net/core/neighbour.c9
-rw-r--r--net/core/rtnetlink.c3
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/core/user_dma.c2
-rw-r--r--net/dccp/ccids/ccid3.c13
-rw-r--r--net/dccp/ipv4.c4
-rw-r--r--net/ipv4/devinet.c9
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/raw.c9
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c9
-rw-r--r--net/ipv4/tcp_input.c35
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv6/addrconf.c107
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/datagram.c45
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c21
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c8
-rw-r--r--net/ipv6/raw.c11
-rw-r--r--net/ipv6/route.c12
-rw-r--r--net/ipv6/tunnel6.c2
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/irda/af_irda.c12
-rw-r--r--net/llc/llc_sap.c10
-rw-r--r--net/mac80211/cfg.c4
-rw-r--r--net/mac80211/ieee80211_i.h24
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mlme.c45
-rw-r--r--net/mac80211/rx.c4
-rw-r--r--net/mac80211/util.c4
-rw-r--r--net/mac80211/wext.c26
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/xt_connlimit.c3
-rw-r--r--net/netlink/attr.c12
-rw-r--r--net/netlink/genetlink.c6
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_gred.c3
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_red.c3
-rw-r--r--net/sctp/associola.c21
-rw-r--r--net/sctp/ipv6.c11
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c120
-rw-r--r--net/sctp/protocol.c11
-rw-r--r--net/sctp/transport.c50
-rw-r--r--net/sunrpc/auth_generic.c8
-rw-r--r--net/sunrpc/svc_xprt.c23
-rw-r--r--net/sunrpc/svcauth_unix.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c102
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c290
-rw-r--r--net/wireless/nl80211.c12
-rw-r--r--net/xfrm/xfrm_algo.c4
59 files changed, 688 insertions, 504 deletions
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index d8f215733175..034aa10a5198 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -64,20 +64,15 @@ void ax25_frames_acked(ax25_cb *ax25, unsigned short nr)
64 64
65void ax25_requeue_frames(ax25_cb *ax25) 65void ax25_requeue_frames(ax25_cb *ax25)
66{ 66{
67 struct sk_buff *skb, *skb_prev = NULL; 67 struct sk_buff *skb;
68 68
69 /* 69 /*
70 * Requeue all the un-ack-ed frames on the output queue to be picked 70 * Requeue all the un-ack-ed frames on the output queue to be picked
71 * up by ax25_kick called from the timer. This arrangement handles the 71 * up by ax25_kick called from the timer. This arrangement handles the
72 * possibility of an empty output queue. 72 * possibility of an empty output queue.
73 */ 73 */
74 while ((skb = skb_dequeue(&ax25->ack_queue)) != NULL) { 74 while ((skb = skb_dequeue_tail(&ax25->ack_queue)) != NULL)
75 if (skb_prev == NULL) 75 skb_queue_head(&ax25->write_queue, skb);
76 skb_queue_head(&ax25->write_queue, skb);
77 else
78 skb_append(skb_prev, skb, &ax25->write_queue);
79 skb_prev = skb;
80 }
81} 76}
82 77
83/* 78/*
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index eb62558e9b09..0c2c93735e93 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -423,8 +423,8 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
423 423
424 rfcomm_dlc_lock(d); 424 rfcomm_dlc_lock(d);
425 d->state = BT_CLOSED; 425 d->state = BT_CLOSED;
426 rfcomm_dlc_unlock(d);
427 d->state_change(d, err); 426 d->state_change(d, err);
427 rfcomm_dlc_unlock(d);
428 428
429 skb_queue_purge(&d->tx_queue); 429 skb_queue_purge(&d->tx_queue);
430 rfcomm_dlc_unlink(d); 430 rfcomm_dlc_unlink(d);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index c3f749abb2d0..c9191871c1e0 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -566,11 +566,22 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
566 if (dlc->state == BT_CLOSED) { 566 if (dlc->state == BT_CLOSED) {
567 if (!dev->tty) { 567 if (!dev->tty) {
568 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { 568 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
569 if (rfcomm_dev_get(dev->id) == NULL) 569 /* Drop DLC lock here to avoid deadlock
570 * 1. rfcomm_dev_get will take rfcomm_dev_lock
571 * but in rfcomm_dev_add there's lock order:
572 * rfcomm_dev_lock -> dlc lock
573 * 2. rfcomm_dev_put will deadlock if it's
574 * the last reference
575 */
576 rfcomm_dlc_unlock(dlc);
577 if (rfcomm_dev_get(dev->id) == NULL) {
578 rfcomm_dlc_lock(dlc);
570 return; 579 return;
580 }
571 581
572 rfcomm_dev_del(dev); 582 rfcomm_dev_del(dev);
573 rfcomm_dev_put(dev); 583 rfcomm_dev_put(dev);
584 rfcomm_dlc_lock(dlc);
574 } 585 }
575 } else 586 } else
576 tty_hangup(dev->tty); 587 tty_hangup(dev->tty);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5d9d7130bd6e..65f01f71b3f3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1714,7 +1714,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1714 return nla_nest_end(skb, nest); 1714 return nla_nest_end(skb, nest);
1715 1715
1716nla_put_failure: 1716nla_put_failure:
1717 return nla_nest_cancel(skb, nest); 1717 nla_nest_cancel(skb, nest);
1718 return -EMSGSIZE;
1718} 1719}
1719 1720
1720static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, 1721static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
@@ -2057,9 +2058,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2057 goto nla_put_failure; 2058 goto nla_put_failure;
2058 } 2059 }
2059 2060
2060 ci.ndm_used = now - neigh->used; 2061 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2061 ci.ndm_confirmed = now - neigh->confirmed; 2062 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2062 ci.ndm_updated = now - neigh->updated; 2063 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2063 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; 2064 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
2064 read_unlock_bh(&neigh->lock); 2065 read_unlock_bh(&neigh->lock);
2065 2066
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ca32ddb8ad17..6c8d7f0ea01a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -498,7 +498,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
498 return nla_nest_end(skb, mx); 498 return nla_nest_end(skb, mx);
499 499
500nla_put_failure: 500nla_put_failure:
501 return nla_nest_cancel(skb, mx); 501 nla_nest_cancel(skb, mx);
502 return -EMSGSIZE;
502} 503}
503 504
504int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, 505int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5c459f2b7985..1e556d312117 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1445,6 +1445,7 @@ done:
1445 1445
1446 if (spd.nr_pages) { 1446 if (spd.nr_pages) {
1447 int ret; 1447 int ret;
1448 struct sock *sk = __skb->sk;
1448 1449
1449 /* 1450 /*
1450 * Drop the socket lock, otherwise we have reverse 1451 * Drop the socket lock, otherwise we have reverse
@@ -1455,9 +1456,9 @@ done:
1455 * we call into ->sendpage() with the i_mutex lock held 1456 * we call into ->sendpage() with the i_mutex lock held
1456 * and networking will grab the socket lock. 1457 * and networking will grab the socket lock.
1457 */ 1458 */
1458 release_sock(__skb->sk); 1459 release_sock(sk);
1459 ret = splice_to_pipe(pipe, &spd); 1460 ret = splice_to_pipe(pipe, &spd);
1460 lock_sock(__skb->sk); 1461 lock_sock(sk);
1461 return ret; 1462 return ret;
1462 } 1463 }
1463 1464
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 0ad1cd57bc39..c77aff9c6eb3 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -75,7 +75,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
75 75
76 end = start + skb_shinfo(skb)->frags[i].size; 76 end = start + skb_shinfo(skb)->frags[i].size;
77 copy = end - offset; 77 copy = end - offset;
78 if ((copy = end - offset) > 0) { 78 if (copy > 0) {
79 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 79 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
80 struct page *page = frag->page; 80 struct page *page = frag->page;
81 81
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index cd61dea2eea1..f813077234b7 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -193,22 +193,17 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
193 193
194/* 194/*
195 * Update Window Counter using the algorithm from [RFC 4342, 8.1]. 195 * Update Window Counter using the algorithm from [RFC 4342, 8.1].
196 * The algorithm is not applicable if RTT < 4 microseconds. 196 * As elsewhere, RTT > 0 is assumed by using dccp_sample_rtt().
197 */ 197 */
198static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, 198static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
199 ktime_t now) 199 ktime_t now)
200{ 200{
201 u32 quarter_rtts; 201 u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count),
202 202 quarter_rtts = (4 * delta) / hctx->ccid3hctx_rtt;
203 if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */
204 return;
205
206 quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count);
207 quarter_rtts /= hctx->ccid3hctx_rtt / 4;
208 203
209 if (quarter_rtts > 0) { 204 if (quarter_rtts > 0) {
210 hctx->ccid3hctx_t_last_win_count = now; 205 hctx->ccid3hctx_t_last_win_count = now;
211 hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); 206 hctx->ccid3hctx_last_win_count += min(quarter_rtts, 5U);
212 hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ 207 hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */
213 } 208 }
214} 209}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b348dd70c685..c22a3780c14e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -739,8 +739,8 @@ int dccp_invalid_packet(struct sk_buff *skb)
739 * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet 739 * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
740 * has short sequence numbers), drop packet and return 740 * has short sequence numbers), drop packet and return
741 */ 741 */
742 if (dh->dccph_type >= DCCP_PKT_DATA && 742 if ((dh->dccph_type < DCCP_PKT_DATA ||
743 dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0) { 743 dh->dccph_type > DCCP_PKT_DATAACK) && dh->dccph_x == 0) {
744 DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", 744 DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n",
745 dccp_packet_name(dh->dccph_type)); 745 dccp_packet_name(dh->dccph_type));
746 return 1; 746 return 1;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6848e4760f34..79a7ef6209ff 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -90,7 +90,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 [IFA_LOCAL] = { .type = NLA_U32 }, 90 [IFA_LOCAL] = { .type = NLA_U32 },
91 [IFA_ADDRESS] = { .type = NLA_U32 }, 91 [IFA_ADDRESS] = { .type = NLA_U32 },
92 [IFA_BROADCAST] = { .type = NLA_U32 }, 92 [IFA_BROADCAST] = { .type = NLA_U32 },
93 [IFA_ANYCAST] = { .type = NLA_U32 },
94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95}; 94};
96 95
@@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
536 if (tb[IFA_BROADCAST]) 535 if (tb[IFA_BROADCAST])
537 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); 536 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
538 537
539 if (tb[IFA_ANYCAST])
540 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
541
542 if (tb[IFA_LABEL]) 538 if (tb[IFA_LABEL])
543 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 539 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
544 else 540 else
@@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
745 break; 741 break;
746 inet_del_ifa(in_dev, ifap, 0); 742 inet_del_ifa(in_dev, ifap, 0);
747 ifa->ifa_broadcast = 0; 743 ifa->ifa_broadcast = 0;
748 ifa->ifa_anycast = 0;
749 ifa->ifa_scope = 0; 744 ifa->ifa_scope = 0;
750 } 745 }
751 746
@@ -1113,7 +1108,6 @@ static inline size_t inet_nlmsg_size(void)
1113 + nla_total_size(4) /* IFA_ADDRESS */ 1108 + nla_total_size(4) /* IFA_ADDRESS */
1114 + nla_total_size(4) /* IFA_LOCAL */ 1109 + nla_total_size(4) /* IFA_LOCAL */
1115 + nla_total_size(4) /* IFA_BROADCAST */ 1110 + nla_total_size(4) /* IFA_BROADCAST */
1116 + nla_total_size(4) /* IFA_ANYCAST */
1117 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1111 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1118} 1112}
1119 1113
@@ -1143,9 +1137,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1143 if (ifa->ifa_broadcast) 1137 if (ifa->ifa_broadcast)
1144 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); 1138 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1145 1139
1146 if (ifa->ifa_anycast)
1147 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1148
1149 if (ifa->ifa_label[0]) 1140 if (ifa->ifa_label[0])
1150 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); 1141 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1151 1142
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0f1557a4ac7a..0b2ac6a3d903 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -506,7 +506,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
506 [RTA_PREFSRC] = { .type = NLA_U32 }, 506 [RTA_PREFSRC] = { .type = NLA_U32 },
507 [RTA_METRICS] = { .type = NLA_NESTED }, 507 [RTA_METRICS] = { .type = NLA_NESTED },
508 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 508 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
509 [RTA_PROTOINFO] = { .type = NLA_U32 },
510 [RTA_FLOW] = { .type = NLA_U32 }, 509 [RTA_FLOW] = { .type = NLA_U32 },
511}; 510};
512 511
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index fead049daf43..e7e091d365ff 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -608,6 +608,14 @@ static void raw_close(struct sock *sk, long timeout)
608 sk_common_release(sk); 608 sk_common_release(sk);
609} 609}
610 610
611static int raw_destroy(struct sock *sk)
612{
613 lock_sock(sk);
614 ip_flush_pending_frames(sk);
615 release_sock(sk);
616 return 0;
617}
618
611/* This gets rid of all the nasties in af_inet. -DaveM */ 619/* This gets rid of all the nasties in af_inet. -DaveM */
612static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 620static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
613{ 621{
@@ -820,6 +828,7 @@ struct proto raw_prot = {
820 .name = "RAW", 828 .name = "RAW",
821 .owner = THIS_MODULE, 829 .owner = THIS_MODULE,
822 .close = raw_close, 830 .close = raw_close,
831 .destroy = raw_destroy,
823 .connect = ip4_datagram_connect, 832 .connect = ip4_datagram_connect,
824 .disconnect = udp_disconnect, 833 .disconnect = udp_disconnect,
825 .ioctl = raw_ioctl, 834 .ioctl = raw_ioctl,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df41026b60db..96be336064fb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1792,7 +1792,7 @@ static int __mkroute_input(struct sk_buff *skb,
1792 if (err) 1792 if (err)
1793 flags |= RTCF_DIRECTSRC; 1793 flags |= RTCF_DIRECTSRC;
1794 1794
1795 if (out_dev == in_dev && err && !(flags & RTCF_MASQ) && 1795 if (out_dev == in_dev && err &&
1796 (IN_DEV_SHARED_MEDIA(out_dev) || 1796 (IN_DEV_SHARED_MEDIA(out_dev) ||
1797 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) 1797 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1798 flags |= RTCF_DOREDIRECT; 1798 flags |= RTCF_DOREDIRECT;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f88653138621..ab66683b8043 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1227,7 +1227,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1227 copied += used; 1227 copied += used;
1228 offset += used; 1228 offset += used;
1229 } 1229 }
1230 if (offset != skb->len) 1230 /*
1231 * If recv_actor drops the lock (e.g. TCP splice
1232 * receive) the skb pointer might be invalid when
1233 * getting here: tcp_collapse might have deleted it
1234 * while aggregating skbs from the socket queue.
1235 */
1236 skb = tcp_recv_skb(sk, seq-1, &offset);
1237 if (!skb || (offset+1 != skb->len))
1231 break; 1238 break;
1232 } 1239 }
1233 if (tcp_hdr(skb)->fin) { 1240 if (tcp_hdr(skb)->fin) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b54d9d37b636..eba873e9b560 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1392,9 +1392,9 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1392 1392
1393 if (before(next_dup->start_seq, skip_to_seq)) { 1393 if (before(next_dup->start_seq, skip_to_seq)) {
1394 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); 1394 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
1395 tcp_sacktag_walk(skb, sk, NULL, 1395 skb = tcp_sacktag_walk(skb, sk, NULL,
1396 next_dup->start_seq, next_dup->end_seq, 1396 next_dup->start_seq, next_dup->end_seq,
1397 1, fack_count, reord, flag); 1397 1, fack_count, reord, flag);
1398 } 1398 }
1399 1399
1400 return skb; 1400 return skb;
@@ -2483,6 +2483,20 @@ static inline void tcp_complete_cwr(struct sock *sk)
2483 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2483 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2484} 2484}
2485 2485
2486static void tcp_try_keep_open(struct sock *sk)
2487{
2488 struct tcp_sock *tp = tcp_sk(sk);
2489 int state = TCP_CA_Open;
2490
2491 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2492 state = TCP_CA_Disorder;
2493
2494 if (inet_csk(sk)->icsk_ca_state != state) {
2495 tcp_set_ca_state(sk, state);
2496 tp->high_seq = tp->snd_nxt;
2497 }
2498}
2499
2486static void tcp_try_to_open(struct sock *sk, int flag) 2500static void tcp_try_to_open(struct sock *sk, int flag)
2487{ 2501{
2488 struct tcp_sock *tp = tcp_sk(sk); 2502 struct tcp_sock *tp = tcp_sk(sk);
@@ -2496,15 +2510,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2496 tcp_enter_cwr(sk, 1); 2510 tcp_enter_cwr(sk, 1);
2497 2511
2498 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2512 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2499 int state = TCP_CA_Open; 2513 tcp_try_keep_open(sk);
2500
2501 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2502 state = TCP_CA_Disorder;
2503
2504 if (inet_csk(sk)->icsk_ca_state != state) {
2505 tcp_set_ca_state(sk, state);
2506 tp->high_seq = tp->snd_nxt;
2507 }
2508 tcp_moderate_cwnd(tp); 2514 tcp_moderate_cwnd(tp);
2509 } else { 2515 } else {
2510 tcp_cwnd_down(sk, flag); 2516 tcp_cwnd_down(sk, flag);
@@ -3310,8 +3316,11 @@ no_queue:
3310 return 1; 3316 return 1;
3311 3317
3312old_ack: 3318old_ack:
3313 if (TCP_SKB_CB(skb)->sacked) 3319 if (TCP_SKB_CB(skb)->sacked) {
3314 tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3320 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3321 if (icsk->icsk_ca_state == TCP_CA_Open)
3322 tcp_try_keep_open(sk);
3323 }
3315 3324
3316uninteresting_ack: 3325uninteresting_ack:
3317 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3326 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e399bde7813a..ad993ecb4810 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2131,6 +2131,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2131 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2131 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2132 if (tcp_transmit_skb(sk, skb, 0, priority)) 2132 if (tcp_transmit_skb(sk, skb, 0, priority))
2133 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2133 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
2134
2135 TCP_INC_STATS(TCP_MIB_OUTRSTS);
2134} 2136}
2135 2137
2136/* WARNING: This routine must only be called when we have already sent 2138/* WARNING: This routine must only be called when we have already sent
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index d3b709a6f264..cb1f0e83830b 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -97,7 +97,7 @@ static int tunnel64_rcv(struct sk_buff *skb)
97{ 97{
98 struct xfrm_tunnel *handler; 98 struct xfrm_tunnel *handler;
99 99
100 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 100 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
101 goto drop; 101 goto drop;
102 102
103 for (handler = tunnel64_handlers; handler; handler = handler->next) 103 for (handler = tunnel64_handlers; handler; handler = handler->next)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db1cb7c96d63..56fcda3694ba 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info)
420/* 420/*
421 * Throw away all pending data and cancel the corking. Socket is locked. 421 * Throw away all pending data and cancel the corking. Socket is locked.
422 */ 422 */
423static void udp_flush_pending_frames(struct sock *sk) 423void udp_flush_pending_frames(struct sock *sk)
424{ 424{
425 struct udp_sock *up = udp_sk(sk); 425 struct udp_sock *up = udp_sk(sk);
426 426
@@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk)
430 ip_flush_pending_frames(sk); 430 ip_flush_pending_frames(sk);
431 } 431 }
432} 432}
433EXPORT_SYMBOL(udp_flush_pending_frames);
433 434
434/** 435/**
435 * udp4_hwcsum_outgoing - handle outgoing HW checksumming 436 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3a835578fd1c..147588f4c7c0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -731,8 +731,13 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
731 onlink = -1; 731 onlink = -1;
732 732
733 spin_lock(&ifa->lock); 733 spin_lock(&ifa->lock);
734 lifetime = min_t(unsigned long, 734
735 ifa->valid_lft, 0x7fffffffUL/HZ); 735 lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
736 /*
737 * Note: Because this address is
738 * not permanent, lifetime <
739 * LONG_MAX / HZ here.
740 */
736 if (time_before(expires, 741 if (time_before(expires,
737 ifa->tstamp + lifetime * HZ)) 742 ifa->tstamp + lifetime * HZ))
738 expires = ifa->tstamp + lifetime * HZ; 743 expires = ifa->tstamp + lifetime * HZ;
@@ -1722,7 +1727,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1722 __u32 valid_lft; 1727 __u32 valid_lft;
1723 __u32 prefered_lft; 1728 __u32 prefered_lft;
1724 int addr_type; 1729 int addr_type;
1725 unsigned long rt_expires;
1726 struct inet6_dev *in6_dev; 1730 struct inet6_dev *in6_dev;
1727 1731
1728 pinfo = (struct prefix_info *) opt; 1732 pinfo = (struct prefix_info *) opt;
@@ -1764,28 +1768,23 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1764 * 2) Configure prefixes with the auto flag set 1768 * 2) Configure prefixes with the auto flag set
1765 */ 1769 */
1766 1770
1767 if (valid_lft == INFINITY_LIFE_TIME) 1771 if (pinfo->onlink) {
1768 rt_expires = ~0UL; 1772 struct rt6_info *rt;
1769 else if (valid_lft >= 0x7FFFFFFF/HZ) { 1773 unsigned long rt_expires;
1774
1770 /* Avoid arithmetic overflow. Really, we could 1775 /* Avoid arithmetic overflow. Really, we could
1771 * save rt_expires in seconds, likely valid_lft, 1776 * save rt_expires in seconds, likely valid_lft,
1772 * but it would require division in fib gc, that it 1777 * but it would require division in fib gc, that it
1773 * not good. 1778 * not good.
1774 */ 1779 */
1775 rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); 1780 if (HZ > USER_HZ)
1776 } else 1781 rt_expires = addrconf_timeout_fixup(valid_lft, HZ);
1777 rt_expires = valid_lft * HZ; 1782 else
1783 rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ);
1778 1784
1779 /* 1785 if (addrconf_finite_timeout(rt_expires))
1780 * We convert this (in jiffies) to clock_t later. 1786 rt_expires *= HZ;
1781 * Avoid arithmetic overflow there as well.
1782 * Overflow can happen only if HZ < USER_HZ.
1783 */
1784 if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ)
1785 rt_expires = 0x7FFFFFFF / USER_HZ;
1786 1787
1787 if (pinfo->onlink) {
1788 struct rt6_info *rt;
1789 rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL, 1788 rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL,
1790 dev->ifindex, 1); 1789 dev->ifindex, 1);
1791 1790
@@ -1794,7 +1793,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1794 if (valid_lft == 0) { 1793 if (valid_lft == 0) {
1795 ip6_del_rt(rt); 1794 ip6_del_rt(rt);
1796 rt = NULL; 1795 rt = NULL;
1797 } else if (~rt_expires) { 1796 } else if (addrconf_finite_timeout(rt_expires)) {
1798 /* not infinity */ 1797 /* not infinity */
1799 rt->rt6i_expires = jiffies + rt_expires; 1798 rt->rt6i_expires = jiffies + rt_expires;
1800 rt->rt6i_flags |= RTF_EXPIRES; 1799 rt->rt6i_flags |= RTF_EXPIRES;
@@ -1803,9 +1802,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1803 rt->rt6i_expires = 0; 1802 rt->rt6i_expires = 0;
1804 } 1803 }
1805 } else if (valid_lft) { 1804 } else if (valid_lft) {
1806 int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
1807 clock_t expires = 0; 1805 clock_t expires = 0;
1808 if (~rt_expires) { 1806 int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
1807 if (addrconf_finite_timeout(rt_expires)) {
1809 /* not infinity */ 1808 /* not infinity */
1810 flags |= RTF_EXPIRES; 1809 flags |= RTF_EXPIRES;
1811 expires = jiffies_to_clock_t(rt_expires); 1810 expires = jiffies_to_clock_t(rt_expires);
@@ -2027,7 +2026,7 @@ err_exit:
2027 * Manual configuration of address on an interface 2026 * Manual configuration of address on an interface
2028 */ 2027 */
2029static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, 2028static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2030 int plen, __u8 ifa_flags, __u32 prefered_lft, 2029 unsigned int plen, __u8 ifa_flags, __u32 prefered_lft,
2031 __u32 valid_lft) 2030 __u32 valid_lft)
2032{ 2031{
2033 struct inet6_ifaddr *ifp; 2032 struct inet6_ifaddr *ifp;
@@ -2036,9 +2035,13 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2036 int scope; 2035 int scope;
2037 u32 flags; 2036 u32 flags;
2038 clock_t expires; 2037 clock_t expires;
2038 unsigned long timeout;
2039 2039
2040 ASSERT_RTNL(); 2040 ASSERT_RTNL();
2041 2041
2042 if (plen > 128)
2043 return -EINVAL;
2044
2042 /* check the lifetime */ 2045 /* check the lifetime */
2043 if (!valid_lft || prefered_lft > valid_lft) 2046 if (!valid_lft || prefered_lft > valid_lft)
2044 return -EINVAL; 2047 return -EINVAL;
@@ -2052,22 +2055,23 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2052 2055
2053 scope = ipv6_addr_scope(pfx); 2056 scope = ipv6_addr_scope(pfx);
2054 2057
2055 if (valid_lft == INFINITY_LIFE_TIME) { 2058 timeout = addrconf_timeout_fixup(valid_lft, HZ);
2056 ifa_flags |= IFA_F_PERMANENT; 2059 if (addrconf_finite_timeout(timeout)) {
2057 flags = 0; 2060 expires = jiffies_to_clock_t(timeout * HZ);
2058 expires = 0; 2061 valid_lft = timeout;
2059 } else {
2060 if (valid_lft >= 0x7FFFFFFF/HZ)
2061 valid_lft = 0x7FFFFFFF/HZ;
2062 flags = RTF_EXPIRES; 2062 flags = RTF_EXPIRES;
2063 expires = jiffies_to_clock_t(valid_lft * HZ); 2063 } else {
2064 expires = 0;
2065 flags = 0;
2066 ifa_flags |= IFA_F_PERMANENT;
2064 } 2067 }
2065 2068
2066 if (prefered_lft == 0) 2069 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
2067 ifa_flags |= IFA_F_DEPRECATED; 2070 if (addrconf_finite_timeout(timeout)) {
2068 else if ((prefered_lft >= 0x7FFFFFFF/HZ) && 2071 if (timeout == 0)
2069 (prefered_lft != INFINITY_LIFE_TIME)) 2072 ifa_flags |= IFA_F_DEPRECATED;
2070 prefered_lft = 0x7FFFFFFF/HZ; 2073 prefered_lft = timeout;
2074 }
2071 2075
2072 ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); 2076 ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
2073 2077
@@ -2095,12 +2099,15 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2095} 2099}
2096 2100
2097static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, 2101static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
2098 int plen) 2102 unsigned int plen)
2099{ 2103{
2100 struct inet6_ifaddr *ifp; 2104 struct inet6_ifaddr *ifp;
2101 struct inet6_dev *idev; 2105 struct inet6_dev *idev;
2102 struct net_device *dev; 2106 struct net_device *dev;
2103 2107
2108 if (plen > 128)
2109 return -EINVAL;
2110
2104 dev = __dev_get_by_index(net, ifindex); 2111 dev = __dev_get_by_index(net, ifindex);
2105 if (!dev) 2112 if (!dev)
2106 return -ENODEV; 2113 return -ENODEV;
@@ -3169,26 +3176,28 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
3169{ 3176{
3170 u32 flags; 3177 u32 flags;
3171 clock_t expires; 3178 clock_t expires;
3179 unsigned long timeout;
3172 3180
3173 if (!valid_lft || (prefered_lft > valid_lft)) 3181 if (!valid_lft || (prefered_lft > valid_lft))
3174 return -EINVAL; 3182 return -EINVAL;
3175 3183
3176 if (valid_lft == INFINITY_LIFE_TIME) { 3184 timeout = addrconf_timeout_fixup(valid_lft, HZ);
3177 ifa_flags |= IFA_F_PERMANENT; 3185 if (addrconf_finite_timeout(timeout)) {
3178 flags = 0; 3186 expires = jiffies_to_clock_t(timeout * HZ);
3179 expires = 0; 3187 valid_lft = timeout;
3180 } else {
3181 if (valid_lft >= 0x7FFFFFFF/HZ)
3182 valid_lft = 0x7FFFFFFF/HZ;
3183 flags = RTF_EXPIRES; 3188 flags = RTF_EXPIRES;
3184 expires = jiffies_to_clock_t(valid_lft * HZ); 3189 } else {
3190 expires = 0;
3191 flags = 0;
3192 ifa_flags |= IFA_F_PERMANENT;
3185 } 3193 }
3186 3194
3187 if (prefered_lft == 0) 3195 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
3188 ifa_flags |= IFA_F_DEPRECATED; 3196 if (addrconf_finite_timeout(timeout)) {
3189 else if ((prefered_lft >= 0x7FFFFFFF/HZ) && 3197 if (timeout == 0)
3190 (prefered_lft != INFINITY_LIFE_TIME)) 3198 ifa_flags |= IFA_F_DEPRECATED;
3191 prefered_lft = 0x7FFFFFFF/HZ; 3199 prefered_lft = timeout;
3200 }
3192 3201
3193 spin_lock_bh(&ifp->lock); 3202 spin_lock_bh(&ifp->lock);
3194 ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; 3203 ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3c6aafb02183..e84b3fd17fb4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -191,7 +191,7 @@ lookup_protocol:
191 np->mcast_hops = -1; 191 np->mcast_hops = -1;
192 np->mc_loop = 1; 192 np->mc_loop = 1;
193 np->pmtudisc = IPV6_PMTUDISC_WANT; 193 np->pmtudisc = IPV6_PMTUDISC_WANT;
194 np->ipv6only = init_net.ipv6.sysctl.bindv6only; 194 np->ipv6only = net->ipv6.sysctl.bindv6only;
195 195
196 /* Init the ipv4 part of the socket since we can have sockets 196 /* Init the ipv4 part of the socket since we can have sockets
197 * using v6 API for ipv4. 197 * using v6 API for ipv4.
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 94fa6ae77cfe..b9c2de84a8a2 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -496,7 +496,8 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
496 return 0; 496 return 0;
497} 497}
498 498
499int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, 499int datagram_send_ctl(struct net *net,
500 struct msghdr *msg, struct flowi *fl,
500 struct ipv6_txoptions *opt, 501 struct ipv6_txoptions *opt,
501 int *hlimit, int *tclass) 502 int *hlimit, int *tclass)
502{ 503{
@@ -509,7 +510,6 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
509 510
510 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { 511 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
511 int addr_type; 512 int addr_type;
512 struct net_device *dev = NULL;
513 513
514 if (!CMSG_OK(msg, cmsg)) { 514 if (!CMSG_OK(msg, cmsg)) {
515 err = -EINVAL; 515 err = -EINVAL;
@@ -522,6 +522,9 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
522 switch (cmsg->cmsg_type) { 522 switch (cmsg->cmsg_type) {
523 case IPV6_PKTINFO: 523 case IPV6_PKTINFO:
524 case IPV6_2292PKTINFO: 524 case IPV6_2292PKTINFO:
525 {
526 struct net_device *dev = NULL;
527
525 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { 528 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
526 err = -EINVAL; 529 err = -EINVAL;
527 goto exit_f; 530 goto exit_f;
@@ -535,32 +538,32 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
535 fl->oif = src_info->ipi6_ifindex; 538 fl->oif = src_info->ipi6_ifindex;
536 } 539 }
537 540
538 addr_type = ipv6_addr_type(&src_info->ipi6_addr); 541 addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
539 542
540 if (addr_type == IPV6_ADDR_ANY) 543 if (fl->oif) {
541 break; 544 dev = dev_get_by_index(net, fl->oif);
545 if (!dev)
546 return -ENODEV;
547 } else if (addr_type & IPV6_ADDR_LINKLOCAL)
548 return -EINVAL;
542 549
543 if (addr_type & IPV6_ADDR_LINKLOCAL) { 550 if (addr_type != IPV6_ADDR_ANY) {
544 if (!src_info->ipi6_ifindex) 551 int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
545 return -EINVAL; 552 if (!ipv6_chk_addr(net, &src_info->ipi6_addr,
546 else { 553 strict ? dev : NULL, 0))
547 dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); 554 err = -EINVAL;
548 if (!dev) 555 else
549 return -ENODEV; 556 ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
550 }
551 }
552 if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr,
553 dev, 0)) {
554 if (dev)
555 dev_put(dev);
556 err = -EINVAL;
557 goto exit_f;
558 } 557 }
558
559 if (dev) 559 if (dev)
560 dev_put(dev); 560 dev_put(dev);
561 561
562 ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); 562 if (err)
563 goto exit_f;
564
563 break; 565 break;
566 }
564 567
565 case IPV6_FLOWINFO: 568 case IPV6_FLOWINFO:
566 if (cmsg->cmsg_len < CMSG_LEN(4)) { 569 if (cmsg->cmsg_len < CMSG_LEN(4)) {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index eb7a940310f4..37a4e777e347 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -354,7 +354,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
354 msg.msg_control = (void*)(fl->opt+1); 354 msg.msg_control = (void*)(fl->opt+1);
355 flowi.oif = 0; 355 flowi.oif = 0;
356 356
357 err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); 357 err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk);
358 if (err) 358 if (err)
359 goto done; 359 goto done;
360 err = -EINVAL; 360 err = -EINVAL;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 56d55fecf8ec..26b83e512a09 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -161,9 +161,17 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
161 struct ipv6_txoptions *opt; 161 struct ipv6_txoptions *opt;
162 struct sk_buff *pktopt; 162 struct sk_buff *pktopt;
163 163
164 if (sk->sk_protocol != IPPROTO_UDP && 164 if (sk->sk_type == SOCK_RAW)
165 sk->sk_protocol != IPPROTO_UDPLITE && 165 break;
166 sk->sk_protocol != IPPROTO_TCP) 166
167 if (sk->sk_protocol == IPPROTO_UDP ||
168 sk->sk_protocol == IPPROTO_UDPLITE) {
169 struct udp_sock *up = udp_sk(sk);
170 if (up->pending == AF_INET6) {
171 retv = -EBUSY;
172 break;
173 }
174 } else if (sk->sk_protocol != IPPROTO_TCP)
167 break; 175 break;
168 176
169 if (sk->sk_state != TCP_ESTABLISHED) { 177 if (sk->sk_state != TCP_ESTABLISHED) {
@@ -416,7 +424,7 @@ sticky_done:
416 msg.msg_controllen = optlen; 424 msg.msg_controllen = optlen;
417 msg.msg_control = (void*)(opt+1); 425 msg.msg_control = (void*)(opt+1);
418 426
419 retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); 427 retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk);
420 if (retv) 428 if (retv)
421 goto done; 429 goto done;
422update: 430update:
@@ -832,7 +840,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
832 len = min_t(unsigned int, len, ipv6_optlen(hdr)); 840 len = min_t(unsigned int, len, ipv6_optlen(hdr));
833 if (copy_to_user(optval, hdr, len)) 841 if (copy_to_user(optval, hdr, len))
834 return -EFAULT; 842 return -EFAULT;
835 return ipv6_optlen(hdr); 843 return len;
836} 844}
837 845
838static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, 846static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
@@ -975,6 +983,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
975 len = ipv6_getsockopt_sticky(sk, np->opt, 983 len = ipv6_getsockopt_sticky(sk, np->opt,
976 optname, optval, len); 984 optname, optval, len);
977 release_sock(sk); 985 release_sock(sk);
986 /* check if ipv6_getsockopt_sticky() returns err code */
987 if (len < 0)
988 return len;
978 return put_user(len, optlen); 989 return put_user(len, optlen);
979 } 990 }
980 991
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 2dccad48058c..e65e26e210ee 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -209,7 +209,9 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
209 arg.dst = dst; 209 arg.dst = dst;
210 hash = ip6qhashfn(id, src, dst); 210 hash = ip6qhashfn(id, src, dst);
211 211
212 local_bh_disable();
212 q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); 213 q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
214 local_bh_enable();
213 if (q == NULL) 215 if (q == NULL)
214 goto oom; 216 goto oom;
215 217
@@ -638,10 +640,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
638 goto ret_orig; 640 goto ret_orig;
639 } 641 }
640 642
641 spin_lock(&fq->q.lock); 643 spin_lock_bh(&fq->q.lock);
642 644
643 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { 645 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
644 spin_unlock(&fq->q.lock); 646 spin_unlock_bh(&fq->q.lock);
645 pr_debug("Can't insert skb to queue\n"); 647 pr_debug("Can't insert skb to queue\n");
646 fq_put(fq); 648 fq_put(fq);
647 goto ret_orig; 649 goto ret_orig;
@@ -653,7 +655,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
653 if (ret_skb == NULL) 655 if (ret_skb == NULL)
654 pr_debug("Can't reassemble fragmented packets\n"); 656 pr_debug("Can't reassemble fragmented packets\n");
655 } 657 }
656 spin_unlock(&fq->q.lock); 658 spin_unlock_bh(&fq->q.lock);
657 659
658 fq_put(fq); 660 fq_put(fq);
659 return ret_skb; 661 return ret_skb;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 232e0dc45bf5..8fee9a15b2d3 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -813,7 +813,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
813 memset(opt, 0, sizeof(struct ipv6_txoptions)); 813 memset(opt, 0, sizeof(struct ipv6_txoptions));
814 opt->tot_len = sizeof(struct ipv6_txoptions); 814 opt->tot_len = sizeof(struct ipv6_txoptions);
815 815
816 err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); 816 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
817 if (err < 0) { 817 if (err < 0) {
818 fl6_sock_release(flowlabel); 818 fl6_sock_release(flowlabel);
819 return err; 819 return err;
@@ -1164,6 +1164,14 @@ static void rawv6_close(struct sock *sk, long timeout)
1164 sk_common_release(sk); 1164 sk_common_release(sk);
1165} 1165}
1166 1166
1167static int raw6_destroy(struct sock *sk)
1168{
1169 lock_sock(sk);
1170 ip6_flush_pending_frames(sk);
1171 release_sock(sk);
1172 return 0;
1173}
1174
1167static int rawv6_init_sk(struct sock *sk) 1175static int rawv6_init_sk(struct sock *sk)
1168{ 1176{
1169 struct raw6_sock *rp = raw6_sk(sk); 1177 struct raw6_sock *rp = raw6_sk(sk);
@@ -1187,6 +1195,7 @@ struct proto rawv6_prot = {
1187 .name = "RAWv6", 1195 .name = "RAWv6",
1188 .owner = THIS_MODULE, 1196 .owner = THIS_MODULE,
1189 .close = rawv6_close, 1197 .close = rawv6_close,
1198 .destroy = raw6_destroy,
1190 .connect = ip6_datagram_connect, 1199 .connect = ip6_datagram_connect,
1191 .disconnect = udp_disconnect, 1200 .disconnect = udp_disconnect,
1192 .ioctl = rawv6_ioctl, 1201 .ioctl = rawv6_ioctl,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 48534c6c0735..220cffe9e63b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -446,7 +446,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446 struct route_info *rinfo = (struct route_info *) opt; 446 struct route_info *rinfo = (struct route_info *) opt;
447 struct in6_addr prefix_buf, *prefix; 447 struct in6_addr prefix_buf, *prefix;
448 unsigned int pref; 448 unsigned int pref;
449 u32 lifetime; 449 unsigned long lifetime;
450 struct rt6_info *rt; 450 struct rt6_info *rt;
451 451
452 if (len < sizeof(struct route_info)) { 452 if (len < sizeof(struct route_info)) {
@@ -472,13 +472,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
472 if (pref == ICMPV6_ROUTER_PREF_INVALID) 472 if (pref == ICMPV6_ROUTER_PREF_INVALID)
473 pref = ICMPV6_ROUTER_PREF_MEDIUM; 473 pref = ICMPV6_ROUTER_PREF_MEDIUM;
474 474
475 lifetime = ntohl(rinfo->lifetime); 475 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
476 if (lifetime == 0xffffffff) {
477 /* infinity */
478 } else if (lifetime > 0x7fffffff/HZ - 1) {
479 /* Avoid arithmetic overflow */
480 lifetime = 0x7fffffff/HZ - 1;
481 }
482 476
483 if (rinfo->length == 3) 477 if (rinfo->length == 3)
484 prefix = (struct in6_addr *)rinfo->prefix; 478 prefix = (struct in6_addr *)rinfo->prefix;
@@ -506,7 +500,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
506 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 500 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
507 501
508 if (rt) { 502 if (rt) {
509 if (lifetime == 0xffffffff) { 503 if (!addrconf_finite_timeout(lifetime)) {
510 rt->rt6i_flags &= ~RTF_EXPIRES; 504 rt->rt6i_flags &= ~RTF_EXPIRES;
511 } else { 505 } else {
512 rt->rt6i_expires = jiffies + HZ * lifetime; 506 rt->rt6i_expires = jiffies + HZ * lifetime;
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 6323921b40be..669f280989c3 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -109,7 +109,7 @@ static int tunnel46_rcv(struct sk_buff *skb)
109{ 109{
110 struct xfrm6_tunnel *handler; 110 struct xfrm6_tunnel *handler;
111 111
112 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 112 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
113 goto drop; 113 goto drop;
114 114
115 for (handler = tunnel46_handlers; handler; handler = handler->next) 115 for (handler = tunnel46_handlers; handler; handler = handler->next)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1fd784f3e2ec..dd309626ae9a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -534,7 +534,9 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
534{ 534{
535 struct udp_sock *up = udp_sk(sk); 535 struct udp_sock *up = udp_sk(sk);
536 536
537 if (up->pending) { 537 if (up->pending == AF_INET)
538 udp_flush_pending_frames(sk);
539 else if (up->pending) {
538 up->len = 0; 540 up->len = 0;
539 up->pending = 0; 541 up->pending = 0;
540 ip6_flush_pending_frames(sk); 542 ip6_flush_pending_frames(sk);
@@ -731,7 +733,7 @@ do_udp_sendmsg:
731 memset(opt, 0, sizeof(struct ipv6_txoptions)); 733 memset(opt, 0, sizeof(struct ipv6_txoptions));
732 opt->tot_len = sizeof(*opt); 734 opt->tot_len = sizeof(*opt);
733 735
734 err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); 736 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
735 if (err < 0) { 737 if (err < 0) {
736 fl6_sock_release(flowlabel); 738 fl6_sock_release(flowlabel);
737 return err; 739 return err;
@@ -848,12 +850,14 @@ do_append_data:
848 } else { 850 } else {
849 dst_release(dst); 851 dst_release(dst);
850 } 852 }
853 dst = NULL;
851 } 854 }
852 855
853 if (err > 0) 856 if (err > 0)
854 err = np->recverr ? net_xmit_errno(err) : 0; 857 err = np->recverr ? net_xmit_errno(err) : 0;
855 release_sock(sk); 858 release_sock(sk);
856out: 859out:
860 dst_release(dst);
857 fl6_sock_release(flowlabel); 861 fl6_sock_release(flowlabel);
858 if (!err) 862 if (!err)
859 return len; 863 return len;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index ae54b20d0470..3eb5bcc75f99 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1093,11 +1093,6 @@ static int irda_create(struct net *net, struct socket *sock, int protocol)
1093 1093
1094 init_waitqueue_head(&self->query_wait); 1094 init_waitqueue_head(&self->query_wait);
1095 1095
1096 /* Initialise networking socket struct */
1097 sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */
1098 sk->sk_family = PF_IRDA;
1099 sk->sk_protocol = protocol;
1100
1101 switch (sock->type) { 1096 switch (sock->type) {
1102 case SOCK_STREAM: 1097 case SOCK_STREAM:
1103 sock->ops = &irda_stream_ops; 1098 sock->ops = &irda_stream_ops;
@@ -1124,13 +1119,20 @@ static int irda_create(struct net *net, struct socket *sock, int protocol)
1124 self->max_sdu_size_rx = TTP_SAR_UNBOUND; 1119 self->max_sdu_size_rx = TTP_SAR_UNBOUND;
1125 break; 1120 break;
1126 default: 1121 default:
1122 sk_free(sk);
1127 return -ESOCKTNOSUPPORT; 1123 return -ESOCKTNOSUPPORT;
1128 } 1124 }
1129 break; 1125 break;
1130 default: 1126 default:
1127 sk_free(sk);
1131 return -ESOCKTNOSUPPORT; 1128 return -ESOCKTNOSUPPORT;
1132 } 1129 }
1133 1130
1131 /* Initialise networking socket struct */
1132 sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */
1133 sk->sk_family = PF_IRDA;
1134 sk->sk_protocol = protocol;
1135
1134 /* Register as a client with IrLMP */ 1136 /* Register as a client with IrLMP */
1135 self->ckey = irlmp_register_client(0, NULL, NULL, NULL); 1137 self->ckey = irlmp_register_client(0, NULL, NULL, NULL);
1136 self->mask.word = 0xffff; 1138 self->mask.word = 0xffff;
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index e2ddde755019..008de1fc42ca 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -286,12 +286,14 @@ void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb,
286 * 286 *
287 * Sends received pdus to the sap state machine. 287 * Sends received pdus to the sap state machine.
288 */ 288 */
289static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb) 289static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb,
290 struct sock *sk)
290{ 291{
291 struct llc_sap_state_ev *ev = llc_sap_ev(skb); 292 struct llc_sap_state_ev *ev = llc_sap_ev(skb);
292 293
293 ev->type = LLC_SAP_EV_TYPE_PDU; 294 ev->type = LLC_SAP_EV_TYPE_PDU;
294 ev->reason = 0; 295 ev->reason = 0;
296 skb->sk = sk;
295 llc_sap_state_process(sap, skb); 297 llc_sap_state_process(sap, skb);
296} 298}
297 299
@@ -360,8 +362,7 @@ static void llc_sap_mcast(struct llc_sap *sap,
360 break; 362 break;
361 363
362 sock_hold(sk); 364 sock_hold(sk);
363 skb_set_owner_r(skb1, sk); 365 llc_sap_rcv(sap, skb1, sk);
364 llc_sap_rcv(sap, skb1);
365 sock_put(sk); 366 sock_put(sk);
366 } 367 }
367 read_unlock_bh(&sap->sk_list.lock); 368 read_unlock_bh(&sap->sk_list.lock);
@@ -381,8 +382,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb)
381 } else { 382 } else {
382 struct sock *sk = llc_lookup_dgram(sap, &laddr); 383 struct sock *sk = llc_lookup_dgram(sap, &laddr);
383 if (sk) { 384 if (sk) {
384 skb_set_owner_r(skb, sk); 385 llc_sap_rcv(sap, skb, sk);
385 llc_sap_rcv(sap, skb);
386 sock_put(sk); 386 sock_put(sk);
387 } else 387 } else
388 kfree_skb(skb); 388 kfree_skb(skb);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index dbf0563c397d..81087281b031 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -674,7 +674,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
674 if (params->vlan) { 674 if (params->vlan) {
675 sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); 675 sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
676 676
677 if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN || 677 if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN &&
678 sdata->vif.type != IEEE80211_IF_TYPE_AP) 678 sdata->vif.type != IEEE80211_IF_TYPE_AP)
679 return -EINVAL; 679 return -EINVAL;
680 } else 680 } else
@@ -762,7 +762,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
762 if (params->vlan && params->vlan != sta->sdata->dev) { 762 if (params->vlan && params->vlan != sta->sdata->dev) {
763 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); 763 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
764 764
765 if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN || 765 if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN &&
766 vlansdata->vif.type != IEEE80211_IF_TYPE_AP) { 766 vlansdata->vif.type != IEEE80211_IF_TYPE_AP) {
767 rcu_read_unlock(); 767 rcu_read_unlock();
768 return -EINVAL; 768 return -EINVAL;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 432011cd3647..884be4d100f1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -853,7 +853,29 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
853 853
854/* ieee80211_ioctl.c */ 854/* ieee80211_ioctl.c */
855extern const struct iw_handler_def ieee80211_iw_handler_def; 855extern const struct iw_handler_def ieee80211_iw_handler_def;
856int ieee80211_set_freq(struct ieee80211_local *local, int freq); 856
857/* Least common multiple of the used rates (in 100 kbps). This is used to
858 * calculate rate_inv values for each rate so that only integers are needed. */
859#define CHAN_UTIL_RATE_LCM 95040
860/* 1 usec is 1/8 * (95040/10) = 1188 */
861#define CHAN_UTIL_PER_USEC 1188
862/* Amount of bits to shift the result right to scale the total utilization
863 * to values that will not wrap around 32-bit integers. */
864#define CHAN_UTIL_SHIFT 9
865/* Theoretical maximum of channel utilization counter in 10 ms (stat_time=1):
866 * (CHAN_UTIL_PER_USEC * 10000) >> CHAN_UTIL_SHIFT = 23203. So dividing the
867 * raw value with about 23 should give utilization in 10th of a percentage
868 * (1/1000). However, utilization is only estimated and not all intervals
869 * between frames etc. are calculated. 18 seems to give numbers that are closer
870 * to the real maximum. */
871#define CHAN_UTIL_PER_10MS 18
872#define CHAN_UTIL_HDR_LONG (202 * CHAN_UTIL_PER_USEC)
873#define CHAN_UTIL_HDR_SHORT (40 * CHAN_UTIL_PER_USEC)
874
875
876/* ieee80211_ioctl.c */
877int ieee80211_set_freq(struct net_device *dev, int freq);
878
857/* ieee80211_sta.c */ 879/* ieee80211_sta.c */
858void ieee80211_sta_timer(unsigned long data); 880void ieee80211_sta_timer(unsigned long data);
859void ieee80211_sta_work(struct work_struct *work); 881void ieee80211_sta_work(struct work_struct *work);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f79f6b9938a6..b182f018a187 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -516,6 +516,7 @@ static int ieee80211_stop(struct net_device *dev)
516 case IEEE80211_IF_TYPE_STA: 516 case IEEE80211_IF_TYPE_STA:
517 case IEEE80211_IF_TYPE_IBSS: 517 case IEEE80211_IF_TYPE_IBSS:
518 sdata->u.sta.state = IEEE80211_DISABLED; 518 sdata->u.sta.state = IEEE80211_DISABLED;
519 memset(sdata->u.sta.bssid, 0, ETH_ALEN);
519 del_timer_sync(&sdata->u.sta.timer); 520 del_timer_sync(&sdata->u.sta.timer);
520 /* 521 /*
521 * When we get here, the interface is marked down. 522 * When we get here, the interface is marked down.
@@ -1314,7 +1315,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
1314 /* 1315 /*
1315 * Clear the TX filter mask for this STA when sending the next 1316 * Clear the TX filter mask for this STA when sending the next
1316 * packet. If the STA went to power save mode, this will happen 1317 * packet. If the STA went to power save mode, this will happen
1317 * happen when it wakes up for the next time. 1318 * when it wakes up for the next time.
1318 */ 1319 */
1319 set_sta_flags(sta, WLAN_STA_CLEAR_PS_FILT); 1320 set_sta_flags(sta, WLAN_STA_CLEAR_PS_FILT);
1320 1321
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index adbc1c804dd3..7f05820dc629 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -44,7 +44,7 @@
44#define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) 44#define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ)
45#define IEEE80211_SCAN_INTERVAL (2 * HZ) 45#define IEEE80211_SCAN_INTERVAL (2 * HZ)
46#define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ) 46#define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ)
47#define IEEE80211_IBSS_JOIN_TIMEOUT (20 * HZ) 47#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ)
48 48
49#define IEEE80211_PROBE_DELAY (HZ / 33) 49#define IEEE80211_PROBE_DELAY (HZ / 33)
50#define IEEE80211_CHANNEL_TIME (HZ / 33) 50#define IEEE80211_CHANNEL_TIME (HZ / 33)
@@ -1337,7 +1337,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev,
1337 1337
1338 /* prepare reordering buffer */ 1338 /* prepare reordering buffer */
1339 tid_agg_rx->reorder_buf = 1339 tid_agg_rx->reorder_buf =
1340 kmalloc(buf_size * sizeof(struct sk_buf *), GFP_ATOMIC); 1340 kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC);
1341 if (!tid_agg_rx->reorder_buf) { 1341 if (!tid_agg_rx->reorder_buf) {
1342 if (net_ratelimit()) 1342 if (net_ratelimit())
1343 printk(KERN_ERR "can not allocate reordering buffer " 1343 printk(KERN_ERR "can not allocate reordering buffer "
@@ -1346,7 +1346,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev,
1346 goto end; 1346 goto end;
1347 } 1347 }
1348 memset(tid_agg_rx->reorder_buf, 0, 1348 memset(tid_agg_rx->reorder_buf, 0,
1349 buf_size * sizeof(struct sk_buf *)); 1349 buf_size * sizeof(struct sk_buff *));
1350 1350
1351 if (local->ops->ampdu_action) 1351 if (local->ops->ampdu_action)
1352 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, 1352 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
@@ -1625,7 +1625,7 @@ void sta_addba_resp_timer_expired(unsigned long data)
1625 * only one argument, and both sta_info and TID are needed, so init 1625 * only one argument, and both sta_info and TID are needed, so init
1626 * flow in sta_info_create gives the TID as data, while the timer_to_id 1626 * flow in sta_info_create gives the TID as data, while the timer_to_id
1627 * array gives the sta through container_of */ 1627 * array gives the sta through container_of */
1628 u16 tid = *(int *)data; 1628 u16 tid = *(u8 *)data;
1629 struct sta_info *temp_sta = container_of((void *)data, 1629 struct sta_info *temp_sta = container_of((void *)data,
1630 struct sta_info, timer_to_tid[tid]); 1630 struct sta_info, timer_to_tid[tid]);
1631 1631
@@ -1673,7 +1673,7 @@ timer_expired_exit:
1673static void sta_rx_agg_session_timer_expired(unsigned long data) 1673static void sta_rx_agg_session_timer_expired(unsigned long data)
1674{ 1674{
1675 /* not an elegant detour, but there is no choice as the timer passes 1675 /* not an elegant detour, but there is no choice as the timer passes
1676 * only one argument, and verious sta_info are needed here, so init 1676 * only one argument, and various sta_info are needed here, so init
1677 * flow in sta_info_create gives the TID as data, while the timer_to_id 1677 * flow in sta_info_create gives the TID as data, while the timer_to_id
1678 * array gives the sta through container_of */ 1678 * array gives the sta through container_of */
1679 u8 *ptid = (u8 *)data; 1679 u8 *ptid = (u8 *)data;
@@ -2348,6 +2348,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
2348 u8 *pos; 2348 u8 *pos;
2349 struct ieee80211_sub_if_data *sdata; 2349 struct ieee80211_sub_if_data *sdata;
2350 struct ieee80211_supported_band *sband; 2350 struct ieee80211_supported_band *sband;
2351 union iwreq_data wrqu;
2351 2352
2352 sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; 2353 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
2353 2354
@@ -2370,13 +2371,10 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
2370 sdata->drop_unencrypted = bss->capability & 2371 sdata->drop_unencrypted = bss->capability &
2371 WLAN_CAPABILITY_PRIVACY ? 1 : 0; 2372 WLAN_CAPABILITY_PRIVACY ? 1 : 0;
2372 2373
2373 res = ieee80211_set_freq(local, bss->freq); 2374 res = ieee80211_set_freq(dev, bss->freq);
2374 2375
2375 if (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS) { 2376 if (res)
2376 printk(KERN_DEBUG "%s: IBSS not allowed on frequency " 2377 return res;
2377 "%d MHz\n", dev->name, local->oper_channel->center_freq);
2378 return -1;
2379 }
2380 2378
2381 /* Set beacon template */ 2379 /* Set beacon template */
2382 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); 2380 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
@@ -2491,7 +2489,9 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
2491 ifsta->state = IEEE80211_IBSS_JOINED; 2489 ifsta->state = IEEE80211_IBSS_JOINED;
2492 mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); 2490 mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
2493 2491
2494 ieee80211_rx_bss_put(dev, bss); 2492 memset(&wrqu, 0, sizeof(wrqu));
2493 memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN);
2494 wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
2495 2495
2496 return res; 2496 return res;
2497} 2497}
@@ -3518,7 +3518,7 @@ static int ieee80211_sta_config_auth(struct net_device *dev,
3518 spin_unlock_bh(&local->sta_bss_lock); 3518 spin_unlock_bh(&local->sta_bss_lock);
3519 3519
3520 if (selected) { 3520 if (selected) {
3521 ieee80211_set_freq(local, selected->freq); 3521 ieee80211_set_freq(dev, selected->freq);
3522 if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) 3522 if (!(ifsta->flags & IEEE80211_STA_SSID_SET))
3523 ieee80211_sta_set_ssid(dev, selected->ssid, 3523 ieee80211_sta_set_ssid(dev, selected->ssid,
3524 selected->ssid_len); 3524 selected->ssid_len);
@@ -3553,6 +3553,7 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
3553 struct ieee80211_supported_band *sband; 3553 struct ieee80211_supported_band *sband;
3554 u8 bssid[ETH_ALEN], *pos; 3554 u8 bssid[ETH_ALEN], *pos;
3555 int i; 3555 int i;
3556 int ret;
3556 DECLARE_MAC_BUF(mac); 3557 DECLARE_MAC_BUF(mac);
3557 3558
3558#if 0 3559#if 0
@@ -3599,7 +3600,9 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
3599 *pos++ = (u8) (rate / 5); 3600 *pos++ = (u8) (rate / 5);
3600 } 3601 }
3601 3602
3602 return ieee80211_sta_join_ibss(dev, ifsta, bss); 3603 ret = ieee80211_sta_join_ibss(dev, ifsta, bss);
3604 ieee80211_rx_bss_put(dev, bss);
3605 return ret;
3603} 3606}
3604 3607
3605 3608
@@ -3647,10 +3650,13 @@ static int ieee80211_sta_find_ibss(struct net_device *dev,
3647 (bss = ieee80211_rx_bss_get(dev, bssid, 3650 (bss = ieee80211_rx_bss_get(dev, bssid,
3648 local->hw.conf.channel->center_freq, 3651 local->hw.conf.channel->center_freq,
3649 ifsta->ssid, ifsta->ssid_len))) { 3652 ifsta->ssid, ifsta->ssid_len))) {
3653 int ret;
3650 printk(KERN_DEBUG "%s: Selected IBSS BSSID %s" 3654 printk(KERN_DEBUG "%s: Selected IBSS BSSID %s"
3651 " based on configured SSID\n", 3655 " based on configured SSID\n",
3652 dev->name, print_mac(mac, bssid)); 3656 dev->name, print_mac(mac, bssid));
3653 return ieee80211_sta_join_ibss(dev, ifsta, bss); 3657 ret = ieee80211_sta_join_ibss(dev, ifsta, bss);
3658 ieee80211_rx_bss_put(dev, bss);
3659 return ret;
3654 } 3660 }
3655#ifdef CONFIG_MAC80211_IBSS_DEBUG 3661#ifdef CONFIG_MAC80211_IBSS_DEBUG
3656 printk(KERN_DEBUG " did not try to join ibss\n"); 3662 printk(KERN_DEBUG " did not try to join ibss\n");
@@ -4127,18 +4133,17 @@ ieee80211_sta_scan_result(struct net_device *dev,
4127 4133
4128 memset(&iwe, 0, sizeof(iwe)); 4134 memset(&iwe, 0, sizeof(iwe));
4129 iwe.cmd = SIOCGIWFREQ; 4135 iwe.cmd = SIOCGIWFREQ;
4130 iwe.u.freq.m = bss->freq; 4136 iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq);
4131 iwe.u.freq.e = 6; 4137 iwe.u.freq.e = 0;
4132 current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, 4138 current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
4133 IW_EV_FREQ_LEN); 4139 IW_EV_FREQ_LEN);
4134 4140
4135 memset(&iwe, 0, sizeof(iwe)); 4141 memset(&iwe, 0, sizeof(iwe));
4136 iwe.cmd = SIOCGIWFREQ; 4142 iwe.cmd = SIOCGIWFREQ;
4137 iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); 4143 iwe.u.freq.m = bss->freq;
4138 iwe.u.freq.e = 0; 4144 iwe.u.freq.e = 6;
4139 current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, 4145 current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
4140 IW_EV_FREQ_LEN); 4146 IW_EV_FREQ_LEN);
4141
4142 memset(&iwe, 0, sizeof(iwe)); 4147 memset(&iwe, 0, sizeof(iwe));
4143 iwe.cmd = IWEVQUAL; 4148 iwe.cmd = IWEVQUAL;
4144 iwe.u.qual.qual = bss->qual; 4149 iwe.u.qual.qual = bss->qual;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 9400a9766a77..a3643fd86af9 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1116,7 +1116,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
1116 u16 fc, hdrlen, ethertype; 1116 u16 fc, hdrlen, ethertype;
1117 u8 *payload; 1117 u8 *payload;
1118 u8 dst[ETH_ALEN]; 1118 u8 dst[ETH_ALEN];
1119 u8 src[ETH_ALEN]; 1119 u8 src[ETH_ALEN] __aligned(2);
1120 struct sk_buff *skb = rx->skb; 1120 struct sk_buff *skb = rx->skb;
1121 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1121 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1122 DECLARE_MAC_BUF(mac); 1122 DECLARE_MAC_BUF(mac);
@@ -1259,7 +1259,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
1259 */ 1259 */
1260static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx) 1260static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx)
1261{ 1261{
1262 static const u8 pae_group_addr[ETH_ALEN] 1262 static const u8 pae_group_addr[ETH_ALEN] __aligned(2)
1263 = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 }; 1263 = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 };
1264 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; 1264 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
1265 1265
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5a77e2c01f49..6513bc2d2707 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -34,11 +34,11 @@ void *mac80211_wiphy_privid = &mac80211_wiphy_privid;
34 34
35/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ 35/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
36/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ 36/* Ethernet-II snap header (RFC1042 for most EtherTypes) */
37const unsigned char rfc1042_header[] = 37const unsigned char rfc1042_header[] __aligned(2) =
38 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; 38 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 };
39 39
40/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ 40/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */
41const unsigned char bridge_tunnel_header[] = 41const unsigned char bridge_tunnel_header[] __aligned(2) =
42 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; 42 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
43 43
44 44
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index c2e2378af082..4806d96b9877 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -302,14 +302,22 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev,
302 return 0; 302 return 0;
303} 303}
304 304
305int ieee80211_set_freq(struct ieee80211_local *local, int freqMHz) 305int ieee80211_set_freq(struct net_device *dev, int freqMHz)
306{ 306{
307 int ret = -EINVAL; 307 int ret = -EINVAL;
308 struct ieee80211_channel *chan; 308 struct ieee80211_channel *chan;
309 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
310 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
309 311
310 chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); 312 chan = ieee80211_get_channel(local->hw.wiphy, freqMHz);
311 313
312 if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { 314 if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) {
315 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
316 chan->flags & IEEE80211_CHAN_NO_IBSS) {
317 printk(KERN_DEBUG "%s: IBSS not allowed on frequency "
318 "%d MHz\n", dev->name, chan->center_freq);
319 return ret;
320 }
313 local->oper_channel = chan; 321 local->oper_channel = chan;
314 322
315 if (local->sta_sw_scanning || local->sta_hw_scanning) 323 if (local->sta_sw_scanning || local->sta_hw_scanning)
@@ -327,7 +335,6 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev,
327 struct iw_request_info *info, 335 struct iw_request_info *info,
328 struct iw_freq *freq, char *extra) 336 struct iw_freq *freq, char *extra)
329{ 337{
330 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
331 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 338 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
332 339
333 if (sdata->vif.type == IEEE80211_IF_TYPE_STA) 340 if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
@@ -341,14 +348,14 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev,
341 IEEE80211_STA_AUTO_CHANNEL_SEL; 348 IEEE80211_STA_AUTO_CHANNEL_SEL;
342 return 0; 349 return 0;
343 } else 350 } else
344 return ieee80211_set_freq(local, 351 return ieee80211_set_freq(dev,
345 ieee80211_channel_to_frequency(freq->m)); 352 ieee80211_channel_to_frequency(freq->m));
346 } else { 353 } else {
347 int i, div = 1000000; 354 int i, div = 1000000;
348 for (i = 0; i < freq->e; i++) 355 for (i = 0; i < freq->e; i++)
349 div /= 10; 356 div /= 10;
350 if (div > 0) 357 if (div > 0)
351 return ieee80211_set_freq(local, freq->m / div); 358 return ieee80211_set_freq(dev, freq->m / div);
352 else 359 else
353 return -EINVAL; 360 return -EINVAL;
354 } 361 }
@@ -501,9 +508,14 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
501 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 508 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
502 if (sdata->vif.type == IEEE80211_IF_TYPE_STA || 509 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
503 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { 510 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
504 ap_addr->sa_family = ARPHRD_ETHER; 511 if (sdata->u.sta.state == IEEE80211_ASSOCIATED) {
505 memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); 512 ap_addr->sa_family = ARPHRD_ETHER;
506 return 0; 513 memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN);
514 return 0;
515 } else {
516 memset(&ap_addr->sa_data, 0, ETH_ALEN);
517 return 0;
518 }
507 } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { 519 } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
508 ap_addr->sa_family = ARPHRD_ETHER; 520 ap_addr->sa_family = ARPHRD_ETHER;
509 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); 521 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index e31beeb33b2b..e8f0dead267f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -587,10 +587,10 @@ int __init nf_conntrack_expect_init(void)
587 return 0; 587 return 0;
588 588
589err3: 589err3:
590 kmem_cache_destroy(nf_ct_expect_cachep);
591err2:
590 nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, 592 nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
591 nf_ct_expect_hsize); 593 nf_ct_expect_hsize);
592err2:
593 kmem_cache_destroy(nf_ct_expect_cachep);
594err1: 594err1:
595 return err; 595 return err;
596} 596}
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 2e89a00df92c..70907f6baac3 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -73,7 +73,8 @@ connlimit_iphash6(const union nf_inet_addr *addr,
73static inline bool already_closed(const struct nf_conn *conn) 73static inline bool already_closed(const struct nf_conn *conn)
74{ 74{
75 if (nf_ct_protonum(conn) == IPPROTO_TCP) 75 if (nf_ct_protonum(conn) == IPPROTO_TCP)
76 return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT; 76 return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
77 conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
77 else 78 else
78 return 0; 79 return 0;
79} 80}
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index feb326f4a752..47bbf45ae5d7 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -400,13 +400,13 @@ void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
400 * @attrlen: length of attribute payload 400 * @attrlen: length of attribute payload
401 * @data: head of attribute payload 401 * @data: head of attribute payload
402 * 402 *
403 * Returns -1 if the tailroom of the skb is insufficient to store 403 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
404 * the attribute header and payload. 404 * the attribute header and payload.
405 */ 405 */
406int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) 406int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
407{ 407{
408 if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) 408 if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
409 return -1; 409 return -EMSGSIZE;
410 410
411 __nla_put(skb, attrtype, attrlen, data); 411 __nla_put(skb, attrtype, attrlen, data);
412 return 0; 412 return 0;
@@ -418,13 +418,13 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
418 * @attrlen: length of attribute payload 418 * @attrlen: length of attribute payload
419 * @data: head of attribute payload 419 * @data: head of attribute payload
420 * 420 *
421 * Returns -1 if the tailroom of the skb is insufficient to store 421 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
422 * the attribute payload. 422 * the attribute payload.
423 */ 423 */
424int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) 424int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
425{ 425{
426 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) 426 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
427 return -1; 427 return -EMSGSIZE;
428 428
429 __nla_put_nohdr(skb, attrlen, data); 429 __nla_put_nohdr(skb, attrlen, data);
430 return 0; 430 return 0;
@@ -436,13 +436,13 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
436 * @attrlen: length of attribute payload 436 * @attrlen: length of attribute payload
437 * @data: head of attribute payload 437 * @data: head of attribute payload
438 * 438 *
439 * Returns -1 if the tailroom of the skb is insufficient to store 439 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
440 * the attribute payload. 440 * the attribute payload.
441 */ 441 */
442int nla_append(struct sk_buff *skb, int attrlen, const void *data) 442int nla_append(struct sk_buff *skb, int attrlen, const void *data)
443{ 443{
444 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) 444 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
445 return -1; 445 return -EMSGSIZE;
446 446
447 memcpy(skb_put(skb, attrlen), data, attrlen); 447 memcpy(skb_put(skb, attrlen), data, attrlen);
448 return 0; 448 return 0;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index d16929c9b4bc..f5aa23c3e886 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -554,7 +554,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
554 return genlmsg_end(skb, hdr); 554 return genlmsg_end(skb, hdr);
555 555
556nla_put_failure: 556nla_put_failure:
557 return genlmsg_cancel(skb, hdr); 557 genlmsg_cancel(skb, hdr);
558 return -EMSGSIZE;
558} 559}
559 560
560static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, 561static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
@@ -590,7 +591,8 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
590 return genlmsg_end(skb, hdr); 591 return genlmsg_end(skb, hdr);
591 592
592nla_put_failure: 593nla_put_failure:
593 return genlmsg_cancel(skb, hdr); 594 genlmsg_cancel(skb, hdr);
595 return -EMSGSIZE;
594} 596}
595 597
596static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) 598static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 0df911fd67b1..64465bacbe79 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -444,7 +444,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
444 return nla_nest_end(skb, opts); 444 return nla_nest_end(skb, opts);
445 445
446nla_put_failure: 446nla_put_failure:
447 return nla_nest_cancel(skb, opts); 447 nla_nest_cancel(skb, opts);
448 return -EMSGSIZE;
448} 449}
449 450
450static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) 451static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -466,7 +467,8 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
466 return nla_nest_end(skb, opts); 467 return nla_nest_end(skb, opts);
467 468
468nla_put_failure: 469nla_put_failure:
469 return nla_nest_cancel(skb, opts); 470 nla_nest_cancel(skb, opts);
471 return -EMSGSIZE;
470} 472}
471 473
472static const struct Qdisc_class_ops dsmark_class_ops = { 474static const struct Qdisc_class_ops dsmark_class_ops = {
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 3a9d226ff1e4..c89fba56db56 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -582,7 +582,8 @@ append_opt:
582 return nla_nest_end(skb, opts); 582 return nla_nest_end(skb, opts);
583 583
584nla_put_failure: 584nla_put_failure:
585 return nla_nest_cancel(skb, opts); 585 nla_nest_cancel(skb, opts);
586 return -EMSGSIZE;
586} 587}
587 588
588static void gred_destroy(struct Qdisc *sch) 589static void gred_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 87293d0db1d7..fdfaa3fcc16d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1360,7 +1360,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
1360 1360
1361 nla_put_failure: 1361 nla_put_failure:
1362 nla_nest_cancel(skb, nest); 1362 nla_nest_cancel(skb, nest);
1363 return -1; 1363 return -EMSGSIZE;
1364} 1364}
1365 1365
1366static int 1366static int
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 3dcd493f4f4a..5c569853b9c0 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -281,7 +281,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
281 return nla_nest_end(skb, opts); 281 return nla_nest_end(skb, opts);
282 282
283nla_put_failure: 283nla_put_failure:
284 return nla_nest_cancel(skb, opts); 284 nla_nest_cancel(skb, opts);
285 return -EMSGSIZE;
285} 286}
286 287
287static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 288static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 7b79d1e781a8..d5cc731b6798 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1206,6 +1206,9 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
1206 struct list_head *head = &asoc->peer.transport_addr_list; 1206 struct list_head *head = &asoc->peer.transport_addr_list;
1207 struct list_head *pos; 1207 struct list_head *pos;
1208 1208
1209 if (asoc->peer.transport_count == 1)
1210 return;
1211
1209 /* Find the next transport in a round-robin fashion. */ 1212 /* Find the next transport in a round-robin fashion. */
1210 t = asoc->peer.retran_path; 1213 t = asoc->peer.retran_path;
1211 pos = &t->transports; 1214 pos = &t->transports;
@@ -1220,6 +1223,15 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
1220 1223
1221 t = list_entry(pos, struct sctp_transport, transports); 1224 t = list_entry(pos, struct sctp_transport, transports);
1222 1225
1226 /* We have exhausted the list, but didn't find any
1227 * other active transports. If so, use the next
1228 * transport.
1229 */
1230 if (t == asoc->peer.retran_path) {
1231 t = next;
1232 break;
1233 }
1234
1223 /* Try to find an active transport. */ 1235 /* Try to find an active transport. */
1224 1236
1225 if ((t->state == SCTP_ACTIVE) || 1237 if ((t->state == SCTP_ACTIVE) ||
@@ -1232,15 +1244,6 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
1232 if (!next) 1244 if (!next)
1233 next = t; 1245 next = t;
1234 } 1246 }
1235
1236 /* We have exhausted the list, but didn't find any
1237 * other active transports. If so, use the next
1238 * transport.
1239 */
1240 if (t == asoc->peer.retran_path) {
1241 t = next;
1242 break;
1243 }
1244 } 1247 }
1245 1248
1246 asoc->peer.retran_path = t; 1249 asoc->peer.retran_path = t;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e45e44c60635..a2f4d4d51593 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -299,7 +299,8 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
299/* Fills in the source address(saddr) based on the destination address(daddr) 299/* Fills in the source address(saddr) based on the destination address(daddr)
300 * and asoc's bind address list. 300 * and asoc's bind address list.
301 */ 301 */
302static void sctp_v6_get_saddr(struct sctp_association *asoc, 302static void sctp_v6_get_saddr(struct sctp_sock *sk,
303 struct sctp_association *asoc,
303 struct dst_entry *dst, 304 struct dst_entry *dst,
304 union sctp_addr *daddr, 305 union sctp_addr *daddr,
305 union sctp_addr *saddr) 306 union sctp_addr *saddr)
@@ -318,7 +319,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
318 if (!asoc) { 319 if (!asoc) {
319 ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, 320 ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL,
320 &daddr->v6.sin6_addr, 321 &daddr->v6.sin6_addr,
321 inet6_sk(asoc->base.sk)->srcprefs, 322 inet6_sk(&sk->inet.sk)->srcprefs,
322 &saddr->v6.sin6_addr); 323 &saddr->v6.sin6_addr);
323 SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n", 324 SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n",
324 NIP6(saddr->v6.sin6_addr)); 325 NIP6(saddr->v6.sin6_addr));
@@ -726,6 +727,11 @@ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
726 seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr)); 727 seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr));
727} 728}
728 729
730static void sctp_v6_ecn_capable(struct sock *sk)
731{
732 inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
733}
734
729/* Initialize a PF_INET6 socket msg_name. */ 735/* Initialize a PF_INET6 socket msg_name. */
730static void sctp_inet6_msgname(char *msgname, int *addr_len) 736static void sctp_inet6_msgname(char *msgname, int *addr_len)
731{ 737{
@@ -996,6 +1002,7 @@ static struct sctp_af sctp_af_inet6 = {
996 .skb_iif = sctp_v6_skb_iif, 1002 .skb_iif = sctp_v6_skb_iif,
997 .is_ce = sctp_v6_is_ce, 1003 .is_ce = sctp_v6_is_ce,
998 .seq_dump_addr = sctp_v6_seq_dump_addr, 1004 .seq_dump_addr = sctp_v6_seq_dump_addr,
1005 .ecn_capable = sctp_v6_ecn_capable,
999 .net_header_len = sizeof(struct ipv6hdr), 1006 .net_header_len = sizeof(struct ipv6hdr),
1000 .sockaddr_len = sizeof(struct sockaddr_in6), 1007 .sockaddr_len = sizeof(struct sockaddr_in6),
1001#ifdef CONFIG_COMPAT 1008#ifdef CONFIG_COMPAT
diff --git a/net/sctp/output.c b/net/sctp/output.c
index cf4f9fb6819d..6d45bae93b46 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -548,7 +548,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
548 * Note: The works for IPv6 layer checks this bit too later 548 * Note: The works for IPv6 layer checks this bit too later
549 * in transmission. See IP6_ECN_flow_xmit(). 549 * in transmission. See IP6_ECN_flow_xmit().
550 */ 550 */
551 INET_ECN_xmit(nskb->sk); 551 (*tp->af_specific->ecn_capable)(nskb->sk);
552 552
553 /* Set up the IP options. */ 553 /* Set up the IP options. */
554 /* BUG: not implemented 554 /* BUG: not implemented
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 59edfd25a19c..ace6770e9048 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -208,6 +208,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
208 INIT_LIST_HEAD(&q->sacked); 208 INIT_LIST_HEAD(&q->sacked);
209 INIT_LIST_HEAD(&q->abandoned); 209 INIT_LIST_HEAD(&q->abandoned);
210 210
211 q->fast_rtx = 0;
211 q->outstanding_bytes = 0; 212 q->outstanding_bytes = 0;
212 q->empty = 1; 213 q->empty = 1;
213 q->cork = 0; 214 q->cork = 0;
@@ -500,6 +501,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
500 case SCTP_RTXR_FAST_RTX: 501 case SCTP_RTXR_FAST_RTX:
501 SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); 502 SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
502 sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); 503 sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
504 q->fast_rtx = 1;
503 break; 505 break;
504 case SCTP_RTXR_PMTUD: 506 case SCTP_RTXR_PMTUD:
505 SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); 507 SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
@@ -518,9 +520,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
518 * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by 520 * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by
519 * following the procedures outlined in C1 - C5. 521 * following the procedures outlined in C1 - C5.
520 */ 522 */
521 sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); 523 if (reason == SCTP_RTXR_T3_RTX)
524 sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point);
522 525
523 error = sctp_outq_flush(q, /* rtx_timeout */ 1); 526 /* Flush the queues only on timeout, since fast_rtx is only
527 * triggered during sack processing and the queue
528 * will be flushed at the end.
529 */
530 if (reason != SCTP_RTXR_FAST_RTX)
531 error = sctp_outq_flush(q, /* rtx_timeout */ 1);
524 532
525 if (error) 533 if (error)
526 q->asoc->base.sk->sk_err = -error; 534 q->asoc->base.sk->sk_err = -error;
@@ -538,17 +546,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
538 int rtx_timeout, int *start_timer) 546 int rtx_timeout, int *start_timer)
539{ 547{
540 struct list_head *lqueue; 548 struct list_head *lqueue;
541 struct list_head *lchunk;
542 struct sctp_transport *transport = pkt->transport; 549 struct sctp_transport *transport = pkt->transport;
543 sctp_xmit_t status; 550 sctp_xmit_t status;
544 struct sctp_chunk *chunk, *chunk1; 551 struct sctp_chunk *chunk, *chunk1;
545 struct sctp_association *asoc; 552 struct sctp_association *asoc;
553 int fast_rtx;
546 int error = 0; 554 int error = 0;
555 int timer = 0;
556 int done = 0;
547 557
548 asoc = q->asoc; 558 asoc = q->asoc;
549 lqueue = &q->retransmit; 559 lqueue = &q->retransmit;
560 fast_rtx = q->fast_rtx;
550 561
551 /* RFC 2960 6.3.3 Handle T3-rtx Expiration 562 /* This loop handles time-out retransmissions, fast retransmissions,
563 * and retransmissions due to opening of whindow.
564 *
565 * RFC 2960 6.3.3 Handle T3-rtx Expiration
552 * 566 *
553 * E3) Determine how many of the earliest (i.e., lowest TSN) 567 * E3) Determine how many of the earliest (i.e., lowest TSN)
554 * outstanding DATA chunks for the address for which the 568 * outstanding DATA chunks for the address for which the
@@ -563,12 +577,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
563 * [Just to be painfully clear, if we are retransmitting 577 * [Just to be painfully clear, if we are retransmitting
564 * because a timeout just happened, we should send only ONE 578 * because a timeout just happened, we should send only ONE
565 * packet of retransmitted data.] 579 * packet of retransmitted data.]
580 *
581 * For fast retransmissions we also send only ONE packet. However,
582 * if we are just flushing the queue due to open window, we'll
583 * try to send as much as possible.
566 */ 584 */
567 lchunk = sctp_list_dequeue(lqueue); 585 list_for_each_entry_safe(chunk, chunk1, lqueue, transmitted_list) {
568
569 while (lchunk) {
570 chunk = list_entry(lchunk, struct sctp_chunk,
571 transmitted_list);
572 586
573 /* Make sure that Gap Acked TSNs are not retransmitted. A 587 /* Make sure that Gap Acked TSNs are not retransmitted. A
574 * simple approach is just to move such TSNs out of the 588 * simple approach is just to move such TSNs out of the
@@ -576,58 +590,60 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
576 * next chunk. 590 * next chunk.
577 */ 591 */
578 if (chunk->tsn_gap_acked) { 592 if (chunk->tsn_gap_acked) {
579 list_add_tail(lchunk, &transport->transmitted); 593 list_del(&chunk->transmitted_list);
580 lchunk = sctp_list_dequeue(lqueue); 594 list_add_tail(&chunk->transmitted_list,
595 &transport->transmitted);
581 continue; 596 continue;
582 } 597 }
583 598
599 /* If we are doing fast retransmit, ignore non-fast_rtransmit
600 * chunks
601 */
602 if (fast_rtx && !chunk->fast_retransmit)
603 continue;
604
584 /* Attempt to append this chunk to the packet. */ 605 /* Attempt to append this chunk to the packet. */
585 status = sctp_packet_append_chunk(pkt, chunk); 606 status = sctp_packet_append_chunk(pkt, chunk);
586 607
587 switch (status) { 608 switch (status) {
588 case SCTP_XMIT_PMTU_FULL: 609 case SCTP_XMIT_PMTU_FULL:
589 /* Send this packet. */ 610 /* Send this packet. */
590 if ((error = sctp_packet_transmit(pkt)) == 0) 611 error = sctp_packet_transmit(pkt);
591 *start_timer = 1;
592 612
593 /* If we are retransmitting, we should only 613 /* If we are retransmitting, we should only
594 * send a single packet. 614 * send a single packet.
595 */ 615 */
596 if (rtx_timeout) { 616 if (rtx_timeout || fast_rtx)
597 list_add(lchunk, lqueue); 617 done = 1;
598 lchunk = NULL;
599 }
600 618
601 /* Bundle lchunk in the next round. */ 619 /* Bundle next chunk in the next round. */
602 break; 620 break;
603 621
604 case SCTP_XMIT_RWND_FULL: 622 case SCTP_XMIT_RWND_FULL:
605 /* Send this packet. */ 623 /* Send this packet. */
606 if ((error = sctp_packet_transmit(pkt)) == 0) 624 error = sctp_packet_transmit(pkt);
607 *start_timer = 1;
608 625
609 /* Stop sending DATA as there is no more room 626 /* Stop sending DATA as there is no more room
610 * at the receiver. 627 * at the receiver.
611 */ 628 */
612 list_add(lchunk, lqueue); 629 done = 1;
613 lchunk = NULL;
614 break; 630 break;
615 631
616 case SCTP_XMIT_NAGLE_DELAY: 632 case SCTP_XMIT_NAGLE_DELAY:
617 /* Send this packet. */ 633 /* Send this packet. */
618 if ((error = sctp_packet_transmit(pkt)) == 0) 634 error = sctp_packet_transmit(pkt);
619 *start_timer = 1;
620 635
621 /* Stop sending DATA because of nagle delay. */ 636 /* Stop sending DATA because of nagle delay. */
622 list_add(lchunk, lqueue); 637 done = 1;
623 lchunk = NULL;
624 break; 638 break;
625 639
626 default: 640 default:
627 /* The append was successful, so add this chunk to 641 /* The append was successful, so add this chunk to
628 * the transmitted list. 642 * the transmitted list.
629 */ 643 */
630 list_add_tail(lchunk, &transport->transmitted); 644 list_del(&chunk->transmitted_list);
645 list_add_tail(&chunk->transmitted_list,
646 &transport->transmitted);
631 647
632 /* Mark the chunk as ineligible for fast retransmit 648 /* Mark the chunk as ineligible for fast retransmit
633 * after it is retransmitted. 649 * after it is retransmitted.
@@ -635,27 +651,44 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
635 if (chunk->fast_retransmit > 0) 651 if (chunk->fast_retransmit > 0)
636 chunk->fast_retransmit = -1; 652 chunk->fast_retransmit = -1;
637 653
638 *start_timer = 1; 654 /* Force start T3-rtx timer when fast retransmitting
639 q->empty = 0; 655 * the earliest outstanding TSN
656 */
657 if (!timer && fast_rtx &&
658 ntohl(chunk->subh.data_hdr->tsn) ==
659 asoc->ctsn_ack_point + 1)
660 timer = 2;
640 661
641 /* Retrieve a new chunk to bundle. */ 662 q->empty = 0;
642 lchunk = sctp_list_dequeue(lqueue);
643 break; 663 break;
644 } 664 }
645 665
646 /* If we are here due to a retransmit timeout or a fast 666 /* Set the timer if there were no errors */
647 * retransmit and if there are any chunks left in the retransmit 667 if (!error && !timer)
648 * queue that could not fit in the PMTU sized packet, they need 668 timer = 1;
649 * to be marked as ineligible for a subsequent fast retransmit. 669
650 */ 670 if (done)
651 if (rtx_timeout && !lchunk) { 671 break;
652 list_for_each_entry(chunk1, lqueue, transmitted_list) { 672 }
653 if (chunk1->fast_retransmit > 0) 673
654 chunk1->fast_retransmit = -1; 674 /* If we are here due to a retransmit timeout or a fast
655 } 675 * retransmit and if there are any chunks left in the retransmit
676 * queue that could not fit in the PMTU sized packet, they need
677 * to be marked as ineligible for a subsequent fast retransmit.
678 */
679 if (rtx_timeout || fast_rtx) {
680 list_for_each_entry(chunk1, lqueue, transmitted_list) {
681 if (chunk1->fast_retransmit > 0)
682 chunk1->fast_retransmit = -1;
656 } 683 }
657 } 684 }
658 685
686 *start_timer = timer;
687
688 /* Clear fast retransmit hint */
689 if (fast_rtx)
690 q->fast_rtx = 0;
691
659 return error; 692 return error;
660} 693}
661 694
@@ -862,7 +895,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
862 rtx_timeout, &start_timer); 895 rtx_timeout, &start_timer);
863 896
864 if (start_timer) 897 if (start_timer)
865 sctp_transport_reset_timers(transport); 898 sctp_transport_reset_timers(transport,
899 start_timer-1);
866 900
867 /* This can happen on COOKIE-ECHO resend. Only 901 /* This can happen on COOKIE-ECHO resend. Only
868 * one chunk can get bundled with a COOKIE-ECHO. 902 * one chunk can get bundled with a COOKIE-ECHO.
@@ -977,7 +1011,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
977 list_add_tail(&chunk->transmitted_list, 1011 list_add_tail(&chunk->transmitted_list,
978 &transport->transmitted); 1012 &transport->transmitted);
979 1013
980 sctp_transport_reset_timers(transport); 1014 sctp_transport_reset_timers(transport, start_timer-1);
981 1015
982 q->empty = 0; 1016 q->empty = 0;
983 1017
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index b8bd9e014498..d6af466091d2 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -473,11 +473,11 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
473 /* Walk through the bind address list and look for a bind 473 /* Walk through the bind address list and look for a bind
474 * address that matches the source address of the returned dst. 474 * address that matches the source address of the returned dst.
475 */ 475 */
476 sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
476 rcu_read_lock(); 477 rcu_read_lock();
477 list_for_each_entry_rcu(laddr, &bp->address_list, list) { 478 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
478 if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) 479 if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
479 continue; 480 continue;
480 sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
481 if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) 481 if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
482 goto out_unlock; 482 goto out_unlock;
483 } 483 }
@@ -522,7 +522,8 @@ out:
522/* For v4, the source address is cached in the route entry(dst). So no need 522/* For v4, the source address is cached in the route entry(dst). So no need
523 * to cache it separately and hence this is an empty routine. 523 * to cache it separately and hence this is an empty routine.
524 */ 524 */
525static void sctp_v4_get_saddr(struct sctp_association *asoc, 525static void sctp_v4_get_saddr(struct sctp_sock *sk,
526 struct sctp_association *asoc,
526 struct dst_entry *dst, 527 struct dst_entry *dst,
527 union sctp_addr *daddr, 528 union sctp_addr *daddr,
528 union sctp_addr *saddr) 529 union sctp_addr *saddr)
@@ -619,6 +620,11 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
619 seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); 620 seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr));
620} 621}
621 622
623static void sctp_v4_ecn_capable(struct sock *sk)
624{
625 INET_ECN_xmit(sk);
626}
627
622/* Event handler for inet address addition/deletion events. 628/* Event handler for inet address addition/deletion events.
623 * The sctp_local_addr_list needs to be protocted by a spin lock since 629 * The sctp_local_addr_list needs to be protocted by a spin lock since
624 * multiple notifiers (say IPv4 and IPv6) may be running at the same 630 * multiple notifiers (say IPv4 and IPv6) may be running at the same
@@ -937,6 +943,7 @@ static struct sctp_af sctp_af_inet = {
937 .skb_iif = sctp_v4_skb_iif, 943 .skb_iif = sctp_v4_skb_iif,
938 .is_ce = sctp_v4_is_ce, 944 .is_ce = sctp_v4_is_ce,
939 .seq_dump_addr = sctp_v4_seq_dump_addr, 945 .seq_dump_addr = sctp_v4_seq_dump_addr,
946 .ecn_capable = sctp_v4_ecn_capable,
940 .net_header_len = sizeof(struct iphdr), 947 .net_header_len = sizeof(struct iphdr),
941 .sockaddr_len = sizeof(struct sockaddr_in), 948 .sockaddr_len = sizeof(struct sockaddr_in),
942#ifdef CONFIG_COMPAT 949#ifdef CONFIG_COMPAT
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index f4938f6c5abe..3f34f61221ec 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
79 peer->rttvar = 0; 79 peer->rttvar = 0;
80 peer->srtt = 0; 80 peer->srtt = 0;
81 peer->rto_pending = 0; 81 peer->rto_pending = 0;
82 peer->fast_recovery = 0;
82 83
83 peer->last_time_heard = jiffies; 84 peer->last_time_heard = jiffies;
84 peer->last_time_used = jiffies; 85 peer->last_time_used = jiffies;
@@ -190,7 +191,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
190/* Start T3_rtx timer if it is not already running and update the heartbeat 191/* Start T3_rtx timer if it is not already running and update the heartbeat
191 * timer. This routine is called every time a DATA chunk is sent. 192 * timer. This routine is called every time a DATA chunk is sent.
192 */ 193 */
193void sctp_transport_reset_timers(struct sctp_transport *transport) 194void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
194{ 195{
195 /* RFC 2960 6.3.2 Retransmission Timer Rules 196 /* RFC 2960 6.3.2 Retransmission Timer Rules
196 * 197 *
@@ -200,7 +201,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport)
200 * address. 201 * address.
201 */ 202 */
202 203
203 if (!timer_pending(&transport->T3_rtx_timer)) 204 if (force || !timer_pending(&transport->T3_rtx_timer))
204 if (!mod_timer(&transport->T3_rtx_timer, 205 if (!mod_timer(&transport->T3_rtx_timer,
205 jiffies + transport->rto)) 206 jiffies + transport->rto))
206 sctp_transport_hold(transport); 207 sctp_transport_hold(transport);
@@ -291,7 +292,7 @@ void sctp_transport_route(struct sctp_transport *transport,
291 if (saddr) 292 if (saddr)
292 memcpy(&transport->saddr, saddr, sizeof(union sctp_addr)); 293 memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
293 else 294 else
294 af->get_saddr(asoc, dst, daddr, &transport->saddr); 295 af->get_saddr(opt, asoc, dst, daddr, &transport->saddr);
295 296
296 transport->dst = dst; 297 transport->dst = dst;
297 if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { 298 if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
@@ -403,11 +404,16 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
403 cwnd = transport->cwnd; 404 cwnd = transport->cwnd;
404 flight_size = transport->flight_size; 405 flight_size = transport->flight_size;
405 406
407 /* See if we need to exit Fast Recovery first */
408 if (transport->fast_recovery &&
409 TSN_lte(transport->fast_recovery_exit, sack_ctsn))
410 transport->fast_recovery = 0;
411
406 /* The appropriate cwnd increase algorithm is performed if, and only 412 /* The appropriate cwnd increase algorithm is performed if, and only
407 * if the cumulative TSN has advanced and the congestion window is 413 * if the cumulative TSN whould advanced and the congestion window is
408 * being fully utilized. 414 * being fully utilized.
409 */ 415 */
410 if ((transport->asoc->ctsn_ack_point >= sack_ctsn) || 416 if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) ||
411 (flight_size < cwnd)) 417 (flight_size < cwnd))
412 return; 418 return;
413 419
@@ -416,17 +422,23 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
416 pmtu = transport->asoc->pathmtu; 422 pmtu = transport->asoc->pathmtu;
417 423
418 if (cwnd <= ssthresh) { 424 if (cwnd <= ssthresh) {
419 /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less 425 /* RFC 4960 7.2.1
420 * than or equal to ssthresh an SCTP endpoint MUST use the 426 * o When cwnd is less than or equal to ssthresh, an SCTP
421 * slow start algorithm to increase cwnd only if the current 427 * endpoint MUST use the slow-start algorithm to increase
422 * congestion window is being fully utilized and an incoming 428 * cwnd only if the current congestion window is being fully
423 * SACK advances the Cumulative TSN Ack Point. Only when these 429 * utilized, an incoming SACK advances the Cumulative TSN
424 * two conditions are met can the cwnd be increased otherwise 430 * Ack Point, and the data sender is not in Fast Recovery.
425 * the cwnd MUST not be increased. If these conditions are met 431 * Only when these three conditions are met can the cwnd be
426 * then cwnd MUST be increased by at most the lesser of 432 * increased; otherwise, the cwnd MUST not be increased.
427 * 1) the total size of the previously outstanding DATA 433 * If these conditions are met, then cwnd MUST be increased
428 * chunk(s) acknowledged, and 2) the destination's path MTU. 434 * by, at most, the lesser of 1) the total size of the
435 * previously outstanding DATA chunk(s) acknowledged, and
436 * 2) the destination's path MTU. This upper bound protects
437 * against the ACK-Splitting attack outlined in [SAVAGE99].
429 */ 438 */
439 if (transport->fast_recovery)
440 return;
441
430 if (bytes_acked > pmtu) 442 if (bytes_acked > pmtu)
431 cwnd += pmtu; 443 cwnd += pmtu;
432 else 444 else
@@ -502,6 +514,13 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
502 * cwnd = ssthresh 514 * cwnd = ssthresh
503 * partial_bytes_acked = 0 515 * partial_bytes_acked = 0
504 */ 516 */
517 if (transport->fast_recovery)
518 return;
519
520 /* Mark Fast recovery */
521 transport->fast_recovery = 1;
522 transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
523
505 transport->ssthresh = max(transport->cwnd/2, 524 transport->ssthresh = max(transport->cwnd/2,
506 4*transport->asoc->pathmtu); 525 4*transport->asoc->pathmtu);
507 transport->cwnd = transport->ssthresh; 526 transport->cwnd = transport->ssthresh;
@@ -586,6 +605,7 @@ void sctp_transport_reset(struct sctp_transport *t)
586 t->flight_size = 0; 605 t->flight_size = 0;
587 t->error_count = 0; 606 t->error_count = 0;
588 t->rto_pending = 0; 607 t->rto_pending = 0;
608 t->fast_recovery = 0;
589 609
590 /* Initialize the state information for SFR-CACC */ 610 /* Initialize the state information for SFR-CACC */
591 t->cacc.changeover_active = 0; 611 t->cacc.changeover_active = 0;
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index d927d9f57412..744b79fdcb19 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -17,8 +17,8 @@
17# define RPCDBG_FACILITY RPCDBG_AUTH 17# define RPCDBG_FACILITY RPCDBG_AUTH
18#endif 18#endif
19 19
20#define RPC_ANONYMOUS_USERID ((uid_t)-2) 20#define RPC_MACHINE_CRED_USERID ((uid_t)0)
21#define RPC_ANONYMOUS_GROUPID ((gid_t)-2) 21#define RPC_MACHINE_CRED_GROUPID ((gid_t)0)
22 22
23struct generic_cred { 23struct generic_cred {
24 struct rpc_cred gc_base; 24 struct rpc_cred gc_base;
@@ -44,8 +44,8 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred);
44struct rpc_cred *rpc_lookup_machine_cred(void) 44struct rpc_cred *rpc_lookup_machine_cred(void)
45{ 45{
46 struct auth_cred acred = { 46 struct auth_cred acred = {
47 .uid = RPC_ANONYMOUS_USERID, 47 .uid = RPC_MACHINE_CRED_USERID,
48 .gid = RPC_ANONYMOUS_GROUPID, 48 .gid = RPC_MACHINE_CRED_GROUPID,
49 .machine_cred = 1, 49 .machine_cred = 1,
50 }; 50 };
51 51
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d8e8d79a8451..e46c825f4954 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -6,30 +6,9 @@
6 6
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/fcntl.h>
10#include <linux/net.h>
11#include <linux/in.h>
12#include <linux/inet.h>
13#include <linux/udp.h>
14#include <linux/tcp.h>
15#include <linux/unistd.h>
16#include <linux/slab.h>
17#include <linux/netdevice.h>
18#include <linux/skbuff.h>
19#include <linux/file.h>
20#include <linux/freezer.h> 9#include <linux/freezer.h>
21#include <linux/kthread.h> 10#include <linux/kthread.h>
22#include <net/sock.h> 11#include <net/sock.h>
23#include <net/checksum.h>
24#include <net/ip.h>
25#include <net/ipv6.h>
26#include <net/tcp_states.h>
27#include <linux/uaccess.h>
28#include <asm/ioctls.h>
29
30#include <linux/sunrpc/types.h>
31#include <linux/sunrpc/clnt.h>
32#include <linux/sunrpc/xdr.h>
33#include <linux/sunrpc/stats.h> 12#include <linux/sunrpc/stats.h>
34#include <linux/sunrpc/svc_xprt.h> 13#include <linux/sunrpc/svc_xprt.h>
35 14
@@ -296,8 +275,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
296 if (!(xprt->xpt_flags & 275 if (!(xprt->xpt_flags &
297 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) 276 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
298 return; 277 return;
299 if (test_bit(XPT_DEAD, &xprt->xpt_flags))
300 return;
301 278
302 cpu = get_cpu(); 279 cpu = get_cpu();
303 pool = svc_pool_for_cpu(xprt->xpt_server, cpu); 280 pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 3f30ee6006ae..f24800f2c098 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -278,7 +278,7 @@ static int ip_map_show(struct seq_file *m,
278 dom = im->m_client->h.name; 278 dom = im->m_client->h.name;
279 279
280 if (ipv6_addr_v4mapped(&addr)) { 280 if (ipv6_addr_v4mapped(&addr)) {
281 seq_printf(m, "%s" NIPQUAD_FMT "%s\n", 281 seq_printf(m, "%s " NIPQUAD_FMT " %s\n",
282 im->m_class, 282 im->m_class,
283 ntohl(addr.s6_addr32[3]) >> 24 & 0xff, 283 ntohl(addr.s6_addr32[3]) >> 24 & 0xff,
284 ntohl(addr.s6_addr32[3]) >> 16 & 0xff, 284 ntohl(addr.s6_addr32[3]) >> 16 & 0xff,
@@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m,
286 ntohl(addr.s6_addr32[3]) >> 0 & 0xff, 286 ntohl(addr.s6_addr32[3]) >> 0 & 0xff,
287 dom); 287 dom);
288 } else { 288 } else {
289 seq_printf(m, "%s" NIP6_FMT "%s\n", 289 seq_printf(m, "%s " NIP6_FMT " %s\n",
290 im->m_class, NIP6(addr), dom); 290 im->m_class, NIP6(addr), dom);
291 } 291 }
292 return 0; 292 return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c22d6b6f2db4..06ab4841537b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -260,11 +260,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
260 * On our side, we need to read into a pagelist. The first page immediately 260 * On our side, we need to read into a pagelist. The first page immediately
261 * follows the RPC header. 261 * follows the RPC header.
262 * 262 *
263 * This function returns 1 to indicate success. The data is not yet in 263 * This function returns:
264 * 0 - No error and no read-list found.
265 *
266 * 1 - Successful read-list processing. The data is not yet in
264 * the pagelist and therefore the RPC request must be deferred. The 267 * the pagelist and therefore the RPC request must be deferred. The
265 * I/O completion will enqueue the transport again and 268 * I/O completion will enqueue the transport again and
266 * svc_rdma_recvfrom will complete the request. 269 * svc_rdma_recvfrom will complete the request.
267 * 270 *
271 * <0 - Error processing/posting read-list.
272 *
268 * NOTE: The ctxt must not be touched after the last WR has been posted 273 * NOTE: The ctxt must not be touched after the last WR has been posted
269 * because the I/O completion processing may occur on another 274 * because the I/O completion processing may occur on another
270 * processor and free / modify the context. Ne touche pas! 275 * processor and free / modify the context. Ne touche pas!
@@ -284,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
284 u64 sgl_offset; 289 u64 sgl_offset;
285 struct rpcrdma_read_chunk *ch; 290 struct rpcrdma_read_chunk *ch;
286 struct svc_rdma_op_ctxt *ctxt = NULL; 291 struct svc_rdma_op_ctxt *ctxt = NULL;
287 struct svc_rdma_op_ctxt *head;
288 struct svc_rdma_op_ctxt *tmp_sge_ctxt; 292 struct svc_rdma_op_ctxt *tmp_sge_ctxt;
289 struct svc_rdma_op_ctxt *tmp_ch_ctxt; 293 struct svc_rdma_op_ctxt *tmp_ch_ctxt;
290 struct chunk_sge *ch_sge_ary; 294 struct chunk_sge *ch_sge_ary;
@@ -302,25 +306,19 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
302 ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; 306 ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
303 307
304 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); 308 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
309 if (ch_count > RPCSVC_MAXPAGES)
310 return -EINVAL;
305 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, 311 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
306 sge, ch_sge_ary, 312 sge, ch_sge_ary,
307 ch_count, byte_count); 313 ch_count, byte_count);
308 head = svc_rdma_get_context(xprt);
309 sgl_offset = 0; 314 sgl_offset = 0;
310 ch_no = 0; 315 ch_no = 0;
311 316
312 for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; 317 for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
313 ch->rc_discrim != 0; ch++, ch_no++) { 318 ch->rc_discrim != 0; ch++, ch_no++) {
314next_sge: 319next_sge:
315 if (!ctxt) 320 ctxt = svc_rdma_get_context(xprt);
316 ctxt = head;
317 else {
318 ctxt->next = svc_rdma_get_context(xprt);
319 ctxt = ctxt->next;
320 }
321 ctxt->next = NULL;
322 ctxt->direction = DMA_FROM_DEVICE; 321 ctxt->direction = DMA_FROM_DEVICE;
323 clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
324 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 322 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
325 323
326 /* Prepare READ WR */ 324 /* Prepare READ WR */
@@ -347,20 +345,15 @@ next_sge:
347 * the client and the RPC needs to be enqueued. 345 * the client and the RPC needs to be enqueued.
348 */ 346 */
349 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 347 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
350 ctxt->next = hdr_ctxt; 348 ctxt->read_hdr = hdr_ctxt;
351 hdr_ctxt->next = head;
352 } 349 }
353 /* Post the read */ 350 /* Post the read */
354 err = svc_rdma_send(xprt, &read_wr); 351 err = svc_rdma_send(xprt, &read_wr);
355 if (err) { 352 if (err) {
356 printk(KERN_ERR "svcrdma: Error posting send = %d\n", 353 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
357 err); 354 err);
358 /* 355 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
359 * Break the circular list so free knows when 356 svc_rdma_put_context(ctxt, 0);
360 * to stop if the error happened to occur on
361 * the last read
362 */
363 ctxt->next = NULL;
364 goto out; 357 goto out;
365 } 358 }
366 atomic_inc(&rdma_stat_read); 359 atomic_inc(&rdma_stat_read);
@@ -371,7 +364,7 @@ next_sge:
371 goto next_sge; 364 goto next_sge;
372 } 365 }
373 sgl_offset = 0; 366 sgl_offset = 0;
374 err = 0; 367 err = 1;
375 } 368 }
376 369
377 out: 370 out:
@@ -389,25 +382,12 @@ next_sge:
389 while (rqstp->rq_resused) 382 while (rqstp->rq_resused)
390 rqstp->rq_respages[--rqstp->rq_resused] = NULL; 383 rqstp->rq_respages[--rqstp->rq_resused] = NULL;
391 384
392 if (err) { 385 return err;
393 printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err);
394 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
395 /* Free the linked list of read contexts */
396 while (head != NULL) {
397 ctxt = head->next;
398 svc_rdma_put_context(head, 1);
399 head = ctxt;
400 }
401 return 0;
402 }
403
404 return 1;
405} 386}
406 387
407static int rdma_read_complete(struct svc_rqst *rqstp, 388static int rdma_read_complete(struct svc_rqst *rqstp,
408 struct svc_rdma_op_ctxt *data) 389 struct svc_rdma_op_ctxt *head)
409{ 390{
410 struct svc_rdma_op_ctxt *head = data->next;
411 int page_no; 391 int page_no;
412 int ret; 392 int ret;
413 393
@@ -433,21 +413,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
433 rqstp->rq_arg.len = head->arg.len; 413 rqstp->rq_arg.len = head->arg.len;
434 rqstp->rq_arg.buflen = head->arg.buflen; 414 rqstp->rq_arg.buflen = head->arg.buflen;
435 415
416 /* Free the context */
417 svc_rdma_put_context(head, 0);
418
436 /* XXX: What should this be? */ 419 /* XXX: What should this be? */
437 rqstp->rq_prot = IPPROTO_MAX; 420 rqstp->rq_prot = IPPROTO_MAX;
438 421 svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt);
439 /*
440 * Free the contexts we used to build the RDMA_READ. We have
441 * to be careful here because the context list uses the same
442 * next pointer used to chain the contexts associated with the
443 * RDMA_READ
444 */
445 data->next = NULL; /* terminate circular list */
446 do {
447 data = head->next;
448 svc_rdma_put_context(head, 0);
449 head = data;
450 } while (head != NULL);
451 422
452 ret = rqstp->rq_arg.head[0].iov_len 423 ret = rqstp->rq_arg.head[0].iov_len
453 + rqstp->rq_arg.page_len 424 + rqstp->rq_arg.page_len
@@ -457,8 +428,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
457 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, 428 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
458 rqstp->rq_arg.head[0].iov_len); 429 rqstp->rq_arg.head[0].iov_len);
459 430
460 /* Indicate that we've consumed an RQ credit */
461 rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
462 svc_xprt_received(rqstp->rq_xprt); 431 svc_xprt_received(rqstp->rq_xprt);
463 return ret; 432 return ret;
464} 433}
@@ -480,13 +449,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
480 449
481 dprintk("svcrdma: rqstp=%p\n", rqstp); 450 dprintk("svcrdma: rqstp=%p\n", rqstp);
482 451
483 /*
484 * The rq_xprt_ctxt indicates if we've consumed an RQ credit
485 * or not. It is used in the rdma xpo_release_rqst function to
486 * determine whether or not to return an RQ WQE to the RQ.
487 */
488 rqstp->rq_xprt_ctxt = NULL;
489
490 spin_lock_bh(&rdma_xprt->sc_read_complete_lock); 452 spin_lock_bh(&rdma_xprt->sc_read_complete_lock);
491 if (!list_empty(&rdma_xprt->sc_read_complete_q)) { 453 if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
492 ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, 454 ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
@@ -537,21 +499,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
537 /* If the request is invalid, reply with an error */ 499 /* If the request is invalid, reply with an error */
538 if (len < 0) { 500 if (len < 0) {
539 if (len == -ENOSYS) 501 if (len == -ENOSYS)
540 (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); 502 svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
541 goto close_out; 503 goto close_out;
542 } 504 }
543 505
544 /* Read read-list data. If we would need to wait, defer 506 /* Read read-list data. */
545 * it. Not that in this case, we don't return the RQ credit 507 ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
546 * until after the read completes. 508 if (ret > 0) {
547 */ 509 /* read-list posted, defer until data received from client. */
548 if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) {
549 svc_xprt_received(xprt); 510 svc_xprt_received(xprt);
550 return 0; 511 return 0;
551 } 512 }
552 513 if (ret < 0) {
553 /* Indicate we've consumed an RQ credit */ 514 /* Post of read-list failed, free context. */
554 rqstp->rq_xprt_ctxt = rqstp->rq_xprt; 515 svc_rdma_put_context(ctxt, 1);
516 return 0;
517 }
555 518
556 ret = rqstp->rq_arg.head[0].iov_len 519 ret = rqstp->rq_arg.head[0].iov_len
557 + rqstp->rq_arg.page_len 520 + rqstp->rq_arg.page_len
@@ -569,11 +532,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
569 return ret; 532 return ret;
570 533
571 close_out: 534 close_out:
572 if (ctxt) { 535 if (ctxt)
573 svc_rdma_put_context(ctxt, 1); 536 svc_rdma_put_context(ctxt, 1);
574 /* Indicate we've consumed an RQ credit */
575 rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
576 }
577 dprintk("svcrdma: transport %p is closing\n", xprt); 537 dprintk("svcrdma: transport %p is closing\n", xprt);
578 /* 538 /*
579 * Set the close bit and enqueue it. svc_recv will see the 539 * Set the close bit and enqueue it. svc_recv will see the
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 981f190c1b39..fb82b1b683f8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -389,6 +389,17 @@ static int send_reply(struct svcxprt_rdma *rdma,
389 int page_no; 389 int page_no;
390 int ret; 390 int ret;
391 391
392 /* Post a recv buffer to handle another request. */
393 ret = svc_rdma_post_recv(rdma);
394 if (ret) {
395 printk(KERN_INFO
396 "svcrdma: could not post a receive buffer, err=%d."
397 "Closing transport %p.\n", ret, rdma);
398 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
399 svc_rdma_put_context(ctxt, 0);
400 return -ENOTCONN;
401 }
402
392 /* Prepare the context */ 403 /* Prepare the context */
393 ctxt->pages[0] = page; 404 ctxt->pages[0] = page;
394 ctxt->count = 1; 405 ctxt->count = 1;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index af408fc12634..e132509d1db0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -103,8 +103,8 @@ static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)
103 spin_lock_bh(&xprt->sc_ctxt_lock); 103 spin_lock_bh(&xprt->sc_ctxt_lock);
104 if (ctxt) { 104 if (ctxt) {
105 at_least_one = 1; 105 at_least_one = 1;
106 ctxt->next = xprt->sc_ctxt_head; 106 INIT_LIST_HEAD(&ctxt->free_list);
107 xprt->sc_ctxt_head = ctxt; 107 list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
108 } else { 108 } else {
109 /* kmalloc failed...give up for now */ 109 /* kmalloc failed...give up for now */
110 xprt->sc_ctxt_cnt--; 110 xprt->sc_ctxt_cnt--;
@@ -123,7 +123,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
123 123
124 while (1) { 124 while (1) {
125 spin_lock_bh(&xprt->sc_ctxt_lock); 125 spin_lock_bh(&xprt->sc_ctxt_lock);
126 if (unlikely(xprt->sc_ctxt_head == NULL)) { 126 if (unlikely(list_empty(&xprt->sc_ctxt_free))) {
127 /* Try to bump my cache. */ 127 /* Try to bump my cache. */
128 spin_unlock_bh(&xprt->sc_ctxt_lock); 128 spin_unlock_bh(&xprt->sc_ctxt_lock);
129 129
@@ -136,12 +136,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
136 schedule_timeout_uninterruptible(msecs_to_jiffies(500)); 136 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
137 continue; 137 continue;
138 } 138 }
139 ctxt = xprt->sc_ctxt_head; 139 ctxt = list_entry(xprt->sc_ctxt_free.next,
140 xprt->sc_ctxt_head = ctxt->next; 140 struct svc_rdma_op_ctxt,
141 free_list);
142 list_del_init(&ctxt->free_list);
141 spin_unlock_bh(&xprt->sc_ctxt_lock); 143 spin_unlock_bh(&xprt->sc_ctxt_lock);
142 ctxt->xprt = xprt; 144 ctxt->xprt = xprt;
143 INIT_LIST_HEAD(&ctxt->dto_q); 145 INIT_LIST_HEAD(&ctxt->dto_q);
144 ctxt->count = 0; 146 ctxt->count = 0;
147 atomic_inc(&xprt->sc_ctxt_used);
145 break; 148 break;
146 } 149 }
147 return ctxt; 150 return ctxt;
@@ -159,14 +162,15 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
159 put_page(ctxt->pages[i]); 162 put_page(ctxt->pages[i]);
160 163
161 for (i = 0; i < ctxt->count; i++) 164 for (i = 0; i < ctxt->count; i++)
162 dma_unmap_single(xprt->sc_cm_id->device->dma_device, 165 ib_dma_unmap_single(xprt->sc_cm_id->device,
163 ctxt->sge[i].addr, 166 ctxt->sge[i].addr,
164 ctxt->sge[i].length, 167 ctxt->sge[i].length,
165 ctxt->direction); 168 ctxt->direction);
169
166 spin_lock_bh(&xprt->sc_ctxt_lock); 170 spin_lock_bh(&xprt->sc_ctxt_lock);
167 ctxt->next = xprt->sc_ctxt_head; 171 list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
168 xprt->sc_ctxt_head = ctxt;
169 spin_unlock_bh(&xprt->sc_ctxt_lock); 172 spin_unlock_bh(&xprt->sc_ctxt_lock);
173 atomic_dec(&xprt->sc_ctxt_used);
170} 174}
171 175
172/* ib_cq event handler */ 176/* ib_cq event handler */
@@ -228,23 +232,8 @@ static void dto_tasklet_func(unsigned long data)
228 list_del_init(&xprt->sc_dto_q); 232 list_del_init(&xprt->sc_dto_q);
229 spin_unlock_irqrestore(&dto_lock, flags); 233 spin_unlock_irqrestore(&dto_lock, flags);
230 234
231 if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) { 235 rq_cq_reap(xprt);
232 ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); 236 sq_cq_reap(xprt);
233 rq_cq_reap(xprt);
234 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
235 /*
236 * If data arrived before established event,
237 * don't enqueue. This defers RPC I/O until the
238 * RDMA connection is complete.
239 */
240 if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
241 svc_xprt_enqueue(&xprt->sc_xprt);
242 }
243
244 if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) {
245 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
246 sq_cq_reap(xprt);
247 }
248 237
249 svc_xprt_put(&xprt->sc_xprt); 238 svc_xprt_put(&xprt->sc_xprt);
250 spin_lock_irqsave(&dto_lock, flags); 239 spin_lock_irqsave(&dto_lock, flags);
@@ -263,11 +252,15 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
263 struct svcxprt_rdma *xprt = cq_context; 252 struct svcxprt_rdma *xprt = cq_context;
264 unsigned long flags; 253 unsigned long flags;
265 254
255 /* Guard against unconditional flush call for destroyed QP */
256 if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
257 return;
258
266 /* 259 /*
267 * Set the bit regardless of whether or not it's on the list 260 * Set the bit regardless of whether or not it's on the list
268 * because it may be on the list already due to an SQ 261 * because it may be on the list already due to an SQ
269 * completion. 262 * completion.
270 */ 263 */
271 set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags); 264 set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags);
272 265
273 /* 266 /*
@@ -290,6 +283,8 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
290 * 283 *
291 * Take all completing WC off the CQE and enqueue the associated DTO 284 * Take all completing WC off the CQE and enqueue the associated DTO
292 * context on the dto_q for the transport. 285 * context on the dto_q for the transport.
286 *
287 * Note that caller must hold a transport reference.
293 */ 288 */
294static void rq_cq_reap(struct svcxprt_rdma *xprt) 289static void rq_cq_reap(struct svcxprt_rdma *xprt)
295{ 290{
@@ -297,29 +292,47 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
297 struct ib_wc wc; 292 struct ib_wc wc;
298 struct svc_rdma_op_ctxt *ctxt = NULL; 293 struct svc_rdma_op_ctxt *ctxt = NULL;
299 294
295 if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags))
296 return;
297
298 ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP);
300 atomic_inc(&rdma_stat_rq_poll); 299 atomic_inc(&rdma_stat_rq_poll);
301 300
302 spin_lock_bh(&xprt->sc_rq_dto_lock);
303 while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { 301 while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) {
304 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; 302 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
305 ctxt->wc_status = wc.status; 303 ctxt->wc_status = wc.status;
306 ctxt->byte_len = wc.byte_len; 304 ctxt->byte_len = wc.byte_len;
307 if (wc.status != IB_WC_SUCCESS) { 305 if (wc.status != IB_WC_SUCCESS) {
308 /* Close the transport */ 306 /* Close the transport */
307 dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt);
309 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 308 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
310 svc_rdma_put_context(ctxt, 1); 309 svc_rdma_put_context(ctxt, 1);
310 svc_xprt_put(&xprt->sc_xprt);
311 continue; 311 continue;
312 } 312 }
313 spin_lock_bh(&xprt->sc_rq_dto_lock);
313 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); 314 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
315 spin_unlock_bh(&xprt->sc_rq_dto_lock);
316 svc_xprt_put(&xprt->sc_xprt);
314 } 317 }
315 spin_unlock_bh(&xprt->sc_rq_dto_lock);
316 318
317 if (ctxt) 319 if (ctxt)
318 atomic_inc(&rdma_stat_rq_prod); 320 atomic_inc(&rdma_stat_rq_prod);
321
322 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
323 /*
324 * If data arrived before established event,
325 * don't enqueue. This defers RPC I/O until the
326 * RDMA connection is complete.
327 */
328 if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
329 svc_xprt_enqueue(&xprt->sc_xprt);
319} 330}
320 331
321/* 332/*
322 * Send Queue Completion Handler - potentially called on interrupt context. 333 * Send Queue Completion Handler - potentially called on interrupt context.
334 *
335 * Note that caller must hold a transport reference.
323 */ 336 */
324static void sq_cq_reap(struct svcxprt_rdma *xprt) 337static void sq_cq_reap(struct svcxprt_rdma *xprt)
325{ 338{
@@ -328,6 +341,11 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
328 struct ib_cq *cq = xprt->sc_sq_cq; 341 struct ib_cq *cq = xprt->sc_sq_cq;
329 int ret; 342 int ret;
330 343
344
345 if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
346 return;
347
348 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
331 atomic_inc(&rdma_stat_sq_poll); 349 atomic_inc(&rdma_stat_sq_poll);
332 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { 350 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
333 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; 351 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
@@ -349,14 +367,16 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
349 367
350 case IB_WR_RDMA_READ: 368 case IB_WR_RDMA_READ:
351 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { 369 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
370 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
371 BUG_ON(!read_hdr);
352 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 372 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
353 set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
354 spin_lock_bh(&xprt->sc_read_complete_lock); 373 spin_lock_bh(&xprt->sc_read_complete_lock);
355 list_add_tail(&ctxt->dto_q, 374 list_add_tail(&read_hdr->dto_q,
356 &xprt->sc_read_complete_q); 375 &xprt->sc_read_complete_q);
357 spin_unlock_bh(&xprt->sc_read_complete_lock); 376 spin_unlock_bh(&xprt->sc_read_complete_lock);
358 svc_xprt_enqueue(&xprt->sc_xprt); 377 svc_xprt_enqueue(&xprt->sc_xprt);
359 } 378 }
379 svc_rdma_put_context(ctxt, 0);
360 break; 380 break;
361 381
362 default: 382 default:
@@ -365,6 +385,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
365 wc.opcode, wc.status); 385 wc.opcode, wc.status);
366 break; 386 break;
367 } 387 }
388 svc_xprt_put(&xprt->sc_xprt);
368 } 389 }
369 390
370 if (ctxt) 391 if (ctxt)
@@ -376,11 +397,15 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
376 struct svcxprt_rdma *xprt = cq_context; 397 struct svcxprt_rdma *xprt = cq_context;
377 unsigned long flags; 398 unsigned long flags;
378 399
400 /* Guard against unconditional flush call for destroyed QP */
401 if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
402 return;
403
379 /* 404 /*
380 * Set the bit regardless of whether or not it's on the list 405 * Set the bit regardless of whether or not it's on the list
381 * because it may be on the list already due to an RQ 406 * because it may be on the list already due to an RQ
382 * completion. 407 * completion.
383 */ 408 */
384 set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags); 409 set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags);
385 410
386 /* 411 /*
@@ -407,28 +432,29 @@ static void create_context_cache(struct svcxprt_rdma *xprt,
407 xprt->sc_ctxt_max = ctxt_max; 432 xprt->sc_ctxt_max = ctxt_max;
408 xprt->sc_ctxt_bump = ctxt_bump; 433 xprt->sc_ctxt_bump = ctxt_bump;
409 xprt->sc_ctxt_cnt = 0; 434 xprt->sc_ctxt_cnt = 0;
410 xprt->sc_ctxt_head = NULL; 435 atomic_set(&xprt->sc_ctxt_used, 0);
436
437 INIT_LIST_HEAD(&xprt->sc_ctxt_free);
411 for (i = 0; i < ctxt_count; i++) { 438 for (i = 0; i < ctxt_count; i++) {
412 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); 439 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
413 if (ctxt) { 440 if (ctxt) {
414 ctxt->next = xprt->sc_ctxt_head; 441 INIT_LIST_HEAD(&ctxt->free_list);
415 xprt->sc_ctxt_head = ctxt; 442 list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
416 xprt->sc_ctxt_cnt++; 443 xprt->sc_ctxt_cnt++;
417 } 444 }
418 } 445 }
419} 446}
420 447
421static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt) 448static void destroy_context_cache(struct svcxprt_rdma *xprt)
422{ 449{
423 struct svc_rdma_op_ctxt *next; 450 while (!list_empty(&xprt->sc_ctxt_free)) {
424 if (!ctxt) 451 struct svc_rdma_op_ctxt *ctxt;
425 return; 452 ctxt = list_entry(xprt->sc_ctxt_free.next,
426 453 struct svc_rdma_op_ctxt,
427 do { 454 free_list);
428 next = ctxt->next; 455 list_del_init(&ctxt->free_list);
429 kfree(ctxt); 456 kfree(ctxt);
430 ctxt = next; 457 }
431 } while (next);
432} 458}
433 459
434static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, 460static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
@@ -465,7 +491,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
465 reqs + 491 reqs +
466 cma_xprt->sc_sq_depth + 492 cma_xprt->sc_sq_depth +
467 RPCRDMA_MAX_THREADS + 1); /* max */ 493 RPCRDMA_MAX_THREADS + 1); /* max */
468 if (!cma_xprt->sc_ctxt_head) { 494 if (list_empty(&cma_xprt->sc_ctxt_free)) {
469 kfree(cma_xprt); 495 kfree(cma_xprt);
470 return NULL; 496 return NULL;
471 } 497 }
@@ -520,7 +546,12 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
520 recv_wr.num_sge = ctxt->count; 546 recv_wr.num_sge = ctxt->count;
521 recv_wr.wr_id = (u64)(unsigned long)ctxt; 547 recv_wr.wr_id = (u64)(unsigned long)ctxt;
522 548
549 svc_xprt_get(&xprt->sc_xprt);
523 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); 550 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
551 if (ret) {
552 svc_xprt_put(&xprt->sc_xprt);
553 svc_rdma_put_context(ctxt, 1);
554 }
524 return ret; 555 return ret;
525} 556}
526 557
@@ -539,6 +570,7 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
539{ 570{
540 struct svcxprt_rdma *listen_xprt = new_cma_id->context; 571 struct svcxprt_rdma *listen_xprt = new_cma_id->context;
541 struct svcxprt_rdma *newxprt; 572 struct svcxprt_rdma *newxprt;
573 struct sockaddr *sa;
542 574
543 /* Create a new transport */ 575 /* Create a new transport */
544 newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); 576 newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0);
@@ -551,6 +583,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
551 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", 583 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
552 newxprt, newxprt->sc_cm_id, listen_xprt); 584 newxprt, newxprt->sc_cm_id, listen_xprt);
553 585
586 /* Set the local and remote addresses in the transport */
587 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
588 svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
589 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
590 svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
591
554 /* 592 /*
555 * Enqueue the new transport on the accept queue of the listening 593 * Enqueue the new transport on the accept queue of the listening
556 * transport 594 * transport
@@ -627,6 +665,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
627 if (xprt) { 665 if (xprt) {
628 set_bit(XPT_CLOSE, &xprt->xpt_flags); 666 set_bit(XPT_CLOSE, &xprt->xpt_flags);
629 svc_xprt_enqueue(xprt); 667 svc_xprt_enqueue(xprt);
668 svc_xprt_put(xprt);
630 } 669 }
631 break; 670 break;
632 case RDMA_CM_EVENT_DEVICE_REMOVAL: 671 case RDMA_CM_EVENT_DEVICE_REMOVAL:
@@ -661,31 +700,27 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
661 700
662 cma_xprt = rdma_create_xprt(serv, 1); 701 cma_xprt = rdma_create_xprt(serv, 1);
663 if (!cma_xprt) 702 if (!cma_xprt)
664 return ERR_PTR(ENOMEM); 703 return ERR_PTR(-ENOMEM);
665 xprt = &cma_xprt->sc_xprt; 704 xprt = &cma_xprt->sc_xprt;
666 705
667 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); 706 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP);
668 if (IS_ERR(listen_id)) { 707 if (IS_ERR(listen_id)) {
669 svc_xprt_put(&cma_xprt->sc_xprt); 708 ret = PTR_ERR(listen_id);
670 dprintk("svcrdma: rdma_create_id failed = %ld\n", 709 dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
671 PTR_ERR(listen_id)); 710 goto err0;
672 return (void *)listen_id;
673 } 711 }
712
674 ret = rdma_bind_addr(listen_id, sa); 713 ret = rdma_bind_addr(listen_id, sa);
675 if (ret) { 714 if (ret) {
676 rdma_destroy_id(listen_id);
677 svc_xprt_put(&cma_xprt->sc_xprt);
678 dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); 715 dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
679 return ERR_PTR(ret); 716 goto err1;
680 } 717 }
681 cma_xprt->sc_cm_id = listen_id; 718 cma_xprt->sc_cm_id = listen_id;
682 719
683 ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); 720 ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
684 if (ret) { 721 if (ret) {
685 rdma_destroy_id(listen_id);
686 svc_xprt_put(&cma_xprt->sc_xprt);
687 dprintk("svcrdma: rdma_listen failed = %d\n", ret); 722 dprintk("svcrdma: rdma_listen failed = %d\n", ret);
688 return ERR_PTR(ret); 723 goto err1;
689 } 724 }
690 725
691 /* 726 /*
@@ -696,6 +731,12 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
696 svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); 731 svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
697 732
698 return &cma_xprt->sc_xprt; 733 return &cma_xprt->sc_xprt;
734
735 err1:
736 rdma_destroy_id(listen_id);
737 err0:
738 kfree(cma_xprt);
739 return ERR_PTR(ret);
699} 740}
700 741
701/* 742/*
@@ -716,7 +757,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
716 struct rdma_conn_param conn_param; 757 struct rdma_conn_param conn_param;
717 struct ib_qp_init_attr qp_attr; 758 struct ib_qp_init_attr qp_attr;
718 struct ib_device_attr devattr; 759 struct ib_device_attr devattr;
719 struct sockaddr *sa;
720 int ret; 760 int ret;
721 int i; 761 int i;
722 762
@@ -826,7 +866,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
826 newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; 866 newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
827 newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; 867 newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
828 } 868 }
829 svc_xprt_get(&newxprt->sc_xprt);
830 newxprt->sc_qp = newxprt->sc_cm_id->qp; 869 newxprt->sc_qp = newxprt->sc_cm_id->qp;
831 870
832 /* Register all of physical memory */ 871 /* Register all of physical memory */
@@ -850,6 +889,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
850 /* Swap out the handler */ 889 /* Swap out the handler */
851 newxprt->sc_cm_id->event_handler = rdma_cma_handler; 890 newxprt->sc_cm_id->event_handler = rdma_cma_handler;
852 891
892 /*
893 * Arm the CQs for the SQ and RQ before accepting so we can't
894 * miss the first message
895 */
896 ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
897 ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
898
853 /* Accept Connection */ 899 /* Accept Connection */
854 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); 900 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
855 memset(&conn_param, 0, sizeof conn_param); 901 memset(&conn_param, 0, sizeof conn_param);
@@ -886,58 +932,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
886 newxprt->sc_max_requests, 932 newxprt->sc_max_requests,
887 newxprt->sc_ord); 933 newxprt->sc_ord);
888 934
889 /* Set the local and remote addresses in the transport */
890 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
891 svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
892 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
893 svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
894
895 ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
896 ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
897 return &newxprt->sc_xprt; 935 return &newxprt->sc_xprt;
898 936
899 errout: 937 errout:
900 dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); 938 dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
901 /* Take a reference in case the DTO handler runs */ 939 /* Take a reference in case the DTO handler runs */
902 svc_xprt_get(&newxprt->sc_xprt); 940 svc_xprt_get(&newxprt->sc_xprt);
903 if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) { 941 if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
904 ib_destroy_qp(newxprt->sc_qp); 942 ib_destroy_qp(newxprt->sc_qp);
905 svc_xprt_put(&newxprt->sc_xprt);
906 }
907 rdma_destroy_id(newxprt->sc_cm_id); 943 rdma_destroy_id(newxprt->sc_cm_id);
908 /* This call to put will destroy the transport */ 944 /* This call to put will destroy the transport */
909 svc_xprt_put(&newxprt->sc_xprt); 945 svc_xprt_put(&newxprt->sc_xprt);
910 return NULL; 946 return NULL;
911} 947}
912 948
913/*
914 * Post an RQ WQE to the RQ when the rqst is being released. This
915 * effectively returns an RQ credit to the client. The rq_xprt_ctxt
916 * will be null if the request is deferred due to an RDMA_READ or the
917 * transport had no data ready (EAGAIN). Note that an RPC deferred in
918 * svc_process will still return the credit, this is because the data
919 * is copied and no longer consume a WQE/WC.
920 */
921static void svc_rdma_release_rqst(struct svc_rqst *rqstp) 949static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
922{ 950{
923 int err;
924 struct svcxprt_rdma *rdma =
925 container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
926 if (rqstp->rq_xprt_ctxt) {
927 BUG_ON(rqstp->rq_xprt_ctxt != rdma);
928 err = svc_rdma_post_recv(rdma);
929 if (err)
930 dprintk("svcrdma: failed to post an RQ WQE error=%d\n",
931 err);
932 }
933 rqstp->rq_xprt_ctxt = NULL;
934} 951}
935 952
936/* 953/*
937 * When connected, an svc_xprt has at least three references: 954 * When connected, an svc_xprt has at least two references:
938 *
939 * - A reference held by the QP. We still hold that here because this
940 * code deletes the QP and puts the reference.
941 * 955 *
942 * - A reference held by the cm_id between the ESTABLISHED and 956 * - A reference held by the cm_id between the ESTABLISHED and
943 * DISCONNECTED events. If the remote peer disconnected first, this 957 * DISCONNECTED events. If the remote peer disconnected first, this
@@ -946,7 +960,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
946 * - A reference held by the svc_recv code that called this function 960 * - A reference held by the svc_recv code that called this function
947 * as part of close processing. 961 * as part of close processing.
948 * 962 *
949 * At a minimum two references should still be held. 963 * At a minimum one references should still be held.
950 */ 964 */
951static void svc_rdma_detach(struct svc_xprt *xprt) 965static void svc_rdma_detach(struct svc_xprt *xprt)
952{ 966{
@@ -956,23 +970,53 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
956 970
957 /* Disconnect and flush posted WQE */ 971 /* Disconnect and flush posted WQE */
958 rdma_disconnect(rdma->sc_cm_id); 972 rdma_disconnect(rdma->sc_cm_id);
959
960 /* Destroy the QP if present (not a listener) */
961 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) {
962 ib_destroy_qp(rdma->sc_qp);
963 svc_xprt_put(xprt);
964 }
965
966 /* Destroy the CM ID */
967 rdma_destroy_id(rdma->sc_cm_id);
968} 973}
969 974
970static void svc_rdma_free(struct svc_xprt *xprt) 975static void __svc_rdma_free(struct work_struct *work)
971{ 976{
972 struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; 977 struct svcxprt_rdma *rdma =
978 container_of(work, struct svcxprt_rdma, sc_work);
973 dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); 979 dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
980
974 /* We should only be called from kref_put */ 981 /* We should only be called from kref_put */
975 BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0); 982 BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0);
983
984 /*
985 * Destroy queued, but not processed read completions. Note
986 * that this cleanup has to be done before destroying the
987 * cm_id because the device ptr is needed to unmap the dma in
988 * svc_rdma_put_context.
989 */
990 spin_lock_bh(&rdma->sc_read_complete_lock);
991 while (!list_empty(&rdma->sc_read_complete_q)) {
992 struct svc_rdma_op_ctxt *ctxt;
993 ctxt = list_entry(rdma->sc_read_complete_q.next,
994 struct svc_rdma_op_ctxt,
995 dto_q);
996 list_del_init(&ctxt->dto_q);
997 svc_rdma_put_context(ctxt, 1);
998 }
999 spin_unlock_bh(&rdma->sc_read_complete_lock);
1000
1001 /* Destroy queued, but not processed recv completions */
1002 spin_lock_bh(&rdma->sc_rq_dto_lock);
1003 while (!list_empty(&rdma->sc_rq_dto_q)) {
1004 struct svc_rdma_op_ctxt *ctxt;
1005 ctxt = list_entry(rdma->sc_rq_dto_q.next,
1006 struct svc_rdma_op_ctxt,
1007 dto_q);
1008 list_del_init(&ctxt->dto_q);
1009 svc_rdma_put_context(ctxt, 1);
1010 }
1011 spin_unlock_bh(&rdma->sc_rq_dto_lock);
1012
1013 /* Warn if we leaked a resource or under-referenced */
1014 WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
1015
1016 /* Destroy the QP if present (not a listener) */
1017 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
1018 ib_destroy_qp(rdma->sc_qp);
1019
976 if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) 1020 if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
977 ib_destroy_cq(rdma->sc_sq_cq); 1021 ib_destroy_cq(rdma->sc_sq_cq);
978 1022
@@ -985,10 +1029,21 @@ static void svc_rdma_free(struct svc_xprt *xprt)
985 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) 1029 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
986 ib_dealloc_pd(rdma->sc_pd); 1030 ib_dealloc_pd(rdma->sc_pd);
987 1031
988 destroy_context_cache(rdma->sc_ctxt_head); 1032 /* Destroy the CM ID */
1033 rdma_destroy_id(rdma->sc_cm_id);
1034
1035 destroy_context_cache(rdma);
989 kfree(rdma); 1036 kfree(rdma);
990} 1037}
991 1038
1039static void svc_rdma_free(struct svc_xprt *xprt)
1040{
1041 struct svcxprt_rdma *rdma =
1042 container_of(xprt, struct svcxprt_rdma, sc_xprt);
1043 INIT_WORK(&rdma->sc_work, __svc_rdma_free);
1044 schedule_work(&rdma->sc_work);
1045}
1046
992static int svc_rdma_has_wspace(struct svc_xprt *xprt) 1047static int svc_rdma_has_wspace(struct svc_xprt *xprt)
993{ 1048{
994 struct svcxprt_rdma *rdma = 1049 struct svcxprt_rdma *rdma =
@@ -1018,7 +1073,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1018 int ret; 1073 int ret;
1019 1074
1020 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1075 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1021 return 0; 1076 return -ENOTCONN;
1022 1077
1023 BUG_ON(wr->send_flags != IB_SEND_SIGNALED); 1078 BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
1024 BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != 1079 BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
@@ -1029,7 +1084,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1029 if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { 1084 if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
1030 spin_unlock_bh(&xprt->sc_lock); 1085 spin_unlock_bh(&xprt->sc_lock);
1031 atomic_inc(&rdma_stat_sq_starve); 1086 atomic_inc(&rdma_stat_sq_starve);
1032 /* See if we can reap some SQ WR */ 1087
1088 /* See if we can opportunistically reap SQ WR to make room */
1033 sq_cq_reap(xprt); 1089 sq_cq_reap(xprt);
1034 1090
1035 /* Wait until SQ WR available if SQ still full */ 1091 /* Wait until SQ WR available if SQ still full */
@@ -1041,22 +1097,25 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1041 continue; 1097 continue;
1042 } 1098 }
1043 /* Bumped used SQ WR count and post */ 1099 /* Bumped used SQ WR count and post */
1100 svc_xprt_get(&xprt->sc_xprt);
1044 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); 1101 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
1045 if (!ret) 1102 if (!ret)
1046 atomic_inc(&xprt->sc_sq_count); 1103 atomic_inc(&xprt->sc_sq_count);
1047 else 1104 else {
1105 svc_xprt_put(&xprt->sc_xprt);
1048 dprintk("svcrdma: failed to post SQ WR rc=%d, " 1106 dprintk("svcrdma: failed to post SQ WR rc=%d, "
1049 "sc_sq_count=%d, sc_sq_depth=%d\n", 1107 "sc_sq_count=%d, sc_sq_depth=%d\n",
1050 ret, atomic_read(&xprt->sc_sq_count), 1108 ret, atomic_read(&xprt->sc_sq_count),
1051 xprt->sc_sq_depth); 1109 xprt->sc_sq_depth);
1110 }
1052 spin_unlock_bh(&xprt->sc_lock); 1111 spin_unlock_bh(&xprt->sc_lock);
1053 break; 1112 break;
1054 } 1113 }
1055 return ret; 1114 return ret;
1056} 1115}
1057 1116
1058int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, 1117void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1059 enum rpcrdma_errcode err) 1118 enum rpcrdma_errcode err)
1060{ 1119{
1061 struct ib_send_wr err_wr; 1120 struct ib_send_wr err_wr;
1062 struct ib_sge sge; 1121 struct ib_sge sge;
@@ -1094,9 +1153,8 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1094 /* Post It */ 1153 /* Post It */
1095 ret = svc_rdma_send(xprt, &err_wr); 1154 ret = svc_rdma_send(xprt, &err_wr);
1096 if (ret) { 1155 if (ret) {
1097 dprintk("svcrdma: Error posting send = %d\n", ret); 1156 dprintk("svcrdma: Error %d posting send for protocol error\n",
1157 ret);
1098 svc_rdma_put_context(ctxt, 1); 1158 svc_rdma_put_context(ctxt, 1);
1099 } 1159 }
1100
1101 return ret;
1102} 1160}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2bdd4dddc0e1..fb75f265b39c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -187,7 +187,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
187 return genlmsg_end(msg, hdr); 187 return genlmsg_end(msg, hdr);
188 188
189 nla_put_failure: 189 nla_put_failure:
190 return genlmsg_cancel(msg, hdr); 190 genlmsg_cancel(msg, hdr);
191 return -EMSGSIZE;
191} 192}
192 193
193static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) 194static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
@@ -273,7 +274,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags,
273 return genlmsg_end(msg, hdr); 274 return genlmsg_end(msg, hdr);
274 275
275 nla_put_failure: 276 nla_put_failure:
276 return genlmsg_cancel(msg, hdr); 277 genlmsg_cancel(msg, hdr);
278 return -EMSGSIZE;
277} 279}
278 280
279static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb) 281static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb)
@@ -928,7 +930,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
928 return genlmsg_end(msg, hdr); 930 return genlmsg_end(msg, hdr);
929 931
930 nla_put_failure: 932 nla_put_failure:
931 return genlmsg_cancel(msg, hdr); 933 genlmsg_cancel(msg, hdr);
934 return -EMSGSIZE;
932} 935}
933 936
934static int nl80211_dump_station(struct sk_buff *skb, 937static int nl80211_dump_station(struct sk_buff *skb,
@@ -1267,7 +1270,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
1267 return genlmsg_end(msg, hdr); 1270 return genlmsg_end(msg, hdr);
1268 1271
1269 nla_put_failure: 1272 nla_put_failure:
1270 return genlmsg_cancel(msg, hdr); 1273 genlmsg_cancel(msg, hdr);
1274 return -EMSGSIZE;
1271} 1275}
1272 1276
1273static int nl80211_dump_mpath(struct sk_buff *skb, 1277static int nl80211_dump_mpath(struct sk_buff *skb,
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index ac765dd9c7f5..23a2cc04b8cd 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -200,8 +200,8 @@ static struct xfrm_algo_desc aalg_list[] = {
200 } 200 }
201}, 201},
202{ 202{
203 .name = "hmac(ripemd160)", 203 .name = "hmac(rmd160)",
204 .compat = "ripemd160", 204 .compat = "rmd160",
205 205
206 .uinfo = { 206 .uinfo = {
207 .auth = { 207 .auth = {