aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c14
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.c1
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_route.c4
-rw-r--r--net/bluetooth/af_bluetooth.c5
-rw-r--r--net/bridge/br_if.c29
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/caif/caif_dev.c22
-rw-r--r--net/caif/caif_socket.c19
-rw-r--r--net/caif/cfcnfg.c49
-rw-r--r--net/caif/cfctrl.c59
-rw-r--r--net/caif/cfdbgl.c4
-rw-r--r--net/caif/cfdgml.c11
-rw-r--r--net/caif/cffrml.c14
-rw-r--r--net/caif/cfmuxl.c14
-rw-r--r--net/caif/cfpkt_skbuff.c48
-rw-r--r--net/caif/cfrfml.c12
-rw-r--r--net/caif/cfserl.c4
-rw-r--r--net/caif/cfsrvl.c17
-rw-r--r--net/caif/cfutill.c12
-rw-r--r--net/caif/cfveil.c11
-rw-r--r--net/caif/cfvidl.c6
-rw-r--r--net/caif/chnl_net.c46
-rw-r--r--net/can/raw.c4
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c196
-rw-r--r--net/core/ethtool.c34
-rw-r--r--net/core/iovec.c6
-rw-r--r--net/core/net-sysfs.c5
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c31
-rw-r--r--net/core/skbuff.c92
-rw-r--r--net/core/sock.c4
-rw-r--r--net/dccp/ccids/Kconfig31
-rw-r--r--net/dccp/ccids/ccid2.c287
-rw-r--r--net/dccp/ccids/ccid2.h35
-rw-r--r--net/dccp/ccids/ccid3.c81
-rw-r--r--net/dccp/ccids/ccid3.h21
-rw-r--r--net/decnet/dn_nsp_out.c8
-rw-r--r--net/econet/af_econet.c2
-rw-r--r--net/ethernet/eth.c6
-rw-r--r--net/ipv4/Kconfig7
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/arp.c226
-rw-r--r--net/ipv4/gre.c151
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c14
-rw-r--r--net/ipv4/ip_output.c15
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c31
-rw-r--r--net/ipv4/protocol.c31
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c9
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_input.c17
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_output.c23
-rw-r--r--net/ipv4/tcp_timer.c40
-rw-r--r--net/ipv4/tunnel4.c19
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/xfrm4_tunnel.c4
-rw-r--r--net/ipv6/addrconf.c3
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/ndisc.c18
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/protocol.c32
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/tunnel6.c17
-rw-r--r--net/ipv6/xfrm6_tunnel.c4
-rw-r--r--net/irda/irlan/irlan_eth.c32
-rw-r--r--net/l2tp/l2tp_eth.c1
-rw-r--r--net/mac80211/aes_ccm.c6
-rw-r--r--net/mac80211/aes_cmac.c6
-rw-r--r--net/mac80211/agg-rx.c22
-rw-r--r--net/mac80211/cfg.c115
-rw-r--r--net/mac80211/debugfs.c6
-rw-r--r--net/mac80211/debugfs_key.c55
-rw-r--r--net/mac80211/driver-ops.h14
-rw-r--r--net/mac80211/driver-trace.h29
-rw-r--r--net/mac80211/ht.c28
-rw-r--r--net/mac80211/ibss.c12
-rw-r--r--net/mac80211/ieee80211_i.h66
-rw-r--r--net/mac80211/iface.c375
-rw-r--r--net/mac80211/key.c111
-rw-r--r--net/mac80211/key.h10
-rw-r--r--net/mac80211/main.c159
-rw-r--r--net/mac80211/mlme.c64
-rw-r--r--net/mac80211/offchannel.c19
-rw-r--r--net/mac80211/pm.c3
-rw-r--r--net/mac80211/rate.c9
-rw-r--r--net/mac80211/rc80211_pid_debugfs.c2
-rw-r--r--net/mac80211/rx.c520
-rw-r--r--net/mac80211/scan.c66
-rw-r--r--net/mac80211/sta_info.c21
-rw-r--r--net/mac80211/sta_info.h16
-rw-r--r--net/mac80211/status.c11
-rw-r--r--net/mac80211/tx.c67
-rw-r--r--net/mac80211/util.c17
-rw-r--r--net/mac80211/wep.c2
-rw-r--r--net/mac80211/work.c39
-rw-r--r--net/mac80211/wpa.c32
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c22
-rw-r--r--net/netfilter/xt_hashlimit.c15
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/phonet/pep.c8
-rw-r--r--net/phonet/pn_dev.c3
-rw-r--r--net/phonet/socket.c4
-rw-r--r--net/rds/af_rds.c26
-rw-r--r--net/rds/bind.c82
-rw-r--r--net/rds/cong.c8
-rw-r--r--net/rds/connection.c157
-rw-r--r--net/rds/ib.c194
-rw-r--r--net/rds/ib.h100
-rw-r--r--net/rds/ib_cm.c183
-rw-r--r--net/rds/ib_rdma.c314
-rw-r--r--net/rds/ib_recv.c549
-rw-r--r--net/rds/ib_send.c682
-rw-r--r--net/rds/ib_stats.c2
-rw-r--r--net/rds/ib_sysctl.c17
-rw-r--r--net/rds/info.c12
-rw-r--r--net/rds/iw.c4
-rw-r--r--net/rds/iw.h11
-rw-r--r--net/rds/iw_cm.c14
-rw-r--r--net/rds/iw_rdma.c1
-rw-r--r--net/rds/iw_recv.c24
-rw-r--r--net/rds/iw_send.c93
-rw-r--r--net/rds/iw_sysctl.c4
-rw-r--r--net/rds/loop.c31
-rw-r--r--net/rds/message.c118
-rw-r--r--net/rds/page.c5
-rw-r--r--net/rds/rdma.c339
-rw-r--r--net/rds/rdma.h85
-rw-r--r--net/rds/rdma_transport.c42
-rw-r--r--net/rds/rds.h187
-rw-r--r--net/rds/recv.c9
-rw-r--r--net/rds/send.c544
-rw-r--r--net/rds/stats.c6
-rw-r--r--net/rds/sysctl.c4
-rw-r--r--net/rds/tcp.c8
-rw-r--r--net/rds/tcp.h9
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_listen.c6
-rw-r--r--net/rds/tcp_recv.c14
-rw-r--r--net/rds/tcp_send.c66
-rw-r--r--net/rds/threads.c69
-rw-r--r--net/rds/transport.c19
-rw-r--r--net/rds/xlist.h80
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_csum.c595
-rw-r--r--net/sched/cls_flow.c74
-rw-r--r--net/sched/em_meta.c6
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_sfq.c33
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/objcnt.c5
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c34
-rw-r--r--net/sctp/probe.c4
-rw-r--r--net/sctp/protocol.c17
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_sideeffect.c21
-rw-r--r--net/sctp/sm_statefuns.c20
-rw-r--r--net/sctp/sm_statetable.c42
-rw-r--r--net/sctp/socket.c79
-rw-r--r--net/sctp/transport.c9
-rw-r--r--net/socket.c30
-rw-r--r--net/tipc/bcast.c39
-rw-r--r--net/tipc/core.c6
-rw-r--r--net/tipc/discover.c8
-rw-r--r--net/tipc/eth_media.c48
-rw-r--r--net/tipc/link.c25
-rw-r--r--net/tipc/name_table.c48
-rw-r--r--net/tipc/net.c1
-rw-r--r--net/tipc/node.c22
-rw-r--r--net/tipc/node.h2
-rw-r--r--net/tipc/port.c19
-rw-r--r--net/tipc/socket.c81
-rw-r--r--net/unix/af_unix.c5
-rw-r--r--net/wireless/core.c15
-rw-r--r--net/wireless/core.h21
-rw-r--r--net/wireless/mlme.c149
-rw-r--r--net/wireless/nl80211.c150
-rw-r--r--net/wireless/nl80211.h14
-rw-r--r--net/wireless/reg.c16
-rw-r--r--net/wireless/sysfs.c9
-rw-r--r--net/wireless/util.c13
-rw-r--r--net/wireless/wext-core.c2
-rw-r--r--net/wireless/wext-sme.c2
200 files changed, 6115 insertions, 3685 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 01ddb0472f86..889f4ac4459a 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -35,12 +35,12 @@ drop:
35} 35}
36EXPORT_SYMBOL(__vlan_hwaccel_rx); 36EXPORT_SYMBOL(__vlan_hwaccel_rx);
37 37
38int vlan_hwaccel_do_receive(struct sk_buff *skb) 38void vlan_hwaccel_do_receive(struct sk_buff *skb)
39{ 39{
40 struct net_device *dev = skb->dev; 40 struct net_device *dev = skb->dev;
41 struct vlan_rx_stats *rx_stats; 41 struct vlan_rx_stats *rx_stats;
42 42
43 skb->dev = vlan_dev_info(dev)->real_dev; 43 skb->dev = vlan_dev_real_dev(dev);
44 netif_nit_deliver(skb); 44 netif_nit_deliver(skb);
45 45
46 skb->dev = dev; 46 skb->dev = dev;
@@ -69,7 +69,6 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
69 break; 69 break;
70 } 70 }
71 u64_stats_update_end(&rx_stats->syncp); 71 u64_stats_update_end(&rx_stats->syncp);
72 return 0;
73} 72}
74 73
75struct net_device *vlan_dev_real_dev(const struct net_device *dev) 74struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -106,9 +105,12 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
106 goto drop; 105 goto drop;
107 106
108 for (p = napi->gro_list; p; p = p->next) { 107 for (p = napi->gro_list; p; p = p->next) {
109 NAPI_GRO_CB(p)->same_flow = 108 unsigned long diffs;
110 p->dev == skb->dev && !compare_ether_header( 109
111 skb_mac_header(p), skb_gro_mac_header(skb)); 110 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
111 diffs |= compare_ether_header(skb_mac_header(p),
112 skb_gro_mac_header(skb));
113 NAPI_GRO_CB(p)->same_flow = !diffs;
112 NAPI_GRO_CB(p)->flush = 0; 114 NAPI_GRO_CB(p)->flush = 0;
113 } 115 }
114 116
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c85109d809ca..078eb162d9bf 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -222,7 +222,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
222 } 222 }
223} 223}
224 224
225static unsigned int 225static int
226p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) 226p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
227{ 227{
228 int ret, n; 228 int ret, n;
diff --git a/net/atm/common.c b/net/atm/common.c
index 940404a73b3d..1b9c52a02cd3 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -792,7 +792,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
792 default: 792 default:
793 if (level == SOL_SOCKET) 793 if (level == SOL_SOCKET)
794 return -EINVAL; 794 return -EINVAL;
795 break; 795 break;
796 } 796 }
797 if (!vcc->dev || !vcc->dev->ops->getsockopt) 797 if (!vcc->dev || !vcc->dev->ops->getsockopt)
798 return -EINVAL; 798 return -EINVAL;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index d98bde1a0ac8..181d70c73d70 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -220,7 +220,6 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
220static int lec_open(struct net_device *dev) 220static int lec_open(struct net_device *dev)
221{ 221{
222 netif_start_queue(dev); 222 netif_start_queue(dev);
223 memset(&dev->stats, 0, sizeof(struct net_device_stats));
224 223
225 return 0; 224 return 0;
226} 225}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index cfdfd7e2a172..26eaebf4aaa9 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1103,7 +1103,7 @@ done:
1103out: 1103out:
1104 release_sock(sk); 1104 release_sock(sk);
1105 1105
1106 return 0; 1106 return err;
1107} 1107}
1108 1108
1109/* 1109/*
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 7805945a5fd6..a1690845dc6e 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -412,7 +412,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
412{ 412{
413 ax25_uid_assoc *user; 413 ax25_uid_assoc *user;
414 ax25_route *ax25_rt; 414 ax25_route *ax25_rt;
415 int err; 415 int err = 0;
416 416
417 if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL) 417 if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
418 return -EHOSTUNREACH; 418 return -EHOSTUNREACH;
@@ -453,7 +453,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
453put: 453put:
454 ax25_put_route(ax25_rt); 454 ax25_put_route(ax25_rt);
455 455
456 return 0; 456 return err;
457} 457}
458 458
459struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, 459struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 421c45bd1b95..ed0f22f57668 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -297,13 +297,12 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w
297 mask |= POLLERR; 297 mask |= POLLERR;
298 298
299 if (sk->sk_shutdown & RCV_SHUTDOWN) 299 if (sk->sk_shutdown & RCV_SHUTDOWN)
300 mask |= POLLRDHUP; 300 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
301 301
302 if (sk->sk_shutdown == SHUTDOWN_MASK) 302 if (sk->sk_shutdown == SHUTDOWN_MASK)
303 mask |= POLLHUP; 303 mask |= POLLHUP;
304 304
305 if (!skb_queue_empty(&sk->sk_receive_queue) || 305 if (!skb_queue_empty(&sk->sk_receive_queue))
306 (sk->sk_shutdown & RCV_SHUTDOWN))
307 mask |= POLLIN | POLLRDNORM; 306 mask |= POLLIN | POLLRDNORM;
308 307
309 if (sk->sk_state == BT_CLOSED) 308 if (sk->sk_state == BT_CLOSED)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c03d2c3ff03e..89ad25a76202 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -61,30 +61,27 @@ static int port_cost(struct net_device *dev)
61} 61}
62 62
63 63
64/* 64/* Check for port carrier transistions. */
65 * Check for port carrier transistions.
66 * Called from work queue to allow for calling functions that
67 * might sleep (such as speed check), and to debounce.
68 */
69void br_port_carrier_check(struct net_bridge_port *p) 65void br_port_carrier_check(struct net_bridge_port *p)
70{ 66{
71 struct net_device *dev = p->dev; 67 struct net_device *dev = p->dev;
72 struct net_bridge *br = p->br; 68 struct net_bridge *br = p->br;
73 69
74 if (netif_carrier_ok(dev)) 70 if (netif_running(dev) && netif_carrier_ok(dev))
75 p->path_cost = port_cost(dev); 71 p->path_cost = port_cost(dev);
76 72
77 if (netif_running(br->dev)) { 73 if (!netif_running(br->dev))
78 spin_lock_bh(&br->lock); 74 return;
79 if (netif_carrier_ok(dev)) { 75
80 if (p->state == BR_STATE_DISABLED) 76 spin_lock_bh(&br->lock);
81 br_stp_enable_port(p); 77 if (netif_running(dev) && netif_carrier_ok(dev)) {
82 } else { 78 if (p->state == BR_STATE_DISABLED)
83 if (p->state != BR_STATE_DISABLED) 79 br_stp_enable_port(p);
84 br_stp_disable_port(p); 80 } else {
85 } 81 if (p->state != BR_STATE_DISABLED)
86 spin_unlock_bh(&br->lock); 82 br_stp_disable_port(p);
87 } 83 }
84 spin_unlock_bh(&br->lock);
88} 85}
89 86
90static void release_nbp(struct kobject *kobj) 87static void release_nbp(struct kobject *kobj)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 826cd5221536..6d04cfdf4541 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -141,7 +141,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
141 const unsigned char *dest = eth_hdr(skb)->h_dest; 141 const unsigned char *dest = eth_hdr(skb)->h_dest;
142 int (*rhook)(struct sk_buff *skb); 142 int (*rhook)(struct sk_buff *skb);
143 143
144 if (skb->pkt_type == PACKET_LOOPBACK) 144 if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
145 return skb; 145 return skb;
146 146
147 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 147 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 0b586e9d1378..0fd01dd17c48 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -9,6 +9,8 @@
9 * and Sakari Ailus <sakari.ailus@nokia.com> 9 * and Sakari Ailus <sakari.ailus@nokia.com>
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
13
12#include <linux/version.h> 14#include <linux/version.h>
13#include <linux/module.h> 15#include <linux/module.h>
14#include <linux/kernel.h> 16#include <linux/kernel.h>
@@ -214,7 +216,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
214 216
215 switch (what) { 217 switch (what) {
216 case NETDEV_REGISTER: 218 case NETDEV_REGISTER:
217 pr_info("CAIF: %s():register %s\n", __func__, dev->name); 219 netdev_info(dev, "register\n");
218 caifd = caif_device_alloc(dev); 220 caifd = caif_device_alloc(dev);
219 if (caifd == NULL) 221 if (caifd == NULL)
220 break; 222 break;
@@ -225,14 +227,13 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
225 break; 227 break;
226 228
227 case NETDEV_UP: 229 case NETDEV_UP:
228 pr_info("CAIF: %s(): up %s\n", __func__, dev->name); 230 netdev_info(dev, "up\n");
229 caifd = caif_get(dev); 231 caifd = caif_get(dev);
230 if (caifd == NULL) 232 if (caifd == NULL)
231 break; 233 break;
232 caifdev = netdev_priv(dev); 234 caifdev = netdev_priv(dev);
233 if (atomic_read(&caifd->state) == NETDEV_UP) { 235 if (atomic_read(&caifd->state) == NETDEV_UP) {
234 pr_info("CAIF: %s():%s already up\n", 236 netdev_info(dev, "already up\n");
235 __func__, dev->name);
236 break; 237 break;
237 } 238 }
238 atomic_set(&caifd->state, what); 239 atomic_set(&caifd->state, what);
@@ -273,7 +274,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
273 caifd = caif_get(dev); 274 caifd = caif_get(dev);
274 if (caifd == NULL) 275 if (caifd == NULL)
275 break; 276 break;
276 pr_info("CAIF: %s():going down %s\n", __func__, dev->name); 277 netdev_info(dev, "going down\n");
277 278
278 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN || 279 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN ||
279 atomic_read(&caifd->state) == NETDEV_DOWN) 280 atomic_read(&caifd->state) == NETDEV_DOWN)
@@ -295,11 +296,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
295 caifd = caif_get(dev); 296 caifd = caif_get(dev);
296 if (caifd == NULL) 297 if (caifd == NULL)
297 break; 298 break;
298 pr_info("CAIF: %s(): down %s\n", __func__, dev->name); 299 netdev_info(dev, "down\n");
299 if (atomic_read(&caifd->in_use)) 300 if (atomic_read(&caifd->in_use))
300 pr_warning("CAIF: %s(): " 301 netdev_warn(dev,
301 "Unregistering an active CAIF device: %s\n", 302 "Unregistering an active CAIF device\n");
302 __func__, dev->name);
303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer); 303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
304 dev_put(dev); 304 dev_put(dev);
305 atomic_set(&caifd->state, what); 305 atomic_set(&caifd->state, what);
@@ -307,7 +307,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
307 307
308 case NETDEV_UNREGISTER: 308 case NETDEV_UNREGISTER:
309 caifd = caif_get(dev); 309 caifd = caif_get(dev);
310 pr_info("CAIF: %s(): unregister %s\n", __func__, dev->name); 310 netdev_info(dev, "unregister\n");
311 atomic_set(&caifd->state, what); 311 atomic_set(&caifd->state, what);
312 caif_device_destroy(dev); 312 caif_device_destroy(dev);
313 break; 313 break;
@@ -391,7 +391,7 @@ static int __init caif_device_init(void)
391 int result; 391 int result;
392 cfg = cfcnfg_create(); 392 cfg = cfcnfg_create();
393 if (!cfg) { 393 if (!cfg) {
394 pr_warning("CAIF: %s(): can't create cfcnfg.\n", __func__); 394 pr_warn("can't create cfcnfg\n");
395 goto err_cfcnfg_create_failed; 395 goto err_cfcnfg_create_failed;
396 } 396 }
397 result = register_pernet_device(&caif_net_ops); 397 result = register_pernet_device(&caif_net_ops);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 8ce904786116..fd1f5df0827c 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/fs.h> 9#include <linux/fs.h>
8#include <linux/init.h> 10#include <linux/init.h>
9#include <linux/module.h> 11#include <linux/module.h>
@@ -157,8 +159,8 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
157 159
158 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 160 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
159 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) { 161 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
160 trace_printk("CAIF: %s():" 162 trace_printk("CAIF: %s(): "
161 " sending flow OFF (queue len = %d %d)\n", 163 "sending flow OFF (queue len = %d %d)\n",
162 __func__, 164 __func__,
163 atomic_read(&cf_sk->sk.sk_rmem_alloc), 165 atomic_read(&cf_sk->sk.sk_rmem_alloc),
164 sk_rcvbuf_lowwater(cf_sk)); 166 sk_rcvbuf_lowwater(cf_sk));
@@ -172,8 +174,8 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
172 return err; 174 return err;
173 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { 175 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
174 set_rx_flow_off(cf_sk); 176 set_rx_flow_off(cf_sk);
175 trace_printk("CAIF: %s():" 177 trace_printk("CAIF: %s(): "
176 " sending flow OFF due to rmem_schedule\n", 178 "sending flow OFF due to rmem_schedule\n",
177 __func__); 179 __func__);
178 dbfs_atomic_inc(&cnt.num_rx_flow_off); 180 dbfs_atomic_inc(&cnt.num_rx_flow_off);
179 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); 181 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
@@ -275,8 +277,7 @@ static void caif_ctrl_cb(struct cflayer *layr,
275 break; 277 break;
276 278
277 default: 279 default:
278 pr_debug("CAIF: %s(): Unexpected flow command %d\n", 280 pr_debug("Unexpected flow command %d\n", flow);
279 __func__, flow);
280 } 281 }
281} 282}
282 283
@@ -536,8 +537,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
536 537
537 /* Slight paranoia, probably not needed. */ 538 /* Slight paranoia, probably not needed. */
538 if (unlikely(loopcnt++ > 1000)) { 539 if (unlikely(loopcnt++ > 1000)) {
539 pr_warning("CAIF: %s(): transmit retries failed," 540 pr_warn("transmit retries failed, error = %d\n", ret);
540 " error = %d\n", __func__, ret);
541 break; 541 break;
542 } 542 }
543 543
@@ -902,8 +902,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
902 cf_sk->maxframe = dev->mtu - (headroom + tailroom); 902 cf_sk->maxframe = dev->mtu - (headroom + tailroom);
903 dev_put(dev); 903 dev_put(dev);
904 if (cf_sk->maxframe < 1) { 904 if (cf_sk->maxframe < 1) {
905 pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n", 905 pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
906 __func__, dev->mtu);
907 err = -ENODEV; 906 err = -ENODEV;
908 goto out; 907 goto out;
909 } 908 }
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 1c29189b344d..ef93a131310b 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -3,6 +3,9 @@
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
6#include <linux/kernel.h> 9#include <linux/kernel.h>
7#include <linux/stddef.h> 10#include <linux/stddef.h>
8#include <linux/slab.h> 11#include <linux/slab.h>
@@ -78,7 +81,7 @@ struct cfcnfg *cfcnfg_create(void)
78 /* Initiate this layer */ 81 /* Initiate this layer */
79 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC); 82 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
80 if (!this) { 83 if (!this) {
81 pr_warning("CAIF: %s(): Out of memory\n", __func__); 84 pr_warn("Out of memory\n");
82 return NULL; 85 return NULL;
83 } 86 }
84 this->mux = cfmuxl_create(); 87 this->mux = cfmuxl_create();
@@ -106,7 +109,7 @@ struct cfcnfg *cfcnfg_create(void)
106 layer_set_up(this->ctrl, this); 109 layer_set_up(this->ctrl, this);
107 return this; 110 return this;
108out_of_mem: 111out_of_mem:
109 pr_warning("CAIF: %s(): Out of memory\n", __func__); 112 pr_warn("Out of memory\n");
110 kfree(this->mux); 113 kfree(this->mux);
111 kfree(this->ctrl); 114 kfree(this->ctrl);
112 kfree(this); 115 kfree(this);
@@ -194,7 +197,7 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
194 caif_assert(adap_layer != NULL); 197 caif_assert(adap_layer != NULL);
195 channel_id = adap_layer->id; 198 channel_id = adap_layer->id;
196 if (adap_layer->dn == NULL || channel_id == 0) { 199 if (adap_layer->dn == NULL || channel_id == 0) {
197 pr_err("CAIF: %s():adap_layer->id is 0\n", __func__); 200 pr_err("adap_layer->id is 0\n");
198 ret = -ENOTCONN; 201 ret = -ENOTCONN;
199 goto end; 202 goto end;
200 } 203 }
@@ -204,9 +207,8 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
204 layer_set_up(servl, NULL); 207 layer_set_up(servl, NULL);
205 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); 208 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
206 if (servl == NULL) { 209 if (servl == NULL) {
207 pr_err("CAIF: %s(): PROTOCOL ERROR " 210 pr_err("PROTOCOL ERROR - Error removing service_layer Channel_Id(%d)",
208 "- Error removing service_layer Channel_Id(%d)", 211 channel_id);
209 __func__, channel_id);
210 ret = -EINVAL; 212 ret = -EINVAL;
211 goto end; 213 goto end;
212 } 214 }
@@ -216,18 +218,14 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
216 218
217 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid); 219 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
218 if (phyinfo == NULL) { 220 if (phyinfo == NULL) {
219 pr_warning("CAIF: %s(): " 221 pr_warn("No interface to send disconnect to\n");
220 "No interface to send disconnect to\n",
221 __func__);
222 ret = -ENODEV; 222 ret = -ENODEV;
223 goto end; 223 goto end;
224 } 224 }
225 if (phyinfo->id != phyid || 225 if (phyinfo->id != phyid ||
226 phyinfo->phy_layer->id != phyid || 226 phyinfo->phy_layer->id != phyid ||
227 phyinfo->frm_layer->id != phyid) { 227 phyinfo->frm_layer->id != phyid) {
228 pr_err("CAIF: %s(): " 228 pr_err("Inconsistency in phy registration\n");
229 "Inconsistency in phy registration\n",
230 __func__);
231 ret = -EINVAL; 229 ret = -EINVAL;
232 goto end; 230 goto end;
233 } 231 }
@@ -276,21 +274,20 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
276{ 274{
277 struct cflayer *frml; 275 struct cflayer *frml;
278 if (adap_layer == NULL) { 276 if (adap_layer == NULL) {
279 pr_err("CAIF: %s(): adap_layer is zero", __func__); 277 pr_err("adap_layer is zero\n");
280 return -EINVAL; 278 return -EINVAL;
281 } 279 }
282 if (adap_layer->receive == NULL) { 280 if (adap_layer->receive == NULL) {
283 pr_err("CAIF: %s(): adap_layer->receive is NULL", __func__); 281 pr_err("adap_layer->receive is NULL\n");
284 return -EINVAL; 282 return -EINVAL;
285 } 283 }
286 if (adap_layer->ctrlcmd == NULL) { 284 if (adap_layer->ctrlcmd == NULL) {
287 pr_err("CAIF: %s(): adap_layer->ctrlcmd == NULL", __func__); 285 pr_err("adap_layer->ctrlcmd == NULL\n");
288 return -EINVAL; 286 return -EINVAL;
289 } 287 }
290 frml = cnfg->phy_layers[param->phyid].frm_layer; 288 frml = cnfg->phy_layers[param->phyid].frm_layer;
291 if (frml == NULL) { 289 if (frml == NULL) {
292 pr_err("CAIF: %s(): Specified PHY type does not exist!", 290 pr_err("Specified PHY type does not exist!\n");
293 __func__);
294 return -ENODEV; 291 return -ENODEV;
295 } 292 }
296 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id); 293 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id);
@@ -330,9 +327,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
330 struct net_device *netdev; 327 struct net_device *netdev;
331 328
332 if (adapt_layer == NULL) { 329 if (adapt_layer == NULL) {
333 pr_debug("CAIF: %s(): link setup response " 330 pr_debug("link setup response but no client exist, send linkdown back\n");
334 "but no client exist, send linkdown back\n",
335 __func__);
336 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL); 331 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
337 return; 332 return;
338 } 333 }
@@ -374,13 +369,11 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
374 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info); 369 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
375 break; 370 break;
376 default: 371 default:
377 pr_err("CAIF: %s(): Protocol error. " 372 pr_err("Protocol error. Link setup response - unknown channel type\n");
378 "Link setup response - unknown channel type\n",
379 __func__);
380 return; 373 return;
381 } 374 }
382 if (!servicel) { 375 if (!servicel) {
383 pr_warning("CAIF: %s(): Out of memory\n", __func__); 376 pr_warn("Out of memory\n");
384 return; 377 return;
385 } 378 }
386 layer_set_dn(servicel, cnfg->mux); 379 layer_set_dn(servicel, cnfg->mux);
@@ -418,7 +411,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
418 } 411 }
419 } 412 }
420 if (*phyid == 0) { 413 if (*phyid == 0) {
421 pr_err("CAIF: %s(): No Available PHY ID\n", __func__); 414 pr_err("No Available PHY ID\n");
422 return; 415 return;
423 } 416 }
424 417
@@ -427,7 +420,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
427 phy_driver = 420 phy_driver =
428 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx); 421 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx);
429 if (!phy_driver) { 422 if (!phy_driver) {
430 pr_warning("CAIF: %s(): Out of memory\n", __func__); 423 pr_warn("Out of memory\n");
431 return; 424 return;
432 } 425 }
433 426
@@ -436,7 +429,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
436 phy_driver = NULL; 429 phy_driver = NULL;
437 break; 430 break;
438 default: 431 default:
439 pr_err("CAIF: %s(): %d", __func__, phy_type); 432 pr_err("%d\n", phy_type);
440 return; 433 return;
441 break; 434 break;
442 } 435 }
@@ -455,7 +448,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
455 phy_layer->type = phy_type; 448 phy_layer->type = phy_type;
456 frml = cffrml_create(*phyid, fcs); 449 frml = cffrml_create(*phyid, fcs);
457 if (!frml) { 450 if (!frml) {
458 pr_warning("CAIF: %s(): Out of memory\n", __func__); 451 pr_warn("Out of memory\n");
459 return; 452 return;
460 } 453 }
461 cnfg->phy_layers[*phyid].frm_layer = frml; 454 cnfg->phy_layers[*phyid].frm_layer = frml;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 563145fdc4c3..08f267a109aa 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -36,7 +38,7 @@ struct cflayer *cfctrl_create(void)
36 struct cfctrl *this = 38 struct cfctrl *this =
37 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC); 39 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
38 if (!this) { 40 if (!this) {
39 pr_warning("CAIF: %s(): Out of memory\n", __func__); 41 pr_warn("Out of memory\n");
40 return NULL; 42 return NULL;
41 } 43 }
42 caif_assert(offsetof(struct cfctrl, serv.layer) == 0); 44 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -132,9 +134,7 @@ struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
132 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 134 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
133 if (cfctrl_req_eq(req, p)) { 135 if (cfctrl_req_eq(req, p)) {
134 if (p != first) 136 if (p != first)
135 pr_warning("CAIF: %s(): Requests are not " 137 pr_warn("Requests are not received in order\n");
136 "received in order\n",
137 __func__);
138 138
139 atomic_set(&ctrl->rsp_seq_no, 139 atomic_set(&ctrl->rsp_seq_no,
140 p->sequence_no); 140 p->sequence_no);
@@ -177,7 +177,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
177 int ret; 177 int ret;
178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
179 if (!pkt) { 179 if (!pkt) {
180 pr_warning("CAIF: %s(): Out of memory\n", __func__); 180 pr_warn("Out of memory\n");
181 return; 181 return;
182 } 182 }
183 caif_assert(offsetof(struct cfctrl, serv.layer) == 0); 183 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -189,8 +189,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
189 ret = 189 ret =
190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
191 if (ret < 0) { 191 if (ret < 0) {
192 pr_err("CAIF: %s(): Could not transmit enum message\n", 192 pr_err("Could not transmit enum message\n");
193 __func__);
194 cfpkt_destroy(pkt); 193 cfpkt_destroy(pkt);
195 } 194 }
196} 195}
@@ -208,7 +207,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
208 char utility_name[16]; 207 char utility_name[16];
209 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 208 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
210 if (!pkt) { 209 if (!pkt) {
211 pr_warning("CAIF: %s(): Out of memory\n", __func__); 210 pr_warn("Out of memory\n");
212 return -ENOMEM; 211 return -ENOMEM;
213 } 212 }
214 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); 213 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
@@ -253,13 +252,13 @@ int cfctrl_linkup_request(struct cflayer *layer,
253 param->u.utility.paramlen); 252 param->u.utility.paramlen);
254 break; 253 break;
255 default: 254 default:
256 pr_warning("CAIF: %s():Request setup of bad link type = %d\n", 255 pr_warn("Request setup of bad link type = %d\n",
257 __func__, param->linktype); 256 param->linktype);
258 return -EINVAL; 257 return -EINVAL;
259 } 258 }
260 req = kzalloc(sizeof(*req), GFP_KERNEL); 259 req = kzalloc(sizeof(*req), GFP_KERNEL);
261 if (!req) { 260 if (!req) {
262 pr_warning("CAIF: %s(): Out of memory\n", __func__); 261 pr_warn("Out of memory\n");
263 return -ENOMEM; 262 return -ENOMEM;
264 } 263 }
265 req->client_layer = user_layer; 264 req->client_layer = user_layer;
@@ -276,8 +275,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
276 ret = 275 ret =
277 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 276 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
278 if (ret < 0) { 277 if (ret < 0) {
279 pr_err("CAIF: %s(): Could not transmit linksetup request\n", 278 pr_err("Could not transmit linksetup request\n");
280 __func__);
281 cfpkt_destroy(pkt); 279 cfpkt_destroy(pkt);
282 return -ENODEV; 280 return -ENODEV;
283 } 281 }
@@ -291,7 +289,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
291 struct cfctrl *cfctrl = container_obj(layer); 289 struct cfctrl *cfctrl = container_obj(layer);
292 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 290 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
293 if (!pkt) { 291 if (!pkt) {
294 pr_warning("CAIF: %s(): Out of memory\n", __func__); 292 pr_warn("Out of memory\n");
295 return -ENOMEM; 293 return -ENOMEM;
296 } 294 }
297 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY); 295 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
@@ -300,8 +298,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
300 ret = 298 ret =
301 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 299 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
302 if (ret < 0) { 300 if (ret < 0) {
303 pr_err("CAIF: %s(): Could not transmit link-down request\n", 301 pr_err("Could not transmit link-down request\n");
304 __func__);
305 cfpkt_destroy(pkt); 302 cfpkt_destroy(pkt);
306 } 303 }
307 return ret; 304 return ret;
@@ -313,7 +310,7 @@ void cfctrl_sleep_req(struct cflayer *layer)
313 struct cfctrl *cfctrl = container_obj(layer); 310 struct cfctrl *cfctrl = container_obj(layer);
314 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 311 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
315 if (!pkt) { 312 if (!pkt) {
316 pr_warning("CAIF: %s(): Out of memory\n", __func__); 313 pr_warn("Out of memory\n");
317 return; 314 return;
318 } 315 }
319 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP); 316 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP);
@@ -330,7 +327,7 @@ void cfctrl_wake_req(struct cflayer *layer)
330 struct cfctrl *cfctrl = container_obj(layer); 327 struct cfctrl *cfctrl = container_obj(layer);
331 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 328 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
332 if (!pkt) { 329 if (!pkt) {
333 pr_warning("CAIF: %s(): Out of memory\n", __func__); 330 pr_warn("Out of memory\n");
334 return; 331 return;
335 } 332 }
336 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE); 333 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE);
@@ -347,7 +344,7 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
347 struct cfctrl *cfctrl = container_obj(layer); 344 struct cfctrl *cfctrl = container_obj(layer);
348 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 345 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
349 if (!pkt) { 346 if (!pkt) {
350 pr_warning("CAIF: %s(): Out of memory\n", __func__); 347 pr_warn("Out of memory\n");
351 return; 348 return;
352 } 349 }
353 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON); 350 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON);
@@ -364,12 +361,11 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
364 struct cfctrl_request_info *p, *tmp; 361 struct cfctrl_request_info *p, *tmp;
365 struct cfctrl *ctrl = container_obj(layr); 362 struct cfctrl *ctrl = container_obj(layr);
366 spin_lock(&ctrl->info_list_lock); 363 spin_lock(&ctrl->info_list_lock);
367 pr_warning("CAIF: %s(): enter\n", __func__); 364 pr_warn("enter\n");
368 365
369 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 366 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
370 if (p->client_layer == adap_layer) { 367 if (p->client_layer == adap_layer) {
371 pr_warning("CAIF: %s(): cancel req :%d\n", __func__, 368 pr_warn("cancel req :%d\n", p->sequence_no);
372 p->sequence_no);
373 list_del(&p->list); 369 list_del(&p->list);
374 kfree(p); 370 kfree(p);
375 } 371 }
@@ -520,9 +516,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
520 cfpkt_extr_head(pkt, &param, len); 516 cfpkt_extr_head(pkt, &param, len);
521 break; 517 break;
522 default: 518 default:
523 pr_warning("CAIF: %s(): Request setup " 519 pr_warn("Request setup - invalid link type (%d)\n",
524 "- invalid link type (%d)", 520 serv);
525 __func__, serv);
526 goto error; 521 goto error;
527 } 522 }
528 523
@@ -532,9 +527,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
532 527
533 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || 528 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
534 cfpkt_erroneous(pkt)) { 529 cfpkt_erroneous(pkt)) {
535 pr_err("CAIF: %s(): Invalid O/E bit or parse " 530 pr_err("Invalid O/E bit or parse error on CAIF control channel\n");
536 "error on CAIF control channel",
537 __func__);
538 cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 531 cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
539 0, 532 0,
540 req ? req->client_layer 533 req ? req->client_layer
@@ -556,8 +549,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
556 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); 549 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
557 break; 550 break;
558 case CFCTRL_CMD_LINK_ERR: 551 case CFCTRL_CMD_LINK_ERR:
559 pr_err("CAIF: %s(): Frame Error Indication received\n", 552 pr_err("Frame Error Indication received\n");
560 __func__);
561 cfctrl->res.linkerror_ind(); 553 cfctrl->res.linkerror_ind();
562 break; 554 break;
563 case CFCTRL_CMD_ENUM: 555 case CFCTRL_CMD_ENUM:
@@ -576,7 +568,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
576 cfctrl->res.radioset_rsp(); 568 cfctrl->res.radioset_rsp();
577 break; 569 break;
578 default: 570 default:
579 pr_err("CAIF: %s(): Unrecognized Control Frame\n", __func__); 571 pr_err("Unrecognized Control Frame\n");
580 goto error; 572 goto error;
581 break; 573 break;
582 } 574 }
@@ -595,8 +587,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
595 case CAIF_CTRLCMD_FLOW_OFF_IND: 587 case CAIF_CTRLCMD_FLOW_OFF_IND:
596 spin_lock(&this->info_list_lock); 588 spin_lock(&this->info_list_lock);
597 if (!list_empty(&this->list)) { 589 if (!list_empty(&this->list)) {
598 pr_debug("CAIF: %s(): Received flow off in " 590 pr_debug("Received flow off in control layer\n");
599 "control layer", __func__);
600 } 591 }
601 spin_unlock(&this->info_list_lock); 592 spin_unlock(&this->info_list_lock);
602 break; 593 break;
@@ -620,7 +611,7 @@ static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
620 if (!ctrl->loop_linkused[linkid]) 611 if (!ctrl->loop_linkused[linkid])
621 goto found; 612 goto found;
622 spin_unlock(&ctrl->loop_linkid_lock); 613 spin_unlock(&ctrl->loop_linkid_lock);
623 pr_err("CAIF: %s(): Out of link-ids\n", __func__); 614 pr_err("Out of link-ids\n");
624 return -EINVAL; 615 return -EINVAL;
625found: 616found:
626 if (!ctrl->loop_linkused[linkid]) 617 if (!ctrl->loop_linkused[linkid])
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 676648cac8dd..496fda9ac66f 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/slab.h> 10#include <linux/slab.h>
9#include <net/caif/caif_layer.h> 11#include <net/caif/caif_layer.h>
@@ -17,7 +19,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
17{ 19{
18 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 20 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
19 if (!dbg) { 21 if (!dbg) {
20 pr_warning("CAIF: %s(): Out of memory\n", __func__); 22 pr_warn("Out of memory\n");
21 return NULL; 23 return NULL;
22 } 24 }
23 caif_assert(offsetof(struct cfsrvl, layer) == 0); 25 caif_assert(offsetof(struct cfsrvl, layer) == 0);
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index ed9d53aff280..d3ed264ad6c4 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
26{ 28{
27 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!dgm) { 30 if (!dgm) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__); 31 pr_warn("Out of memory\n");
30 return NULL; 32 return NULL;
31 } 33 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0); 34 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -49,14 +51,14 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
49 caif_assert(layr->ctrlcmd != NULL); 51 caif_assert(layr->ctrlcmd != NULL);
50 52
51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 53 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
52 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 54 pr_err("Packet is erroneous!\n");
53 cfpkt_destroy(pkt); 55 cfpkt_destroy(pkt);
54 return -EPROTO; 56 return -EPROTO;
55 } 57 }
56 58
57 if ((cmd & DGM_CMD_BIT) == 0) { 59 if ((cmd & DGM_CMD_BIT) == 0) {
58 if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) { 60 if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) {
59 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 61 pr_err("Packet is erroneous!\n");
60 cfpkt_destroy(pkt); 62 cfpkt_destroy(pkt);
61 return -EPROTO; 63 return -EPROTO;
62 } 64 }
@@ -75,8 +77,7 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
75 return 0; 77 return 0;
76 default: 78 default:
77 cfpkt_destroy(pkt); 79 cfpkt_destroy(pkt);
78 pr_info("CAIF: %s(): Unknown datagram control %d (0x%x)\n", 80 pr_info("Unknown datagram control %d (0x%x)\n", cmd, cmd);
79 __func__, cmd, cmd);
80 return -EPROTO; 81 return -EPROTO;
81 } 82 }
82} 83}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index e86a4ca3b217..a445043931ae 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -6,6 +6,8 @@
6 * License terms: GNU General Public License (GPL) version 2 6 * License terms: GNU General Public License (GPL) version 2
7 */ 7 */
8 8
9#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
10
9#include <linux/stddef.h> 11#include <linux/stddef.h>
10#include <linux/spinlock.h> 12#include <linux/spinlock.h>
11#include <linux/slab.h> 13#include <linux/slab.h>
@@ -32,7 +34,7 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
32{ 34{
33 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC); 35 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC);
34 if (!this) { 36 if (!this) {
35 pr_warning("CAIF: %s(): Out of memory\n", __func__); 37 pr_warn("Out of memory\n");
36 return NULL; 38 return NULL;
37 } 39 }
38 caif_assert(offsetof(struct cffrml, layer) == 0); 40 caif_assert(offsetof(struct cffrml, layer) == 0);
@@ -83,7 +85,7 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
83 85
84 if (cfpkt_setlen(pkt, len) < 0) { 86 if (cfpkt_setlen(pkt, len) < 0) {
85 ++cffrml_rcv_error; 87 ++cffrml_rcv_error;
86 pr_err("CAIF: %s():Framing length error (%d)\n", __func__, len); 88 pr_err("Framing length error (%d)\n", len);
87 cfpkt_destroy(pkt); 89 cfpkt_destroy(pkt);
88 return -EPROTO; 90 return -EPROTO;
89 } 91 }
@@ -99,14 +101,14 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
99 cfpkt_add_trail(pkt, &tmp, 2); 101 cfpkt_add_trail(pkt, &tmp, 2);
100 ++cffrml_rcv_error; 102 ++cffrml_rcv_error;
101 ++cffrml_rcv_checsum_error; 103 ++cffrml_rcv_checsum_error;
102 pr_info("CAIF: %s(): Frame checksum error " 104 pr_info("Frame checksum error (0x%x != 0x%x)\n",
103 "(0x%x != 0x%x)\n", __func__, hdrchks, pktchks); 105 hdrchks, pktchks);
104 return -EILSEQ; 106 return -EILSEQ;
105 } 107 }
106 } 108 }
107 if (cfpkt_erroneous(pkt)) { 109 if (cfpkt_erroneous(pkt)) {
108 ++cffrml_rcv_error; 110 ++cffrml_rcv_error;
109 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 111 pr_err("Packet is erroneous!\n");
110 cfpkt_destroy(pkt); 112 cfpkt_destroy(pkt);
111 return -EPROTO; 113 return -EPROTO;
112 } 114 }
@@ -132,7 +134,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
132 cfpkt_add_head(pkt, &tmp, 2); 134 cfpkt_add_head(pkt, &tmp, 2);
133 cfpkt_info(pkt)->hdr_len += 2; 135 cfpkt_info(pkt)->hdr_len += 2;
134 if (cfpkt_erroneous(pkt)) { 136 if (cfpkt_erroneous(pkt)) {
135 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 137 pr_err("Packet is erroneous!\n");
136 return -EPROTO; 138 return -EPROTO;
137 } 139 }
138 ret = layr->dn->transmit(layr->dn, pkt); 140 ret = layr->dn->transmit(layr->dn, pkt);
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 80c8d332b258..46f34b2e0478 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -3,6 +3,9 @@
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
6#include <linux/stddef.h> 9#include <linux/stddef.h>
7#include <linux/spinlock.h> 10#include <linux/spinlock.h>
8#include <linux/slab.h> 11#include <linux/slab.h>
@@ -190,7 +193,7 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
190 u8 id; 193 u8 id;
191 struct cflayer *up; 194 struct cflayer *up;
192 if (cfpkt_extr_head(pkt, &id, 1) < 0) { 195 if (cfpkt_extr_head(pkt, &id, 1) < 0) {
193 pr_err("CAIF: %s(): erroneous Caif Packet\n", __func__); 196 pr_err("erroneous Caif Packet\n");
194 cfpkt_destroy(pkt); 197 cfpkt_destroy(pkt);
195 return -EPROTO; 198 return -EPROTO;
196 } 199 }
@@ -199,8 +202,8 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
199 up = get_up(muxl, id); 202 up = get_up(muxl, id);
200 spin_unlock(&muxl->receive_lock); 203 spin_unlock(&muxl->receive_lock);
201 if (up == NULL) { 204 if (up == NULL) {
202 pr_info("CAIF: %s():Received data on unknown link ID = %d " 205 pr_info("Received data on unknown link ID = %d (0x%x) up == NULL",
203 "(0x%x) up == NULL", __func__, id, id); 206 id, id);
204 cfpkt_destroy(pkt); 207 cfpkt_destroy(pkt);
205 /* 208 /*
206 * Don't return ERROR, since modem misbehaves and sends out 209 * Don't return ERROR, since modem misbehaves and sends out
@@ -223,9 +226,8 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
223 struct caif_payload_info *info = cfpkt_info(pkt); 226 struct caif_payload_info *info = cfpkt_info(pkt);
224 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info); 227 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info);
225 if (dn == NULL) { 228 if (dn == NULL) {
226 pr_warning("CAIF: %s(): Send data on unknown phy " 229 pr_warn("Send data on unknown phy ID = %d (0x%x)\n",
227 "ID = %d (0x%x)\n", 230 info->dev_info->id, info->dev_info->id);
228 __func__, info->dev_info->id, info->dev_info->id);
229 return -ENOTCONN; 231 return -ENOTCONN;
230 } 232 }
231 info->hdr_len += 1; 233 info->hdr_len += 1;
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index c49a6695793a..d7e865e2ff65 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/string.h> 9#include <linux/string.h>
8#include <linux/skbuff.h> 10#include <linux/skbuff.h>
9#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -12,11 +14,12 @@
12#define PKT_PREFIX 48 14#define PKT_PREFIX 48
13#define PKT_POSTFIX 2 15#define PKT_POSTFIX 2
14#define PKT_LEN_WHEN_EXTENDING 128 16#define PKT_LEN_WHEN_EXTENDING 128
15#define PKT_ERROR(pkt, errmsg) do { \ 17#define PKT_ERROR(pkt, errmsg) \
16 cfpkt_priv(pkt)->erronous = true; \ 18do { \
17 skb_reset_tail_pointer(&pkt->skb); \ 19 cfpkt_priv(pkt)->erronous = true; \
18 pr_warning("CAIF: " errmsg);\ 20 skb_reset_tail_pointer(&pkt->skb); \
19 } while (0) 21 pr_warn(errmsg); \
22} while (0)
20 23
21struct cfpktq { 24struct cfpktq {
22 struct sk_buff_head head; 25 struct sk_buff_head head;
@@ -130,13 +133,13 @@ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
130 return -EPROTO; 133 return -EPROTO;
131 134
132 if (unlikely(len > skb->len)) { 135 if (unlikely(len > skb->len)) {
133 PKT_ERROR(pkt, "cfpkt_extr_head read beyond end of packet\n"); 136 PKT_ERROR(pkt, "read beyond end of packet\n");
134 return -EPROTO; 137 return -EPROTO;
135 } 138 }
136 139
137 if (unlikely(len > skb_headlen(skb))) { 140 if (unlikely(len > skb_headlen(skb))) {
138 if (unlikely(skb_linearize(skb) != 0)) { 141 if (unlikely(skb_linearize(skb) != 0)) {
139 PKT_ERROR(pkt, "cfpkt_extr_head linearize failed\n"); 142 PKT_ERROR(pkt, "linearize failed\n");
140 return -EPROTO; 143 return -EPROTO;
141 } 144 }
142 } 145 }
@@ -156,11 +159,11 @@ int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
156 return -EPROTO; 159 return -EPROTO;
157 160
158 if (unlikely(skb_linearize(skb) != 0)) { 161 if (unlikely(skb_linearize(skb) != 0)) {
159 PKT_ERROR(pkt, "cfpkt_extr_trail linearize failed\n"); 162 PKT_ERROR(pkt, "linearize failed\n");
160 return -EPROTO; 163 return -EPROTO;
161 } 164 }
162 if (unlikely(skb->data + len > skb_tail_pointer(skb))) { 165 if (unlikely(skb->data + len > skb_tail_pointer(skb))) {
163 PKT_ERROR(pkt, "cfpkt_extr_trail read beyond end of packet\n"); 166 PKT_ERROR(pkt, "read beyond end of packet\n");
164 return -EPROTO; 167 return -EPROTO;
165 } 168 }
166 from = skb_tail_pointer(skb) - len; 169 from = skb_tail_pointer(skb) - len;
@@ -202,7 +205,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
202 205
203 /* Make sure data is writable */ 206 /* Make sure data is writable */
204 if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) { 207 if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) {
205 PKT_ERROR(pkt, "cfpkt_add_body: cow failed\n"); 208 PKT_ERROR(pkt, "cow failed\n");
206 return -EPROTO; 209 return -EPROTO;
207 } 210 }
208 /* 211 /*
@@ -211,8 +214,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
211 * lengths of the top SKB. 214 * lengths of the top SKB.
212 */ 215 */
213 if (lastskb != skb) { 216 if (lastskb != skb) {
214 pr_warning("CAIF: %s(): Packet is non-linear\n", 217 pr_warn("Packet is non-linear\n");
215 __func__);
216 skb->len += len; 218 skb->len += len;
217 skb->data_len += len; 219 skb->data_len += len;
218 } 220 }
@@ -242,14 +244,14 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
242 if (unlikely(is_erronous(pkt))) 244 if (unlikely(is_erronous(pkt)))
243 return -EPROTO; 245 return -EPROTO;
244 if (unlikely(skb_headroom(skb) < len)) { 246 if (unlikely(skb_headroom(skb) < len)) {
245 PKT_ERROR(pkt, "cfpkt_add_head: no headroom\n"); 247 PKT_ERROR(pkt, "no headroom\n");
246 return -EPROTO; 248 return -EPROTO;
247 } 249 }
248 250
249 /* Make sure data is writable */ 251 /* Make sure data is writable */
250 ret = skb_cow_data(skb, 0, &lastskb); 252 ret = skb_cow_data(skb, 0, &lastskb);
251 if (unlikely(ret < 0)) { 253 if (unlikely(ret < 0)) {
252 PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n"); 254 PKT_ERROR(pkt, "cow failed\n");
253 return ret; 255 return ret;
254 } 256 }
255 257
@@ -283,7 +285,7 @@ inline u16 cfpkt_iterate(struct cfpkt *pkt,
283 if (unlikely(is_erronous(pkt))) 285 if (unlikely(is_erronous(pkt)))
284 return -EPROTO; 286 return -EPROTO;
285 if (unlikely(skb_linearize(&pkt->skb) != 0)) { 287 if (unlikely(skb_linearize(&pkt->skb) != 0)) {
286 PKT_ERROR(pkt, "cfpkt_iterate: linearize failed\n"); 288 PKT_ERROR(pkt, "linearize failed\n");
287 return -EPROTO; 289 return -EPROTO;
288 } 290 }
289 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt)); 291 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
@@ -309,7 +311,7 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
309 311
310 /* Need to expand SKB */ 312 /* Need to expand SKB */
311 if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len))) 313 if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len)))
312 PKT_ERROR(pkt, "cfpkt_setlen: skb_pad_trail failed\n"); 314 PKT_ERROR(pkt, "skb_pad_trail failed\n");
313 315
314 return cfpkt_getlen(pkt); 316 return cfpkt_getlen(pkt);
315} 317}
@@ -380,8 +382,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
380 return NULL; 382 return NULL;
381 383
382 if (skb->data + pos > skb_tail_pointer(skb)) { 384 if (skb->data + pos > skb_tail_pointer(skb)) {
383 PKT_ERROR(pkt, 385 PKT_ERROR(pkt, "trying to split beyond end of packet\n");
384 "cfpkt_split: trying to split beyond end of packet");
385 return NULL; 386 return NULL;
386 } 387 }
387 388
@@ -455,17 +456,17 @@ int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen)
455 return -EPROTO; 456 return -EPROTO;
456 /* Make sure SKB is writable */ 457 /* Make sure SKB is writable */
457 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) { 458 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
458 PKT_ERROR(pkt, "cfpkt_raw_append: skb_cow_data failed\n"); 459 PKT_ERROR(pkt, "skb_cow_data failed\n");
459 return -EPROTO; 460 return -EPROTO;
460 } 461 }
461 462
462 if (unlikely(skb_linearize(skb) != 0)) { 463 if (unlikely(skb_linearize(skb) != 0)) {
463 PKT_ERROR(pkt, "cfpkt_raw_append: linearize failed\n"); 464 PKT_ERROR(pkt, "linearize failed\n");
464 return -EPROTO; 465 return -EPROTO;
465 } 466 }
466 467
467 if (unlikely(skb_tailroom(skb) < buflen)) { 468 if (unlikely(skb_tailroom(skb) < buflen)) {
468 PKT_ERROR(pkt, "cfpkt_raw_append: buffer too short - failed\n"); 469 PKT_ERROR(pkt, "buffer too short - failed\n");
469 return -EPROTO; 470 return -EPROTO;
470 } 471 }
471 472
@@ -483,14 +484,13 @@ int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen)
483 return -EPROTO; 484 return -EPROTO;
484 485
485 if (unlikely(buflen > skb->len)) { 486 if (unlikely(buflen > skb->len)) {
486 PKT_ERROR(pkt, "cfpkt_raw_extract: buflen too large " 487 PKT_ERROR(pkt, "buflen too large - failed\n");
487 "- failed\n");
488 return -EPROTO; 488 return -EPROTO;
489 } 489 }
490 490
491 if (unlikely(buflen > skb_headlen(skb))) { 491 if (unlikely(buflen > skb_headlen(skb))) {
492 if (unlikely(skb_linearize(skb) != 0)) { 492 if (unlikely(skb_linearize(skb) != 0)) {
493 PKT_ERROR(pkt, "cfpkt_raw_extract: linearize failed\n"); 493 PKT_ERROR(pkt, "linearize failed\n");
494 return -EPROTO; 494 return -EPROTO;
495 } 495 }
496 } 496 }
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 9a699242d104..bde8481e8d25 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -48,7 +50,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
48 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); 50 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
49 51
50 if (!this) { 52 if (!this) {
51 pr_warning("CAIF: %s(): Out of memory\n", __func__); 53 pr_warn("Out of memory\n");
52 return NULL; 54 return NULL;
53 } 55 }
54 56
@@ -178,9 +180,7 @@ out:
178 cfpkt_destroy(rfml->incomplete_frm); 180 cfpkt_destroy(rfml->incomplete_frm);
179 rfml->incomplete_frm = NULL; 181 rfml->incomplete_frm = NULL;
180 182
181 pr_info("CAIF: %s(): " 183 pr_info("Connection error %d triggered on RFM link\n", err);
182 "Connection error %d triggered on RFM link\n",
183 __func__, err);
184 184
185 /* Trigger connection error upon failure.*/ 185 /* Trigger connection error upon failure.*/
186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
@@ -280,9 +280,7 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
280out: 280out:
281 281
282 if (err != 0) { 282 if (err != 0) {
283 pr_info("CAIF: %s(): " 283 pr_info("Connection error %d triggered on RFM link\n", err);
284 "Connection error %d triggered on RFM link\n",
285 __func__, err);
286 /* Trigger connection error upon failure.*/ 284 /* Trigger connection error upon failure.*/
287 285
288 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 286 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index a11fbd68a13d..9297f7dea9d8 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -34,7 +36,7 @@ struct cflayer *cfserl_create(int type, int instance, bool use_stx)
34{ 36{
35 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC); 37 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC);
36 if (!this) { 38 if (!this) {
37 pr_warning("CAIF: %s(): Out of memory\n", __func__); 39 pr_warn("Out of memory\n");
38 return NULL; 40 return NULL;
39 } 41 }
40 caif_assert(offsetof(struct cfserl, layer) == 0); 42 caif_assert(offsetof(struct cfserl, layer) == 0);
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index f40939a91211..ab5e542526bf 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/errno.h> 11#include <linux/errno.h>
@@ -79,8 +81,7 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
79 layr->up->ctrlcmd(layr->up, ctrl, phyid); 81 layr->up->ctrlcmd(layr->up, ctrl, phyid);
80 break; 82 break;
81 default: 83 default:
82 pr_warning("CAIF: %s(): " 84 pr_warn("Unexpected ctrl in cfsrvl (%d)\n", ctrl);
83 "Unexpected ctrl in cfsrvl (%d)\n", __func__, ctrl);
84 /* We have both modem and phy flow on, send flow on */ 85 /* We have both modem and phy flow on, send flow on */
85 layr->up->ctrlcmd(layr->up, ctrl, phyid); 86 layr->up->ctrlcmd(layr->up, ctrl, phyid);
86 service->phy_flow_on = true; 87 service->phy_flow_on = true;
@@ -107,14 +108,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
107 u8 flow_on = SRVL_FLOW_ON; 108 u8 flow_on = SRVL_FLOW_ON;
108 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 109 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
109 if (!pkt) { 110 if (!pkt) {
110 pr_warning("CAIF: %s(): Out of memory\n", 111 pr_warn("Out of memory\n");
111 __func__);
112 return -ENOMEM; 112 return -ENOMEM;
113 } 113 }
114 114
115 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) { 115 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
116 pr_err("CAIF: %s(): Packet is erroneous!\n", 116 pr_err("Packet is erroneous!\n");
117 __func__);
118 cfpkt_destroy(pkt); 117 cfpkt_destroy(pkt);
119 return -EPROTO; 118 return -EPROTO;
120 } 119 }
@@ -131,14 +130,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
131 u8 flow_off = SRVL_FLOW_OFF; 130 u8 flow_off = SRVL_FLOW_OFF;
132 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 131 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
133 if (!pkt) { 132 if (!pkt) {
134 pr_warning("CAIF: %s(): Out of memory\n", 133 pr_warn("Out of memory\n");
135 __func__);
136 return -ENOMEM; 134 return -ENOMEM;
137 } 135 }
138 136
139 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { 137 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
140 pr_err("CAIF: %s(): Packet is erroneous!\n", 138 pr_err("Packet is erroneous!\n");
141 __func__);
142 cfpkt_destroy(pkt); 139 cfpkt_destroy(pkt);
143 return -EPROTO; 140 return -EPROTO;
144 } 141 }
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 02795aff57a4..efad410e4c82 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
26{ 28{
27 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!util) { 30 if (!util) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__); 31 pr_warn("Out of memory\n");
30 return NULL; 32 return NULL;
31 } 33 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0); 34 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
47 caif_assert(layr->up->receive != NULL); 49 caif_assert(layr->up->receive != NULL);
48 caif_assert(layr->up->ctrlcmd != NULL); 50 caif_assert(layr->up->ctrlcmd != NULL);
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 52 pr_err("Packet is erroneous!\n");
51 cfpkt_destroy(pkt); 53 cfpkt_destroy(pkt);
52 return -EPROTO; 54 return -EPROTO;
53 } 55 }
@@ -64,16 +66,14 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
64 cfpkt_destroy(pkt); 66 cfpkt_destroy(pkt);
65 return 0; 67 return 0;
66 case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */ 68 case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */
67 pr_err("CAIF: %s(): REMOTE SHUTDOWN REQUEST RECEIVED\n", 69 pr_err("REMOTE SHUTDOWN REQUEST RECEIVED\n");
68 __func__);
69 layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0); 70 layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0);
70 service->open = false; 71 service->open = false;
71 cfpkt_destroy(pkt); 72 cfpkt_destroy(pkt);
72 return 0; 73 return 0;
73 default: 74 default:
74 cfpkt_destroy(pkt); 75 cfpkt_destroy(pkt);
75 pr_warning("CAIF: %s(): Unknown service control %d (0x%x)\n", 76 pr_warn("Unknown service control %d (0x%x)\n", cmd, cmd);
76 __func__, cmd, cmd);
77 return -EPROTO; 77 return -EPROTO;
78 } 78 }
79} 79}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 77cc09faac9a..3b425b189a99 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/slab.h> 10#include <linux/slab.h>
9#include <net/caif/caif_layer.h> 11#include <net/caif/caif_layer.h>
@@ -25,7 +27,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
25{ 27{
26 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 28 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
27 if (!vei) { 29 if (!vei) {
28 pr_warning("CAIF: %s(): Out of memory\n", __func__); 30 pr_warn("Out of memory\n");
29 return NULL; 31 return NULL;
30 } 32 }
31 caif_assert(offsetof(struct cfsrvl, layer) == 0); 33 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
47 49
48 50
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 52 pr_err("Packet is erroneous!\n");
51 cfpkt_destroy(pkt); 53 cfpkt_destroy(pkt);
52 return -EPROTO; 54 return -EPROTO;
53 } 55 }
@@ -67,8 +69,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
67 cfpkt_destroy(pkt); 69 cfpkt_destroy(pkt);
68 return 0; 70 return 0;
69 default: /* SET RS232 PIN */ 71 default: /* SET RS232 PIN */
70 pr_warning("CAIF: %s():Unknown VEI control packet %d (0x%x)!\n", 72 pr_warn("Unknown VEI control packet %d (0x%x)!\n", cmd, cmd);
71 __func__, cmd, cmd);
72 cfpkt_destroy(pkt); 73 cfpkt_destroy(pkt);
73 return -EPROTO; 74 return -EPROTO;
74 } 75 }
@@ -86,7 +87,7 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
86 caif_assert(layr->dn->transmit != NULL); 87 caif_assert(layr->dn->transmit != NULL);
87 88
88 if (cfpkt_add_head(pkt, &tmp, 1) < 0) { 89 if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
89 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 90 pr_err("Packet is erroneous!\n");
90 return -EPROTO; 91 return -EPROTO;
91 } 92 }
92 93
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index ada6ee2d48f5..bf6fef2a0eff 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -21,7 +23,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
21{ 23{
22 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 24 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
23 if (!vid) { 25 if (!vid) {
24 pr_warning("CAIF: %s(): Out of memory\n", __func__); 26 pr_warn("Out of memory\n");
25 return NULL; 27 return NULL;
26 } 28 }
27 caif_assert(offsetof(struct cfsrvl, layer) == 0); 29 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -38,7 +40,7 @@ static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt)
38{ 40{
39 u32 videoheader; 41 u32 videoheader;
40 if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) { 42 if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) {
41 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 43 pr_err("Packet is erroneous!\n");
42 cfpkt_destroy(pkt); 44 cfpkt_destroy(pkt);
43 return -EPROTO; 45 return -EPROTO;
44 } 46 }
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 4293e190ec53..86aac24b0225 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -5,6 +5,8 @@
5 * License terms: GNU General Public License (GPL) version 2 5 * License terms: GNU General Public License (GPL) version 2
6 */ 6 */
7 7
8#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
9
8#include <linux/version.h> 10#include <linux/version.h>
9#include <linux/fs.h> 11#include <linux/fs.h>
10#include <linux/init.h> 12#include <linux/init.h>
@@ -29,7 +31,7 @@
29#define CAIF_NET_DEFAULT_QUEUE_LEN 500 31#define CAIF_NET_DEFAULT_QUEUE_LEN 500
30 32
31#undef pr_debug 33#undef pr_debug
32#define pr_debug pr_warning 34#define pr_debug pr_warn
33 35
34/*This list is protected by the rtnl lock. */ 36/*This list is protected by the rtnl lock. */
35static LIST_HEAD(chnl_net_list); 37static LIST_HEAD(chnl_net_list);
@@ -142,8 +144,7 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
142 int phyid) 144 int phyid)
143{ 145{
144 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); 146 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
145 pr_debug("CAIF: %s(): NET flowctrl func called flow: %s\n", 147 pr_debug("NET flowctrl func called flow: %s\n",
146 __func__,
147 flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" : 148 flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
148 flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" : 149 flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
149 flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" : 150 flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
@@ -196,12 +197,12 @@ static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
196 priv = netdev_priv(dev); 197 priv = netdev_priv(dev);
197 198
198 if (skb->len > priv->netdev->mtu) { 199 if (skb->len > priv->netdev->mtu) {
199 pr_warning("CAIF: %s(): Size of skb exceeded MTU\n", __func__); 200 pr_warn("Size of skb exceeded MTU\n");
200 return -ENOSPC; 201 return -ENOSPC;
201 } 202 }
202 203
203 if (!priv->flowenabled) { 204 if (!priv->flowenabled) {
204 pr_debug("CAIF: %s(): dropping packets flow off\n", __func__); 205 pr_debug("dropping packets flow off\n");
205 return NETDEV_TX_BUSY; 206 return NETDEV_TX_BUSY;
206 } 207 }
207 208
@@ -237,7 +238,7 @@ static int chnl_net_open(struct net_device *dev)
237 ASSERT_RTNL(); 238 ASSERT_RTNL();
238 priv = netdev_priv(dev); 239 priv = netdev_priv(dev);
239 if (!priv) { 240 if (!priv) {
240 pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__); 241 pr_debug("chnl_net_open: no priv\n");
241 return -ENODEV; 242 return -ENODEV;
242 } 243 }
243 244
@@ -246,18 +247,17 @@ static int chnl_net_open(struct net_device *dev)
246 result = caif_connect_client(&priv->conn_req, &priv->chnl, 247 result = caif_connect_client(&priv->conn_req, &priv->chnl,
247 &llifindex, &headroom, &tailroom); 248 &llifindex, &headroom, &tailroom);
248 if (result != 0) { 249 if (result != 0) {
249 pr_debug("CAIF: %s(): err: " 250 pr_debug("err: "
250 "Unable to register and open device," 251 "Unable to register and open device,"
251 " Err:%d\n", 252 " Err:%d\n",
252 __func__, 253 result);
253 result);
254 goto error; 254 goto error;
255 } 255 }
256 256
257 lldev = dev_get_by_index(dev_net(dev), llifindex); 257 lldev = dev_get_by_index(dev_net(dev), llifindex);
258 258
259 if (lldev == NULL) { 259 if (lldev == NULL) {
260 pr_debug("CAIF: %s(): no interface?\n", __func__); 260 pr_debug("no interface?\n");
261 result = -ENODEV; 261 result = -ENODEV;
262 goto error; 262 goto error;
263 } 263 }
@@ -279,9 +279,7 @@ static int chnl_net_open(struct net_device *dev)
279 dev_put(lldev); 279 dev_put(lldev);
280 280
281 if (mtu < 100) { 281 if (mtu < 100) {
282 pr_warning("CAIF: %s(): " 282 pr_warn("CAIF Interface MTU too small (%d)\n", mtu);
283 "CAIF Interface MTU too small (%d)\n",
284 __func__, mtu);
285 result = -ENODEV; 283 result = -ENODEV;
286 goto error; 284 goto error;
287 } 285 }
@@ -296,33 +294,32 @@ static int chnl_net_open(struct net_device *dev)
296 rtnl_lock(); 294 rtnl_lock();
297 295
298 if (result == -ERESTARTSYS) { 296 if (result == -ERESTARTSYS) {
299 pr_debug("CAIF: %s(): wait_event_interruptible" 297 pr_debug("wait_event_interruptible woken by a signal\n");
300 " woken by a signal\n", __func__);
301 result = -ERESTARTSYS; 298 result = -ERESTARTSYS;
302 goto error; 299 goto error;
303 } 300 }
304 301
305 if (result == 0) { 302 if (result == 0) {
306 pr_debug("CAIF: %s(): connect timeout\n", __func__); 303 pr_debug("connect timeout\n");
307 caif_disconnect_client(&priv->chnl); 304 caif_disconnect_client(&priv->chnl);
308 priv->state = CAIF_DISCONNECTED; 305 priv->state = CAIF_DISCONNECTED;
309 pr_debug("CAIF: %s(): state disconnected\n", __func__); 306 pr_debug("state disconnected\n");
310 result = -ETIMEDOUT; 307 result = -ETIMEDOUT;
311 goto error; 308 goto error;
312 } 309 }
313 310
314 if (priv->state != CAIF_CONNECTED) { 311 if (priv->state != CAIF_CONNECTED) {
315 pr_debug("CAIF: %s(): connect failed\n", __func__); 312 pr_debug("connect failed\n");
316 result = -ECONNREFUSED; 313 result = -ECONNREFUSED;
317 goto error; 314 goto error;
318 } 315 }
319 pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__); 316 pr_debug("CAIF Netdevice connected\n");
320 return 0; 317 return 0;
321 318
322error: 319error:
323 caif_disconnect_client(&priv->chnl); 320 caif_disconnect_client(&priv->chnl);
324 priv->state = CAIF_DISCONNECTED; 321 priv->state = CAIF_DISCONNECTED;
325 pr_debug("CAIF: %s(): state disconnected\n", __func__); 322 pr_debug("state disconnected\n");
326 return result; 323 return result;
327 324
328} 325}
@@ -413,7 +410,7 @@ static void caif_netlink_parms(struct nlattr *data[],
413 struct caif_connect_request *conn_req) 410 struct caif_connect_request *conn_req)
414{ 411{
415 if (!data) { 412 if (!data) {
416 pr_warning("CAIF: %s: no params data found\n", __func__); 413 pr_warn("no params data found\n");
417 return; 414 return;
418 } 415 }
419 if (data[IFLA_CAIF_IPV4_CONNID]) 416 if (data[IFLA_CAIF_IPV4_CONNID])
@@ -442,8 +439,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
442 439
443 ret = register_netdevice(dev); 440 ret = register_netdevice(dev);
444 if (ret) 441 if (ret)
445 pr_warning("CAIF: %s(): device rtml registration failed\n", 442 pr_warn("device rtml registration failed\n");
446 __func__);
447 return ret; 443 return ret;
448} 444}
449 445
diff --git a/net/can/raw.c b/net/can/raw.c
index a10e3338f084..7d77e67e57af 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -647,12 +647,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
647 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 647 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
648 if (err < 0) 648 if (err < 0)
649 goto free_skb; 649 goto free_skb;
650 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 650 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
651 if (err < 0) 651 if (err < 0)
652 goto free_skb; 652 goto free_skb;
653 653
654 /* to be able to check the received tx sock reference in raw_rcv() */ 654 /* to be able to check the received tx sock reference in raw_rcv() */
655 skb_tx(skb)->prevent_sk_orphan = 1; 655 skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
656 656
657 skb->dev = dev; 657 skb->dev = dev;
658 skb->sk = sk; 658 skb->sk = sk;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 251997a95483..4df1b7a6c1bf 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -746,13 +746,12 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
746 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 746 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
747 mask |= POLLERR; 747 mask |= POLLERR;
748 if (sk->sk_shutdown & RCV_SHUTDOWN) 748 if (sk->sk_shutdown & RCV_SHUTDOWN)
749 mask |= POLLRDHUP; 749 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
750 if (sk->sk_shutdown == SHUTDOWN_MASK) 750 if (sk->sk_shutdown == SHUTDOWN_MASK)
751 mask |= POLLHUP; 751 mask |= POLLHUP;
752 752
753 /* readable? */ 753 /* readable? */
754 if (!skb_queue_empty(&sk->sk_receive_queue) || 754 if (!skb_queue_empty(&sk->sk_receive_queue))
755 (sk->sk_shutdown & RCV_SHUTDOWN))
756 mask |= POLLIN | POLLRDNORM; 755 mask |= POLLIN | POLLRDNORM;
757 756
758 /* Connection-based need to check for termination and startup */ 757 /* Connection-based need to check for termination and startup */
diff --git a/net/core/dev.c b/net/core/dev.c
index b9b22a3c4c8f..fc2dc933bee5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -371,6 +371,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
371 * --ANK (980803) 371 * --ANK (980803)
372 */ 372 */
373 373
374static inline struct list_head *ptype_head(const struct packet_type *pt)
375{
376 if (pt->type == htons(ETH_P_ALL))
377 return &ptype_all;
378 else
379 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
380}
381
374/** 382/**
375 * dev_add_pack - add packet handler 383 * dev_add_pack - add packet handler
376 * @pt: packet type declaration 384 * @pt: packet type declaration
@@ -386,16 +394,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
386 394
387void dev_add_pack(struct packet_type *pt) 395void dev_add_pack(struct packet_type *pt)
388{ 396{
389 int hash; 397 struct list_head *head = ptype_head(pt);
390 398
391 spin_lock_bh(&ptype_lock); 399 spin_lock(&ptype_lock);
392 if (pt->type == htons(ETH_P_ALL)) 400 list_add_rcu(&pt->list, head);
393 list_add_rcu(&pt->list, &ptype_all); 401 spin_unlock(&ptype_lock);
394 else {
395 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
396 list_add_rcu(&pt->list, &ptype_base[hash]);
397 }
398 spin_unlock_bh(&ptype_lock);
399} 402}
400EXPORT_SYMBOL(dev_add_pack); 403EXPORT_SYMBOL(dev_add_pack);
401 404
@@ -414,15 +417,10 @@ EXPORT_SYMBOL(dev_add_pack);
414 */ 417 */
415void __dev_remove_pack(struct packet_type *pt) 418void __dev_remove_pack(struct packet_type *pt)
416{ 419{
417 struct list_head *head; 420 struct list_head *head = ptype_head(pt);
418 struct packet_type *pt1; 421 struct packet_type *pt1;
419 422
420 spin_lock_bh(&ptype_lock); 423 spin_lock(&ptype_lock);
421
422 if (pt->type == htons(ETH_P_ALL))
423 head = &ptype_all;
424 else
425 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
426 424
427 list_for_each_entry(pt1, head, list) { 425 list_for_each_entry(pt1, head, list) {
428 if (pt == pt1) { 426 if (pt == pt1) {
@@ -433,7 +431,7 @@ void __dev_remove_pack(struct packet_type *pt)
433 431
434 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 432 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
435out: 433out:
436 spin_unlock_bh(&ptype_lock); 434 spin_unlock(&ptype_lock);
437} 435}
438EXPORT_SYMBOL(__dev_remove_pack); 436EXPORT_SYMBOL(__dev_remove_pack);
439 437
@@ -1902,14 +1900,14 @@ static int dev_gso_segment(struct sk_buff *skb)
1902 1900
1903/* 1901/*
1904 * Try to orphan skb early, right before transmission by the device. 1902 * Try to orphan skb early, right before transmission by the device.
1905 * We cannot orphan skb if tx timestamp is requested, since 1903 * We cannot orphan skb if tx timestamp is requested or the sk-reference
1906 * drivers need to call skb_tstamp_tx() to send the timestamp. 1904 * is needed on driver level for other reasons, e.g. see net/can/raw.c
1907 */ 1905 */
1908static inline void skb_orphan_try(struct sk_buff *skb) 1906static inline void skb_orphan_try(struct sk_buff *skb)
1909{ 1907{
1910 struct sock *sk = skb->sk; 1908 struct sock *sk = skb->sk;
1911 1909
1912 if (sk && !skb_tx(skb)->flags) { 1910 if (sk && !skb_shinfo(skb)->tx_flags) {
1913 /* skb_tx_hash() wont be able to get sk. 1911 /* skb_tx_hash() wont be able to get sk.
1914 * We copy sk_hash into skb->rxhash 1912 * We copy sk_hash into skb->rxhash
1915 */ 1913 */
@@ -1930,7 +1928,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
1930 struct net_device *dev) 1928 struct net_device *dev)
1931{ 1929{
1932 return skb_is_nonlinear(skb) && 1930 return skb_is_nonlinear(skb) &&
1933 ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || 1931 ((skb_has_frag_list(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
1934 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || 1932 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
1935 illegal_highdma(dev, skb)))); 1933 illegal_highdma(dev, skb))));
1936} 1934}
@@ -2259,69 +2257,44 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2259 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2257 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2260} 2258}
2261 2259
2262#ifdef CONFIG_RPS
2263
2264/* One global table that all flow-based protocols share. */
2265struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2266EXPORT_SYMBOL(rps_sock_flow_table);
2267
2268/* 2260/*
2269 * get_rps_cpu is called from netif_receive_skb and returns the target 2261 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2270 * CPU from the RPS map of the receiving queue for a given skb. 2262 * and src/dst port numbers. Returns a non-zero hash number on success
2271 * rcu_read_lock must be held on entry. 2263 * and 0 on failure.
2272 */ 2264 */
2273static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2265__u32 __skb_get_rxhash(struct sk_buff *skb)
2274 struct rps_dev_flow **rflowp)
2275{ 2266{
2267 int nhoff, hash = 0, poff;
2276 struct ipv6hdr *ip6; 2268 struct ipv6hdr *ip6;
2277 struct iphdr *ip; 2269 struct iphdr *ip;
2278 struct netdev_rx_queue *rxqueue;
2279 struct rps_map *map;
2280 struct rps_dev_flow_table *flow_table;
2281 struct rps_sock_flow_table *sock_flow_table;
2282 int cpu = -1;
2283 u8 ip_proto; 2270 u8 ip_proto;
2284 u16 tcpu;
2285 u32 addr1, addr2, ihl; 2271 u32 addr1, addr2, ihl;
2286 union { 2272 union {
2287 u32 v32; 2273 u32 v32;
2288 u16 v16[2]; 2274 u16 v16[2];
2289 } ports; 2275 } ports;
2290 2276
2291 if (skb_rx_queue_recorded(skb)) { 2277 nhoff = skb_network_offset(skb);
2292 u16 index = skb_get_rx_queue(skb);
2293 if (unlikely(index >= dev->num_rx_queues)) {
2294 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2295 "on queue %u, but number of RX queues is %u\n",
2296 dev->name, index, dev->num_rx_queues);
2297 goto done;
2298 }
2299 rxqueue = dev->_rx + index;
2300 } else
2301 rxqueue = dev->_rx;
2302
2303 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2304 goto done;
2305
2306 if (skb->rxhash)
2307 goto got_hash; /* Skip hash computation on packet header */
2308 2278
2309 switch (skb->protocol) { 2279 switch (skb->protocol) {
2310 case __constant_htons(ETH_P_IP): 2280 case __constant_htons(ETH_P_IP):
2311 if (!pskb_may_pull(skb, sizeof(*ip))) 2281 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2312 goto done; 2282 goto done;
2313 2283
2314 ip = (struct iphdr *) skb->data; 2284 ip = (struct iphdr *) (skb->data + nhoff);
2315 ip_proto = ip->protocol; 2285 if (ip->frag_off & htons(IP_MF | IP_OFFSET))
2286 ip_proto = 0;
2287 else
2288 ip_proto = ip->protocol;
2316 addr1 = (__force u32) ip->saddr; 2289 addr1 = (__force u32) ip->saddr;
2317 addr2 = (__force u32) ip->daddr; 2290 addr2 = (__force u32) ip->daddr;
2318 ihl = ip->ihl; 2291 ihl = ip->ihl;
2319 break; 2292 break;
2320 case __constant_htons(ETH_P_IPV6): 2293 case __constant_htons(ETH_P_IPV6):
2321 if (!pskb_may_pull(skb, sizeof(*ip6))) 2294 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2322 goto done; 2295 goto done;
2323 2296
2324 ip6 = (struct ipv6hdr *) skb->data; 2297 ip6 = (struct ipv6hdr *) (skb->data + nhoff);
2325 ip_proto = ip6->nexthdr; 2298 ip_proto = ip6->nexthdr;
2326 addr1 = (__force u32) ip6->saddr.s6_addr32[3]; 2299 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2327 addr2 = (__force u32) ip6->daddr.s6_addr32[3]; 2300 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2330,33 +2303,80 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2330 default: 2303 default:
2331 goto done; 2304 goto done;
2332 } 2305 }
2333 switch (ip_proto) { 2306
2334 case IPPROTO_TCP: 2307 ports.v32 = 0;
2335 case IPPROTO_UDP: 2308 poff = proto_ports_offset(ip_proto);
2336 case IPPROTO_DCCP: 2309 if (poff >= 0) {
2337 case IPPROTO_ESP: 2310 nhoff += ihl * 4 + poff;
2338 case IPPROTO_AH: 2311 if (pskb_may_pull(skb, nhoff + 4)) {
2339 case IPPROTO_SCTP: 2312 ports.v32 = * (__force u32 *) (skb->data + nhoff);
2340 case IPPROTO_UDPLITE:
2341 if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2342 ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2343 if (ports.v16[1] < ports.v16[0]) 2313 if (ports.v16[1] < ports.v16[0])
2344 swap(ports.v16[0], ports.v16[1]); 2314 swap(ports.v16[0], ports.v16[1]);
2345 break;
2346 } 2315 }
2347 default:
2348 ports.v32 = 0;
2349 break;
2350 } 2316 }
2351 2317
2352 /* get a consistent hash (same value on both flow directions) */ 2318 /* get a consistent hash (same value on both flow directions) */
2353 if (addr2 < addr1) 2319 if (addr2 < addr1)
2354 swap(addr1, addr2); 2320 swap(addr1, addr2);
2355 skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2356 if (!skb->rxhash)
2357 skb->rxhash = 1;
2358 2321
2359got_hash: 2322 hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2323 if (!hash)
2324 hash = 1;
2325
2326done:
2327 return hash;
2328}
2329EXPORT_SYMBOL(__skb_get_rxhash);
2330
2331#ifdef CONFIG_RPS
2332
2333/* One global table that all flow-based protocols share. */
2334struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2335EXPORT_SYMBOL(rps_sock_flow_table);
2336
2337/*
2338 * get_rps_cpu is called from netif_receive_skb and returns the target
2339 * CPU from the RPS map of the receiving queue for a given skb.
2340 * rcu_read_lock must be held on entry.
2341 */
2342static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2343 struct rps_dev_flow **rflowp)
2344{
2345 struct netdev_rx_queue *rxqueue;
2346 struct rps_map *map = NULL;
2347 struct rps_dev_flow_table *flow_table;
2348 struct rps_sock_flow_table *sock_flow_table;
2349 int cpu = -1;
2350 u16 tcpu;
2351
2352 if (skb_rx_queue_recorded(skb)) {
2353 u16 index = skb_get_rx_queue(skb);
2354 if (unlikely(index >= dev->num_rx_queues)) {
2355 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2356 "on queue %u, but number of RX queues is %u\n",
2357 dev->name, index, dev->num_rx_queues);
2358 goto done;
2359 }
2360 rxqueue = dev->_rx + index;
2361 } else
2362 rxqueue = dev->_rx;
2363
2364 if (rxqueue->rps_map) {
2365 map = rcu_dereference(rxqueue->rps_map);
2366 if (map && map->len == 1) {
2367 tcpu = map->cpus[0];
2368 if (cpu_online(tcpu))
2369 cpu = tcpu;
2370 goto done;
2371 }
2372 } else if (!rxqueue->rps_flow_table) {
2373 goto done;
2374 }
2375
2376 skb_reset_network_header(skb);
2377 if (!skb_get_rxhash(skb))
2378 goto done;
2379
2360 flow_table = rcu_dereference(rxqueue->rps_flow_table); 2380 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2361 sock_flow_table = rcu_dereference(rps_sock_flow_table); 2381 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2362 if (flow_table && sock_flow_table) { 2382 if (flow_table && sock_flow_table) {
@@ -2396,7 +2416,6 @@ got_hash:
2396 } 2416 }
2397 } 2417 }
2398 2418
2399 map = rcu_dereference(rxqueue->rps_map);
2400 if (map) { 2419 if (map) {
2401 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 2420 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2402 2421
@@ -2828,8 +2847,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
2828 if (!netdev_tstamp_prequeue) 2847 if (!netdev_tstamp_prequeue)
2829 net_timestamp_check(skb); 2848 net_timestamp_check(skb);
2830 2849
2831 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) 2850 if (vlan_tx_tag_present(skb))
2832 return NET_RX_SUCCESS; 2851 vlan_hwaccel_do_receive(skb);
2833 2852
2834 /* if we've gotten here through NAPI, check netpoll */ 2853 /* if we've gotten here through NAPI, check netpoll */
2835 if (netpoll_receive_skb(skb)) 2854 if (netpoll_receive_skb(skb))
@@ -3050,7 +3069,7 @@ out:
3050 return netif_receive_skb(skb); 3069 return netif_receive_skb(skb);
3051} 3070}
3052 3071
3053static void napi_gro_flush(struct napi_struct *napi) 3072inline void napi_gro_flush(struct napi_struct *napi)
3054{ 3073{
3055 struct sk_buff *skb, *next; 3074 struct sk_buff *skb, *next;
3056 3075
@@ -3063,6 +3082,7 @@ static void napi_gro_flush(struct napi_struct *napi)
3063 napi->gro_count = 0; 3082 napi->gro_count = 0;
3064 napi->gro_list = NULL; 3083 napi->gro_list = NULL;
3065} 3084}
3085EXPORT_SYMBOL(napi_gro_flush);
3066 3086
3067enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3087enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3068{ 3088{
@@ -3077,7 +3097,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3077 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3097 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3078 goto normal; 3098 goto normal;
3079 3099
3080 if (skb_is_gso(skb) || skb_has_frags(skb)) 3100 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3081 goto normal; 3101 goto normal;
3082 3102
3083 rcu_read_lock(); 3103 rcu_read_lock();
@@ -3156,16 +3176,18 @@ normal:
3156} 3176}
3157EXPORT_SYMBOL(dev_gro_receive); 3177EXPORT_SYMBOL(dev_gro_receive);
3158 3178
3159static gro_result_t 3179static inline gro_result_t
3160__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3180__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3161{ 3181{
3162 struct sk_buff *p; 3182 struct sk_buff *p;
3163 3183
3164 for (p = napi->gro_list; p; p = p->next) { 3184 for (p = napi->gro_list; p; p = p->next) {
3165 NAPI_GRO_CB(p)->same_flow = 3185 unsigned long diffs;
3166 (p->dev == skb->dev) && 3186
3167 !compare_ether_header(skb_mac_header(p), 3187 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3188 diffs |= compare_ether_header(skb_mac_header(p),
3168 skb_gro_mac_header(skb)); 3189 skb_gro_mac_header(skb));
3190 NAPI_GRO_CB(p)->same_flow = !diffs;
3169 NAPI_GRO_CB(p)->flush = 0; 3191 NAPI_GRO_CB(p)->flush = 0;
3170 } 3192 }
3171 3193
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 7a85367b3c2f..970eb9817bbc 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -205,18 +205,24 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
205 struct ethtool_drvinfo info; 205 struct ethtool_drvinfo info;
206 const struct ethtool_ops *ops = dev->ethtool_ops; 206 const struct ethtool_ops *ops = dev->ethtool_ops;
207 207
208 if (!ops->get_drvinfo)
209 return -EOPNOTSUPP;
210
211 memset(&info, 0, sizeof(info)); 208 memset(&info, 0, sizeof(info));
212 info.cmd = ETHTOOL_GDRVINFO; 209 info.cmd = ETHTOOL_GDRVINFO;
213 ops->get_drvinfo(dev, &info); 210 if (ops && ops->get_drvinfo) {
211 ops->get_drvinfo(dev, &info);
212 } else if (dev->dev.parent && dev->dev.parent->driver) {
213 strlcpy(info.bus_info, dev_name(dev->dev.parent),
214 sizeof(info.bus_info));
215 strlcpy(info.driver, dev->dev.parent->driver->name,
216 sizeof(info.driver));
217 } else {
218 return -EOPNOTSUPP;
219 }
214 220
215 /* 221 /*
216 * this method of obtaining string set info is deprecated; 222 * this method of obtaining string set info is deprecated;
217 * Use ETHTOOL_GSSET_INFO instead. 223 * Use ETHTOOL_GSSET_INFO instead.
218 */ 224 */
219 if (ops->get_sset_count) { 225 if (ops && ops->get_sset_count) {
220 int rc; 226 int rc;
221 227
222 rc = ops->get_sset_count(dev, ETH_SS_TEST); 228 rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -229,9 +235,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
229 if (rc >= 0) 235 if (rc >= 0)
230 info.n_priv_flags = rc; 236 info.n_priv_flags = rc;
231 } 237 }
232 if (ops->get_regs_len) 238 if (ops && ops->get_regs_len)
233 info.regdump_len = ops->get_regs_len(dev); 239 info.regdump_len = ops->get_regs_len(dev);
234 if (ops->get_eeprom_len) 240 if (ops && ops->get_eeprom_len)
235 info.eedump_len = ops->get_eeprom_len(dev); 241 info.eedump_len = ops->get_eeprom_len(dev);
236 242
237 if (copy_to_user(useraddr, &info, sizeof(info))) 243 if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -1402,14 +1408,22 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1402 if (!dev || !netif_device_present(dev)) 1408 if (!dev || !netif_device_present(dev))
1403 return -ENODEV; 1409 return -ENODEV;
1404 1410
1405 if (!dev->ethtool_ops)
1406 return -EOPNOTSUPP;
1407
1408 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd))) 1411 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
1409 return -EFAULT; 1412 return -EFAULT;
1410 1413
1414 if (!dev->ethtool_ops) {
1415 /* ETHTOOL_GDRVINFO does not require any driver support.
1416 * It is also unprivileged and does not change anything,
1417 * so we can take a shortcut to it. */
1418 if (ethcmd == ETHTOOL_GDRVINFO)
1419 return ethtool_get_drvinfo(dev, useraddr);
1420 else
1421 return -EOPNOTSUPP;
1422 }
1423
1411 /* Allow some commands to be done by anyone */ 1424 /* Allow some commands to be done by anyone */
1412 switch (ethcmd) { 1425 switch (ethcmd) {
1426 case ETHTOOL_GSET:
1413 case ETHTOOL_GDRVINFO: 1427 case ETHTOOL_GDRVINFO:
1414 case ETHTOOL_GMSGLVL: 1428 case ETHTOOL_GMSGLVL:
1415 case ETHTOOL_GCOALESCE: 1429 case ETHTOOL_GCOALESCE:
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 1cd98df412df..f4657c2127b4 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -41,7 +41,9 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
41 41
42 if (m->msg_namelen) { 42 if (m->msg_namelen) {
43 if (mode == VERIFY_READ) { 43 if (mode == VERIFY_READ) {
44 err = move_addr_to_kernel(m->msg_name, m->msg_namelen, 44 void __user *namep;
45 namep = (void __user __force *) m->msg_name;
46 err = move_addr_to_kernel(namep, m->msg_namelen,
45 address); 47 address);
46 if (err < 0) 48 if (err < 0)
47 return err; 49 return err;
@@ -52,7 +54,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
52 } 54 }
53 55
54 size = m->msg_iovlen * sizeof(struct iovec); 56 size = m->msg_iovlen * sizeof(struct iovec);
55 if (copy_from_user(iov, m->msg_iov, size)) 57 if (copy_from_user(iov, (void __user __force *) m->msg_iov, size))
56 return -EFAULT; 58 return -EFAULT;
57 59
58 m->msg_iov = iov; 60 m->msg_iov = iov;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index af4dfbadf2a0..76485a3f910b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -515,7 +515,7 @@ static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
515 return attribute->store(queue, attribute, buf, count); 515 return attribute->store(queue, attribute, buf, count);
516} 516}
517 517
518static struct sysfs_ops rx_queue_sysfs_ops = { 518static const struct sysfs_ops rx_queue_sysfs_ops = {
519 .show = rx_queue_attr_show, 519 .show = rx_queue_attr_show,
520 .store = rx_queue_attr_store, 520 .store = rx_queue_attr_store,
521}; 521};
@@ -789,12 +789,13 @@ static const void *net_netlink_ns(struct sock *sk)
789 return sock_net(sk); 789 return sock_net(sk);
790} 790}
791 791
792static struct kobj_ns_type_operations net_ns_type_operations = { 792struct kobj_ns_type_operations net_ns_type_operations = {
793 .type = KOBJ_NS_TYPE_NET, 793 .type = KOBJ_NS_TYPE_NET,
794 .current_ns = net_current_ns, 794 .current_ns = net_current_ns,
795 .netlink_ns = net_netlink_ns, 795 .netlink_ns = net_netlink_ns,
796 .initial_ns = net_initial_ns, 796 .initial_ns = net_initial_ns,
797}; 797};
798EXPORT_SYMBOL_GPL(net_ns_type_operations);
798 799
799static void net_kobj_ns_exit(struct net *net) 800static void net_kobj_ns_exit(struct net *net)
800{ 801{
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 10a1ea72010d..386c2283f14e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3907,8 +3907,6 @@ static void __exit pg_cleanup(void)
3907{ 3907{
3908 struct pktgen_thread *t; 3908 struct pktgen_thread *t;
3909 struct list_head *q, *n; 3909 struct list_head *q, *n;
3910 wait_queue_head_t queue;
3911 init_waitqueue_head(&queue);
3912 3910
3913 /* Stop all interfaces & threads */ 3911 /* Stop all interfaces & threads */
3914 3912
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f78d821bd935..b2a718dfd720 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -612,36 +612,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
612 612
613static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) 613static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
614{ 614{
615 struct rtnl_link_stats64 a; 615 memcpy(v, b, sizeof(*b));
616
617 a.rx_packets = b->rx_packets;
618 a.tx_packets = b->tx_packets;
619 a.rx_bytes = b->rx_bytes;
620 a.tx_bytes = b->tx_bytes;
621 a.rx_errors = b->rx_errors;
622 a.tx_errors = b->tx_errors;
623 a.rx_dropped = b->rx_dropped;
624 a.tx_dropped = b->tx_dropped;
625
626 a.multicast = b->multicast;
627 a.collisions = b->collisions;
628
629 a.rx_length_errors = b->rx_length_errors;
630 a.rx_over_errors = b->rx_over_errors;
631 a.rx_crc_errors = b->rx_crc_errors;
632 a.rx_frame_errors = b->rx_frame_errors;
633 a.rx_fifo_errors = b->rx_fifo_errors;
634 a.rx_missed_errors = b->rx_missed_errors;
635
636 a.tx_aborted_errors = b->tx_aborted_errors;
637 a.tx_carrier_errors = b->tx_carrier_errors;
638 a.tx_fifo_errors = b->tx_fifo_errors;
639 a.tx_heartbeat_errors = b->tx_heartbeat_errors;
640 a.tx_window_errors = b->tx_window_errors;
641
642 a.rx_compressed = b->rx_compressed;
643 a.tx_compressed = b->tx_compressed;
644 memcpy(v, &a, sizeof(a));
645} 616}
646 617
647/* All VF info */ 618/* All VF info */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c83b421341c0..752c1972b3a7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -202,8 +202,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
202 skb->data = data; 202 skb->data = data;
203 skb_reset_tail_pointer(skb); 203 skb_reset_tail_pointer(skb);
204 skb->end = skb->tail + size; 204 skb->end = skb->tail + size;
205 kmemcheck_annotate_bitfield(skb, flags1);
206 kmemcheck_annotate_bitfield(skb, flags2);
207#ifdef NET_SKBUFF_DATA_USES_OFFSET 205#ifdef NET_SKBUFF_DATA_USES_OFFSET
208 skb->mac_header = ~0U; 206 skb->mac_header = ~0U;
209#endif 207#endif
@@ -340,7 +338,7 @@ static void skb_release_data(struct sk_buff *skb)
340 put_page(skb_shinfo(skb)->frags[i].page); 338 put_page(skb_shinfo(skb)->frags[i].page);
341 } 339 }
342 340
343 if (skb_has_frags(skb)) 341 if (skb_has_frag_list(skb))
344 skb_drop_fraglist(skb); 342 skb_drop_fraglist(skb);
345 343
346 kfree(skb->head); 344 kfree(skb->head);
@@ -685,16 +683,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
685 683
686struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) 684struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
687{ 685{
688 int headerlen = skb->data - skb->head; 686 int headerlen = skb_headroom(skb);
689 /* 687 unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
690 * Allocate the copy buffer 688 struct sk_buff *n = alloc_skb(size, gfp_mask);
691 */ 689
692 struct sk_buff *n;
693#ifdef NET_SKBUFF_DATA_USES_OFFSET
694 n = alloc_skb(skb->end + skb->data_len, gfp_mask);
695#else
696 n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
697#endif
698 if (!n) 690 if (!n)
699 return NULL; 691 return NULL;
700 692
@@ -726,20 +718,14 @@ EXPORT_SYMBOL(skb_copy);
726 718
727struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) 719struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
728{ 720{
729 /* 721 unsigned int size = skb_end_pointer(skb) - skb->head;
730 * Allocate the copy buffer 722 struct sk_buff *n = alloc_skb(size, gfp_mask);
731 */ 723
732 struct sk_buff *n;
733#ifdef NET_SKBUFF_DATA_USES_OFFSET
734 n = alloc_skb(skb->end, gfp_mask);
735#else
736 n = alloc_skb(skb->end - skb->head, gfp_mask);
737#endif
738 if (!n) 724 if (!n)
739 goto out; 725 goto out;
740 726
741 /* Set the data pointer */ 727 /* Set the data pointer */
742 skb_reserve(n, skb->data - skb->head); 728 skb_reserve(n, skb_headroom(skb));
743 /* Set the tail pointer and length */ 729 /* Set the tail pointer and length */
744 skb_put(n, skb_headlen(skb)); 730 skb_put(n, skb_headlen(skb));
745 /* Copy the bytes */ 731 /* Copy the bytes */
@@ -759,7 +745,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
759 skb_shinfo(n)->nr_frags = i; 745 skb_shinfo(n)->nr_frags = i;
760 } 746 }
761 747
762 if (skb_has_frags(skb)) { 748 if (skb_has_frag_list(skb)) {
763 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 749 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
764 skb_clone_fraglist(n); 750 skb_clone_fraglist(n);
765 } 751 }
@@ -791,12 +777,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
791{ 777{
792 int i; 778 int i;
793 u8 *data; 779 u8 *data;
794#ifdef NET_SKBUFF_DATA_USES_OFFSET 780 int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
795 int size = nhead + skb->end + ntail;
796#else
797 int size = nhead + (skb->end - skb->head) + ntail;
798#endif
799 long off; 781 long off;
782 bool fastpath;
800 783
801 BUG_ON(nhead < 0); 784 BUG_ON(nhead < 0);
802 785
@@ -810,23 +793,36 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
810 goto nodata; 793 goto nodata;
811 794
812 /* Copy only real data... and, alas, header. This should be 795 /* Copy only real data... and, alas, header. This should be
813 * optimized for the cases when header is void. */ 796 * optimized for the cases when header is void.
814#ifdef NET_SKBUFF_DATA_USES_OFFSET 797 */
815 memcpy(data + nhead, skb->head, skb->tail); 798 memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
816#else 799
817 memcpy(data + nhead, skb->head, skb->tail - skb->head); 800 memcpy((struct skb_shared_info *)(data + size),
818#endif 801 skb_shinfo(skb),
819 memcpy(data + size, skb_end_pointer(skb),
820 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); 802 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
821 803
822 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 804 /* Check if we can avoid taking references on fragments if we own
823 get_page(skb_shinfo(skb)->frags[i].page); 805 * the last reference on skb->head. (see skb_release_data())
806 */
807 if (!skb->cloned)
808 fastpath = true;
809 else {
810 int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
824 811
825 if (skb_has_frags(skb)) 812 fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
826 skb_clone_fraglist(skb); 813 }
827 814
828 skb_release_data(skb); 815 if (fastpath) {
816 kfree(skb->head);
817 } else {
818 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
819 get_page(skb_shinfo(skb)->frags[i].page);
829 820
821 if (skb_has_frag_list(skb))
822 skb_clone_fraglist(skb);
823
824 skb_release_data(skb);
825 }
830 off = (data + nhead) - skb->head; 826 off = (data + nhead) - skb->head;
831 827
832 skb->head = data; 828 skb->head = data;
@@ -1099,7 +1095,7 @@ drop_pages:
1099 for (; i < nfrags; i++) 1095 for (; i < nfrags; i++)
1100 put_page(skb_shinfo(skb)->frags[i].page); 1096 put_page(skb_shinfo(skb)->frags[i].page);
1101 1097
1102 if (skb_has_frags(skb)) 1098 if (skb_has_frag_list(skb))
1103 skb_drop_fraglist(skb); 1099 skb_drop_fraglist(skb);
1104 goto done; 1100 goto done;
1105 } 1101 }
@@ -1194,7 +1190,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1194 /* Optimization: no fragments, no reasons to preestimate 1190 /* Optimization: no fragments, no reasons to preestimate
1195 * size of pulled pages. Superb. 1191 * size of pulled pages. Superb.
1196 */ 1192 */
1197 if (!skb_has_frags(skb)) 1193 if (!skb_has_frag_list(skb))
1198 goto pull_pages; 1194 goto pull_pages;
1199 1195
1200 /* Estimate size of pulled pages. */ 1196 /* Estimate size of pulled pages. */
@@ -2323,7 +2319,7 @@ next_skb:
2323 st->frag_data = NULL; 2319 st->frag_data = NULL;
2324 } 2320 }
2325 2321
2326 if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) { 2322 if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
2327 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 2323 st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
2328 st->frag_idx = 0; 2324 st->frag_idx = 0;
2329 goto next_skb; 2325 goto next_skb;
@@ -2893,7 +2889,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2893 return -ENOMEM; 2889 return -ENOMEM;
2894 2890
2895 /* Easy case. Most of packets will go this way. */ 2891 /* Easy case. Most of packets will go this way. */
2896 if (!skb_has_frags(skb)) { 2892 if (!skb_has_frag_list(skb)) {
2897 /* A little of trouble, not enough of space for trailer. 2893 /* A little of trouble, not enough of space for trailer.
2898 * This should not happen, when stack is tuned to generate 2894 * This should not happen, when stack is tuned to generate
2899 * good frames. OK, on miss we reallocate and reserve even more 2895 * good frames. OK, on miss we reallocate and reserve even more
@@ -2928,7 +2924,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2928 2924
2929 if (skb1->next == NULL && tailbits) { 2925 if (skb1->next == NULL && tailbits) {
2930 if (skb_shinfo(skb1)->nr_frags || 2926 if (skb_shinfo(skb1)->nr_frags ||
2931 skb_has_frags(skb1) || 2927 skb_has_frag_list(skb1) ||
2932 skb_tailroom(skb1) < tailbits) 2928 skb_tailroom(skb1) < tailbits)
2933 ntail = tailbits + 128; 2929 ntail = tailbits + 128;
2934 } 2930 }
@@ -2937,7 +2933,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2937 skb_cloned(skb1) || 2933 skb_cloned(skb1) ||
2938 ntail || 2934 ntail ||
2939 skb_shinfo(skb1)->nr_frags || 2935 skb_shinfo(skb1)->nr_frags ||
2940 skb_has_frags(skb1)) { 2936 skb_has_frag_list(skb1)) {
2941 struct sk_buff *skb2; 2937 struct sk_buff *skb2;
2942 2938
2943 /* Fuck, we are miserable poor guys... */ 2939 /* Fuck, we are miserable poor guys... */
@@ -3020,7 +3016,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3020 } else { 3016 } else {
3021 /* 3017 /*
3022 * no hardware time stamps available, 3018 * no hardware time stamps available,
3023 * so keep the skb_shared_tx and only 3019 * so keep the shared tx_flags and only
3024 * store software time stamp 3020 * store software time stamp
3025 */ 3021 */
3026 skb->tstamp = ktime_get_real(); 3022 skb->tstamp = ktime_get_real();
diff --git a/net/core/sock.c b/net/core/sock.c
index b05b9b6ddb87..f3a06c40d5e0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1557,6 +1557,8 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1557EXPORT_SYMBOL(sock_alloc_send_skb); 1557EXPORT_SYMBOL(sock_alloc_send_skb);
1558 1558
1559static void __lock_sock(struct sock *sk) 1559static void __lock_sock(struct sock *sk)
1560 __releases(&sk->sk_lock.slock)
1561 __acquires(&sk->sk_lock.slock)
1560{ 1562{
1561 DEFINE_WAIT(wait); 1563 DEFINE_WAIT(wait);
1562 1564
@@ -1573,6 +1575,8 @@ static void __lock_sock(struct sock *sk)
1573} 1575}
1574 1576
1575static void __release_sock(struct sock *sk) 1577static void __release_sock(struct sock *sk)
1578 __releases(&sk->sk_lock.slock)
1579 __acquires(&sk->sk_lock.slock)
1576{ 1580{
1577 struct sk_buff *skb = sk->sk_backlog.head; 1581 struct sk_buff *skb = sk->sk_backlog.head;
1578 1582
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 8408398cd44e..0581143cb800 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -47,37 +47,6 @@ config IP_DCCP_CCID3_DEBUG
47 47
48 If in doubt, say N. 48 If in doubt, say N.
49 49
50config IP_DCCP_CCID3_RTO
51 int "Use higher bound for nofeedback timer"
52 default 100
53 depends on IP_DCCP_CCID3 && EXPERIMENTAL
54 ---help---
55 Use higher lower bound for nofeedback timer expiration.
56
57 The TFRC nofeedback timer normally expires after the maximum of 4
58 RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
59 with a small RTT this can mean a high processing load and reduced
60 performance, since then the nofeedback timer is triggered very
61 frequently.
62
63 This option enables to set a higher lower bound for the nofeedback
64 value. Values in units of milliseconds can be set here.
65
66 A value of 0 disables this feature by enforcing the value specified
67 in RFC 3448. The following values have been suggested as bounds for
68 experimental use:
69 * 16-20ms to match the typical multimedia inter-frame interval
70 * 100ms as a reasonable compromise [default]
71 * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
72
73 The default of 100ms is a compromise between a large value for
74 efficient DCCP implementations, and a small value to avoid disrupting
75 the network in times of congestion.
76
77 The purpose of the nofeedback timer is to slow DCCP down when there
78 is serious network congestion: experimenting with larger values should
79 therefore not be performed on WANs.
80
81config IP_DCCP_TFRC_LIB 50config IP_DCCP_TFRC_LIB
82 def_bool y if IP_DCCP_CCID3 51 def_bool y if IP_DCCP_CCID3
83 52
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9b3ae9922be1..dc18172b1e59 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,59 +25,14 @@
25 */ 25 */
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include "../feat.h" 27#include "../feat.h"
28#include "../ccid.h"
29#include "../dccp.h"
30#include "ccid2.h" 28#include "ccid2.h"
31 29
32 30
33#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 31#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
34static int ccid2_debug; 32static int ccid2_debug;
35#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) 33#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
36
37static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc)
38{
39 int len = 0;
40 int pipe = 0;
41 struct ccid2_seq *seqp = hc->tx_seqh;
42
43 /* there is data in the chain */
44 if (seqp != hc->tx_seqt) {
45 seqp = seqp->ccid2s_prev;
46 len++;
47 if (!seqp->ccid2s_acked)
48 pipe++;
49
50 while (seqp != hc->tx_seqt) {
51 struct ccid2_seq *prev = seqp->ccid2s_prev;
52
53 len++;
54 if (!prev->ccid2s_acked)
55 pipe++;
56
57 /* packets are sent sequentially */
58 BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
59 prev->ccid2s_seq ) >= 0);
60 BUG_ON(time_before(seqp->ccid2s_sent,
61 prev->ccid2s_sent));
62
63 seqp = prev;
64 }
65 }
66
67 BUG_ON(pipe != hc->tx_pipe);
68 ccid2_pr_debug("len of chain=%d\n", len);
69
70 do {
71 seqp = seqp->ccid2s_prev;
72 len++;
73 } while (seqp != hc->tx_seqh);
74
75 ccid2_pr_debug("total len=%d\n", len);
76 BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN);
77}
78#else 34#else
79#define ccid2_pr_debug(format, a...) 35#define ccid2_pr_debug(format, a...)
80#define ccid2_hc_tx_check_sanity(hc)
81#endif 36#endif
82 37
83static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) 38static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
@@ -156,19 +111,10 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
156 dp->dccps_l_ack_ratio = val; 111 dp->dccps_l_ack_ratio = val;
157} 112}
158 113
159static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val)
160{
161 ccid2_pr_debug("change SRTT to %ld\n", val);
162 hc->tx_srtt = val;
163}
164
165static void ccid2_start_rto_timer(struct sock *sk);
166
167static void ccid2_hc_tx_rto_expire(unsigned long data) 114static void ccid2_hc_tx_rto_expire(unsigned long data)
168{ 115{
169 struct sock *sk = (struct sock *)data; 116 struct sock *sk = (struct sock *)data;
170 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 117 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
171 long s;
172 118
173 bh_lock_sock(sk); 119 bh_lock_sock(sk);
174 if (sock_owned_by_user(sk)) { 120 if (sock_owned_by_user(sk)) {
@@ -178,23 +124,19 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
178 124
179 ccid2_pr_debug("RTO_EXPIRE\n"); 125 ccid2_pr_debug("RTO_EXPIRE\n");
180 126
181 ccid2_hc_tx_check_sanity(hc);
182
183 /* back-off timer */ 127 /* back-off timer */
184 hc->tx_rto <<= 1; 128 hc->tx_rto <<= 1;
129 if (hc->tx_rto > DCCP_RTO_MAX)
130 hc->tx_rto = DCCP_RTO_MAX;
185 131
186 s = hc->tx_rto / HZ; 132 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
187 if (s > 60)
188 hc->tx_rto = 60 * HZ;
189
190 ccid2_start_rto_timer(sk);
191 133
192 /* adjust pipe, cwnd etc */ 134 /* adjust pipe, cwnd etc */
193 hc->tx_ssthresh = hc->tx_cwnd / 2; 135 hc->tx_ssthresh = hc->tx_cwnd / 2;
194 if (hc->tx_ssthresh < 2) 136 if (hc->tx_ssthresh < 2)
195 hc->tx_ssthresh = 2; 137 hc->tx_ssthresh = 2;
196 hc->tx_cwnd = 1; 138 hc->tx_cwnd = 1;
197 hc->tx_pipe = 0; 139 hc->tx_pipe = 0;
198 140
199 /* clear state about stuff we sent */ 141 /* clear state about stuff we sent */
200 hc->tx_seqt = hc->tx_seqh; 142 hc->tx_seqt = hc->tx_seqh;
@@ -204,22 +146,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
204 hc->tx_rpseq = 0; 146 hc->tx_rpseq = 0;
205 hc->tx_rpdupack = -1; 147 hc->tx_rpdupack = -1;
206 ccid2_change_l_ack_ratio(sk, 1); 148 ccid2_change_l_ack_ratio(sk, 1);
207 ccid2_hc_tx_check_sanity(hc);
208out: 149out:
209 bh_unlock_sock(sk); 150 bh_unlock_sock(sk);
210 sock_put(sk); 151 sock_put(sk);
211} 152}
212 153
213static void ccid2_start_rto_timer(struct sock *sk)
214{
215 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
216
217 ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto);
218
219 BUG_ON(timer_pending(&hc->tx_rtotimer));
220 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
221}
222
223static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) 154static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
224{ 155{
225 struct dccp_sock *dp = dccp_sk(sk); 156 struct dccp_sock *dp = dccp_sk(sk);
@@ -230,7 +161,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
230 161
231 hc->tx_seqh->ccid2s_seq = dp->dccps_gss; 162 hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
232 hc->tx_seqh->ccid2s_acked = 0; 163 hc->tx_seqh->ccid2s_acked = 0;
233 hc->tx_seqh->ccid2s_sent = jiffies; 164 hc->tx_seqh->ccid2s_sent = ccid2_time_stamp;
234 165
235 next = hc->tx_seqh->ccid2s_next; 166 next = hc->tx_seqh->ccid2s_next;
236 /* check if we need to alloc more space */ 167 /* check if we need to alloc more space */
@@ -296,23 +227,20 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
296 } 227 }
297#endif 228#endif
298 229
299 /* setup RTO timer */ 230 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
300 if (!timer_pending(&hc->tx_rtotimer))
301 ccid2_start_rto_timer(sk);
302 231
303#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 232#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
304 do { 233 do {
305 struct ccid2_seq *seqp = hc->tx_seqt; 234 struct ccid2_seq *seqp = hc->tx_seqt;
306 235
307 while (seqp != hc->tx_seqh) { 236 while (seqp != hc->tx_seqh) {
308 ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", 237 ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
309 (unsigned long long)seqp->ccid2s_seq, 238 (unsigned long long)seqp->ccid2s_seq,
310 seqp->ccid2s_acked, seqp->ccid2s_sent); 239 seqp->ccid2s_acked, seqp->ccid2s_sent);
311 seqp = seqp->ccid2s_next; 240 seqp = seqp->ccid2s_next;
312 } 241 }
313 } while (0); 242 } while (0);
314 ccid2_pr_debug("=========\n"); 243 ccid2_pr_debug("=========\n");
315 ccid2_hc_tx_check_sanity(hc);
316#endif 244#endif
317} 245}
318 246
@@ -378,17 +306,87 @@ out_invalid_option:
378 return -1; 306 return -1;
379} 307}
380 308
381static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) 309/**
310 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
311 * This code is almost identical with TCP's tcp_rtt_estimator(), since
312 * - it has a higher sampling frequency (recommended by RFC 1323),
313 * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
314 * - it is simple (cf. more complex proposals such as Eifel timer or research
315 * which suggests that the gain should be set according to window size),
316 * - in tests it was found to work well with CCID2 [gerrit].
317 */
318static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
382{ 319{
383 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 320 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
321 long m = mrtt ? : 1;
384 322
385 sk_stop_timer(sk, &hc->tx_rtotimer); 323 if (hc->tx_srtt == 0) {
386 ccid2_pr_debug("deleted RTO timer\n"); 324 /* First measurement m */
325 hc->tx_srtt = m << 3;
326 hc->tx_mdev = m << 1;
327
328 hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
329 hc->tx_rttvar = hc->tx_mdev_max;
330
331 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
332 } else {
333 /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
334 m -= (hc->tx_srtt >> 3);
335 hc->tx_srtt += m;
336
337 /* Similarly, update scaled mdev with regard to |m| */
338 if (m < 0) {
339 m = -m;
340 m -= (hc->tx_mdev >> 2);
341 /*
342 * This neutralises RTO increase when RTT < SRTT - mdev
343 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
344 * in Linux TCP", USENIX 2002, pp. 49-62).
345 */
346 if (m > 0)
347 m >>= 3;
348 } else {
349 m -= (hc->tx_mdev >> 2);
350 }
351 hc->tx_mdev += m;
352
353 if (hc->tx_mdev > hc->tx_mdev_max) {
354 hc->tx_mdev_max = hc->tx_mdev;
355 if (hc->tx_mdev_max > hc->tx_rttvar)
356 hc->tx_rttvar = hc->tx_mdev_max;
357 }
358
359 /*
360 * Decay RTTVAR at most once per flight, exploiting that
361 * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
362 * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
363 * GAR is a useful bound for FlightSize = pipe.
364 * AWL is probably too low here, as it over-estimates pipe.
365 */
366 if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
367 if (hc->tx_mdev_max < hc->tx_rttvar)
368 hc->tx_rttvar -= (hc->tx_rttvar -
369 hc->tx_mdev_max) >> 2;
370 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
371 hc->tx_mdev_max = tcp_rto_min(sk);
372 }
373 }
374
375 /*
376 * Set RTO from SRTT and RTTVAR
377 * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
378 * This agrees with RFC 4341, 5:
379 * "Because DCCP does not retransmit data, DCCP does not require
380 * TCP's recommended minimum timeout of one second".
381 */
382 hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
383
384 if (hc->tx_rto > DCCP_RTO_MAX)
385 hc->tx_rto = DCCP_RTO_MAX;
387} 386}
388 387
389static inline void ccid2_new_ack(struct sock *sk, 388static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
390 struct ccid2_seq *seqp, 389 unsigned int *maxincr)
391 unsigned int *maxincr)
392{ 390{
393 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 391 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
394 392
@@ -402,93 +400,27 @@ static inline void ccid2_new_ack(struct sock *sk,
402 hc->tx_cwnd += 1; 400 hc->tx_cwnd += 1;
403 hc->tx_packets_acked = 0; 401 hc->tx_packets_acked = 0;
404 } 402 }
405 403 /*
406 /* update RTO */ 404 * FIXME: RTT is sampled several times per acknowledgment (for each
407 if (hc->tx_srtt == -1 || 405 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
408 time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { 406 * This causes the RTT to be over-estimated, since the older entries
409 unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; 407 * in the Ack Vector have earlier sending times.
410 int s; 408 * The cleanest solution is to not use the ccid2s_sent field at all
411 409 * and instead use DCCP timestamps: requires changes in other places.
412 /* first measurement */ 410 */
413 if (hc->tx_srtt == -1) { 411 ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
414 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
415 r, jiffies,
416 (unsigned long long)seqp->ccid2s_seq);
417 ccid2_change_srtt(hc, r);
418 hc->tx_rttvar = r >> 1;
419 } else {
420 /* RTTVAR */
421 long tmp = hc->tx_srtt - r;
422 long srtt;
423
424 if (tmp < 0)
425 tmp *= -1;
426
427 tmp >>= 2;
428 hc->tx_rttvar *= 3;
429 hc->tx_rttvar >>= 2;
430 hc->tx_rttvar += tmp;
431
432 /* SRTT */
433 srtt = hc->tx_srtt;
434 srtt *= 7;
435 srtt >>= 3;
436 tmp = r >> 3;
437 srtt += tmp;
438 ccid2_change_srtt(hc, srtt);
439 }
440 s = hc->tx_rttvar << 2;
441 /* clock granularity is 1 when based on jiffies */
442 if (!s)
443 s = 1;
444 hc->tx_rto = hc->tx_srtt + s;
445
446 /* must be at least a second */
447 s = hc->tx_rto / HZ;
448 /* DCCP doesn't require this [but I like it cuz my code sux] */
449#if 1
450 if (s < 1)
451 hc->tx_rto = HZ;
452#endif
453 /* max 60 seconds */
454 if (s > 60)
455 hc->tx_rto = HZ * 60;
456
457 hc->tx_lastrtt = jiffies;
458
459 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
460 hc->tx_srtt, hc->tx_rttvar,
461 hc->tx_rto, HZ, r);
462 }
463
464 /* we got a new ack, so re-start RTO timer */
465 ccid2_hc_tx_kill_rto_timer(sk);
466 ccid2_start_rto_timer(sk);
467}
468
469static void ccid2_hc_tx_dec_pipe(struct sock *sk)
470{
471 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
472
473 if (hc->tx_pipe == 0)
474 DCCP_BUG("pipe == 0");
475 else
476 hc->tx_pipe--;
477
478 if (hc->tx_pipe == 0)
479 ccid2_hc_tx_kill_rto_timer(sk);
480} 412}
481 413
482static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) 414static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
483{ 415{
484 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 416 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
485 417
486 if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { 418 if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
487 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); 419 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
488 return; 420 return;
489 } 421 }
490 422
491 hc->tx_last_cong = jiffies; 423 hc->tx_last_cong = ccid2_time_stamp;
492 424
493 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; 425 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
494 hc->tx_ssthresh = max(hc->tx_cwnd, 2U); 426 hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
@@ -510,7 +442,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
510 int done = 0; 442 int done = 0;
511 unsigned int maxincr = 0; 443 unsigned int maxincr = 0;
512 444
513 ccid2_hc_tx_check_sanity(hc);
514 /* check reverse path congestion */ 445 /* check reverse path congestion */
515 seqno = DCCP_SKB_CB(skb)->dccpd_seq; 446 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
516 447
@@ -620,7 +551,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
620 seqp->ccid2s_acked = 1; 551 seqp->ccid2s_acked = 1;
621 ccid2_pr_debug("Got ack for %llu\n", 552 ccid2_pr_debug("Got ack for %llu\n",
622 (unsigned long long)seqp->ccid2s_seq); 553 (unsigned long long)seqp->ccid2s_seq);
623 ccid2_hc_tx_dec_pipe(sk); 554 hc->tx_pipe--;
624 } 555 }
625 if (seqp == hc->tx_seqt) { 556 if (seqp == hc->tx_seqt) {
626 done = 1; 557 done = 1;
@@ -677,7 +608,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
677 * one ack vector. 608 * one ack vector.
678 */ 609 */
679 ccid2_congestion_event(sk, seqp); 610 ccid2_congestion_event(sk, seqp);
680 ccid2_hc_tx_dec_pipe(sk); 611 hc->tx_pipe--;
681 } 612 }
682 if (seqp == hc->tx_seqt) 613 if (seqp == hc->tx_seqt)
683 break; 614 break;
@@ -695,7 +626,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
695 hc->tx_seqt = hc->tx_seqt->ccid2s_next; 626 hc->tx_seqt = hc->tx_seqt->ccid2s_next;
696 } 627 }
697 628
698 ccid2_hc_tx_check_sanity(hc); 629 /* restart RTO timer if not all outstanding data has been acked */
630 if (hc->tx_pipe == 0)
631 sk_stop_timer(sk, &hc->tx_rtotimer);
632 else
633 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
699} 634}
700 635
701static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 636static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -707,12 +642,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
707 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ 642 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
708 hc->tx_ssthresh = ~0U; 643 hc->tx_ssthresh = ~0U;
709 644
710 /* 645 /* Use larger initial windows (RFC 4341, section 5). */
711 * RFC 4341, 5: "The cwnd parameter is initialized to at most four 646 hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
712 * packets for new connections, following the rules from [RFC3390]".
713 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
714 */
715 hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
716 647
717 /* Make sure that Ack Ratio is enabled and within bounds. */ 648 /* Make sure that Ack Ratio is enabled and within bounds. */
718 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); 649 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
@@ -723,15 +654,11 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
723 if (ccid2_hc_tx_alloc_seq(hc)) 654 if (ccid2_hc_tx_alloc_seq(hc))
724 return -ENOMEM; 655 return -ENOMEM;
725 656
726 hc->tx_rto = 3 * HZ; 657 hc->tx_rto = DCCP_TIMEOUT_INIT;
727 ccid2_change_srtt(hc, -1);
728 hc->tx_rttvar = -1;
729 hc->tx_rpdupack = -1; 658 hc->tx_rpdupack = -1;
730 hc->tx_last_cong = jiffies; 659 hc->tx_last_cong = ccid2_time_stamp;
731 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 660 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
732 (unsigned long)sk); 661 (unsigned long)sk);
733
734 ccid2_hc_tx_check_sanity(hc);
735 return 0; 662 return 0;
736} 663}
737 664
@@ -740,7 +667,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
740 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 667 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
741 int i; 668 int i;
742 669
743 ccid2_hc_tx_kill_rto_timer(sk); 670 sk_stop_timer(sk, &hc->tx_rtotimer);
744 671
745 for (i = 0; i < hc->tx_seqbufc; i++) 672 for (i = 0; i < hc->tx_seqbufc; i++)
746 kfree(hc->tx_seqbuf[i]); 673 kfree(hc->tx_seqbuf[i]);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 1ec6a30103bb..9731c2dc1487 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -18,18 +18,23 @@
18#ifndef _DCCP_CCID2_H_ 18#ifndef _DCCP_CCID2_H_
19#define _DCCP_CCID2_H_ 19#define _DCCP_CCID2_H_
20 20
21#include <linux/dccp.h>
22#include <linux/timer.h> 21#include <linux/timer.h>
23#include <linux/types.h> 22#include <linux/types.h>
24#include "../ccid.h" 23#include "../ccid.h"
24#include "../dccp.h"
25
26/*
27 * CCID-2 timestamping faces the same issues as TCP timestamping.
28 * Hence we reuse/share as much of the code as possible.
29 */
30#define ccid2_time_stamp tcp_time_stamp
31
25/* NUMDUPACK parameter from RFC 4341, p. 6 */ 32/* NUMDUPACK parameter from RFC 4341, p. 6 */
26#define NUMDUPACK 3 33#define NUMDUPACK 3
27 34
28struct sock;
29
30struct ccid2_seq { 35struct ccid2_seq {
31 u64 ccid2s_seq; 36 u64 ccid2s_seq;
32 unsigned long ccid2s_sent; 37 u32 ccid2s_sent;
33 int ccid2s_acked; 38 int ccid2s_acked;
34 struct ccid2_seq *ccid2s_prev; 39 struct ccid2_seq *ccid2s_prev;
35 struct ccid2_seq *ccid2s_next; 40 struct ccid2_seq *ccid2s_next;
@@ -42,7 +47,12 @@ struct ccid2_seq {
42 * struct ccid2_hc_tx_sock - CCID2 TX half connection 47 * struct ccid2_hc_tx_sock - CCID2 TX half connection
43 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 48 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
44 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) 49 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
45 * @tx_lastrtt: time RTT was last measured 50 * @tx_srtt: smoothed RTT estimate, scaled by 2^3
51 * @tx_mdev: smoothed RTT variation, scaled by 2^2
52 * @tx_mdev_max: maximum of @mdev during one flight
53 * @tx_rttvar: moving average/maximum of @mdev_max
54 * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
55 * @tx_rtt_seq: to decay RTTVAR at most once per flight
46 * @tx_rpseq: last consecutive seqno 56 * @tx_rpseq: last consecutive seqno
47 * @tx_rpdupack: dupacks since rpseq 57 * @tx_rpdupack: dupacks since rpseq
48 */ 58 */
@@ -55,14 +65,19 @@ struct ccid2_hc_tx_sock {
55 int tx_seqbufc; 65 int tx_seqbufc;
56 struct ccid2_seq *tx_seqh; 66 struct ccid2_seq *tx_seqh;
57 struct ccid2_seq *tx_seqt; 67 struct ccid2_seq *tx_seqt;
58 long tx_rto; 68
59 long tx_srtt; 69 /* RTT measurement: variables/principles are the same as in TCP */
60 long tx_rttvar; 70 u32 tx_srtt,
61 unsigned long tx_lastrtt; 71 tx_mdev,
72 tx_mdev_max,
73 tx_rttvar,
74 tx_rto;
75 u64 tx_rtt_seq:48;
62 struct timer_list tx_rtotimer; 76 struct timer_list tx_rtotimer;
77
63 u64 tx_rpseq; 78 u64 tx_rpseq;
64 int tx_rpdupack; 79 int tx_rpdupack;
65 unsigned long tx_last_cong; 80 u32 tx_last_cong;
66 u64 tx_high_ack; 81 u64 tx_high_ack;
67}; 82};
68 83
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 95f752986497..278e17069322 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -218,9 +218,9 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
218 218
219 /* 219 /*
220 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 220 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
221 * RTO is 0 if and only if no feedback has been received yet.
221 */ 222 */
222 if (hc->tx_t_rto == 0 || /* no feedback received yet */ 223 if (hc->tx_t_rto == 0 || hc->tx_p == 0) {
223 hc->tx_p == 0) {
224 224
225 /* halve send rate directly */ 225 /* halve send rate directly */
226 hc->tx_x = max(hc->tx_x / 2, 226 hc->tx_x = max(hc->tx_x / 2,
@@ -256,7 +256,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
256 * Set new timeout for the nofeedback timer. 256 * Set new timeout for the nofeedback timer.
257 * See comments in packet_recv() regarding the value of t_RTO. 257 * See comments in packet_recv() regarding the value of t_RTO.
258 */ 258 */
259 if (unlikely(hc->tx_t_rto == 0)) /* no feedback yet */ 259 if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */
260 t_nfb = TFRC_INITIAL_TIMEOUT; 260 t_nfb = TFRC_INITIAL_TIMEOUT;
261 else 261 else
262 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); 262 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi);
@@ -372,7 +372,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
372static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 372static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
373{ 373{
374 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 374 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
375 struct ccid3_options_received *opt_recv; 375 struct ccid3_options_received *opt_recv = &hc->tx_options_received;
376 ktime_t now; 376 ktime_t now;
377 unsigned long t_nfb; 377 unsigned long t_nfb;
378 u32 pinv, r_sample; 378 u32 pinv, r_sample;
@@ -386,7 +386,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
386 hc->tx_state != TFRC_SSTATE_NO_FBACK) 386 hc->tx_state != TFRC_SSTATE_NO_FBACK)
387 return; 387 return;
388 388
389 opt_recv = &hc->tx_options_received;
390 now = ktime_get_real(); 389 now = ktime_get_real();
391 390
392 /* Estimate RTT from history if ACK number is valid */ 391 /* Estimate RTT from history if ACK number is valid */
@@ -461,13 +460,12 @@ done_computing_x:
461 sk->sk_write_space(sk); 460 sk->sk_write_space(sk);
462 461
463 /* 462 /*
464 * Update timeout interval for the nofeedback timer. 463 * Update timeout interval for the nofeedback timer. In order to control
465 * We use a configuration option to increase the lower bound. 464 * rate halving on networks with very low RTTs (<= 1 ms), use per-route
466 * This can help avoid triggering the nofeedback timer too 465 * tunable RTAX_RTO_MIN value as the lower bound.
467 * often ('spinning') on LANs with small RTTs.
468 */ 466 */
469 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO * 467 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt,
470 (USEC_PER_SEC / 1000))); 468 USEC_PER_SEC/HZ * tcp_rto_min(sk));
471 /* 469 /*
472 * Schedule no feedback timer to expire in 470 * Schedule no feedback timer to expire in
473 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) 471 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
@@ -489,11 +487,9 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
489 int rc = 0; 487 int rc = 0;
490 const struct dccp_sock *dp = dccp_sk(sk); 488 const struct dccp_sock *dp = dccp_sk(sk);
491 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 489 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
492 struct ccid3_options_received *opt_recv; 490 struct ccid3_options_received *opt_recv = &hc->tx_options_received;
493 __be32 opt_val; 491 __be32 opt_val;
494 492
495 opt_recv = &hc->tx_options_received;
496
497 if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { 493 if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
498 opt_recv->ccid3or_seqno = dp->dccps_gsr; 494 opt_recv->ccid3or_seqno = dp->dccps_gsr;
499 opt_recv->ccid3or_loss_event_rate = ~0; 495 opt_recv->ccid3or_loss_event_rate = ~0;
@@ -567,34 +563,30 @@ static void ccid3_hc_tx_exit(struct sock *sk)
567 563
568static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) 564static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
569{ 565{
570 struct ccid3_hc_tx_sock *hc; 566 info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto;
571 567 info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt;
572 /* Listen socks doesn't have a private CCID block */
573 if (sk->sk_state == DCCP_LISTEN)
574 return;
575
576 hc = ccid3_hc_tx_sk(sk);
577 info->tcpi_rto = hc->tx_t_rto;
578 info->tcpi_rtt = hc->tx_rtt;
579} 568}
580 569
581static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, 570static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
582 u32 __user *optval, int __user *optlen) 571 u32 __user *optval, int __user *optlen)
583{ 572{
584 const struct ccid3_hc_tx_sock *hc; 573 const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
574 struct tfrc_tx_info tfrc;
585 const void *val; 575 const void *val;
586 576
587 /* Listen socks doesn't have a private CCID block */
588 if (sk->sk_state == DCCP_LISTEN)
589 return -EINVAL;
590
591 hc = ccid3_hc_tx_sk(sk);
592 switch (optname) { 577 switch (optname) {
593 case DCCP_SOCKOPT_CCID_TX_INFO: 578 case DCCP_SOCKOPT_CCID_TX_INFO:
594 if (len < sizeof(hc->tx_tfrc)) 579 if (len < sizeof(tfrc))
595 return -EINVAL; 580 return -EINVAL;
596 len = sizeof(hc->tx_tfrc); 581 tfrc.tfrctx_x = hc->tx_x;
597 val = &hc->tx_tfrc; 582 tfrc.tfrctx_x_recv = hc->tx_x_recv;
583 tfrc.tfrctx_x_calc = hc->tx_x_calc;
584 tfrc.tfrctx_rtt = hc->tx_rtt;
585 tfrc.tfrctx_p = hc->tx_p;
586 tfrc.tfrctx_rto = hc->tx_t_rto;
587 tfrc.tfrctx_ipi = hc->tx_t_ipi;
588 len = sizeof(tfrc);
589 val = &tfrc;
598 break; 590 break;
599 default: 591 default:
600 return -ENOPROTOOPT; 592 return -ENOPROTOOPT;
@@ -701,14 +693,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
701 693
702static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) 694static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
703{ 695{
704 const struct ccid3_hc_rx_sock *hc; 696 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
705 __be32 x_recv, pinv; 697 __be32 x_recv, pinv;
706 698
707 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) 699 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
708 return 0; 700 return 0;
709 701
710 hc = ccid3_hc_rx_sk(sk);
711
712 if (dccp_packet_without_ack(skb)) 702 if (dccp_packet_without_ack(skb))
713 return 0; 703 return 0;
714 704
@@ -749,10 +739,11 @@ static u32 ccid3_first_li(struct sock *sk)
749 x_recv = scaled_div32(hc->rx_bytes_recv, delta); 739 x_recv = scaled_div32(hc->rx_bytes_recv, delta);
750 if (x_recv == 0) { /* would also trigger divide-by-zero */ 740 if (x_recv == 0) { /* would also trigger divide-by-zero */
751 DCCP_WARN("X_recv==0\n"); 741 DCCP_WARN("X_recv==0\n");
752 if ((x_recv = hc->rx_x_recv) == 0) { 742 if (hc->rx_x_recv == 0) {
753 DCCP_BUG("stored value of X_recv is zero"); 743 DCCP_BUG("stored value of X_recv is zero");
754 return ~0U; 744 return ~0U;
755 } 745 }
746 x_recv = hc->rx_x_recv;
756 } 747 }
757 748
758 fval = scaled_div(hc->rx_s, hc->rx_rtt); 749 fval = scaled_div(hc->rx_s, hc->rx_rtt);
@@ -870,30 +861,18 @@ static void ccid3_hc_rx_exit(struct sock *sk)
870 861
871static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) 862static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
872{ 863{
873 const struct ccid3_hc_rx_sock *hc; 864 info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state;
874
875 /* Listen socks doesn't have a private CCID block */
876 if (sk->sk_state == DCCP_LISTEN)
877 return;
878
879 hc = ccid3_hc_rx_sk(sk);
880 info->tcpi_ca_state = hc->rx_state;
881 info->tcpi_options |= TCPI_OPT_TIMESTAMPS; 865 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
882 info->tcpi_rcv_rtt = hc->rx_rtt; 866 info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt;
883} 867}
884 868
885static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, 869static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
886 u32 __user *optval, int __user *optlen) 870 u32 __user *optval, int __user *optlen)
887{ 871{
888 const struct ccid3_hc_rx_sock *hc; 872 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
889 struct tfrc_rx_info rx_info; 873 struct tfrc_rx_info rx_info;
890 const void *val; 874 const void *val;
891 875
892 /* Listen socks doesn't have a private CCID block */
893 if (sk->sk_state == DCCP_LISTEN)
894 return -EINVAL;
895
896 hc = ccid3_hc_rx_sk(sk);
897 switch (optname) { 876 switch (optname) {
898 case DCCP_SOCKOPT_CCID_RX_INFO: 877 case DCCP_SOCKOPT_CCID_RX_INFO:
899 if (len < sizeof(rx_info)) 878 if (len < sizeof(rx_info))
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 032635776653..b7e569c22f36 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -42,7 +42,7 @@
42#include "lib/tfrc.h" 42#include "lib/tfrc.h"
43#include "../ccid.h" 43#include "../ccid.h"
44 44
45/* Two seconds as per RFC 3448 4.2 */ 45/* Two seconds as per RFC 5348, 4.2 */
46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) 46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
47 47
48/* In usecs - half the scheduling granularity as per RFC3448 4.6 */ 48/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
@@ -95,14 +95,13 @@ enum ccid3_hc_tx_states {
95 * @tx_options_received: Parsed set of retrieved options 95 * @tx_options_received: Parsed set of retrieved options
96 */ 96 */
97struct ccid3_hc_tx_sock { 97struct ccid3_hc_tx_sock {
98 struct tfrc_tx_info tx_tfrc; 98 u64 tx_x;
99#define tx_x tx_tfrc.tfrctx_x 99 u64 tx_x_recv;
100#define tx_x_recv tx_tfrc.tfrctx_x_recv 100 u32 tx_x_calc;
101#define tx_x_calc tx_tfrc.tfrctx_x_calc 101 u32 tx_rtt;
102#define tx_rtt tx_tfrc.tfrctx_rtt 102 u32 tx_p;
103#define tx_p tx_tfrc.tfrctx_p 103 u32 tx_t_rto;
104#define tx_t_rto tx_tfrc.tfrctx_rto 104 u32 tx_t_ipi;
105#define tx_t_ipi tx_tfrc.tfrctx_ipi
106 u16 tx_s; 105 u16 tx_s;
107 enum ccid3_hc_tx_states tx_state:8; 106 enum ccid3_hc_tx_states tx_state:8;
108 u8 tx_last_win_count; 107 u8 tx_last_win_count;
@@ -131,16 +130,12 @@ enum ccid3_hc_rx_states {
131 130
132/** 131/**
133 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket 132 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
134 * @rx_x_recv: Receiver estimate of send rate (RFC 3448 4.3)
135 * @rx_rtt: Receiver estimate of rtt (non-standard)
136 * @rx_p: Current loss event rate (RFC 3448 5.4)
137 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) 133 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1)
138 * @rx_state: Receiver state, one of %ccid3_hc_rx_states 134 * @rx_state: Receiver state, one of %ccid3_hc_rx_states
139 * @rx_bytes_recv: Total sum of DCCP payload bytes 135 * @rx_bytes_recv: Total sum of DCCP payload bytes
140 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) 136 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3)
141 * @rx_rtt: Receiver estimate of RTT 137 * @rx_rtt: Receiver estimate of RTT
142 * @rx_tstamp_last_feedback: Time at which last feedback was sent 138 * @rx_tstamp_last_feedback: Time at which last feedback was sent
143 * @rx_tstamp_last_ack: Time at which last feedback was sent
144 * @rx_hist: Packet history (loss detection + RTT sampling) 139 * @rx_hist: Packet history (loss detection + RTT sampling)
145 * @rx_li_hist: Loss Interval database 140 * @rx_li_hist: Loss Interval database
146 * @rx_s: Received packet size in bytes 141 * @rx_s: Received packet size in bytes
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index baeb1eaf011b..2ef115277bea 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -693,22 +693,22 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
693 aux = scp->accessdata.acc_userl; 693 aux = scp->accessdata.acc_userl;
694 *skb_put(skb, 1) = aux; 694 *skb_put(skb, 1) = aux;
695 if (aux > 0) 695 if (aux > 0)
696 memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux); 696 memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux);
697 697
698 aux = scp->accessdata.acc_passl; 698 aux = scp->accessdata.acc_passl;
699 *skb_put(skb, 1) = aux; 699 *skb_put(skb, 1) = aux;
700 if (aux > 0) 700 if (aux > 0)
701 memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux); 701 memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux);
702 702
703 aux = scp->accessdata.acc_accl; 703 aux = scp->accessdata.acc_accl;
704 *skb_put(skb, 1) = aux; 704 *skb_put(skb, 1) = aux;
705 if (aux > 0) 705 if (aux > 0)
706 memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux); 706 memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
707 707
708 aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); 708 aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
709 *skb_put(skb, 1) = aux; 709 *skb_put(skb, 1) = aux;
710 if (aux > 0) 710 if (aux > 0)
711 memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux); 711 memcpy(skb_put(skb, aux), scp->conndata_out.opt_data, aux);
712 712
713 scp->persist = dn_nsp_persist(sk); 713 scp->persist = dn_nsp_persist(sk);
714 scp->persist_fxn = dn_nsp_retrans_conninit; 714 scp->persist_fxn = dn_nsp_retrans_conninit;
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index dc54bd0d083b..baa98fb83552 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -1009,7 +1009,6 @@ static int __init aun_udp_initialise(void)
1009 struct sockaddr_in sin; 1009 struct sockaddr_in sin;
1010 1010
1011 skb_queue_head_init(&aun_queue); 1011 skb_queue_head_init(&aun_queue);
1012 spin_lock_init(&aun_queue_lock);
1013 setup_timer(&ab_cleanup_timer, ab_cleanup, 0); 1012 setup_timer(&ab_cleanup_timer, ab_cleanup, 0);
1014 ab_cleanup_timer.expires = jiffies + (HZ*2); 1013 ab_cleanup_timer.expires = jiffies + (HZ*2);
1015 add_timer(&ab_cleanup_timer); 1014 add_timer(&ab_cleanup_timer);
@@ -1167,7 +1166,6 @@ static int __init econet_proto_init(void)
1167 goto out; 1166 goto out;
1168 sock_register(&econet_family_ops); 1167 sock_register(&econet_family_ops);
1169#ifdef CONFIG_ECONET_AUNUDP 1168#ifdef CONFIG_ECONET_AUNUDP
1170 spin_lock_init(&aun_queue_lock);
1171 aun_udp_initialise(); 1169 aun_udp_initialise();
1172#endif 1170#endif
1173#ifdef CONFIG_ECONET_NATIVE 1171#ifdef CONFIG_ECONET_NATIVE
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 215c83986a9d..85e7b4551326 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -367,7 +367,7 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
367EXPORT_SYMBOL(alloc_etherdev_mq); 367EXPORT_SYMBOL(alloc_etherdev_mq);
368 368
369static size_t _format_mac_addr(char *buf, int buflen, 369static size_t _format_mac_addr(char *buf, int buflen,
370 const unsigned char *addr, int len) 370 const unsigned char *addr, int len)
371{ 371{
372 int i; 372 int i;
373 char *cp = buf; 373 char *cp = buf;
@@ -376,7 +376,7 @@ static size_t _format_mac_addr(char *buf, int buflen,
376 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]); 376 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]);
377 if (i == len - 1) 377 if (i == len - 1)
378 break; 378 break;
379 cp += strlcpy(cp, ":", buflen - (cp - buf)); 379 cp += scnprintf(cp, buflen - (cp - buf), ":");
380 } 380 }
381 return cp - buf; 381 return cp - buf;
382} 382}
@@ -386,7 +386,7 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
386 size_t l; 386 size_t l;
387 387
388 l = _format_mac_addr(buf, PAGE_SIZE, addr, len); 388 l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
389 l += strlcpy(buf + l, "\n", PAGE_SIZE - l); 389 l += scnprintf(buf + l, PAGE_SIZE - l, "\n");
390 return ((ssize_t) l); 390 return ((ssize_t) l);
391} 391}
392EXPORT_SYMBOL(sysfs_format_mac); 392EXPORT_SYMBOL(sysfs_format_mac);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 571f8950ed06..5462e2d147a6 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -215,8 +215,15 @@ config NET_IPIP
215 be inserted in and removed from the running kernel whenever you 215 be inserted in and removed from the running kernel whenever you
216 want). Most people won't need this and can say N. 216 want). Most people won't need this and can say N.
217 217
218config NET_IPGRE_DEMUX
219 tristate "IP: GRE demultiplexer"
220 help
221 This is helper module to demultiplex GRE packets on GRE version field criteria.
222 Required by ip_gre and pptp modules.
223
218config NET_IPGRE 224config NET_IPGRE
219 tristate "IP: GRE tunnels over IP" 225 tristate "IP: GRE tunnels over IP"
226 depends on NET_IPGRE_DEMUX
220 help 227 help
221 Tunneling means encapsulating data of one protocol type within 228 Tunneling means encapsulating data of one protocol type within
222 another protocol and sending it over a channel that understands the 229 another protocol and sending it over a channel that understands the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 80ff87ce43aa..4978d22f9a75 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
21obj-$(CONFIG_IP_MROUTE) += ipmr.o 21obj-$(CONFIG_IP_MROUTE) += ipmr.o
22obj-$(CONFIG_NET_IPIP) += ipip.o 22obj-$(CONFIG_NET_IPIP) += ipip.o
23obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
23obj-$(CONFIG_NET_IPGRE) += ip_gre.o 24obj-$(CONFIG_NET_IPGRE) += ip_gre.o
24obj-$(CONFIG_SYN_COOKIES) += syncookies.o 25obj-$(CONFIG_SYN_COOKIES) += syncookies.o
25obj-$(CONFIG_INET_AH) += ah4.o 26obj-$(CONFIG_INET_AH) += ah4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6a1100c25a9f..f581f77d1097 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -227,18 +227,16 @@ EXPORT_SYMBOL(inet_ehash_secret);
227 227
228/* 228/*
229 * inet_ehash_secret must be set exactly once 229 * inet_ehash_secret must be set exactly once
230 * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
231 */ 230 */
232void build_ehash_secret(void) 231void build_ehash_secret(void)
233{ 232{
234 u32 rnd; 233 u32 rnd;
234
235 do { 235 do {
236 get_random_bytes(&rnd, sizeof(rnd)); 236 get_random_bytes(&rnd, sizeof(rnd));
237 } while (rnd == 0); 237 } while (rnd == 0);
238 spin_lock_bh(&inetsw_lock); 238
239 if (!inet_ehash_secret) 239 cmpxchg(&inet_ehash_secret, 0, rnd);
240 inet_ehash_secret = rnd;
241 spin_unlock_bh(&inetsw_lock);
242} 240}
243EXPORT_SYMBOL(build_ehash_secret); 241EXPORT_SYMBOL(build_ehash_secret);
244 242
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96c1955b3e2f..dcfe7e961c10 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -55,7 +55,7 @@
55 * Stuart Cheshire : Metricom and grat arp fixes 55 * Stuart Cheshire : Metricom and grat arp fixes
56 * *** FOR 2.1 clean this up *** 56 * *** FOR 2.1 clean this up ***
57 * Lawrence V. Stefani: (08/12/96) Added FDDI support. 57 * Lawrence V. Stefani: (08/12/96) Added FDDI support.
58 * Alan Cox : Took the AP1000 nasty FDDI hack and 58 * Alan Cox : Took the AP1000 nasty FDDI hack and
59 * folded into the mainstream FDDI code. 59 * folded into the mainstream FDDI code.
60 * Ack spit, Linus how did you allow that 60 * Ack spit, Linus how did you allow that
61 * one in... 61 * one in...
@@ -120,7 +120,7 @@ EXPORT_SYMBOL(clip_tbl_hook);
120#endif 120#endif
121 121
122#include <asm/system.h> 122#include <asm/system.h>
123#include <asm/uaccess.h> 123#include <linux/uaccess.h>
124 124
125#include <linux/netfilter_arp.h> 125#include <linux/netfilter_arp.h>
126 126
@@ -173,32 +173,32 @@ const struct neigh_ops arp_broken_ops = {
173EXPORT_SYMBOL(arp_broken_ops); 173EXPORT_SYMBOL(arp_broken_ops);
174 174
175struct neigh_table arp_tbl = { 175struct neigh_table arp_tbl = {
176 .family = AF_INET, 176 .family = AF_INET,
177 .entry_size = sizeof(struct neighbour) + 4, 177 .entry_size = sizeof(struct neighbour) + 4,
178 .key_len = 4, 178 .key_len = 4,
179 .hash = arp_hash, 179 .hash = arp_hash,
180 .constructor = arp_constructor, 180 .constructor = arp_constructor,
181 .proxy_redo = parp_redo, 181 .proxy_redo = parp_redo,
182 .id = "arp_cache", 182 .id = "arp_cache",
183 .parms = { 183 .parms = {
184 .tbl = &arp_tbl, 184 .tbl = &arp_tbl,
185 .base_reachable_time = 30 * HZ, 185 .base_reachable_time = 30 * HZ,
186 .retrans_time = 1 * HZ, 186 .retrans_time = 1 * HZ,
187 .gc_staletime = 60 * HZ, 187 .gc_staletime = 60 * HZ,
188 .reachable_time = 30 * HZ, 188 .reachable_time = 30 * HZ,
189 .delay_probe_time = 5 * HZ, 189 .delay_probe_time = 5 * HZ,
190 .queue_len = 3, 190 .queue_len = 3,
191 .ucast_probes = 3, 191 .ucast_probes = 3,
192 .mcast_probes = 3, 192 .mcast_probes = 3,
193 .anycast_delay = 1 * HZ, 193 .anycast_delay = 1 * HZ,
194 .proxy_delay = (8 * HZ) / 10, 194 .proxy_delay = (8 * HZ) / 10,
195 .proxy_qlen = 64, 195 .proxy_qlen = 64,
196 .locktime = 1 * HZ, 196 .locktime = 1 * HZ,
197 }, 197 },
198 .gc_interval = 30 * HZ, 198 .gc_interval = 30 * HZ,
199 .gc_thresh1 = 128, 199 .gc_thresh1 = 128,
200 .gc_thresh2 = 512, 200 .gc_thresh2 = 512,
201 .gc_thresh3 = 1024, 201 .gc_thresh3 = 1024,
202}; 202};
203EXPORT_SYMBOL(arp_tbl); 203EXPORT_SYMBOL(arp_tbl);
204 204
@@ -233,7 +233,7 @@ static u32 arp_hash(const void *pkey, const struct net_device *dev)
233 233
234static int arp_constructor(struct neighbour *neigh) 234static int arp_constructor(struct neighbour *neigh)
235{ 235{
236 __be32 addr = *(__be32*)neigh->primary_key; 236 __be32 addr = *(__be32 *)neigh->primary_key;
237 struct net_device *dev = neigh->dev; 237 struct net_device *dev = neigh->dev;
238 struct in_device *in_dev; 238 struct in_device *in_dev;
239 struct neigh_parms *parms; 239 struct neigh_parms *parms;
@@ -296,16 +296,19 @@ static int arp_constructor(struct neighbour *neigh)
296 neigh->ops = &arp_broken_ops; 296 neigh->ops = &arp_broken_ops;
297 neigh->output = neigh->ops->output; 297 neigh->output = neigh->ops->output;
298 return 0; 298 return 0;
299#else
300 break;
299#endif 301#endif
300 ;} 302 }
301#endif 303#endif
302 if (neigh->type == RTN_MULTICAST) { 304 if (neigh->type == RTN_MULTICAST) {
303 neigh->nud_state = NUD_NOARP; 305 neigh->nud_state = NUD_NOARP;
304 arp_mc_map(addr, neigh->ha, dev, 1); 306 arp_mc_map(addr, neigh->ha, dev, 1);
305 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { 307 } else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
306 neigh->nud_state = NUD_NOARP; 308 neigh->nud_state = NUD_NOARP;
307 memcpy(neigh->ha, dev->dev_addr, dev->addr_len); 309 memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
308 } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) { 310 } else if (neigh->type == RTN_BROADCAST ||
311 (dev->flags & IFF_POINTOPOINT)) {
309 neigh->nud_state = NUD_NOARP; 312 neigh->nud_state = NUD_NOARP;
310 memcpy(neigh->ha, dev->broadcast, dev->addr_len); 313 memcpy(neigh->ha, dev->broadcast, dev->addr_len);
311 } 314 }
@@ -315,7 +318,7 @@ static int arp_constructor(struct neighbour *neigh)
315 else 318 else
316 neigh->ops = &arp_generic_ops; 319 neigh->ops = &arp_generic_ops;
317 320
318 if (neigh->nud_state&NUD_VALID) 321 if (neigh->nud_state & NUD_VALID)
319 neigh->output = neigh->ops->connected_output; 322 neigh->output = neigh->ops->connected_output;
320 else 323 else
321 neigh->output = neigh->ops->output; 324 neigh->output = neigh->ops->output;
@@ -334,7 +337,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
334 __be32 saddr = 0; 337 __be32 saddr = 0;
335 u8 *dst_ha = NULL; 338 u8 *dst_ha = NULL;
336 struct net_device *dev = neigh->dev; 339 struct net_device *dev = neigh->dev;
337 __be32 target = *(__be32*)neigh->primary_key; 340 __be32 target = *(__be32 *)neigh->primary_key;
338 int probes = atomic_read(&neigh->probes); 341 int probes = atomic_read(&neigh->probes);
339 struct in_device *in_dev; 342 struct in_device *in_dev;
340 343
@@ -347,7 +350,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
347 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 350 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
348 default: 351 default:
349 case 0: /* By default announce any local IP */ 352 case 0: /* By default announce any local IP */
350 if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL) 353 if (skb && inet_addr_type(dev_net(dev),
354 ip_hdr(skb)->saddr) == RTN_LOCAL)
351 saddr = ip_hdr(skb)->saddr; 355 saddr = ip_hdr(skb)->saddr;
352 break; 356 break;
353 case 1: /* Restrict announcements of saddr in same subnet */ 357 case 1: /* Restrict announcements of saddr in same subnet */
@@ -369,16 +373,21 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
369 if (!saddr) 373 if (!saddr)
370 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); 374 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
371 375
372 if ((probes -= neigh->parms->ucast_probes) < 0) { 376 probes -= neigh->parms->ucast_probes;
373 if (!(neigh->nud_state&NUD_VALID)) 377 if (probes < 0) {
374 printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n"); 378 if (!(neigh->nud_state & NUD_VALID))
379 printk(KERN_DEBUG
380 "trying to ucast probe in NUD_INVALID\n");
375 dst_ha = neigh->ha; 381 dst_ha = neigh->ha;
376 read_lock_bh(&neigh->lock); 382 read_lock_bh(&neigh->lock);
377 } else if ((probes -= neigh->parms->app_probes) < 0) { 383 } else {
384 probes -= neigh->parms->app_probes;
385 if (probes < 0) {
378#ifdef CONFIG_ARPD 386#ifdef CONFIG_ARPD
379 neigh_app_ns(neigh); 387 neigh_app_ns(neigh);
380#endif 388#endif
381 return; 389 return;
390 }
382 } 391 }
383 392
384 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, 393 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
@@ -451,7 +460,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
451 * is allowed to use this function, it is scheduled to be removed. --ANK 460 * is allowed to use this function, it is scheduled to be removed. --ANK
452 */ 461 */
453 462
454static int arp_set_predefined(int addr_hint, unsigned char * haddr, __be32 paddr, struct net_device * dev) 463static int arp_set_predefined(int addr_hint, unsigned char *haddr,
464 __be32 paddr, struct net_device *dev)
455{ 465{
456 switch (addr_hint) { 466 switch (addr_hint) {
457 case RTN_LOCAL: 467 case RTN_LOCAL:
@@ -483,7 +493,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
483 493
484 paddr = skb_rtable(skb)->rt_gateway; 494 paddr = skb_rtable(skb)->rt_gateway;
485 495
486 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) 496 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr,
497 paddr, dev))
487 return 0; 498 return 0;
488 499
489 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); 500 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
@@ -515,13 +526,14 @@ int arp_bind_neighbour(struct dst_entry *dst)
515 return -EINVAL; 526 return -EINVAL;
516 if (n == NULL) { 527 if (n == NULL) {
517 __be32 nexthop = ((struct rtable *)dst)->rt_gateway; 528 __be32 nexthop = ((struct rtable *)dst)->rt_gateway;
518 if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) 529 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
519 nexthop = 0; 530 nexthop = 0;
520 n = __neigh_lookup_errno( 531 n = __neigh_lookup_errno(
521#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) 532#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
522 dev->type == ARPHRD_ATM ? clip_tbl_hook : 533 dev->type == ARPHRD_ATM ?
534 clip_tbl_hook :
523#endif 535#endif
524 &arp_tbl, &nexthop, dev); 536 &arp_tbl, &nexthop, dev);
525 if (IS_ERR(n)) 537 if (IS_ERR(n))
526 return PTR_ERR(n); 538 return PTR_ERR(n);
527 dst->neighbour = n; 539 dst->neighbour = n;
@@ -543,8 +555,8 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
543 555
544 if (!IN_DEV_PROXY_ARP(in_dev)) 556 if (!IN_DEV_PROXY_ARP(in_dev))
545 return 0; 557 return 0;
546 558 imi = IN_DEV_MEDIUM_ID(in_dev);
547 if ((imi = IN_DEV_MEDIUM_ID(in_dev)) == 0) 559 if (imi == 0)
548 return 1; 560 return 1;
549 if (imi == -1) 561 if (imi == -1)
550 return 0; 562 return 0;
@@ -685,7 +697,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
685 arp->ar_pln = 4; 697 arp->ar_pln = 4;
686 arp->ar_op = htons(type); 698 arp->ar_op = htons(type);
687 699
688 arp_ptr=(unsigned char *)(arp+1); 700 arp_ptr = (unsigned char *)(arp + 1);
689 701
690 memcpy(arp_ptr, src_hw, dev->addr_len); 702 memcpy(arp_ptr, src_hw, dev->addr_len);
691 arp_ptr += dev->addr_len; 703 arp_ptr += dev->addr_len;
@@ -735,9 +747,8 @@ void arp_send(int type, int ptype, __be32 dest_ip,
735 747
736 skb = arp_create(type, ptype, dest_ip, dev, src_ip, 748 skb = arp_create(type, ptype, dest_ip, dev, src_ip,
737 dest_hw, src_hw, target_hw); 749 dest_hw, src_hw, target_hw);
738 if (skb == NULL) { 750 if (skb == NULL)
739 return; 751 return;
740 }
741 752
742 arp_xmit(skb); 753 arp_xmit(skb);
743} 754}
@@ -815,7 +826,7 @@ static int arp_process(struct sk_buff *skb)
815/* 826/*
816 * Extract fields 827 * Extract fields
817 */ 828 */
818 arp_ptr= (unsigned char *)(arp+1); 829 arp_ptr = (unsigned char *)(arp + 1);
819 sha = arp_ptr; 830 sha = arp_ptr;
820 arp_ptr += dev->addr_len; 831 arp_ptr += dev->addr_len;
821 memcpy(&sip, arp_ptr, 4); 832 memcpy(&sip, arp_ptr, 4);
@@ -869,16 +880,17 @@ static int arp_process(struct sk_buff *skb)
869 addr_type = rt->rt_type; 880 addr_type = rt->rt_type;
870 881
871 if (addr_type == RTN_LOCAL) { 882 if (addr_type == RTN_LOCAL) {
872 int dont_send = 0; 883 int dont_send;
873 884
874 if (!dont_send) 885 dont_send = arp_ignore(in_dev, sip, tip);
875 dont_send |= arp_ignore(in_dev,sip,tip);
876 if (!dont_send && IN_DEV_ARPFILTER(in_dev)) 886 if (!dont_send && IN_DEV_ARPFILTER(in_dev))
877 dont_send |= arp_filter(sip,tip,dev); 887 dont_send |= arp_filter(sip, tip, dev);
878 if (!dont_send) { 888 if (!dont_send) {
879 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 889 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
880 if (n) { 890 if (n) {
881 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 891 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
892 dev, tip, sha, dev->dev_addr,
893 sha);
882 neigh_release(n); 894 neigh_release(n);
883 } 895 }
884 } 896 }
@@ -887,8 +899,7 @@ static int arp_process(struct sk_buff *skb)
887 if (addr_type == RTN_UNICAST && 899 if (addr_type == RTN_UNICAST &&
888 (arp_fwd_proxy(in_dev, dev, rt) || 900 (arp_fwd_proxy(in_dev, dev, rt) ||
889 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || 901 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
890 pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) 902 pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
891 {
892 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 903 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
893 if (n) 904 if (n)
894 neigh_release(n); 905 neigh_release(n);
@@ -896,9 +907,12 @@ static int arp_process(struct sk_buff *skb)
896 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || 907 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
897 skb->pkt_type == PACKET_HOST || 908 skb->pkt_type == PACKET_HOST ||
898 in_dev->arp_parms->proxy_delay == 0) { 909 in_dev->arp_parms->proxy_delay == 0) {
899 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 910 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
911 dev, tip, sha, dev->dev_addr,
912 sha);
900 } else { 913 } else {
901 pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); 914 pneigh_enqueue(&arp_tbl,
915 in_dev->arp_parms, skb);
902 return 0; 916 return 0;
903 } 917 }
904 goto out; 918 goto out;
@@ -939,7 +953,8 @@ static int arp_process(struct sk_buff *skb)
939 if (arp->ar_op != htons(ARPOP_REPLY) || 953 if (arp->ar_op != htons(ARPOP_REPLY) ||
940 skb->pkt_type != PACKET_HOST) 954 skb->pkt_type != PACKET_HOST)
941 state = NUD_STALE; 955 state = NUD_STALE;
942 neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0); 956 neigh_update(n, sha, state,
957 override ? NEIGH_UPDATE_F_OVERRIDE : 0);
943 neigh_release(n); 958 neigh_release(n);
944 } 959 }
945 960
@@ -975,7 +990,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
975 arp->ar_pln != 4) 990 arp->ar_pln != 4)
976 goto freeskb; 991 goto freeskb;
977 992
978 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 993 skb = skb_share_check(skb, GFP_ATOMIC);
994 if (skb == NULL)
979 goto out_of_mem; 995 goto out_of_mem;
980 996
981 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); 997 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
@@ -1019,7 +1035,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
1019 return -EINVAL; 1035 return -EINVAL;
1020 if (!dev && (r->arp_flags & ATF_COM)) { 1036 if (!dev && (r->arp_flags & ATF_COM)) {
1021 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, 1037 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
1022 r->arp_ha.sa_data); 1038 r->arp_ha.sa_data);
1023 if (!dev) 1039 if (!dev)
1024 return -ENODEV; 1040 return -ENODEV;
1025 } 1041 }
@@ -1033,7 +1049,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
1033} 1049}
1034 1050
1035static int arp_req_set(struct net *net, struct arpreq *r, 1051static int arp_req_set(struct net *net, struct arpreq *r,
1036 struct net_device * dev) 1052 struct net_device *dev)
1037{ 1053{
1038 __be32 ip; 1054 __be32 ip;
1039 struct neighbour *neigh; 1055 struct neighbour *neigh;
@@ -1046,10 +1062,11 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1046 if (r->arp_flags & ATF_PERM) 1062 if (r->arp_flags & ATF_PERM)
1047 r->arp_flags |= ATF_COM; 1063 r->arp_flags |= ATF_COM;
1048 if (dev == NULL) { 1064 if (dev == NULL) {
1049 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1065 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
1050 .tos = RTO_ONLINK } } }; 1066 .tos = RTO_ONLINK } };
1051 struct rtable * rt; 1067 struct rtable *rt;
1052 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1068 err = ip_route_output_key(net, &rt, &fl);
1069 if (err != 0)
1053 return err; 1070 return err;
1054 dev = rt->dst.dev; 1071 dev = rt->dst.dev;
1055 ip_rt_put(rt); 1072 ip_rt_put(rt);
@@ -1083,9 +1100,9 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1083 unsigned state = NUD_STALE; 1100 unsigned state = NUD_STALE;
1084 if (r->arp_flags & ATF_PERM) 1101 if (r->arp_flags & ATF_PERM)
1085 state = NUD_PERMANENT; 1102 state = NUD_PERMANENT;
1086 err = neigh_update(neigh, (r->arp_flags&ATF_COM) ? 1103 err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
1087 r->arp_ha.sa_data : NULL, state, 1104 r->arp_ha.sa_data : NULL, state,
1088 NEIGH_UPDATE_F_OVERRIDE| 1105 NEIGH_UPDATE_F_OVERRIDE |
1089 NEIGH_UPDATE_F_ADMIN); 1106 NEIGH_UPDATE_F_ADMIN);
1090 neigh_release(neigh); 1107 neigh_release(neigh);
1091 } 1108 }
@@ -1094,12 +1111,12 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1094 1111
1095static unsigned arp_state_to_flags(struct neighbour *neigh) 1112static unsigned arp_state_to_flags(struct neighbour *neigh)
1096{ 1113{
1097 unsigned flags = 0;
1098 if (neigh->nud_state&NUD_PERMANENT) 1114 if (neigh->nud_state&NUD_PERMANENT)
1099 flags = ATF_PERM|ATF_COM; 1115 return ATF_PERM | ATF_COM;
1100 else if (neigh->nud_state&NUD_VALID) 1116 else if (neigh->nud_state&NUD_VALID)
1101 flags = ATF_COM; 1117 return ATF_COM;
1102 return flags; 1118 else
1119 return 0;
1103} 1120}
1104 1121
1105/* 1122/*
@@ -1142,7 +1159,7 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r,
1142} 1159}
1143 1160
1144static int arp_req_delete(struct net *net, struct arpreq *r, 1161static int arp_req_delete(struct net *net, struct arpreq *r,
1145 struct net_device * dev) 1162 struct net_device *dev)
1146{ 1163{
1147 int err; 1164 int err;
1148 __be32 ip; 1165 __be32 ip;
@@ -1153,10 +1170,11 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1153 1170
1154 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; 1171 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
1155 if (dev == NULL) { 1172 if (dev == NULL) {
1156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1173 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
1157 .tos = RTO_ONLINK } } }; 1174 .tos = RTO_ONLINK } };
1158 struct rtable * rt; 1175 struct rtable *rt;
1159 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1176 err = ip_route_output_key(net, &rt, &fl);
1177 if (err != 0)
1160 return err; 1178 return err;
1161 dev = rt->dst.dev; 1179 dev = rt->dst.dev;
1162 ip_rt_put(rt); 1180 ip_rt_put(rt);
@@ -1166,7 +1184,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1166 err = -ENXIO; 1184 err = -ENXIO;
1167 neigh = neigh_lookup(&arp_tbl, &ip, dev); 1185 neigh = neigh_lookup(&arp_tbl, &ip, dev);
1168 if (neigh) { 1186 if (neigh) {
1169 if (neigh->nud_state&~NUD_NOARP) 1187 if (neigh->nud_state & ~NUD_NOARP)
1170 err = neigh_update(neigh, NULL, NUD_FAILED, 1188 err = neigh_update(neigh, NULL, NUD_FAILED,
1171 NEIGH_UPDATE_F_OVERRIDE| 1189 NEIGH_UPDATE_F_OVERRIDE|
1172 NEIGH_UPDATE_F_ADMIN); 1190 NEIGH_UPDATE_F_ADMIN);
@@ -1186,24 +1204,24 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1186 struct net_device *dev = NULL; 1204 struct net_device *dev = NULL;
1187 1205
1188 switch (cmd) { 1206 switch (cmd) {
1189 case SIOCDARP: 1207 case SIOCDARP:
1190 case SIOCSARP: 1208 case SIOCSARP:
1191 if (!capable(CAP_NET_ADMIN)) 1209 if (!capable(CAP_NET_ADMIN))
1192 return -EPERM; 1210 return -EPERM;
1193 case SIOCGARP: 1211 case SIOCGARP:
1194 err = copy_from_user(&r, arg, sizeof(struct arpreq)); 1212 err = copy_from_user(&r, arg, sizeof(struct arpreq));
1195 if (err) 1213 if (err)
1196 return -EFAULT; 1214 return -EFAULT;
1197 break; 1215 break;
1198 default: 1216 default:
1199 return -EINVAL; 1217 return -EINVAL;
1200 } 1218 }
1201 1219
1202 if (r.arp_pa.sa_family != AF_INET) 1220 if (r.arp_pa.sa_family != AF_INET)
1203 return -EPFNOSUPPORT; 1221 return -EPFNOSUPPORT;
1204 1222
1205 if (!(r.arp_flags & ATF_PUBL) && 1223 if (!(r.arp_flags & ATF_PUBL) &&
1206 (r.arp_flags & (ATF_NETMASK|ATF_DONTPUB))) 1224 (r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
1207 return -EINVAL; 1225 return -EINVAL;
1208 if (!(r.arp_flags & ATF_NETMASK)) 1226 if (!(r.arp_flags & ATF_NETMASK))
1209 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = 1227 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
@@ -1211,7 +1229,8 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1211 rtnl_lock(); 1229 rtnl_lock();
1212 if (r.arp_dev[0]) { 1230 if (r.arp_dev[0]) {
1213 err = -ENODEV; 1231 err = -ENODEV;
1214 if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL) 1232 dev = __dev_get_by_name(net, r.arp_dev);
1233 if (dev == NULL)
1215 goto out; 1234 goto out;
1216 1235
1217 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ 1236 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
@@ -1243,7 +1262,8 @@ out:
1243 return err; 1262 return err;
1244} 1263}
1245 1264
1246static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 1265static int arp_netdev_event(struct notifier_block *this, unsigned long event,
1266 void *ptr)
1247{ 1267{
1248 struct net_device *dev = ptr; 1268 struct net_device *dev = ptr;
1249 1269
@@ -1311,12 +1331,13 @@ static char *ax2asc2(ax25_address *a, char *buf)
1311 for (n = 0, s = buf; n < 6; n++) { 1331 for (n = 0, s = buf; n < 6; n++) {
1312 c = (a->ax25_call[n] >> 1) & 0x7F; 1332 c = (a->ax25_call[n] >> 1) & 0x7F;
1313 1333
1314 if (c != ' ') *s++ = c; 1334 if (c != ' ')
1335 *s++ = c;
1315 } 1336 }
1316 1337
1317 *s++ = '-'; 1338 *s++ = '-';
1318 1339 n = (a->ax25_call[6] >> 1) & 0x0F;
1319 if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) { 1340 if (n > 9) {
1320 *s++ = '1'; 1341 *s++ = '1';
1321 n -= 10; 1342 n -= 10;
1322 } 1343 }
@@ -1325,10 +1346,9 @@ static char *ax2asc2(ax25_address *a, char *buf)
1325 *s++ = '\0'; 1346 *s++ = '\0';
1326 1347
1327 if (*buf == '\0' || *buf == '-') 1348 if (*buf == '\0' || *buf == '-')
1328 return "*"; 1349 return "*";
1329 1350
1330 return buf; 1351 return buf;
1331
1332} 1352}
1333#endif /* CONFIG_AX25 */ 1353#endif /* CONFIG_AX25 */
1334 1354
@@ -1408,10 +1428,10 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
1408/* ------------------------------------------------------------------------ */ 1428/* ------------------------------------------------------------------------ */
1409 1429
1410static const struct seq_operations arp_seq_ops = { 1430static const struct seq_operations arp_seq_ops = {
1411 .start = arp_seq_start, 1431 .start = arp_seq_start,
1412 .next = neigh_seq_next, 1432 .next = neigh_seq_next,
1413 .stop = neigh_seq_stop, 1433 .stop = neigh_seq_stop,
1414 .show = arp_seq_show, 1434 .show = arp_seq_show,
1415}; 1435};
1416 1436
1417static int arp_seq_open(struct inode *inode, struct file *file) 1437static int arp_seq_open(struct inode *inode, struct file *file)
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
new file mode 100644
index 000000000000..b546736da2e1
--- /dev/null
+++ b/net/ipv4/gre.c
@@ -0,0 +1,151 @@
1/*
2 * GRE over IPv4 demultiplexer driver
3 *
4 * Authors: Dmitry Kozlov (xeb@mail.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/kmod.h>
16#include <linux/skbuff.h>
17#include <linux/in.h>
18#include <linux/netdevice.h>
19#include <linux/version.h>
20#include <linux/spinlock.h>
21#include <net/protocol.h>
22#include <net/gre.h>
23
24
25const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly;
26static DEFINE_SPINLOCK(gre_proto_lock);
27
28int gre_add_protocol(const struct gre_protocol *proto, u8 version)
29{
30 if (version >= GREPROTO_MAX)
31 goto err_out;
32
33 spin_lock(&gre_proto_lock);
34 if (gre_proto[version])
35 goto err_out_unlock;
36
37 rcu_assign_pointer(gre_proto[version], proto);
38 spin_unlock(&gre_proto_lock);
39 return 0;
40
41err_out_unlock:
42 spin_unlock(&gre_proto_lock);
43err_out:
44 return -1;
45}
46EXPORT_SYMBOL_GPL(gre_add_protocol);
47
48int gre_del_protocol(const struct gre_protocol *proto, u8 version)
49{
50 if (version >= GREPROTO_MAX)
51 goto err_out;
52
53 spin_lock(&gre_proto_lock);
54 if (gre_proto[version] != proto)
55 goto err_out_unlock;
56 rcu_assign_pointer(gre_proto[version], NULL);
57 spin_unlock(&gre_proto_lock);
58 synchronize_rcu();
59 return 0;
60
61err_out_unlock:
62 spin_unlock(&gre_proto_lock);
63err_out:
64 return -1;
65}
66EXPORT_SYMBOL_GPL(gre_del_protocol);
67
68static int gre_rcv(struct sk_buff *skb)
69{
70 const struct gre_protocol *proto;
71 u8 ver;
72 int ret;
73
74 if (!pskb_may_pull(skb, 12))
75 goto drop;
76
77 ver = skb->data[1]&0x7f;
78 if (ver >= GREPROTO_MAX)
79 goto drop;
80
81 rcu_read_lock();
82 proto = rcu_dereference(gre_proto[ver]);
83 if (!proto || !proto->handler)
84 goto drop_unlock;
85 ret = proto->handler(skb);
86 rcu_read_unlock();
87 return ret;
88
89drop_unlock:
90 rcu_read_unlock();
91drop:
92 kfree_skb(skb);
93 return NET_RX_DROP;
94}
95
96static void gre_err(struct sk_buff *skb, u32 info)
97{
98 const struct gre_protocol *proto;
99 u8 ver;
100
101 if (!pskb_may_pull(skb, 12))
102 goto drop;
103
104 ver = skb->data[1]&0x7f;
105 if (ver >= GREPROTO_MAX)
106 goto drop;
107
108 rcu_read_lock();
109 proto = rcu_dereference(gre_proto[ver]);
110 if (!proto || !proto->err_handler)
111 goto drop_unlock;
112 proto->err_handler(skb, info);
113 rcu_read_unlock();
114 return;
115
116drop_unlock:
117 rcu_read_unlock();
118drop:
119 kfree_skb(skb);
120}
121
122static const struct net_protocol net_gre_protocol = {
123 .handler = gre_rcv,
124 .err_handler = gre_err,
125 .netns_ok = 1,
126};
127
128static int __init gre_init(void)
129{
130 pr_info("GRE over IPv4 demultiplexor driver");
131
132 if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
133 pr_err("gre: can't add protocol\n");
134 return -EAGAIN;
135 }
136
137 return 0;
138}
139
140static void __exit gre_exit(void)
141{
142 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
143}
144
145module_init(gre_init);
146module_exit(gre_exit);
147
148MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
149MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
150MODULE_LICENSE("GPL");
151
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a0d847c7cba5..96bc7f9475a3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -379,7 +379,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
379 inet->tos = ip_hdr(skb)->tos; 379 inet->tos = ip_hdr(skb)->tos;
380 daddr = ipc.addr = rt->rt_src; 380 daddr = ipc.addr = rt->rt_src;
381 ipc.opt = NULL; 381 ipc.opt = NULL;
382 ipc.shtx.flags = 0; 382 ipc.tx_flags = 0;
383 if (icmp_param->replyopts.optlen) { 383 if (icmp_param->replyopts.optlen) {
384 ipc.opt = &icmp_param->replyopts; 384 ipc.opt = &icmp_param->replyopts;
385 if (ipc.opt->srr) 385 if (ipc.opt->srr)
@@ -538,7 +538,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
538 inet_sk(sk)->tos = tos; 538 inet_sk(sk)->tos = tos;
539 ipc.addr = iph->saddr; 539 ipc.addr = iph->saddr;
540 ipc.opt = &icmp_param.replyopts; 540 ipc.opt = &icmp_param.replyopts;
541 ipc.shtx.flags = 0; 541 ipc.tx_flags = 0;
542 542
543 { 543 {
544 struct flowi fl = { 544 struct flowi fl = {
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b7c41654dde5..f4dc879e258e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -542,7 +542,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
542 /* If the first fragment is fragmented itself, we split 542 /* If the first fragment is fragmented itself, we split
543 * it to two chunks: the first with data and paged part 543 * it to two chunks: the first with data and paged part
544 * and the second, holding only fragments. */ 544 * and the second, holding only fragments. */
545 if (skb_has_frags(head)) { 545 if (skb_has_frag_list(head)) {
546 struct sk_buff *clone; 546 struct sk_buff *clone;
547 int i, plen = 0; 547 int i, plen = 0;
548 548
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 945b20a5ad50..85176895495a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
44#include <net/net_namespace.h> 44#include <net/net_namespace.h>
45#include <net/netns/generic.h> 45#include <net/netns/generic.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/gre.h>
47 48
48#ifdef CONFIG_IPV6 49#ifdef CONFIG_IPV6
49#include <net/ipv6.h> 50#include <net/ipv6.h>
@@ -1278,10 +1279,9 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1278} 1279}
1279 1280
1280 1281
1281static const struct net_protocol ipgre_protocol = { 1282static const struct gre_protocol ipgre_protocol = {
1282 .handler = ipgre_rcv, 1283 .handler = ipgre_rcv,
1283 .err_handler = ipgre_err, 1284 .err_handler = ipgre_err,
1284 .netns_ok = 1,
1285}; 1285};
1286 1286
1287static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) 1287static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
@@ -1663,7 +1663,7 @@ static int __init ipgre_init(void)
1663 if (err < 0) 1663 if (err < 0)
1664 return err; 1664 return err;
1665 1665
1666 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); 1666 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1667 if (err < 0) { 1667 if (err < 0) {
1668 printk(KERN_INFO "ipgre init: can't add protocol\n"); 1668 printk(KERN_INFO "ipgre init: can't add protocol\n");
1669 goto add_proto_failed; 1669 goto add_proto_failed;
@@ -1683,7 +1683,7 @@ out:
1683tap_ops_failed: 1683tap_ops_failed:
1684 rtnl_link_unregister(&ipgre_link_ops); 1684 rtnl_link_unregister(&ipgre_link_ops);
1685rtnl_link_failed: 1685rtnl_link_failed:
1686 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1686 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1687add_proto_failed: 1687add_proto_failed:
1688 unregister_pernet_device(&ipgre_net_ops); 1688 unregister_pernet_device(&ipgre_net_ops);
1689 goto out; 1689 goto out;
@@ -1693,7 +1693,7 @@ static void __exit ipgre_fini(void)
1693{ 1693{
1694 rtnl_link_unregister(&ipgre_tap_ops); 1694 rtnl_link_unregister(&ipgre_tap_ops);
1695 rtnl_link_unregister(&ipgre_link_ops); 1695 rtnl_link_unregister(&ipgre_link_ops);
1696 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1696 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1697 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1697 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1698 unregister_pernet_device(&ipgre_net_ops); 1698 unregister_pernet_device(&ipgre_net_ops);
1699} 1699}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 04b69896df5f..e42762023c27 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -487,7 +487,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
487 * LATER: this step can be merged to real generation of fragments, 487 * LATER: this step can be merged to real generation of fragments,
488 * we can switch to copy when see the first bad fragment. 488 * we can switch to copy when see the first bad fragment.
489 */ 489 */
490 if (skb_has_frags(skb)) { 490 if (skb_has_frag_list(skb)) {
491 struct sk_buff *frag; 491 struct sk_buff *frag;
492 int first_len = skb_pagelen(skb); 492 int first_len = skb_pagelen(skb);
493 int truesizes = 0; 493 int truesizes = 0;
@@ -837,10 +837,9 @@ int ip_append_data(struct sock *sk,
837 inet->cork.length = 0; 837 inet->cork.length = 0;
838 sk->sk_sndmsg_page = NULL; 838 sk->sk_sndmsg_page = NULL;
839 sk->sk_sndmsg_off = 0; 839 sk->sk_sndmsg_off = 0;
840 if ((exthdrlen = rt->dst.header_len) != 0) { 840 exthdrlen = rt->dst.header_len;
841 length += exthdrlen; 841 length += exthdrlen;
842 transhdrlen += exthdrlen; 842 transhdrlen += exthdrlen;
843 }
844 } else { 843 } else {
845 rt = (struct rtable *)inet->cork.dst; 844 rt = (struct rtable *)inet->cork.dst;
846 if (inet->cork.flags & IPCORK_OPT) 845 if (inet->cork.flags & IPCORK_OPT)
@@ -953,7 +952,7 @@ alloc_new_skb:
953 else 952 else
954 /* only the initial fragment is 953 /* only the initial fragment is
955 time stamped */ 954 time stamped */
956 ipc->shtx.flags = 0; 955 ipc->tx_flags = 0;
957 } 956 }
958 if (skb == NULL) 957 if (skb == NULL)
959 goto error; 958 goto error;
@@ -964,7 +963,7 @@ alloc_new_skb:
964 skb->ip_summed = csummode; 963 skb->ip_summed = csummode;
965 skb->csum = 0; 964 skb->csum = 0;
966 skb_reserve(skb, hh_len); 965 skb_reserve(skb, hh_len);
967 *skb_tx(skb) = ipc->shtx; 966 skb_shinfo(skb)->tx_flags = ipc->tx_flags;
968 967
969 /* 968 /*
970 * Find where to start putting bytes. 969 * Find where to start putting bytes.
@@ -1384,7 +1383,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1384 1383
1385 daddr = ipc.addr = rt->rt_src; 1384 daddr = ipc.addr = rt->rt_src;
1386 ipc.opt = NULL; 1385 ipc.opt = NULL;
1387 ipc.shtx.flags = 0; 1386 ipc.tx_flags = 0;
1388 1387
1389 if (replyopts.opt.optlen) { 1388 if (replyopts.opt.optlen) {
1390 ipc.opt = &replyopts.opt; 1389 ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ec036731a70b..3c6f8f3968a6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -744,7 +744,7 @@ static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
744 ipn->tunnels_wc[0] = tunnel; 744 ipn->tunnels_wc[0] = tunnel;
745} 745}
746 746
747static struct xfrm_tunnel ipip_handler = { 747static struct xfrm_tunnel ipip_handler __read_mostly = {
748 .handler = ipip_rcv, 748 .handler = ipip_rcv,
749 .err_handler = ipip_err, 749 .err_handler = ipip_err,
750 .priority = 1, 750 .priority = 1,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 3a43cf36db87..1e26a4897655 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -29,6 +29,7 @@
29#include <net/netfilter/nf_conntrack.h> 29#include <net/netfilter/nf_conntrack.h>
30#include <net/net_namespace.h> 30#include <net/net_namespace.h>
31#include <net/checksum.h> 31#include <net/checksum.h>
32#include <net/ip.h>
32 33
33#define CLUSTERIP_VERSION "0.8" 34#define CLUSTERIP_VERSION "0.8"
34 35
@@ -231,24 +232,22 @@ clusterip_hashfn(const struct sk_buff *skb,
231{ 232{
232 const struct iphdr *iph = ip_hdr(skb); 233 const struct iphdr *iph = ip_hdr(skb);
233 unsigned long hashval; 234 unsigned long hashval;
234 u_int16_t sport, dport; 235 u_int16_t sport = 0, dport = 0;
235 const u_int16_t *ports; 236 int poff;
236 237
237 switch (iph->protocol) { 238 poff = proto_ports_offset(iph->protocol);
238 case IPPROTO_TCP: 239 if (poff >= 0) {
239 case IPPROTO_UDP: 240 const u_int16_t *ports;
240 case IPPROTO_UDPLITE: 241 u16 _ports[2];
241 case IPPROTO_SCTP: 242
242 case IPPROTO_DCCP: 243 ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
243 case IPPROTO_ICMP: 244 if (ports) {
244 ports = (const void *)iph+iph->ihl*4; 245 sport = ports[0];
245 sport = ports[0]; 246 dport = ports[1];
246 dport = ports[1]; 247 }
247 break; 248 } else {
248 default:
249 if (net_ratelimit()) 249 if (net_ratelimit())
250 pr_info("unknown protocol %u\n", iph->protocol); 250 pr_info("unknown protocol %u\n", iph->protocol);
251 sport = dport = 0;
252 } 251 }
253 252
254 switch (config->hash_mode) { 253 switch (config->hash_mode) {
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index f2d297351405..65699c24411c 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,8 +28,7 @@
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <net/protocol.h> 29#include <net/protocol.h>
30 30
31const struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; 31const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly;
32static DEFINE_SPINLOCK(inet_proto_lock);
33 32
34/* 33/*
35 * Add a protocol handler to the hash tables 34 * Add a protocol handler to the hash tables
@@ -37,20 +36,9 @@ static DEFINE_SPINLOCK(inet_proto_lock);
37 36
38int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) 37int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
39{ 38{
40 int hash, ret; 39 int hash = protocol & (MAX_INET_PROTOS - 1);
41 40
42 hash = protocol & (MAX_INET_PROTOS - 1); 41 return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1;
43
44 spin_lock_bh(&inet_proto_lock);
45 if (inet_protos[hash]) {
46 ret = -1;
47 } else {
48 inet_protos[hash] = prot;
49 ret = 0;
50 }
51 spin_unlock_bh(&inet_proto_lock);
52
53 return ret;
54} 42}
55EXPORT_SYMBOL(inet_add_protocol); 43EXPORT_SYMBOL(inet_add_protocol);
56 44
@@ -60,18 +48,9 @@ EXPORT_SYMBOL(inet_add_protocol);
60 48
61int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) 49int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
62{ 50{
63 int hash, ret; 51 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
64
65 hash = protocol & (MAX_INET_PROTOS - 1);
66 52
67 spin_lock_bh(&inet_proto_lock); 53 ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1;
68 if (inet_protos[hash] == prot) {
69 inet_protos[hash] = NULL;
70 ret = 0;
71 } else {
72 ret = -1;
73 }
74 spin_unlock_bh(&inet_proto_lock);
75 54
76 synchronize_net(); 55 synchronize_net();
77 56
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 009a7b2aa1ef..1f85ef289895 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -505,7 +505,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
505 505
506 ipc.addr = inet->inet_saddr; 506 ipc.addr = inet->inet_saddr;
507 ipc.opt = NULL; 507 ipc.opt = NULL;
508 ipc.shtx.flags = 0; 508 ipc.tx_flags = 0;
509 ipc.oif = sk->sk_bound_dev_if; 509 ipc.oif = sk->sk_bound_dev_if;
510 510
511 if (msg->msg_controllen) { 511 if (msg->msg_controllen) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6298f75d5e93..e24d48dd99d3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1268,18 +1268,11 @@ skip_hashing:
1268 1268
1269void rt_bind_peer(struct rtable *rt, int create) 1269void rt_bind_peer(struct rtable *rt, int create)
1270{ 1270{
1271 static DEFINE_SPINLOCK(rt_peer_lock);
1272 struct inet_peer *peer; 1271 struct inet_peer *peer;
1273 1272
1274 peer = inet_getpeer(rt->rt_dst, create); 1273 peer = inet_getpeer(rt->rt_dst, create);
1275 1274
1276 spin_lock_bh(&rt_peer_lock); 1275 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1277 if (rt->peer == NULL) {
1278 rt->peer = peer;
1279 peer = NULL;
1280 }
1281 spin_unlock_bh(&rt_peer_lock);
1282 if (peer)
1283 inet_putpeer(peer); 1276 inet_putpeer(peer);
1284} 1277}
1285 1278
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3fb1428e526e..3e8a4dbc721b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2389,7 +2389,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2389 err = tp->af_specific->md5_parse(sk, optval, optlen); 2389 err = tp->af_specific->md5_parse(sk, optval, optlen);
2390 break; 2390 break;
2391#endif 2391#endif
2392 2392 case TCP_USER_TIMEOUT:
2393 /* Cap the max timeout in ms TCP will retry/retrans
2394 * before giving up and aborting (ETIMEDOUT) a connection.
2395 */
2396 icsk->icsk_user_timeout = msecs_to_jiffies(val);
2397 break;
2393 default: 2398 default:
2394 err = -ENOPROTOOPT; 2399 err = -ENOPROTOOPT;
2395 break; 2400 break;
@@ -2608,6 +2613,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2608 case TCP_THIN_DUPACK: 2613 case TCP_THIN_DUPACK:
2609 val = tp->thin_dupack; 2614 val = tp->thin_dupack;
2610 break; 2615 break;
2616
2617 case TCP_USER_TIMEOUT:
2618 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2619 break;
2611 default: 2620 default:
2612 return -ENOPROTOOPT; 2621 return -ENOPROTOOPT;
2613 } 2622 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e663b78a2ef6..1bc87a05c734 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk)
805 } 805 }
806} 806}
807 807
808/* Numbers are taken from RFC3390.
809 *
810 * John Heffner states:
811 *
812 * The RFC specifies a window of no more than 4380 bytes
813 * unless 2*MSS > 4380. Reading the pseudocode in the RFC
814 * is a bit misleading because they use a clamp at 4380 bytes
815 * rather than use a multiplier in the relevant range.
816 */
817__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) 808__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
818{ 809{
819 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 810 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
820 811
821 if (!cwnd) { 812 if (!cwnd)
822 if (tp->mss_cache > 1460) 813 cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
823 cwnd = 2;
824 else
825 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
826 }
827 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 814 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
828} 815}
829 816
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 020766292bb0..a0232f3a358b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2571,7 +2571,6 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2571 2571
2572 return tcp_gro_receive(head, skb); 2572 return tcp_gro_receive(head, skb);
2573} 2573}
2574EXPORT_SYMBOL(tcp4_gro_receive);
2575 2574
2576int tcp4_gro_complete(struct sk_buff *skb) 2575int tcp4_gro_complete(struct sk_buff *skb)
2577{ 2576{
@@ -2584,7 +2583,6 @@ int tcp4_gro_complete(struct sk_buff *skb)
2584 2583
2585 return tcp_gro_complete(skb); 2584 return tcp_gro_complete(skb);
2586} 2585}
2587EXPORT_SYMBOL(tcp4_gro_complete);
2588 2586
2589struct proto tcp_prot = { 2587struct proto tcp_prot = {
2590 .name = "TCP", 2588 .name = "TCP",
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index de3bd8458588..ea09d2fd50c7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -224,16 +224,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
224 } 224 }
225 } 225 }
226 226
227 /* Set initial window to value enough for senders, 227 /* Set initial window to value enough for senders, following RFC5681. */
228 * following RFC2414. Senders, not following this RFC,
229 * will be satisfied with 2.
230 */
231 if (mss > (1 << *rcv_wscale)) { 228 if (mss > (1 << *rcv_wscale)) {
232 int init_cwnd = 4; 229 int init_cwnd = rfc3390_bytes_to_packets(mss);
233 if (mss > 1460 * 3) 230
234 init_cwnd = 2;
235 else if (mss > 1460)
236 init_cwnd = 3;
237 /* when initializing use the value from init_rcv_wnd 231 /* when initializing use the value from init_rcv_wnd
238 * rather than the default from above 232 * rather than the default from above
239 */ 233 */
@@ -2429,6 +2423,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2429 __u8 rcv_wscale; 2423 __u8 rcv_wscale;
2430 /* Set this up on the first call only */ 2424 /* Set this up on the first call only */
2431 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 2425 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2426
2427 /* limit the window selection if the user enforce a smaller rx buffer */
2428 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2429 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2430 req->window_clamp = tcp_full_space(sk);
2431
2432 /* tcp_full_space because it is guaranteed to be the first packet */ 2432 /* tcp_full_space because it is guaranteed to be the first packet */
2433 tcp_select_initial_window(tcp_full_space(sk), 2433 tcp_select_initial_window(tcp_full_space(sk),
2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -2555,6 +2555,11 @@ static void tcp_connect_init(struct sock *sk)
2555 2555
2556 tcp_initialize_rcv_mss(sk); 2556 tcp_initialize_rcv_mss(sk);
2557 2557
2558 /* limit the window selection if the user enforce a smaller rx buffer */
2559 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2560 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2561 tp->window_clamp = tcp_full_space(sk);
2562
2558 tcp_select_initial_window(tcp_full_space(sk), 2563 tcp_select_initial_window(tcp_full_space(sk),
2559 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), 2564 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2560 &tp->rcv_wnd, 2565 &tp->rcv_wnd,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c35b469e851c..baea4a129022 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -138,10 +138,10 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
138 * retransmissions with an initial RTO of TCP_RTO_MIN. 138 * retransmissions with an initial RTO of TCP_RTO_MIN.
139 */ 139 */
140static bool retransmits_timed_out(struct sock *sk, 140static bool retransmits_timed_out(struct sock *sk,
141 unsigned int boundary) 141 unsigned int boundary,
142 unsigned int timeout)
142{ 143{
143 unsigned int timeout, linear_backoff_thresh; 144 unsigned int linear_backoff_thresh, start_ts;
144 unsigned int start_ts;
145 145
146 if (!inet_csk(sk)->icsk_retransmits) 146 if (!inet_csk(sk)->icsk_retransmits)
147 return false; 147 return false;
@@ -151,14 +151,15 @@ static bool retransmits_timed_out(struct sock *sk,
151 else 151 else
152 start_ts = tcp_sk(sk)->retrans_stamp; 152 start_ts = tcp_sk(sk)->retrans_stamp;
153 153
154 linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); 154 if (likely(timeout == 0)) {
155 155 linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
156 if (boundary <= linear_backoff_thresh)
157 timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
158 else
159 timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
160 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
161 156
157 if (boundary <= linear_backoff_thresh)
158 timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
159 else
160 timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
161 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
162 }
162 return (tcp_time_stamp - start_ts) >= timeout; 163 return (tcp_time_stamp - start_ts) >= timeout;
163} 164}
164 165
@@ -174,7 +175,7 @@ static int tcp_write_timeout(struct sock *sk)
174 dst_negative_advice(sk); 175 dst_negative_advice(sk);
175 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 176 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
176 } else { 177 } else {
177 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { 178 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) {
178 /* Black hole detection */ 179 /* Black hole detection */
179 tcp_mtu_probing(icsk, sk); 180 tcp_mtu_probing(icsk, sk);
180 181
@@ -187,14 +188,16 @@ static int tcp_write_timeout(struct sock *sk)
187 188
188 retry_until = tcp_orphan_retries(sk, alive); 189 retry_until = tcp_orphan_retries(sk, alive);
189 do_reset = alive || 190 do_reset = alive ||
190 !retransmits_timed_out(sk, retry_until); 191 !retransmits_timed_out(sk, retry_until, 0);
191 192
192 if (tcp_out_of_resources(sk, do_reset)) 193 if (tcp_out_of_resources(sk, do_reset))
193 return 1; 194 return 1;
194 } 195 }
195 } 196 }
196 197
197 if (retransmits_timed_out(sk, retry_until)) { 198 if (retransmits_timed_out(sk, retry_until,
199 (1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) ? 0 :
200 icsk->icsk_user_timeout)) {
198 /* Has it gone just too far? */ 201 /* Has it gone just too far? */
199 tcp_write_err(sk); 202 tcp_write_err(sk);
200 return 1; 203 return 1;
@@ -436,7 +439,7 @@ out_reset_timer:
436 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 439 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
437 } 440 }
438 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 441 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
439 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) 442 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0))
440 __sk_dst_reset(sk); 443 __sk_dst_reset(sk);
441 444
442out:; 445out:;
@@ -556,7 +559,14 @@ static void tcp_keepalive_timer (unsigned long data)
556 elapsed = keepalive_time_elapsed(tp); 559 elapsed = keepalive_time_elapsed(tp);
557 560
558 if (elapsed >= keepalive_time_when(tp)) { 561 if (elapsed >= keepalive_time_when(tp)) {
559 if (icsk->icsk_probes_out >= keepalive_probes(tp)) { 562 /* If the TCP_USER_TIMEOUT option is enabled, use that
563 * to determine when to timeout instead.
564 */
565 if ((icsk->icsk_user_timeout != 0 &&
566 elapsed >= icsk->icsk_user_timeout &&
567 icsk->icsk_probes_out > 0) ||
568 (icsk->icsk_user_timeout == 0 &&
569 icsk->icsk_probes_out >= keepalive_probes(tp))) {
560 tcp_send_active_reset(sk, GFP_ATOMIC); 570 tcp_send_active_reset(sk, GFP_ATOMIC);
561 tcp_write_err(sk); 571 tcp_write_err(sk);
562 goto out; 572 goto out;
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 59186ca7808a..9a17bd2a0a37 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,8 +14,8 @@
14#include <net/protocol.h> 14#include <net/protocol.h>
15#include <net/xfrm.h> 15#include <net/xfrm.h>
16 16
17static struct xfrm_tunnel *tunnel4_handlers; 17static struct xfrm_tunnel *tunnel4_handlers __read_mostly;
18static struct xfrm_tunnel *tunnel64_handlers; 18static struct xfrm_tunnel *tunnel64_handlers __read_mostly;
19static DEFINE_MUTEX(tunnel4_mutex); 19static DEFINE_MUTEX(tunnel4_mutex);
20 20
21static inline struct xfrm_tunnel **fam_handlers(unsigned short family) 21static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
@@ -39,7 +39,7 @@ int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
39 } 39 }
40 40
41 handler->next = *pprev; 41 handler->next = *pprev;
42 *pprev = handler; 42 rcu_assign_pointer(*pprev, handler);
43 43
44 ret = 0; 44 ret = 0;
45 45
@@ -73,6 +73,11 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
73} 73}
74EXPORT_SYMBOL(xfrm4_tunnel_deregister); 74EXPORT_SYMBOL(xfrm4_tunnel_deregister);
75 75
76#define for_each_tunnel_rcu(head, handler) \
77 for (handler = rcu_dereference(head); \
78 handler != NULL; \
79 handler = rcu_dereference(handler->next)) \
80
76static int tunnel4_rcv(struct sk_buff *skb) 81static int tunnel4_rcv(struct sk_buff *skb)
77{ 82{
78 struct xfrm_tunnel *handler; 83 struct xfrm_tunnel *handler;
@@ -80,7 +85,7 @@ static int tunnel4_rcv(struct sk_buff *skb)
80 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 85 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
81 goto drop; 86 goto drop;
82 87
83 for (handler = tunnel4_handlers; handler; handler = handler->next) 88 for_each_tunnel_rcu(tunnel4_handlers, handler)
84 if (!handler->handler(skb)) 89 if (!handler->handler(skb))
85 return 0; 90 return 0;
86 91
@@ -99,7 +104,7 @@ static int tunnel64_rcv(struct sk_buff *skb)
99 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 104 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
100 goto drop; 105 goto drop;
101 106
102 for (handler = tunnel64_handlers; handler; handler = handler->next) 107 for_each_tunnel_rcu(tunnel64_handlers, handler)
103 if (!handler->handler(skb)) 108 if (!handler->handler(skb))
104 return 0; 109 return 0;
105 110
@@ -115,7 +120,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
115{ 120{
116 struct xfrm_tunnel *handler; 121 struct xfrm_tunnel *handler;
117 122
118 for (handler = tunnel4_handlers; handler; handler = handler->next) 123 for_each_tunnel_rcu(tunnel4_handlers, handler)
119 if (!handler->err_handler(skb, info)) 124 if (!handler->err_handler(skb, info))
120 break; 125 break;
121} 126}
@@ -125,7 +130,7 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
125{ 130{
126 struct xfrm_tunnel *handler; 131 struct xfrm_tunnel *handler;
127 132
128 for (handler = tunnel64_handlers; handler; handler = handler->next) 133 for_each_tunnel_rcu(tunnel64_handlers, handler)
129 if (!handler->err_handler(skb, info)) 134 if (!handler->err_handler(skb, info))
130 break; 135 break;
131} 136}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fb23c2e63b52..b3f7e8cf18ac 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -797,7 +797,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
797 return -EOPNOTSUPP; 797 return -EOPNOTSUPP;
798 798
799 ipc.opt = NULL; 799 ipc.opt = NULL;
800 ipc.shtx.flags = 0; 800 ipc.tx_flags = 0;
801 801
802 if (up->pending) { 802 if (up->pending) {
803 /* 803 /*
@@ -845,7 +845,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
845 ipc.addr = inet->inet_saddr; 845 ipc.addr = inet->inet_saddr;
846 846
847 ipc.oif = sk->sk_bound_dev_if; 847 ipc.oif = sk->sk_bound_dev_if;
848 err = sock_tx_timestamp(msg, sk, &ipc.shtx); 848 err = sock_tx_timestamp(sk, &ipc.tx_flags);
849 if (err) 849 if (err)
850 return err; 850 return err;
851 if (msg->msg_controllen) { 851 if (msg->msg_controllen) {
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 41f5982d2087..82806455e859 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -58,14 +58,14 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)
58 return -ENOENT; 58 return -ENOENT;
59} 59}
60 60
61static struct xfrm_tunnel xfrm_tunnel_handler = { 61static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
62 .handler = xfrm_tunnel_rcv, 62 .handler = xfrm_tunnel_rcv,
63 .err_handler = xfrm_tunnel_err, 63 .err_handler = xfrm_tunnel_err,
64 .priority = 2, 64 .priority = 2,
65}; 65};
66 66
67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
68static struct xfrm_tunnel xfrm64_tunnel_handler = { 68static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = {
69 .handler = xfrm_tunnel_rcv, 69 .handler = xfrm_tunnel_rcv,
70 .err_handler = xfrm_tunnel_err, 70 .err_handler = xfrm_tunnel_err,
71 .priority = 2, 71 .priority = 2,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ab70a3fbcafa..5bc893e28008 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2964,7 +2964,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
2964 start sending router solicitations. 2964 start sending router solicitations.
2965 */ 2965 */
2966 2966
2967 if (ifp->idev->cnf.forwarding == 0 && 2967 if ((ifp->idev->cnf.forwarding == 0 ||
2968 ifp->idev->cnf.forwarding == 2) &&
2968 ifp->idev->cnf.rtr_solicits > 0 && 2969 ifp->idev->cnf.rtr_solicits > 0 &&
2969 (dev->flags&IFF_LOOPBACK) == 0 && 2970 (dev->flags&IFF_LOOPBACK) == 0 &&
2970 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { 2971 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d40b330c0ee6..1838927a2243 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -637,7 +637,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
637 } 637 }
638 mtu -= hlen + sizeof(struct frag_hdr); 638 mtu -= hlen + sizeof(struct frag_hdr);
639 639
640 if (skb_has_frags(skb)) { 640 if (skb_has_frag_list(skb)) {
641 int first_len = skb_pagelen(skb); 641 int first_len = skb_pagelen(skb);
642 int truesizes = 0; 642 int truesizes = 0;
643 643
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0fd027f3f47e..29f99dd75bc6 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1372,13 +1372,13 @@ static void __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1372 ip6n->tnls_wc[0] = t; 1372 ip6n->tnls_wc[0] = t;
1373} 1373}
1374 1374
1375static struct xfrm6_tunnel ip4ip6_handler = { 1375static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1376 .handler = ip4ip6_rcv, 1376 .handler = ip4ip6_rcv,
1377 .err_handler = ip4ip6_err, 1377 .err_handler = ip4ip6_err,
1378 .priority = 1, 1378 .priority = 1,
1379}; 1379};
1380 1380
1381static struct xfrm6_tunnel ip6ip6_handler = { 1381static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1382 .handler = ip6ip6_rcv, 1382 .handler = ip6ip6_rcv,
1383 .err_handler = ip6ip6_err, 1383 .err_handler = ip6ip6_err,
1384 .priority = 1, 1384 .priority = 1,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 58841c4ae947..69a0051cea67 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1105,6 +1105,18 @@ errout:
1105 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); 1105 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1106} 1106}
1107 1107
1108static inline int accept_ra(struct inet6_dev *in6_dev)
1109{
1110 /*
1111 * If forwarding is enabled, RA are not accepted unless the special
1112 * hybrid mode (accept_ra=2) is enabled.
1113 */
1114 if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
1115 return 0;
1116
1117 return in6_dev->cnf.accept_ra;
1118}
1119
1108static void ndisc_router_discovery(struct sk_buff *skb) 1120static void ndisc_router_discovery(struct sk_buff *skb)
1109{ 1121{
1110 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); 1122 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
@@ -1158,8 +1170,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1158 return; 1170 return;
1159 } 1171 }
1160 1172
1161 /* skip route and link configuration on routers */ 1173 if (!accept_ra(in6_dev))
1162 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra)
1163 goto skip_linkparms; 1174 goto skip_linkparms;
1164 1175
1165#ifdef CONFIG_IPV6_NDISC_NODETYPE 1176#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1309,8 +1320,7 @@ skip_linkparms:
1309 NEIGH_UPDATE_F_ISROUTER); 1320 NEIGH_UPDATE_F_ISROUTER);
1310 } 1321 }
1311 1322
1312 /* skip route and link configuration on routers */ 1323 if (!accept_ra(in6_dev))
1313 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra)
1314 goto out; 1324 goto out;
1315 1325
1316#ifdef CONFIG_IPV6_ROUTE_INFO 1326#ifdef CONFIG_IPV6_ROUTE_INFO
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 578f3c1a16db..138a8b362706 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -363,7 +363,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
363 /* If the first fragment is fragmented itself, we split 363 /* If the first fragment is fragmented itself, we split
364 * it to two chunks: the first with data and paged part 364 * it to two chunks: the first with data and paged part
365 * and the second, holding only fragments. */ 365 * and the second, holding only fragments. */
366 if (skb_has_frags(head)) { 366 if (skb_has_frag_list(head)) {
367 struct sk_buff *clone; 367 struct sk_buff *clone;
368 int i, plen = 0; 368 int i, plen = 0;
369 369
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 1fa3468f0f32..9bb936ae2452 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,28 +25,14 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <net/protocol.h> 26#include <net/protocol.h>
27 27
28const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS]; 28const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] __read_mostly;
29static DEFINE_SPINLOCK(inet6_proto_lock);
30
31 29
32int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) 30int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
33{ 31{
34 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 32 int hash = protocol & (MAX_INET_PROTOS - 1);
35
36 spin_lock_bh(&inet6_proto_lock);
37
38 if (inet6_protos[hash]) {
39 ret = -1;
40 } else {
41 inet6_protos[hash] = prot;
42 ret = 0;
43 }
44
45 spin_unlock_bh(&inet6_proto_lock);
46 33
47 return ret; 34 return !cmpxchg(&inet6_protos[hash], NULL, prot) ? 0 : -1;
48} 35}
49
50EXPORT_SYMBOL(inet6_add_protocol); 36EXPORT_SYMBOL(inet6_add_protocol);
51 37
52/* 38/*
@@ -57,20 +43,10 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
57{ 43{
58 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 44 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
59 45
60 spin_lock_bh(&inet6_proto_lock); 46 ret = (cmpxchg(&inet6_protos[hash], prot, NULL) == prot) ? 0 : -1;
61
62 if (inet6_protos[hash] != prot) {
63 ret = -1;
64 } else {
65 inet6_protos[hash] = NULL;
66 ret = 0;
67 }
68
69 spin_unlock_bh(&inet6_proto_lock);
70 47
71 synchronize_net(); 48 synchronize_net();
72 49
73 return ret; 50 return ret;
74} 51}
75
76EXPORT_SYMBOL(inet6_del_protocol); 52EXPORT_SYMBOL(inet6_del_protocol);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 64cfef1b0a4c..c7ba3149633f 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -458,7 +458,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
458 /* If the first fragment is fragmented itself, we split 458 /* If the first fragment is fragmented itself, we split
459 * it to two chunks: the first with data and paged part 459 * it to two chunks: the first with data and paged part
460 * and the second, holding only fragments. */ 460 * and the second, holding only fragments. */
461 if (skb_has_frags(head)) { 461 if (skb_has_frag_list(head)) {
462 struct sk_buff *clone; 462 struct sk_buff *clone;
463 int i, plen = 0; 463 int i, plen = 0;
464 464
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 4699cd3c3118..86618eb30335 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1132,7 +1132,7 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1132 sitn->tunnels_wc[0] = tunnel; 1132 sitn->tunnels_wc[0] = tunnel;
1133} 1133}
1134 1134
1135static struct xfrm_tunnel sit_handler = { 1135static struct xfrm_tunnel sit_handler __read_mostly = {
1136 .handler = ipip6_rcv, 1136 .handler = ipip6_rcv,
1137 .err_handler = ipip6_err, 1137 .err_handler = ipip6_err,
1138 .priority = 1, 1138 .priority = 1,
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index fc3c86a47452..d9864725d0c6 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -30,8 +30,8 @@
30#include <net/protocol.h> 30#include <net/protocol.h>
31#include <net/xfrm.h> 31#include <net/xfrm.h>
32 32
33static struct xfrm6_tunnel *tunnel6_handlers; 33static struct xfrm6_tunnel *tunnel6_handlers __read_mostly;
34static struct xfrm6_tunnel *tunnel46_handlers; 34static struct xfrm6_tunnel *tunnel46_handlers __read_mostly;
35static DEFINE_MUTEX(tunnel6_mutex); 35static DEFINE_MUTEX(tunnel6_mutex);
36 36
37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) 37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
@@ -51,7 +51,7 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
51 } 51 }
52 52
53 handler->next = *pprev; 53 handler->next = *pprev;
54 *pprev = handler; 54 rcu_assign_pointer(*pprev, handler);
55 55
56 ret = 0; 56 ret = 0;
57 57
@@ -88,6 +88,11 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
88 88
89EXPORT_SYMBOL(xfrm6_tunnel_deregister); 89EXPORT_SYMBOL(xfrm6_tunnel_deregister);
90 90
91#define for_each_tunnel_rcu(head, handler) \
92 for (handler = rcu_dereference(head); \
93 handler != NULL; \
94 handler = rcu_dereference(handler->next)) \
95
91static int tunnel6_rcv(struct sk_buff *skb) 96static int tunnel6_rcv(struct sk_buff *skb)
92{ 97{
93 struct xfrm6_tunnel *handler; 98 struct xfrm6_tunnel *handler;
@@ -95,7 +100,7 @@ static int tunnel6_rcv(struct sk_buff *skb)
95 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 100 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
96 goto drop; 101 goto drop;
97 102
98 for (handler = tunnel6_handlers; handler; handler = handler->next) 103 for_each_tunnel_rcu(tunnel6_handlers, handler)
99 if (!handler->handler(skb)) 104 if (!handler->handler(skb))
100 return 0; 105 return 0;
101 106
@@ -113,7 +118,7 @@ static int tunnel46_rcv(struct sk_buff *skb)
113 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 118 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
114 goto drop; 119 goto drop;
115 120
116 for (handler = tunnel46_handlers; handler; handler = handler->next) 121 for_each_tunnel_rcu(tunnel46_handlers, handler)
117 if (!handler->handler(skb)) 122 if (!handler->handler(skb))
118 return 0; 123 return 0;
119 124
@@ -129,7 +134,7 @@ static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
129{ 134{
130 struct xfrm6_tunnel *handler; 135 struct xfrm6_tunnel *handler;
131 136
132 for (handler = tunnel6_handlers; handler; handler = handler->next) 137 for_each_tunnel_rcu(tunnel6_handlers, handler)
133 if (!handler->err_handler(skb, opt, type, code, offset, info)) 138 if (!handler->err_handler(skb, opt, type, code, offset, info))
134 break; 139 break;
135} 140}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 2ce3a8278f26..ac7584b946a5 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -317,13 +317,13 @@ static const struct xfrm_type xfrm6_tunnel_type = {
317 .output = xfrm6_tunnel_output, 317 .output = xfrm6_tunnel_output,
318}; 318};
319 319
320static struct xfrm6_tunnel xfrm6_tunnel_handler = { 320static struct xfrm6_tunnel xfrm6_tunnel_handler __read_mostly = {
321 .handler = xfrm6_tunnel_rcv, 321 .handler = xfrm6_tunnel_rcv,
322 .err_handler = xfrm6_tunnel_err, 322 .err_handler = xfrm6_tunnel_err,
323 .priority = 2, 323 .priority = 2,
324}; 324};
325 325
326static struct xfrm6_tunnel xfrm46_tunnel_handler = { 326static struct xfrm6_tunnel xfrm46_tunnel_handler __read_mostly = {
327 .handler = xfrm6_tunnel_rcv, 327 .handler = xfrm6_tunnel_rcv,
328 .err_handler = xfrm6_tunnel_err, 328 .err_handler = xfrm6_tunnel_err,
329 .priority = 2, 329 .priority = 2,
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 5bb8353105cc..8ee1ff6c742f 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -45,13 +45,11 @@ static int irlan_eth_close(struct net_device *dev);
45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, 45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
46 struct net_device *dev); 46 struct net_device *dev);
47static void irlan_eth_set_multicast_list( struct net_device *dev); 47static void irlan_eth_set_multicast_list( struct net_device *dev);
48static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev);
49 48
50static const struct net_device_ops irlan_eth_netdev_ops = { 49static const struct net_device_ops irlan_eth_netdev_ops = {
51 .ndo_open = irlan_eth_open, 50 .ndo_open = irlan_eth_open,
52 .ndo_stop = irlan_eth_close, 51 .ndo_stop = irlan_eth_close,
53 .ndo_start_xmit = irlan_eth_xmit, 52 .ndo_start_xmit = irlan_eth_xmit,
54 .ndo_get_stats = irlan_eth_get_stats,
55 .ndo_set_multicast_list = irlan_eth_set_multicast_list, 53 .ndo_set_multicast_list = irlan_eth_set_multicast_list,
56 .ndo_change_mtu = eth_change_mtu, 54 .ndo_change_mtu = eth_change_mtu,
57 .ndo_validate_addr = eth_validate_addr, 55 .ndo_validate_addr = eth_validate_addr,
@@ -208,10 +206,10 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
208 * tried :-) DB 206 * tried :-) DB
209 */ 207 */
210 /* irttp_data_request already free the packet */ 208 /* irttp_data_request already free the packet */
211 self->stats.tx_dropped++; 209 dev->stats.tx_dropped++;
212 } else { 210 } else {
213 self->stats.tx_packets++; 211 dev->stats.tx_packets++;
214 self->stats.tx_bytes += len; 212 dev->stats.tx_bytes += len;
215 } 213 }
216 214
217 return NETDEV_TX_OK; 215 return NETDEV_TX_OK;
@@ -226,15 +224,16 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
226int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) 224int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
227{ 225{
228 struct irlan_cb *self = instance; 226 struct irlan_cb *self = instance;
227 struct net_device *dev = self->dev;
229 228
230 if (skb == NULL) { 229 if (skb == NULL) {
231 ++self->stats.rx_dropped; 230 dev->stats.rx_dropped++;
232 return 0; 231 return 0;
233 } 232 }
234 if (skb->len < ETH_HLEN) { 233 if (skb->len < ETH_HLEN) {
235 IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n", 234 IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n",
236 __func__, skb->len); 235 __func__, skb->len);
237 ++self->stats.rx_dropped; 236 dev->stats.rx_dropped++;
238 dev_kfree_skb(skb); 237 dev_kfree_skb(skb);
239 return 0; 238 return 0;
240 } 239 }
@@ -244,10 +243,10 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
244 * might have been previously set by the low level IrDA network 243 * might have been previously set by the low level IrDA network
245 * device driver 244 * device driver
246 */ 245 */
247 skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */ 246 skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */
248 247
249 self->stats.rx_packets++; 248 dev->stats.rx_packets++;
250 self->stats.rx_bytes += skb->len; 249 dev->stats.rx_bytes += skb->len;
251 250
252 netif_rx(skb); /* Eat it! */ 251 netif_rx(skb); /* Eat it! */
253 252
@@ -348,16 +347,3 @@ static void irlan_eth_set_multicast_list(struct net_device *dev)
348 else 347 else
349 irlan_set_broadcast_filter(self, FALSE); 348 irlan_set_broadcast_filter(self, FALSE);
350} 349}
351
352/*
353 * Function irlan_get_stats (dev)
354 *
355 * Get the current statistics for this device
356 *
357 */
358static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev)
359{
360 struct irlan_cb *self = netdev_priv(dev);
361
362 return &self->stats;
363}
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 1ae697681bc7..8d9ce0accc98 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -144,7 +144,6 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
144 nf_reset(skb); 144 nf_reset(skb);
145 145
146 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { 146 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
147 dev->last_rx = jiffies;
148 dev->stats.rx_packets++; 147 dev->stats.rx_packets++;
149 dev->stats.rx_bytes += data_len; 148 dev->stats.rx_bytes += data_len;
150 } else 149 } else
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index a87cb3ba2df6..d2b03e0851ef 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -138,10 +138,8 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[])
138 struct crypto_cipher *tfm; 138 struct crypto_cipher *tfm;
139 139
140 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 140 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
141 if (IS_ERR(tfm)) 141 if (!IS_ERR(tfm))
142 return NULL; 142 crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN);
143
144 crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN);
145 143
146 return tfm; 144 return tfm;
147} 145}
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index 3d097b3d7b62..b4d66cca76d6 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -119,10 +119,8 @@ struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[])
119 struct crypto_cipher *tfm; 119 struct crypto_cipher *tfm;
120 120
121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
122 if (IS_ERR(tfm)) 122 if (!IS_ERR(tfm))
123 return NULL; 123 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
124
125 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
126 124
127 return tfm; 125 return tfm;
128} 126}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 965b272499fd..58eab9e8e4ee 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -86,6 +86,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
86 tid, 0, reason); 86 tid, 0, reason);
87 87
88 del_timer_sync(&tid_rx->session_timer); 88 del_timer_sync(&tid_rx->session_timer);
89 del_timer_sync(&tid_rx->reorder_timer);
89 90
90 call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); 91 call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
91} 92}
@@ -120,6 +121,20 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
120 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); 121 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
121} 122}
122 123
124static void sta_rx_agg_reorder_timer_expired(unsigned long data)
125{
126 u8 *ptid = (u8 *)data;
127 u8 *timer_to_id = ptid - *ptid;
128 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
129 timer_to_tid[0]);
130
131 rcu_read_lock();
132 spin_lock(&sta->lock);
133 ieee80211_release_reorder_timeout(sta, *ptid);
134 spin_unlock(&sta->lock);
135 rcu_read_unlock();
136}
137
123static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, 138static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
124 u8 dialog_token, u16 status, u16 policy, 139 u8 dialog_token, u16 status, u16 policy,
125 u16 buf_size, u16 timeout) 140 u16 buf_size, u16 timeout)
@@ -251,11 +266,18 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
251 goto end; 266 goto end;
252 } 267 }
253 268
269 spin_lock_init(&tid_agg_rx->reorder_lock);
270
254 /* rx timer */ 271 /* rx timer */
255 tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired; 272 tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired;
256 tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid]; 273 tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
257 init_timer(&tid_agg_rx->session_timer); 274 init_timer(&tid_agg_rx->session_timer);
258 275
276 /* rx reorder timer */
277 tid_agg_rx->reorder_timer.function = sta_rx_agg_reorder_timer_expired;
278 tid_agg_rx->reorder_timer.data = (unsigned long)&sta->timer_to_tid[tid];
279 init_timer(&tid_agg_rx->reorder_timer);
280
259 /* prepare reordering buffer */ 281 /* prepare reordering buffer */
260 tid_agg_rx->reorder_buf = 282 tid_agg_rx->reorder_buf =
261 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC); 283 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 29ac8e1a509e..5de1ca3f17b9 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -19,33 +19,6 @@
19#include "rate.h" 19#include "rate.h"
20#include "mesh.h" 20#include "mesh.h"
21 21
22static bool nl80211_type_check(enum nl80211_iftype type)
23{
24 switch (type) {
25 case NL80211_IFTYPE_ADHOC:
26 case NL80211_IFTYPE_STATION:
27 case NL80211_IFTYPE_MONITOR:
28#ifdef CONFIG_MAC80211_MESH
29 case NL80211_IFTYPE_MESH_POINT:
30#endif
31 case NL80211_IFTYPE_AP:
32 case NL80211_IFTYPE_AP_VLAN:
33 case NL80211_IFTYPE_WDS:
34 return true;
35 default:
36 return false;
37 }
38}
39
40static bool nl80211_params_check(enum nl80211_iftype type,
41 struct vif_params *params)
42{
43 if (!nl80211_type_check(type))
44 return false;
45
46 return true;
47}
48
49static int ieee80211_add_iface(struct wiphy *wiphy, char *name, 22static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
50 enum nl80211_iftype type, u32 *flags, 23 enum nl80211_iftype type, u32 *flags,
51 struct vif_params *params) 24 struct vif_params *params)
@@ -55,9 +28,6 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
55 struct ieee80211_sub_if_data *sdata; 28 struct ieee80211_sub_if_data *sdata;
56 int err; 29 int err;
57 30
58 if (!nl80211_params_check(type, params))
59 return -EINVAL;
60
61 err = ieee80211_if_add(local, name, &dev, type, params); 31 err = ieee80211_if_add(local, name, &dev, type, params);
62 if (err || type != NL80211_IFTYPE_MONITOR || !flags) 32 if (err || type != NL80211_IFTYPE_MONITOR || !flags)
63 return err; 33 return err;
@@ -82,12 +52,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
82 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 52 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
83 int ret; 53 int ret;
84 54
85 if (ieee80211_sdata_running(sdata))
86 return -EBUSY;
87
88 if (!nl80211_params_check(type, params))
89 return -EINVAL;
90
91 ret = ieee80211_if_change_type(sdata, type); 55 ret = ieee80211_if_change_type(sdata, type);
92 if (ret) 56 if (ret)
93 return ret; 57 return ret;
@@ -114,44 +78,30 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
114 u8 key_idx, const u8 *mac_addr, 78 u8 key_idx, const u8 *mac_addr,
115 struct key_params *params) 79 struct key_params *params)
116{ 80{
117 struct ieee80211_sub_if_data *sdata; 81 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
118 struct sta_info *sta = NULL; 82 struct sta_info *sta = NULL;
119 enum ieee80211_key_alg alg;
120 struct ieee80211_key *key; 83 struct ieee80211_key *key;
121 int err; 84 int err;
122 85
123 if (!netif_running(dev)) 86 if (!ieee80211_sdata_running(sdata))
124 return -ENETDOWN; 87 return -ENETDOWN;
125 88
126 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 89 /* reject WEP and TKIP keys if WEP failed to initialize */
127
128 switch (params->cipher) { 90 switch (params->cipher) {
129 case WLAN_CIPHER_SUITE_WEP40: 91 case WLAN_CIPHER_SUITE_WEP40:
130 case WLAN_CIPHER_SUITE_WEP104:
131 alg = ALG_WEP;
132 break;
133 case WLAN_CIPHER_SUITE_TKIP: 92 case WLAN_CIPHER_SUITE_TKIP:
134 alg = ALG_TKIP; 93 case WLAN_CIPHER_SUITE_WEP104:
135 break; 94 if (IS_ERR(sdata->local->wep_tx_tfm))
136 case WLAN_CIPHER_SUITE_CCMP: 95 return -EINVAL;
137 alg = ALG_CCMP;
138 break;
139 case WLAN_CIPHER_SUITE_AES_CMAC:
140 alg = ALG_AES_CMAC;
141 break; 96 break;
142 default: 97 default:
143 return -EINVAL; 98 break;
144 } 99 }
145 100
146 /* reject WEP and TKIP keys if WEP failed to initialize */ 101 key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len,
147 if ((alg == ALG_WEP || alg == ALG_TKIP) && 102 params->key, params->seq_len, params->seq);
148 IS_ERR(sdata->local->wep_tx_tfm)) 103 if (IS_ERR(key))
149 return -EINVAL; 104 return PTR_ERR(key);
150
151 key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key,
152 params->seq_len, params->seq);
153 if (!key)
154 return -ENOMEM;
155 105
156 mutex_lock(&sdata->local->sta_mtx); 106 mutex_lock(&sdata->local->sta_mtx);
157 107
@@ -164,9 +114,10 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
164 } 114 }
165 } 115 }
166 116
167 ieee80211_key_link(key, sdata, sta); 117 err = ieee80211_key_link(key, sdata, sta);
118 if (err)
119 ieee80211_key_free(sdata->local, key);
168 120
169 err = 0;
170 out_unlock: 121 out_unlock:
171 mutex_unlock(&sdata->local->sta_mtx); 122 mutex_unlock(&sdata->local->sta_mtx);
172 123
@@ -247,10 +198,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
247 198
248 memset(&params, 0, sizeof(params)); 199 memset(&params, 0, sizeof(params));
249 200
250 switch (key->conf.alg) { 201 params.cipher = key->conf.cipher;
251 case ALG_TKIP:
252 params.cipher = WLAN_CIPHER_SUITE_TKIP;
253 202
203 switch (key->conf.cipher) {
204 case WLAN_CIPHER_SUITE_TKIP:
254 iv32 = key->u.tkip.tx.iv32; 205 iv32 = key->u.tkip.tx.iv32;
255 iv16 = key->u.tkip.tx.iv16; 206 iv16 = key->u.tkip.tx.iv16;
256 207
@@ -268,8 +219,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
268 params.seq = seq; 219 params.seq = seq;
269 params.seq_len = 6; 220 params.seq_len = 6;
270 break; 221 break;
271 case ALG_CCMP: 222 case WLAN_CIPHER_SUITE_CCMP:
272 params.cipher = WLAN_CIPHER_SUITE_CCMP;
273 seq[0] = key->u.ccmp.tx_pn[5]; 223 seq[0] = key->u.ccmp.tx_pn[5];
274 seq[1] = key->u.ccmp.tx_pn[4]; 224 seq[1] = key->u.ccmp.tx_pn[4];
275 seq[2] = key->u.ccmp.tx_pn[3]; 225 seq[2] = key->u.ccmp.tx_pn[3];
@@ -279,14 +229,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
279 params.seq = seq; 229 params.seq = seq;
280 params.seq_len = 6; 230 params.seq_len = 6;
281 break; 231 break;
282 case ALG_WEP: 232 case WLAN_CIPHER_SUITE_AES_CMAC:
283 if (key->conf.keylen == 5)
284 params.cipher = WLAN_CIPHER_SUITE_WEP40;
285 else
286 params.cipher = WLAN_CIPHER_SUITE_WEP104;
287 break;
288 case ALG_AES_CMAC:
289 params.cipher = WLAN_CIPHER_SUITE_AES_CMAC;
290 seq[0] = key->u.aes_cmac.tx_pn[5]; 233 seq[0] = key->u.aes_cmac.tx_pn[5];
291 seq[1] = key->u.aes_cmac.tx_pn[4]; 234 seq[1] = key->u.aes_cmac.tx_pn[4];
292 seq[2] = key->u.aes_cmac.tx_pn[3]; 235 seq[2] = key->u.aes_cmac.tx_pn[3];
@@ -1143,9 +1086,9 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
1143 p.uapsd = false; 1086 p.uapsd = false;
1144 1087
1145 if (drv_conf_tx(local, params->queue, &p)) { 1088 if (drv_conf_tx(local, params->queue, &p)) {
1146 printk(KERN_DEBUG "%s: failed to set TX queue " 1089 wiphy_debug(local->hw.wiphy,
1147 "parameters for queue %d\n", 1090 "failed to set TX queue parameters for queue %d\n",
1148 wiphy_name(local->hw.wiphy), params->queue); 1091 params->queue);
1149 return -EINVAL; 1092 return -EINVAL;
1150 } 1093 }
1151 1094
@@ -1541,11 +1484,11 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
1541 return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); 1484 return ieee80211_wk_cancel_remain_on_channel(sdata, cookie);
1542} 1485}
1543 1486
1544static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, 1487static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
1545 struct ieee80211_channel *chan, 1488 struct ieee80211_channel *chan,
1546 enum nl80211_channel_type channel_type, 1489 enum nl80211_channel_type channel_type,
1547 bool channel_type_valid, 1490 bool channel_type_valid,
1548 const u8 *buf, size_t len, u64 *cookie) 1491 const u8 *buf, size_t len, u64 *cookie)
1549{ 1492{
1550 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1493 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1551 struct ieee80211_local *local = sdata->local; 1494 struct ieee80211_local *local = sdata->local;
@@ -1575,8 +1518,6 @@ static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
1575 return -ENOLINK; 1518 return -ENOLINK;
1576 break; 1519 break;
1577 case NL80211_IFTYPE_STATION: 1520 case NL80211_IFTYPE_STATION:
1578 if (!(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED))
1579 flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
1580 break; 1521 break;
1581 default: 1522 default:
1582 return -EOPNOTSUPP; 1523 return -EOPNOTSUPP;
@@ -1647,6 +1588,6 @@ struct cfg80211_ops mac80211_config_ops = {
1647 .set_bitrate_mask = ieee80211_set_bitrate_mask, 1588 .set_bitrate_mask = ieee80211_set_bitrate_mask,
1648 .remain_on_channel = ieee80211_remain_on_channel, 1589 .remain_on_channel = ieee80211_remain_on_channel,
1649 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, 1590 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
1650 .action = ieee80211_action, 1591 .mgmt_tx = ieee80211_mgmt_tx,
1651 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, 1592 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
1652}; 1593};
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index a694c593ff6a..e81ef4e8cb32 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -85,13 +85,15 @@ static ssize_t tsf_write(struct file *file,
85 if (strncmp(buf, "reset", 5) == 0) { 85 if (strncmp(buf, "reset", 5) == 0) {
86 if (local->ops->reset_tsf) { 86 if (local->ops->reset_tsf) {
87 drv_reset_tsf(local); 87 drv_reset_tsf(local);
88 printk(KERN_INFO "%s: debugfs reset TSF\n", wiphy_name(local->hw.wiphy)); 88 wiphy_info(local->hw.wiphy, "debugfs reset TSF\n");
89 } 89 }
90 } else { 90 } else {
91 tsf = simple_strtoul(buf, NULL, 0); 91 tsf = simple_strtoul(buf, NULL, 0);
92 if (local->ops->set_tsf) { 92 if (local->ops->set_tsf) {
93 drv_set_tsf(local, tsf); 93 drv_set_tsf(local, tsf);
94 printk(KERN_INFO "%s: debugfs set TSF to %#018llx\n", wiphy_name(local->hw.wiphy), tsf); 94 wiphy_info(local->hw.wiphy,
95 "debugfs set TSF to %#018llx\n", tsf);
96
95 } 97 }
96 } 98 }
97 99
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index fa5e76e658ef..1647f8dc5cda 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -64,26 +64,13 @@ static ssize_t key_algorithm_read(struct file *file,
64 char __user *userbuf, 64 char __user *userbuf,
65 size_t count, loff_t *ppos) 65 size_t count, loff_t *ppos)
66{ 66{
67 char *alg; 67 char buf[15];
68 struct ieee80211_key *key = file->private_data; 68 struct ieee80211_key *key = file->private_data;
69 u32 c = key->conf.cipher;
69 70
70 switch (key->conf.alg) { 71 sprintf(buf, "%.2x-%.2x-%.2x:%d\n",
71 case ALG_WEP: 72 c >> 24, (c >> 16) & 0xff, (c >> 8) & 0xff, c & 0xff);
72 alg = "WEP\n"; 73 return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
73 break;
74 case ALG_TKIP:
75 alg = "TKIP\n";
76 break;
77 case ALG_CCMP:
78 alg = "CCMP\n";
79 break;
80 case ALG_AES_CMAC:
81 alg = "AES-128-CMAC\n";
82 break;
83 default:
84 return 0;
85 }
86 return simple_read_from_buffer(userbuf, count, ppos, alg, strlen(alg));
87} 74}
88KEY_OPS(algorithm); 75KEY_OPS(algorithm);
89 76
@@ -95,21 +82,22 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
95 int len; 82 int len;
96 struct ieee80211_key *key = file->private_data; 83 struct ieee80211_key *key = file->private_data;
97 84
98 switch (key->conf.alg) { 85 switch (key->conf.cipher) {
99 case ALG_WEP: 86 case WLAN_CIPHER_SUITE_WEP40:
87 case WLAN_CIPHER_SUITE_WEP104:
100 len = scnprintf(buf, sizeof(buf), "\n"); 88 len = scnprintf(buf, sizeof(buf), "\n");
101 break; 89 break;
102 case ALG_TKIP: 90 case WLAN_CIPHER_SUITE_TKIP:
103 len = scnprintf(buf, sizeof(buf), "%08x %04x\n", 91 len = scnprintf(buf, sizeof(buf), "%08x %04x\n",
104 key->u.tkip.tx.iv32, 92 key->u.tkip.tx.iv32,
105 key->u.tkip.tx.iv16); 93 key->u.tkip.tx.iv16);
106 break; 94 break;
107 case ALG_CCMP: 95 case WLAN_CIPHER_SUITE_CCMP:
108 tpn = key->u.ccmp.tx_pn; 96 tpn = key->u.ccmp.tx_pn;
109 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 97 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
110 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); 98 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]);
111 break; 99 break;
112 case ALG_AES_CMAC: 100 case WLAN_CIPHER_SUITE_AES_CMAC:
113 tpn = key->u.aes_cmac.tx_pn; 101 tpn = key->u.aes_cmac.tx_pn;
114 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 102 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
115 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], 103 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4],
@@ -130,11 +118,12 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
130 int i, len; 118 int i, len;
131 const u8 *rpn; 119 const u8 *rpn;
132 120
133 switch (key->conf.alg) { 121 switch (key->conf.cipher) {
134 case ALG_WEP: 122 case WLAN_CIPHER_SUITE_WEP40:
123 case WLAN_CIPHER_SUITE_WEP104:
135 len = scnprintf(buf, sizeof(buf), "\n"); 124 len = scnprintf(buf, sizeof(buf), "\n");
136 break; 125 break;
137 case ALG_TKIP: 126 case WLAN_CIPHER_SUITE_TKIP:
138 for (i = 0; i < NUM_RX_DATA_QUEUES; i++) 127 for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
139 p += scnprintf(p, sizeof(buf)+buf-p, 128 p += scnprintf(p, sizeof(buf)+buf-p,
140 "%08x %04x\n", 129 "%08x %04x\n",
@@ -142,7 +131,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
142 key->u.tkip.rx[i].iv16); 131 key->u.tkip.rx[i].iv16);
143 len = p - buf; 132 len = p - buf;
144 break; 133 break;
145 case ALG_CCMP: 134 case WLAN_CIPHER_SUITE_CCMP:
146 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) { 135 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) {
147 rpn = key->u.ccmp.rx_pn[i]; 136 rpn = key->u.ccmp.rx_pn[i];
148 p += scnprintf(p, sizeof(buf)+buf-p, 137 p += scnprintf(p, sizeof(buf)+buf-p,
@@ -152,7 +141,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
152 } 141 }
153 len = p - buf; 142 len = p - buf;
154 break; 143 break;
155 case ALG_AES_CMAC: 144 case WLAN_CIPHER_SUITE_AES_CMAC:
156 rpn = key->u.aes_cmac.rx_pn; 145 rpn = key->u.aes_cmac.rx_pn;
157 p += scnprintf(p, sizeof(buf)+buf-p, 146 p += scnprintf(p, sizeof(buf)+buf-p,
158 "%02x%02x%02x%02x%02x%02x\n", 147 "%02x%02x%02x%02x%02x%02x\n",
@@ -174,11 +163,11 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
174 char buf[20]; 163 char buf[20];
175 int len; 164 int len;
176 165
177 switch (key->conf.alg) { 166 switch (key->conf.cipher) {
178 case ALG_CCMP: 167 case WLAN_CIPHER_SUITE_CCMP:
179 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); 168 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
180 break; 169 break;
181 case ALG_AES_CMAC: 170 case WLAN_CIPHER_SUITE_AES_CMAC:
182 len = scnprintf(buf, sizeof(buf), "%u\n", 171 len = scnprintf(buf, sizeof(buf), "%u\n",
183 key->u.aes_cmac.replays); 172 key->u.aes_cmac.replays);
184 break; 173 break;
@@ -196,8 +185,8 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
196 char buf[20]; 185 char buf[20];
197 int len; 186 int len;
198 187
199 switch (key->conf.alg) { 188 switch (key->conf.cipher) {
200 case ALG_AES_CMAC: 189 case WLAN_CIPHER_SUITE_AES_CMAC:
201 len = scnprintf(buf, sizeof(buf), "%u\n", 190 len = scnprintf(buf, sizeof(buf), "%u\n",
202 key->u.aes_cmac.icverrors); 191 key->u.aes_cmac.icverrors);
203 break; 192 break;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 14123dce544b..6064b7b09e01 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -54,6 +54,20 @@ static inline int drv_add_interface(struct ieee80211_local *local,
54 return ret; 54 return ret;
55} 55}
56 56
57static inline int drv_change_interface(struct ieee80211_local *local,
58 struct ieee80211_sub_if_data *sdata,
59 enum nl80211_iftype type)
60{
61 int ret;
62
63 might_sleep();
64
65 trace_drv_change_interface(local, sdata, type);
66 ret = local->ops->change_interface(&local->hw, &sdata->vif, type);
67 trace_drv_return_int(local, ret);
68 return ret;
69}
70
57static inline void drv_remove_interface(struct ieee80211_local *local, 71static inline void drv_remove_interface(struct ieee80211_local *local,
58 struct ieee80211_vif *vif) 72 struct ieee80211_vif *vif)
59{ 73{
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 5d5d2a974668..f6f3d89e43fa 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -136,6 +136,31 @@ TRACE_EVENT(drv_add_interface,
136 ) 136 )
137); 137);
138 138
139TRACE_EVENT(drv_change_interface,
140 TP_PROTO(struct ieee80211_local *local,
141 struct ieee80211_sub_if_data *sdata,
142 enum nl80211_iftype type),
143
144 TP_ARGS(local, sdata, type),
145
146 TP_STRUCT__entry(
147 LOCAL_ENTRY
148 VIF_ENTRY
149 __field(u32, new_type)
150 ),
151
152 TP_fast_assign(
153 LOCAL_ASSIGN;
154 VIF_ASSIGN;
155 __entry->new_type = type;
156 ),
157
158 TP_printk(
159 LOCAL_PR_FMT VIF_PR_FMT " new type:%d",
160 LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type
161 )
162);
163
139TRACE_EVENT(drv_remove_interface, 164TRACE_EVENT(drv_remove_interface,
140 TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), 165 TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata),
141 166
@@ -336,7 +361,7 @@ TRACE_EVENT(drv_set_key,
336 LOCAL_ENTRY 361 LOCAL_ENTRY
337 VIF_ENTRY 362 VIF_ENTRY
338 STA_ENTRY 363 STA_ENTRY
339 __field(enum ieee80211_key_alg, alg) 364 __field(u32, cipher)
340 __field(u8, hw_key_idx) 365 __field(u8, hw_key_idx)
341 __field(u8, flags) 366 __field(u8, flags)
342 __field(s8, keyidx) 367 __field(s8, keyidx)
@@ -346,7 +371,7 @@ TRACE_EVENT(drv_set_key,
346 LOCAL_ASSIGN; 371 LOCAL_ASSIGN;
347 VIF_ASSIGN; 372 VIF_ASSIGN;
348 STA_ASSIGN; 373 STA_ASSIGN;
349 __entry->alg = key->alg; 374 __entry->cipher = key->cipher;
350 __entry->flags = key->flags; 375 __entry->flags = key->flags;
351 __entry->keyidx = key->keyidx; 376 __entry->keyidx = key->keyidx;
352 __entry->hw_key_idx = key->hw_key_idx; 377 __entry->hw_key_idx = key->hw_key_idx;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 9d101fb33861..11f74f5f7b2f 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -265,3 +265,31 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
265 265
266 return 0; 266 return 0;
267} 267}
268
269void ieee80211_request_smps_work(struct work_struct *work)
270{
271 struct ieee80211_sub_if_data *sdata =
272 container_of(work, struct ieee80211_sub_if_data,
273 u.mgd.request_smps_work);
274
275 mutex_lock(&sdata->u.mgd.mtx);
276 __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode);
277 mutex_unlock(&sdata->u.mgd.mtx);
278}
279
280void ieee80211_request_smps(struct ieee80211_vif *vif,
281 enum ieee80211_smps_mode smps_mode)
282{
283 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
284
285 if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
286 return;
287
288 if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF))
289 smps_mode = IEEE80211_SMPS_AUTOMATIC;
290
291 ieee80211_queue_work(&sdata->local->hw,
292 &sdata->u.mgd.request_smps_work);
293}
294/* this might change ... don't want non-open drivers using it */
295EXPORT_SYMBOL_GPL(ieee80211_request_smps);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index c691780725a7..1a3aae54f0cf 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -427,8 +427,8 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
427 return NULL; 427 return NULL;
428 428
429#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 429#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
430 printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n", 430 wiphy_debug(local->hw.wiphy, "Adding new IBSS station %pM (dev=%s)\n",
431 wiphy_name(local->hw.wiphy), addr, sdata->name); 431 addr, sdata->name);
432#endif 432#endif
433 433
434 sta = sta_info_alloc(sdata, addr, gfp); 434 sta = sta_info_alloc(sdata, addr, gfp);
@@ -920,12 +920,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
920 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); 920 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN);
921 sdata->u.ibss.ssid_len = params->ssid_len; 921 sdata->u.ibss.ssid_len = params->ssid_len;
922 922
923 mutex_unlock(&sdata->u.ibss.mtx);
924
925 mutex_lock(&sdata->local->mtx);
923 ieee80211_recalc_idle(sdata->local); 926 ieee80211_recalc_idle(sdata->local);
927 mutex_unlock(&sdata->local->mtx);
924 928
925 ieee80211_queue_work(&sdata->local->hw, &sdata->work); 929 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
926 930
927 mutex_unlock(&sdata->u.ibss.mtx);
928
929 return 0; 931 return 0;
930} 932}
931 933
@@ -980,7 +982,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
980 982
981 mutex_unlock(&sdata->u.ibss.mtx); 983 mutex_unlock(&sdata->u.ibss.mtx);
982 984
985 mutex_lock(&local->mtx);
983 ieee80211_recalc_idle(sdata->local); 986 ieee80211_recalc_idle(sdata->local);
987 mutex_unlock(&local->mtx);
984 988
985 return 0; 989 return 0;
986} 990}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 65e0ed6c2975..4e635e2fabdb 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -50,12 +50,6 @@ struct ieee80211_local;
50 * increased memory use (about 2 kB of RAM per entry). */ 50 * increased memory use (about 2 kB of RAM per entry). */
51#define IEEE80211_FRAGMENT_MAX 4 51#define IEEE80211_FRAGMENT_MAX 4
52 52
53/*
54 * Time after which we ignore scan results and no longer report/use
55 * them in any way.
56 */
57#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
58
59#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024)) 53#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024))
60 54
61#define IEEE80211_DEFAULT_UAPSD_QUEUES \ 55#define IEEE80211_DEFAULT_UAPSD_QUEUES \
@@ -170,6 +164,7 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
170#define IEEE80211_RX_RA_MATCH BIT(1) 164#define IEEE80211_RX_RA_MATCH BIT(1)
171#define IEEE80211_RX_AMSDU BIT(2) 165#define IEEE80211_RX_AMSDU BIT(2)
172#define IEEE80211_RX_FRAGMENTED BIT(3) 166#define IEEE80211_RX_FRAGMENTED BIT(3)
167#define IEEE80211_MALFORMED_ACTION_FRM BIT(4)
173/* only add flags here that do not change with subframes of an aMPDU */ 168/* only add flags here that do not change with subframes of an aMPDU */
174 169
175struct ieee80211_rx_data { 170struct ieee80211_rx_data {
@@ -343,7 +338,10 @@ struct ieee80211_if_managed {
343 unsigned long timers_running; /* used for quiesce/restart */ 338 unsigned long timers_running; /* used for quiesce/restart */
344 bool powersave; /* powersave requested for this iface */ 339 bool powersave; /* powersave requested for this iface */
345 enum ieee80211_smps_mode req_smps, /* requested smps mode */ 340 enum ieee80211_smps_mode req_smps, /* requested smps mode */
346 ap_smps; /* smps mode AP thinks we're in */ 341 ap_smps, /* smps mode AP thinks we're in */
342 driver_smps_mode; /* smps mode request */
343
344 struct work_struct request_smps_work;
347 345
348 unsigned int flags; 346 unsigned int flags;
349 347
@@ -371,6 +369,13 @@ struct ieee80211_if_managed {
371 int ave_beacon_signal; 369 int ave_beacon_signal;
372 370
373 /* 371 /*
372 * Number of Beacon frames used in ave_beacon_signal. This can be used
373 * to avoid generating less reliable cqm events that would be based
374 * only on couple of received frames.
375 */
376 unsigned int count_beacon_signal;
377
378 /*
374 * Last Beacon frame signal strength average (ave_beacon_signal / 16) 379 * Last Beacon frame signal strength average (ave_beacon_signal / 16)
375 * that triggered a cqm event. 0 indicates that no event has been 380 * that triggered a cqm event. 0 indicates that no event has been
376 * generated for the current association. 381 * generated for the current association.
@@ -474,6 +479,19 @@ enum ieee80211_sub_if_data_flags {
474 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), 479 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
475}; 480};
476 481
482/**
483 * enum ieee80211_sdata_state_bits - virtual interface state bits
484 * @SDATA_STATE_RUNNING: virtual interface is up & running; this
485 * mirrors netif_running() but is separate for interface type
486 * change handling while the interface is up
487 * @SDATA_STATE_OFFCHANNEL: This interface is currently in offchannel
488 * mode, so queues are stopped
489 */
490enum ieee80211_sdata_state_bits {
491 SDATA_STATE_RUNNING,
492 SDATA_STATE_OFFCHANNEL,
493};
494
477struct ieee80211_sub_if_data { 495struct ieee80211_sub_if_data {
478 struct list_head list; 496 struct list_head list;
479 497
@@ -487,6 +505,8 @@ struct ieee80211_sub_if_data {
487 505
488 unsigned int flags; 506 unsigned int flags;
489 507
508 unsigned long state;
509
490 int drop_unencrypted; 510 int drop_unencrypted;
491 511
492 char name[IFNAMSIZ]; 512 char name[IFNAMSIZ];
@@ -497,6 +517,9 @@ struct ieee80211_sub_if_data {
497 */ 517 */
498 bool ht_opmode_valid; 518 bool ht_opmode_valid;
499 519
520 /* to detect idle changes */
521 bool old_idle;
522
500 /* Fragment table for host-based reassembly */ 523 /* Fragment table for host-based reassembly */
501 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; 524 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
502 unsigned int fragment_next; 525 unsigned int fragment_next;
@@ -508,6 +531,8 @@ struct ieee80211_sub_if_data {
508 struct ieee80211_key *default_mgmt_key; 531 struct ieee80211_key *default_mgmt_key;
509 532
510 u16 sequence_number; 533 u16 sequence_number;
534 __be16 control_port_protocol;
535 bool control_port_no_encrypt;
511 536
512 struct work_struct work; 537 struct work_struct work;
513 struct sk_buff_head skb_queue; 538 struct sk_buff_head skb_queue;
@@ -595,11 +620,17 @@ enum queue_stop_reason {
595 * determine if we are on the operating channel or not 620 * determine if we are on the operating channel or not
596 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, 621 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning,
597 * gets only set in conjunction with SCAN_SW_SCANNING 622 * gets only set in conjunction with SCAN_SW_SCANNING
623 * @SCAN_COMPLETED: Set for our scan work function when the driver reported
624 * that the scan completed.
625 * @SCAN_ABORTED: Set for our scan work function when the driver reported
626 * a scan complete for an aborted scan.
598 */ 627 */
599enum { 628enum {
600 SCAN_SW_SCANNING, 629 SCAN_SW_SCANNING,
601 SCAN_HW_SCANNING, 630 SCAN_HW_SCANNING,
602 SCAN_OFF_CHANNEL, 631 SCAN_OFF_CHANNEL,
632 SCAN_COMPLETED,
633 SCAN_ABORTED,
603}; 634};
604 635
605/** 636/**
@@ -634,7 +665,6 @@ struct ieee80211_local {
634 /* 665 /*
635 * work stuff, potentially off-channel (in the future) 666 * work stuff, potentially off-channel (in the future)
636 */ 667 */
637 struct mutex work_mtx;
638 struct list_head work_list; 668 struct list_head work_list;
639 struct timer_list work_timer; 669 struct timer_list work_timer;
640 struct work_struct work_work; 670 struct work_struct work_work;
@@ -656,6 +686,8 @@ struct ieee80211_local {
656 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll; 686 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll;
657 unsigned int filter_flags; /* FIF_* */ 687 unsigned int filter_flags; /* FIF_* */
658 688
689 bool wiphy_ciphers_allocated;
690
659 /* protects the aggregated multicast list and filter calls */ 691 /* protects the aggregated multicast list and filter calls */
660 spinlock_t filter_lock; 692 spinlock_t filter_lock;
661 693
@@ -746,9 +778,10 @@ struct ieee80211_local {
746 */ 778 */
747 struct mutex key_mtx; 779 struct mutex key_mtx;
748 780
781 /* mutex for scan and work locking */
782 struct mutex mtx;
749 783
750 /* Scanning and BSS list */ 784 /* Scanning and BSS list */
751 struct mutex scan_mtx;
752 unsigned long scanning; 785 unsigned long scanning;
753 struct cfg80211_ssid scan_ssid; 786 struct cfg80211_ssid scan_ssid;
754 struct cfg80211_scan_request *int_scan_req; 787 struct cfg80211_scan_request *int_scan_req;
@@ -870,6 +903,11 @@ struct ieee80211_local {
870 struct dentry *keys; 903 struct dentry *keys;
871 } debugfs; 904 } debugfs;
872#endif 905#endif
906
907 /* dummy netdev for use w/ NAPI */
908 struct net_device napi_dev;
909
910 struct napi_struct napi;
873}; 911};
874 912
875static inline struct ieee80211_sub_if_data * 913static inline struct ieee80211_sub_if_data *
@@ -1071,7 +1109,7 @@ void ieee80211_recalc_idle(struct ieee80211_local *local);
1071 1109
1072static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) 1110static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
1073{ 1111{
1074 return netif_running(sdata->dev); 1112 return test_bit(SDATA_STATE_RUNNING, &sdata->state);
1075} 1113}
1076 1114
1077/* tx handling */ 1115/* tx handling */
@@ -1105,6 +1143,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
1105int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, 1143int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
1106 enum ieee80211_smps_mode smps, const u8 *da, 1144 enum ieee80211_smps_mode smps, const u8 *da,
1107 const u8 *bssid); 1145 const u8 *bssid);
1146void ieee80211_request_smps_work(struct work_struct *work);
1108 1147
1109void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 1148void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1110 u16 initiator, u16 reason); 1149 u16 initiator, u16 reason);
@@ -1131,6 +1170,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
1131void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); 1170void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
1132void ieee80211_ba_session_work(struct work_struct *work); 1171void ieee80211_ba_session_work(struct work_struct *work);
1133void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid); 1172void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
1173void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
1134 1174
1135/* Spectrum management */ 1175/* Spectrum management */
1136void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 1176void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1146,6 +1186,12 @@ int __ieee80211_suspend(struct ieee80211_hw *hw);
1146 1186
1147static inline int __ieee80211_resume(struct ieee80211_hw *hw) 1187static inline int __ieee80211_resume(struct ieee80211_hw *hw)
1148{ 1188{
1189 struct ieee80211_local *local = hw_to_local(hw);
1190
1191 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
1192 "%s: resume with hardware scan still in progress\n",
1193 wiphy_name(hw->wiphy));
1194
1149 return ieee80211_reconfig(hw_to_local(hw)); 1195 return ieee80211_reconfig(hw_to_local(hw));
1150} 1196}
1151#else 1197#else
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ebbe264e2b0b..c1cc200ac81f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -94,21 +94,14 @@ static inline int identical_mac_addr_allowed(int type1, int type2)
94 type2 == NL80211_IFTYPE_AP_VLAN)); 94 type2 == NL80211_IFTYPE_AP_VLAN));
95} 95}
96 96
97static int ieee80211_open(struct net_device *dev) 97static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
98 enum nl80211_iftype iftype)
98{ 99{
99 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
100 struct ieee80211_sub_if_data *nsdata;
101 struct ieee80211_local *local = sdata->local; 100 struct ieee80211_local *local = sdata->local;
102 struct sta_info *sta; 101 struct ieee80211_sub_if_data *nsdata;
103 u32 changed = 0; 102 struct net_device *dev = sdata->dev;
104 int res;
105 u32 hw_reconf_flags = 0;
106 u8 null_addr[ETH_ALEN] = {0};
107 103
108 /* fail early if user set an invalid address */ 104 ASSERT_RTNL();
109 if (compare_ether_addr(dev->dev_addr, null_addr) &&
110 !is_valid_ether_addr(dev->dev_addr))
111 return -EADDRNOTAVAIL;
112 105
113 /* we hold the RTNL here so can safely walk the list */ 106 /* we hold the RTNL here so can safely walk the list */
114 list_for_each_entry(nsdata, &local->interfaces, list) { 107 list_for_each_entry(nsdata, &local->interfaces, list) {
@@ -125,7 +118,7 @@ static int ieee80211_open(struct net_device *dev)
125 * belonging to the same hardware. Then, however, we're 118 * belonging to the same hardware. Then, however, we're
126 * faced with having to adopt two different TSF timers... 119 * faced with having to adopt two different TSF timers...
127 */ 120 */
128 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 121 if (iftype == NL80211_IFTYPE_ADHOC &&
129 nsdata->vif.type == NL80211_IFTYPE_ADHOC) 122 nsdata->vif.type == NL80211_IFTYPE_ADHOC)
130 return -EBUSY; 123 return -EBUSY;
131 124
@@ -139,19 +132,36 @@ static int ieee80211_open(struct net_device *dev)
139 /* 132 /*
140 * check whether it may have the same address 133 * check whether it may have the same address
141 */ 134 */
142 if (!identical_mac_addr_allowed(sdata->vif.type, 135 if (!identical_mac_addr_allowed(iftype,
143 nsdata->vif.type)) 136 nsdata->vif.type))
144 return -ENOTUNIQ; 137 return -ENOTUNIQ;
145 138
146 /* 139 /*
147 * can only add VLANs to enabled APs 140 * can only add VLANs to enabled APs
148 */ 141 */
149 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && 142 if (iftype == NL80211_IFTYPE_AP_VLAN &&
150 nsdata->vif.type == NL80211_IFTYPE_AP) 143 nsdata->vif.type == NL80211_IFTYPE_AP)
151 sdata->bss = &nsdata->u.ap; 144 sdata->bss = &nsdata->u.ap;
152 } 145 }
153 } 146 }
154 147
148 return 0;
149}
150
151/*
152 * NOTE: Be very careful when changing this function, it must NOT return
153 * an error on interface type changes that have been pre-checked, so most
154 * checks should be in ieee80211_check_concurrent_iface.
155 */
156static int ieee80211_do_open(struct net_device *dev, bool coming_up)
157{
158 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
159 struct ieee80211_local *local = sdata->local;
160 struct sta_info *sta;
161 u32 changed = 0;
162 int res;
163 u32 hw_reconf_flags = 0;
164
155 switch (sdata->vif.type) { 165 switch (sdata->vif.type) {
156 case NL80211_IFTYPE_WDS: 166 case NL80211_IFTYPE_WDS:
157 if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) 167 if (!is_valid_ether_addr(sdata->u.wds.remote_addr))
@@ -177,7 +187,7 @@ static int ieee80211_open(struct net_device *dev)
177 /* no special treatment */ 187 /* no special treatment */
178 break; 188 break;
179 case NL80211_IFTYPE_UNSPECIFIED: 189 case NL80211_IFTYPE_UNSPECIFIED:
180 case __NL80211_IFTYPE_AFTER_LAST: 190 case NUM_NL80211_IFTYPES:
181 /* cannot happen */ 191 /* cannot happen */
182 WARN_ON(1); 192 WARN_ON(1);
183 break; 193 break;
@@ -187,39 +197,30 @@ static int ieee80211_open(struct net_device *dev)
187 res = drv_start(local); 197 res = drv_start(local);
188 if (res) 198 if (res)
189 goto err_del_bss; 199 goto err_del_bss;
200 if (local->ops->napi_poll)
201 napi_enable(&local->napi);
190 /* we're brought up, everything changes */ 202 /* we're brought up, everything changes */
191 hw_reconf_flags = ~0; 203 hw_reconf_flags = ~0;
192 ieee80211_led_radio(local, true); 204 ieee80211_led_radio(local, true);
193 } 205 }
194 206
195 /* 207 /*
196 * Check all interfaces and copy the hopefully now-present 208 * Copy the hopefully now-present MAC address to
197 * MAC address to those that have the special null one. 209 * this interface, if it has the special null one.
198 */ 210 */
199 list_for_each_entry(nsdata, &local->interfaces, list) { 211 if (is_zero_ether_addr(dev->dev_addr)) {
200 struct net_device *ndev = nsdata->dev; 212 memcpy(dev->dev_addr,
201 213 local->hw.wiphy->perm_addr,
202 /* 214 ETH_ALEN);
203 * No need to check running since we do not allow 215 memcpy(dev->perm_addr, dev->dev_addr, ETH_ALEN);
204 * it to start up with this invalid address. 216
205 */ 217 if (!is_valid_ether_addr(dev->dev_addr)) {
206 if (compare_ether_addr(null_addr, ndev->dev_addr) == 0) { 218 if (!local->open_count)
207 memcpy(ndev->dev_addr, 219 drv_stop(local);
208 local->hw.wiphy->perm_addr, 220 return -EADDRNOTAVAIL;
209 ETH_ALEN);
210 memcpy(ndev->perm_addr, ndev->dev_addr, ETH_ALEN);
211 } 221 }
212 } 222 }
213 223
214 /*
215 * Validate the MAC address for this device.
216 */
217 if (!is_valid_ether_addr(dev->dev_addr)) {
218 if (!local->open_count)
219 drv_stop(local);
220 return -EADDRNOTAVAIL;
221 }
222
223 switch (sdata->vif.type) { 224 switch (sdata->vif.type) {
224 case NL80211_IFTYPE_AP_VLAN: 225 case NL80211_IFTYPE_AP_VLAN:
225 /* no need to tell driver */ 226 /* no need to tell driver */
@@ -253,9 +254,11 @@ static int ieee80211_open(struct net_device *dev)
253 netif_carrier_on(dev); 254 netif_carrier_on(dev);
254 break; 255 break;
255 default: 256 default:
256 res = drv_add_interface(local, &sdata->vif); 257 if (coming_up) {
257 if (res) 258 res = drv_add_interface(local, &sdata->vif);
258 goto err_stop; 259 if (res)
260 goto err_stop;
261 }
259 262
260 if (ieee80211_vif_is_mesh(&sdata->vif)) { 263 if (ieee80211_vif_is_mesh(&sdata->vif)) {
261 local->fif_other_bss++; 264 local->fif_other_bss++;
@@ -307,9 +310,13 @@ static int ieee80211_open(struct net_device *dev)
307 if (sdata->flags & IEEE80211_SDATA_PROMISC) 310 if (sdata->flags & IEEE80211_SDATA_PROMISC)
308 atomic_inc(&local->iff_promiscs); 311 atomic_inc(&local->iff_promiscs);
309 312
313 mutex_lock(&local->mtx);
310 hw_reconf_flags |= __ieee80211_recalc_idle(local); 314 hw_reconf_flags |= __ieee80211_recalc_idle(local);
315 mutex_unlock(&local->mtx);
316
317 if (coming_up)
318 local->open_count++;
311 319
312 local->open_count++;
313 if (hw_reconf_flags) { 320 if (hw_reconf_flags) {
314 ieee80211_hw_config(local, hw_reconf_flags); 321 ieee80211_hw_config(local, hw_reconf_flags);
315 /* 322 /*
@@ -324,6 +331,8 @@ static int ieee80211_open(struct net_device *dev)
324 331
325 netif_tx_start_all_queues(dev); 332 netif_tx_start_all_queues(dev);
326 333
334 set_bit(SDATA_STATE_RUNNING, &sdata->state);
335
327 return 0; 336 return 0;
328 err_del_interface: 337 err_del_interface:
329 drv_remove_interface(local, &sdata->vif); 338 drv_remove_interface(local, &sdata->vif);
@@ -337,19 +346,38 @@ static int ieee80211_open(struct net_device *dev)
337 return res; 346 return res;
338} 347}
339 348
340static int ieee80211_stop(struct net_device *dev) 349static int ieee80211_open(struct net_device *dev)
341{ 350{
342 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 351 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
352 int err;
353
354 /* fail early if user set an invalid address */
355 if (!is_zero_ether_addr(dev->dev_addr) &&
356 !is_valid_ether_addr(dev->dev_addr))
357 return -EADDRNOTAVAIL;
358
359 err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type);
360 if (err)
361 return err;
362
363 return ieee80211_do_open(dev, true);
364}
365
366static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
367 bool going_down)
368{
343 struct ieee80211_local *local = sdata->local; 369 struct ieee80211_local *local = sdata->local;
344 unsigned long flags; 370 unsigned long flags;
345 struct sk_buff *skb, *tmp; 371 struct sk_buff *skb, *tmp;
346 u32 hw_reconf_flags = 0; 372 u32 hw_reconf_flags = 0;
347 int i; 373 int i;
348 374
375 clear_bit(SDATA_STATE_RUNNING, &sdata->state);
376
349 /* 377 /*
350 * Stop TX on this interface first. 378 * Stop TX on this interface first.
351 */ 379 */
352 netif_tx_stop_all_queues(dev); 380 netif_tx_stop_all_queues(sdata->dev);
353 381
354 /* 382 /*
355 * Purge work for this interface. 383 * Purge work for this interface.
@@ -366,12 +394,9 @@ static int ieee80211_stop(struct net_device *dev)
366 * (because if we remove a STA after ops->remove_interface() 394 * (because if we remove a STA after ops->remove_interface()
367 * the driver will have removed the vif info already!) 395 * the driver will have removed the vif info already!)
368 * 396 *
369 * We could relax this and only unlink the stations from the 397 * This is relevant only in AP, WDS and mesh modes, since in
370 * hash table and list but keep them on a per-sdata list that 398 * all other modes we've already removed all stations when
371 * will be inserted back again when the interface is brought 399 * disconnecting etc.
372 * up again, but I don't currently see a use case for that,
373 * except with WDS which gets a STA entry created when it is
374 * brought up.
375 */ 400 */
376 sta_info_flush(local, sdata); 401 sta_info_flush(local, sdata);
377 402
@@ -390,11 +415,12 @@ static int ieee80211_stop(struct net_device *dev)
390 if (sdata->vif.type == NL80211_IFTYPE_AP) 415 if (sdata->vif.type == NL80211_IFTYPE_AP)
391 local->fif_pspoll--; 416 local->fif_pspoll--;
392 417
393 netif_addr_lock_bh(dev); 418 netif_addr_lock_bh(sdata->dev);
394 spin_lock_bh(&local->filter_lock); 419 spin_lock_bh(&local->filter_lock);
395 __hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len); 420 __hw_addr_unsync(&local->mc_list, &sdata->dev->mc,
421 sdata->dev->addr_len);
396 spin_unlock_bh(&local->filter_lock); 422 spin_unlock_bh(&local->filter_lock);
397 netif_addr_unlock_bh(dev); 423 netif_addr_unlock_bh(sdata->dev);
398 424
399 ieee80211_configure_filter(local); 425 ieee80211_configure_filter(local);
400 426
@@ -406,11 +432,21 @@ static int ieee80211_stop(struct net_device *dev)
406 struct ieee80211_sub_if_data *vlan, *tmpsdata; 432 struct ieee80211_sub_if_data *vlan, *tmpsdata;
407 struct beacon_data *old_beacon = sdata->u.ap.beacon; 433 struct beacon_data *old_beacon = sdata->u.ap.beacon;
408 434
435 /* sdata_running will return false, so this will disable */
436 ieee80211_bss_info_change_notify(sdata,
437 BSS_CHANGED_BEACON_ENABLED);
438
409 /* remove beacon */ 439 /* remove beacon */
410 rcu_assign_pointer(sdata->u.ap.beacon, NULL); 440 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
411 synchronize_rcu(); 441 synchronize_rcu();
412 kfree(old_beacon); 442 kfree(old_beacon);
413 443
444 /* free all potentially still buffered bcast frames */
445 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
446 local->total_ps_buffered--;
447 dev_kfree_skb(skb);
448 }
449
414 /* down all dependent devices, that is VLANs */ 450 /* down all dependent devices, that is VLANs */
415 list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, 451 list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
416 u.vlan.list) 452 u.vlan.list)
@@ -418,7 +454,8 @@ static int ieee80211_stop(struct net_device *dev)
418 WARN_ON(!list_empty(&sdata->u.ap.vlans)); 454 WARN_ON(!list_empty(&sdata->u.ap.vlans));
419 } 455 }
420 456
421 local->open_count--; 457 if (going_down)
458 local->open_count--;
422 459
423 switch (sdata->vif.type) { 460 switch (sdata->vif.type) {
424 case NL80211_IFTYPE_AP_VLAN: 461 case NL80211_IFTYPE_AP_VLAN:
@@ -450,27 +487,6 @@ static int ieee80211_stop(struct net_device *dev)
450 487
451 ieee80211_configure_filter(local); 488 ieee80211_configure_filter(local);
452 break; 489 break;
453 case NL80211_IFTYPE_STATION:
454 del_timer_sync(&sdata->u.mgd.chswitch_timer);
455 del_timer_sync(&sdata->u.mgd.timer);
456 del_timer_sync(&sdata->u.mgd.conn_mon_timer);
457 del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
458 /*
459 * If any of the timers fired while we waited for it, it will
460 * have queued its work. Now the work will be running again
461 * but will not rearm the timer again because it checks
462 * whether the interface is running, which, at this point,
463 * it no longer is.
464 */
465 cancel_work_sync(&sdata->u.mgd.chswitch_work);
466 cancel_work_sync(&sdata->u.mgd.monitor_work);
467 cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work);
468
469 /* fall through */
470 case NL80211_IFTYPE_ADHOC:
471 if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
472 del_timer_sync(&sdata->u.ibss.timer);
473 /* fall through */
474 case NL80211_IFTYPE_MESH_POINT: 490 case NL80211_IFTYPE_MESH_POINT:
475 if (ieee80211_vif_is_mesh(&sdata->vif)) { 491 if (ieee80211_vif_is_mesh(&sdata->vif)) {
476 /* other_bss and allmulti are always set on mesh 492 /* other_bss and allmulti are always set on mesh
@@ -498,27 +514,34 @@ static int ieee80211_stop(struct net_device *dev)
498 ieee80211_scan_cancel(local); 514 ieee80211_scan_cancel(local);
499 515
500 /* 516 /*
501 * Disable beaconing for AP and mesh, IBSS can't 517 * Disable beaconing here for mesh only, AP and IBSS
502 * still be joined to a network at this point. 518 * are already taken care of.
503 */ 519 */
504 if (sdata->vif.type == NL80211_IFTYPE_AP || 520 if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
505 sdata->vif.type == NL80211_IFTYPE_MESH_POINT) {
506 ieee80211_bss_info_change_notify(sdata, 521 ieee80211_bss_info_change_notify(sdata,
507 BSS_CHANGED_BEACON_ENABLED); 522 BSS_CHANGED_BEACON_ENABLED);
508 }
509 523
510 /* free all remaining keys, there shouldn't be any */ 524 /*
525 * Free all remaining keys, there shouldn't be any,
526 * except maybe group keys in AP more or WDS?
527 */
511 ieee80211_free_keys(sdata); 528 ieee80211_free_keys(sdata);
512 drv_remove_interface(local, &sdata->vif); 529
530 if (going_down)
531 drv_remove_interface(local, &sdata->vif);
513 } 532 }
514 533
515 sdata->bss = NULL; 534 sdata->bss = NULL;
516 535
536 mutex_lock(&local->mtx);
517 hw_reconf_flags |= __ieee80211_recalc_idle(local); 537 hw_reconf_flags |= __ieee80211_recalc_idle(local);
538 mutex_unlock(&local->mtx);
518 539
519 ieee80211_recalc_ps(local, -1); 540 ieee80211_recalc_ps(local, -1);
520 541
521 if (local->open_count == 0) { 542 if (local->open_count == 0) {
543 if (local->ops->napi_poll)
544 napi_disable(&local->napi);
522 ieee80211_clear_tx_pending(local); 545 ieee80211_clear_tx_pending(local);
523 ieee80211_stop_device(local); 546 ieee80211_stop_device(local);
524 547
@@ -541,6 +564,13 @@ static int ieee80211_stop(struct net_device *dev)
541 } 564 }
542 } 565 }
543 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 566 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
567}
568
569static int ieee80211_stop(struct net_device *dev)
570{
571 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
572
573 ieee80211_do_stop(sdata, true);
544 574
545 return 0; 575 return 0;
546} 576}
@@ -585,8 +615,6 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
585{ 615{
586 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 616 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
587 struct ieee80211_local *local = sdata->local; 617 struct ieee80211_local *local = sdata->local;
588 struct beacon_data *beacon;
589 struct sk_buff *skb;
590 int flushed; 618 int flushed;
591 int i; 619 int i;
592 620
@@ -599,37 +627,8 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
599 __skb_queue_purge(&sdata->fragments[i].skb_list); 627 __skb_queue_purge(&sdata->fragments[i].skb_list);
600 sdata->fragment_next = 0; 628 sdata->fragment_next = 0;
601 629
602 switch (sdata->vif.type) { 630 if (ieee80211_vif_is_mesh(&sdata->vif))
603 case NL80211_IFTYPE_AP: 631 mesh_rmc_free(sdata);
604 beacon = sdata->u.ap.beacon;
605 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
606 synchronize_rcu();
607 kfree(beacon);
608
609 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
610 local->total_ps_buffered--;
611 dev_kfree_skb(skb);
612 }
613
614 break;
615 case NL80211_IFTYPE_MESH_POINT:
616 if (ieee80211_vif_is_mesh(&sdata->vif))
617 mesh_rmc_free(sdata);
618 break;
619 case NL80211_IFTYPE_ADHOC:
620 if (WARN_ON(sdata->u.ibss.presp))
621 kfree_skb(sdata->u.ibss.presp);
622 break;
623 case NL80211_IFTYPE_STATION:
624 case NL80211_IFTYPE_WDS:
625 case NL80211_IFTYPE_AP_VLAN:
626 case NL80211_IFTYPE_MONITOR:
627 break;
628 case NL80211_IFTYPE_UNSPECIFIED:
629 case __NL80211_IFTYPE_AFTER_LAST:
630 BUG();
631 break;
632 }
633 632
634 flushed = sta_info_flush(local, sdata); 633 flushed = sta_info_flush(local, sdata);
635 WARN_ON(flushed); 634 WARN_ON(flushed);
@@ -847,6 +846,9 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
847 sdata->dev->netdev_ops = &ieee80211_dataif_ops; 846 sdata->dev->netdev_ops = &ieee80211_dataif_ops;
848 sdata->wdev.iftype = type; 847 sdata->wdev.iftype = type;
849 848
849 sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE);
850 sdata->control_port_no_encrypt = false;
851
850 /* only monitor differs */ 852 /* only monitor differs */
851 sdata->dev->type = ARPHRD_ETHER; 853 sdata->dev->type = ARPHRD_ETHER;
852 854
@@ -878,7 +880,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
878 case NL80211_IFTYPE_AP_VLAN: 880 case NL80211_IFTYPE_AP_VLAN:
879 break; 881 break;
880 case NL80211_IFTYPE_UNSPECIFIED: 882 case NL80211_IFTYPE_UNSPECIFIED:
881 case __NL80211_IFTYPE_AFTER_LAST: 883 case NUM_NL80211_IFTYPES:
882 BUG(); 884 BUG();
883 break; 885 break;
884 } 886 }
@@ -886,9 +888,72 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
886 ieee80211_debugfs_add_netdev(sdata); 888 ieee80211_debugfs_add_netdev(sdata);
887} 889}
888 890
891static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
892 enum nl80211_iftype type)
893{
894 struct ieee80211_local *local = sdata->local;
895 int ret, err;
896
897 ASSERT_RTNL();
898
899 if (!local->ops->change_interface)
900 return -EBUSY;
901
902 switch (sdata->vif.type) {
903 case NL80211_IFTYPE_AP:
904 case NL80211_IFTYPE_STATION:
905 case NL80211_IFTYPE_ADHOC:
906 /*
907 * Could maybe also all others here?
908 * Just not sure how that interacts
909 * with the RX/config path e.g. for
910 * mesh.
911 */
912 break;
913 default:
914 return -EBUSY;
915 }
916
917 switch (type) {
918 case NL80211_IFTYPE_AP:
919 case NL80211_IFTYPE_STATION:
920 case NL80211_IFTYPE_ADHOC:
921 /*
922 * Could probably support everything
923 * but WDS here (WDS do_open can fail
924 * under memory pressure, which this
925 * code isn't prepared to handle).
926 */
927 break;
928 default:
929 return -EBUSY;
930 }
931
932 ret = ieee80211_check_concurrent_iface(sdata, type);
933 if (ret)
934 return ret;
935
936 ieee80211_do_stop(sdata, false);
937
938 ieee80211_teardown_sdata(sdata->dev);
939
940 ret = drv_change_interface(local, sdata, type);
941 if (ret)
942 type = sdata->vif.type;
943
944 ieee80211_setup_sdata(sdata, type);
945
946 err = ieee80211_do_open(sdata->dev, false);
947 WARN(err, "type change: do_open returned %d", err);
948
949 return ret;
950}
951
889int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, 952int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
890 enum nl80211_iftype type) 953 enum nl80211_iftype type)
891{ 954{
955 int ret;
956
892 ASSERT_RTNL(); 957 ASSERT_RTNL();
893 958
894 if (type == sdata->vif.type) 959 if (type == sdata->vif.type)
@@ -899,18 +964,15 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
899 type == NL80211_IFTYPE_ADHOC) 964 type == NL80211_IFTYPE_ADHOC)
900 return -EOPNOTSUPP; 965 return -EOPNOTSUPP;
901 966
902 /* 967 if (ieee80211_sdata_running(sdata)) {
903 * We could, here, on changes between IBSS/STA/MESH modes, 968 ret = ieee80211_runtime_change_iftype(sdata, type);
904 * invoke an MLME function instead that disassociates etc. 969 if (ret)
905 * and goes into the requested mode. 970 return ret;
906 */ 971 } else {
907 972 /* Purge and reset type-dependent state. */
908 if (ieee80211_sdata_running(sdata)) 973 ieee80211_teardown_sdata(sdata->dev);
909 return -EBUSY; 974 ieee80211_setup_sdata(sdata, type);
910 975 }
911 /* Purge and reset type-dependent state. */
912 ieee80211_teardown_sdata(sdata->dev);
913 ieee80211_setup_sdata(sdata, type);
914 976
915 /* reset some values that shouldn't be kept across type changes */ 977 /* reset some values that shouldn't be kept across type changes */
916 sdata->vif.bss_conf.basic_rates = 978 sdata->vif.bss_conf.basic_rates =
@@ -1167,8 +1229,7 @@ static u32 ieee80211_idle_off(struct ieee80211_local *local,
1167 return 0; 1229 return 0;
1168 1230
1169#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1231#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1170 printk(KERN_DEBUG "%s: device no longer idle - %s\n", 1232 wiphy_debug(local->hw.wiphy, "device no longer idle - %s\n", reason);
1171 wiphy_name(local->hw.wiphy), reason);
1172#endif 1233#endif
1173 1234
1174 local->hw.conf.flags &= ~IEEE80211_CONF_IDLE; 1235 local->hw.conf.flags &= ~IEEE80211_CONF_IDLE;
@@ -1181,8 +1242,7 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local)
1181 return 0; 1242 return 0;
1182 1243
1183#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1244#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1184 printk(KERN_DEBUG "%s: device now idle\n", 1245 wiphy_debug(local->hw.wiphy, "device now idle\n");
1185 wiphy_name(local->hw.wiphy));
1186#endif 1246#endif
1187 1247
1188 drv_flush(local, false); 1248 drv_flush(local, false);
@@ -1195,28 +1255,61 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
1195{ 1255{
1196 struct ieee80211_sub_if_data *sdata; 1256 struct ieee80211_sub_if_data *sdata;
1197 int count = 0; 1257 int count = 0;
1258 bool working = false, scanning = false;
1259 struct ieee80211_work *wk;
1198 1260
1199 if (!list_empty(&local->work_list)) 1261#ifdef CONFIG_PROVE_LOCKING
1200 return ieee80211_idle_off(local, "working"); 1262 WARN_ON(debug_locks && !lockdep_rtnl_is_held() &&
1201 1263 !lockdep_is_held(&local->iflist_mtx));
1202 if (local->scanning) 1264#endif
1203 return ieee80211_idle_off(local, "scanning"); 1265 lockdep_assert_held(&local->mtx);
1204 1266
1205 list_for_each_entry(sdata, &local->interfaces, list) { 1267 list_for_each_entry(sdata, &local->interfaces, list) {
1206 if (!ieee80211_sdata_running(sdata)) 1268 if (!ieee80211_sdata_running(sdata)) {
1269 sdata->vif.bss_conf.idle = true;
1207 continue; 1270 continue;
1271 }
1272
1273 sdata->old_idle = sdata->vif.bss_conf.idle;
1274
1208 /* do not count disabled managed interfaces */ 1275 /* do not count disabled managed interfaces */
1209 if (sdata->vif.type == NL80211_IFTYPE_STATION && 1276 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
1210 !sdata->u.mgd.associated) 1277 !sdata->u.mgd.associated) {
1278 sdata->vif.bss_conf.idle = true;
1211 continue; 1279 continue;
1280 }
1212 /* do not count unused IBSS interfaces */ 1281 /* do not count unused IBSS interfaces */
1213 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 1282 if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
1214 !sdata->u.ibss.ssid_len) 1283 !sdata->u.ibss.ssid_len) {
1284 sdata->vif.bss_conf.idle = true;
1215 continue; 1285 continue;
1286 }
1216 /* count everything else */ 1287 /* count everything else */
1217 count++; 1288 count++;
1218 } 1289 }
1219 1290
1291 list_for_each_entry(wk, &local->work_list, list) {
1292 working = true;
1293 wk->sdata->vif.bss_conf.idle = false;
1294 }
1295
1296 if (local->scan_sdata) {
1297 scanning = true;
1298 local->scan_sdata->vif.bss_conf.idle = false;
1299 }
1300
1301 list_for_each_entry(sdata, &local->interfaces, list) {
1302 if (sdata->old_idle == sdata->vif.bss_conf.idle)
1303 continue;
1304 if (!ieee80211_sdata_running(sdata))
1305 continue;
1306 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
1307 }
1308
1309 if (working)
1310 return ieee80211_idle_off(local, "working");
1311 if (scanning)
1312 return ieee80211_idle_off(local, "scanning");
1220 if (!count) 1313 if (!count)
1221 return ieee80211_idle_on(local); 1314 return ieee80211_idle_on(local);
1222 else 1315 else
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 1b9d87ed143a..3570f8c2bb40 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -60,7 +60,7 @@ static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
60 return NULL; 60 return NULL;
61} 61}
62 62
63static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) 63static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
64{ 64{
65 struct ieee80211_sub_if_data *sdata; 65 struct ieee80211_sub_if_data *sdata;
66 struct ieee80211_sta *sta; 66 struct ieee80211_sta *sta;
@@ -68,8 +68,10 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
68 68
69 might_sleep(); 69 might_sleep();
70 70
71 if (!key->local->ops->set_key) 71 if (!key->local->ops->set_key) {
72 return; 72 ret = -EOPNOTSUPP;
73 goto out_unsupported;
74 }
73 75
74 assert_key_lock(key->local); 76 assert_key_lock(key->local);
75 77
@@ -87,10 +89,27 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
87 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; 89 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
88 90
89 if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) 91 if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP)
90 printk(KERN_ERR "mac80211-%s: failed to set key " 92 wiphy_err(key->local->hw.wiphy,
91 "(%d, %pM) to hardware (%d)\n", 93 "failed to set key (%d, %pM) to hardware (%d)\n",
92 wiphy_name(key->local->hw.wiphy), 94 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
93 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); 95
96out_unsupported:
97 if (ret) {
98 switch (key->conf.cipher) {
99 case WLAN_CIPHER_SUITE_WEP40:
100 case WLAN_CIPHER_SUITE_WEP104:
101 case WLAN_CIPHER_SUITE_TKIP:
102 case WLAN_CIPHER_SUITE_CCMP:
103 case WLAN_CIPHER_SUITE_AES_CMAC:
104 /* all of these we can do in software */
105 ret = 0;
106 break;
107 default:
108 ret = -EINVAL;
109 }
110 }
111
112 return ret;
94} 113}
95 114
96static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) 115static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
@@ -121,10 +140,9 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
121 sta, &key->conf); 140 sta, &key->conf);
122 141
123 if (ret) 142 if (ret)
124 printk(KERN_ERR "mac80211-%s: failed to remove key " 143 wiphy_err(key->local->hw.wiphy,
125 "(%d, %pM) from hardware (%d)\n", 144 "failed to remove key (%d, %pM) from hardware (%d)\n",
126 wiphy_name(key->local->hw.wiphy), 145 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
127 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
128 146
129 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; 147 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
130} 148}
@@ -227,20 +245,18 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
227 } 245 }
228} 246}
229 247
230struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, 248struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
231 int idx,
232 size_t key_len,
233 const u8 *key_data, 249 const u8 *key_data,
234 size_t seq_len, const u8 *seq) 250 size_t seq_len, const u8 *seq)
235{ 251{
236 struct ieee80211_key *key; 252 struct ieee80211_key *key;
237 int i, j; 253 int i, j, err;
238 254
239 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS); 255 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
240 256
241 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); 257 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL);
242 if (!key) 258 if (!key)
243 return NULL; 259 return ERR_PTR(-ENOMEM);
244 260
245 /* 261 /*
246 * Default to software encryption; we'll later upload the 262 * Default to software encryption; we'll later upload the
@@ -249,15 +265,16 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
249 key->conf.flags = 0; 265 key->conf.flags = 0;
250 key->flags = 0; 266 key->flags = 0;
251 267
252 key->conf.alg = alg; 268 key->conf.cipher = cipher;
253 key->conf.keyidx = idx; 269 key->conf.keyidx = idx;
254 key->conf.keylen = key_len; 270 key->conf.keylen = key_len;
255 switch (alg) { 271 switch (cipher) {
256 case ALG_WEP: 272 case WLAN_CIPHER_SUITE_WEP40:
273 case WLAN_CIPHER_SUITE_WEP104:
257 key->conf.iv_len = WEP_IV_LEN; 274 key->conf.iv_len = WEP_IV_LEN;
258 key->conf.icv_len = WEP_ICV_LEN; 275 key->conf.icv_len = WEP_ICV_LEN;
259 break; 276 break;
260 case ALG_TKIP: 277 case WLAN_CIPHER_SUITE_TKIP:
261 key->conf.iv_len = TKIP_IV_LEN; 278 key->conf.iv_len = TKIP_IV_LEN;
262 key->conf.icv_len = TKIP_ICV_LEN; 279 key->conf.icv_len = TKIP_ICV_LEN;
263 if (seq) { 280 if (seq) {
@@ -269,7 +286,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
269 } 286 }
270 } 287 }
271 break; 288 break;
272 case ALG_CCMP: 289 case WLAN_CIPHER_SUITE_CCMP:
273 key->conf.iv_len = CCMP_HDR_LEN; 290 key->conf.iv_len = CCMP_HDR_LEN;
274 key->conf.icv_len = CCMP_MIC_LEN; 291 key->conf.icv_len = CCMP_MIC_LEN;
275 if (seq) { 292 if (seq) {
@@ -278,42 +295,38 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
278 key->u.ccmp.rx_pn[i][j] = 295 key->u.ccmp.rx_pn[i][j] =
279 seq[CCMP_PN_LEN - j - 1]; 296 seq[CCMP_PN_LEN - j - 1];
280 } 297 }
281 break;
282 case ALG_AES_CMAC:
283 key->conf.iv_len = 0;
284 key->conf.icv_len = sizeof(struct ieee80211_mmie);
285 if (seq)
286 for (j = 0; j < 6; j++)
287 key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1];
288 break;
289 }
290 memcpy(key->conf.key, key_data, key_len);
291 INIT_LIST_HEAD(&key->list);
292
293 if (alg == ALG_CCMP) {
294 /* 298 /*
295 * Initialize AES key state here as an optimization so that 299 * Initialize AES key state here as an optimization so that
296 * it does not need to be initialized for every packet. 300 * it does not need to be initialized for every packet.
297 */ 301 */
298 key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data); 302 key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data);
299 if (!key->u.ccmp.tfm) { 303 if (IS_ERR(key->u.ccmp.tfm)) {
304 err = PTR_ERR(key->u.ccmp.tfm);
300 kfree(key); 305 kfree(key);
301 return NULL; 306 key = ERR_PTR(err);
302 } 307 }
303 } 308 break;
304 309 case WLAN_CIPHER_SUITE_AES_CMAC:
305 if (alg == ALG_AES_CMAC) { 310 key->conf.iv_len = 0;
311 key->conf.icv_len = sizeof(struct ieee80211_mmie);
312 if (seq)
313 for (j = 0; j < 6; j++)
314 key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1];
306 /* 315 /*
307 * Initialize AES key state here as an optimization so that 316 * Initialize AES key state here as an optimization so that
308 * it does not need to be initialized for every packet. 317 * it does not need to be initialized for every packet.
309 */ 318 */
310 key->u.aes_cmac.tfm = 319 key->u.aes_cmac.tfm =
311 ieee80211_aes_cmac_key_setup(key_data); 320 ieee80211_aes_cmac_key_setup(key_data);
312 if (!key->u.aes_cmac.tfm) { 321 if (IS_ERR(key->u.aes_cmac.tfm)) {
322 err = PTR_ERR(key->u.aes_cmac.tfm);
313 kfree(key); 323 kfree(key);
314 return NULL; 324 key = ERR_PTR(err);
315 } 325 }
326 break;
316 } 327 }
328 memcpy(key->conf.key, key_data, key_len);
329 INIT_LIST_HEAD(&key->list);
317 330
318 return key; 331 return key;
319} 332}
@@ -326,9 +339,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
326 if (key->local) 339 if (key->local)
327 ieee80211_key_disable_hw_accel(key); 340 ieee80211_key_disable_hw_accel(key);
328 341
329 if (key->conf.alg == ALG_CCMP) 342 if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
330 ieee80211_aes_key_free(key->u.ccmp.tfm); 343 ieee80211_aes_key_free(key->u.ccmp.tfm);
331 if (key->conf.alg == ALG_AES_CMAC) 344 if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
332 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); 345 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
333 if (key->local) 346 if (key->local)
334 ieee80211_debugfs_key_remove(key); 347 ieee80211_debugfs_key_remove(key);
@@ -336,12 +349,12 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
336 kfree(key); 349 kfree(key);
337} 350}
338 351
339void ieee80211_key_link(struct ieee80211_key *key, 352int ieee80211_key_link(struct ieee80211_key *key,
340 struct ieee80211_sub_if_data *sdata, 353 struct ieee80211_sub_if_data *sdata,
341 struct sta_info *sta) 354 struct sta_info *sta)
342{ 355{
343 struct ieee80211_key *old_key; 356 struct ieee80211_key *old_key;
344 int idx; 357 int idx, ret;
345 358
346 BUG_ON(!sdata); 359 BUG_ON(!sdata);
347 BUG_ON(!key); 360 BUG_ON(!key);
@@ -396,9 +409,11 @@ void ieee80211_key_link(struct ieee80211_key *key,
396 409
397 ieee80211_debugfs_key_add(key); 410 ieee80211_debugfs_key_add(key);
398 411
399 ieee80211_key_enable_hw_accel(key); 412 ret = ieee80211_key_enable_hw_accel(key);
400 413
401 mutex_unlock(&sdata->local->key_mtx); 414 mutex_unlock(&sdata->local->key_mtx);
415
416 return ret;
402} 417}
403 418
404static void __ieee80211_key_free(struct ieee80211_key *key) 419static void __ieee80211_key_free(struct ieee80211_key *key)
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index b665bbb7a471..cb9a4a65cc68 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -123,18 +123,16 @@ struct ieee80211_key {
123 struct ieee80211_key_conf conf; 123 struct ieee80211_key_conf conf;
124}; 124};
125 125
126struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, 126struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
127 int idx,
128 size_t key_len,
129 const u8 *key_data, 127 const u8 *key_data,
130 size_t seq_len, const u8 *seq); 128 size_t seq_len, const u8 *seq);
131/* 129/*
132 * Insert a key into data structures (sdata, sta if necessary) 130 * Insert a key into data structures (sdata, sta if necessary)
133 * to make it used, free old key. 131 * to make it used, free old key.
134 */ 132 */
135void ieee80211_key_link(struct ieee80211_key *key, 133int __must_check ieee80211_key_link(struct ieee80211_key *key,
136 struct ieee80211_sub_if_data *sdata, 134 struct ieee80211_sub_if_data *sdata,
137 struct sta_info *sta); 135 struct sta_info *sta);
138void ieee80211_key_free(struct ieee80211_local *local, 136void ieee80211_key_free(struct ieee80211_local *local,
139 struct ieee80211_key *key); 137 struct ieee80211_key *key);
140void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); 138void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ded5c3843e06..4935b843bcca 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -99,11 +99,13 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
99 int ret = 0; 99 int ret = 0;
100 int power; 100 int power;
101 enum nl80211_channel_type channel_type; 101 enum nl80211_channel_type channel_type;
102 u32 offchannel_flag;
102 103
103 might_sleep(); 104 might_sleep();
104 105
105 scan_chan = local->scan_channel; 106 scan_chan = local->scan_channel;
106 107
108 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
107 if (scan_chan) { 109 if (scan_chan) {
108 chan = scan_chan; 110 chan = scan_chan;
109 channel_type = NL80211_CHAN_NO_HT; 111 channel_type = NL80211_CHAN_NO_HT;
@@ -117,8 +119,9 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
117 channel_type = local->_oper_channel_type; 119 channel_type = local->_oper_channel_type;
118 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; 120 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
119 } 121 }
122 offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
120 123
121 if (chan != local->hw.conf.channel || 124 if (offchannel_flag || chan != local->hw.conf.channel ||
122 channel_type != local->hw.conf.channel_type) { 125 channel_type != local->hw.conf.channel_type) {
123 local->hw.conf.channel = chan; 126 local->hw.conf.channel = chan;
124 local->hw.conf.channel_type = channel_type; 127 local->hw.conf.channel_type = channel_type;
@@ -302,7 +305,13 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
302 305
303 trace_api_restart_hw(local); 306 trace_api_restart_hw(local);
304 307
305 /* use this reason, __ieee80211_resume will unblock it */ 308 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
309 "%s called with hardware scan in progress\n", __func__);
310
311 if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning)))
312 ieee80211_scan_cancel(local);
313
314 /* use this reason, ieee80211_reconfig will unblock it */
306 ieee80211_stop_queues_by_reason(hw, 315 ieee80211_stop_queues_by_reason(hw,
307 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 316 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
308 317
@@ -336,9 +345,6 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
336 struct ieee80211_if_managed *ifmgd; 345 struct ieee80211_if_managed *ifmgd;
337 int c = 0; 346 int c = 0;
338 347
339 if (!netif_running(ndev))
340 return NOTIFY_DONE;
341
342 /* Make sure it's our interface that got changed */ 348 /* Make sure it's our interface that got changed */
343 if (!wdev) 349 if (!wdev)
344 return NOTIFY_DONE; 350 return NOTIFY_DONE;
@@ -349,6 +355,9 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
349 sdata = IEEE80211_DEV_TO_SUB_IF(ndev); 355 sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
350 bss_conf = &sdata->vif.bss_conf; 356 bss_conf = &sdata->vif.bss_conf;
351 357
358 if (!ieee80211_sdata_running(sdata))
359 return NOTIFY_DONE;
360
352 /* ARP filtering is only supported in managed mode */ 361 /* ARP filtering is only supported in managed mode */
353 if (sdata->vif.type != NL80211_IFTYPE_STATION) 362 if (sdata->vif.type != NL80211_IFTYPE_STATION)
354 return NOTIFY_DONE; 363 return NOTIFY_DONE;
@@ -390,6 +399,65 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
390} 399}
391#endif 400#endif
392 401
402static int ieee80211_napi_poll(struct napi_struct *napi, int budget)
403{
404 struct ieee80211_local *local =
405 container_of(napi, struct ieee80211_local, napi);
406
407 return local->ops->napi_poll(&local->hw, budget);
408}
409
410void ieee80211_napi_schedule(struct ieee80211_hw *hw)
411{
412 struct ieee80211_local *local = hw_to_local(hw);
413
414 napi_schedule(&local->napi);
415}
416EXPORT_SYMBOL(ieee80211_napi_schedule);
417
418void ieee80211_napi_complete(struct ieee80211_hw *hw)
419{
420 struct ieee80211_local *local = hw_to_local(hw);
421
422 napi_complete(&local->napi);
423}
424EXPORT_SYMBOL(ieee80211_napi_complete);
425
426/* There isn't a lot of sense in it, but you can transmit anything you like */
427static const struct ieee80211_txrx_stypes
428ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
429 [NL80211_IFTYPE_ADHOC] = {
430 .tx = 0xffff,
431 .rx = BIT(IEEE80211_STYPE_ACTION >> 4),
432 },
433 [NL80211_IFTYPE_STATION] = {
434 .tx = 0xffff,
435 .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
436 BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
437 },
438 [NL80211_IFTYPE_AP] = {
439 .tx = 0xffff,
440 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
441 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
442 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
443 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
444 BIT(IEEE80211_STYPE_AUTH >> 4) |
445 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
446 BIT(IEEE80211_STYPE_ACTION >> 4),
447 },
448 [NL80211_IFTYPE_AP_VLAN] = {
449 /* copy AP */
450 .tx = 0xffff,
451 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
452 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
453 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
454 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
455 BIT(IEEE80211_STYPE_AUTH >> 4) |
456 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
457 BIT(IEEE80211_STYPE_ACTION >> 4),
458 },
459};
460
393struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, 461struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
394 const struct ieee80211_ops *ops) 462 const struct ieee80211_ops *ops)
395{ 463{
@@ -419,6 +487,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
419 if (!wiphy) 487 if (!wiphy)
420 return NULL; 488 return NULL;
421 489
490 wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes;
491
422 wiphy->flags |= WIPHY_FLAG_NETNS_OK | 492 wiphy->flags |= WIPHY_FLAG_NETNS_OK |
423 WIPHY_FLAG_4ADDR_AP | 493 WIPHY_FLAG_4ADDR_AP |
424 WIPHY_FLAG_4ADDR_STATION; 494 WIPHY_FLAG_4ADDR_STATION;
@@ -455,7 +525,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
455 __hw_addr_init(&local->mc_list); 525 __hw_addr_init(&local->mc_list);
456 526
457 mutex_init(&local->iflist_mtx); 527 mutex_init(&local->iflist_mtx);
458 mutex_init(&local->scan_mtx); 528 mutex_init(&local->mtx);
459 529
460 mutex_init(&local->key_mtx); 530 mutex_init(&local->key_mtx);
461 spin_lock_init(&local->filter_lock); 531 spin_lock_init(&local->filter_lock);
@@ -494,6 +564,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
494 skb_queue_head_init(&local->skb_queue); 564 skb_queue_head_init(&local->skb_queue);
495 skb_queue_head_init(&local->skb_queue_unreliable); 565 skb_queue_head_init(&local->skb_queue_unreliable);
496 566
567 /* init dummy netdev for use w/ NAPI */
568 init_dummy_netdev(&local->napi_dev);
569
497 return local_to_hw(local); 570 return local_to_hw(local);
498} 571}
499EXPORT_SYMBOL(ieee80211_alloc_hw); 572EXPORT_SYMBOL(ieee80211_alloc_hw);
@@ -506,6 +579,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
506 int channels, max_bitrates; 579 int channels, max_bitrates;
507 bool supp_ht; 580 bool supp_ht;
508 static const u32 cipher_suites[] = { 581 static const u32 cipher_suites[] = {
582 /* keep WEP first, it may be removed below */
509 WLAN_CIPHER_SUITE_WEP40, 583 WLAN_CIPHER_SUITE_WEP40,
510 WLAN_CIPHER_SUITE_WEP104, 584 WLAN_CIPHER_SUITE_WEP104,
511 WLAN_CIPHER_SUITE_TKIP, 585 WLAN_CIPHER_SUITE_TKIP,
@@ -554,6 +628,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
554 /* mac80211 always supports monitor */ 628 /* mac80211 always supports monitor */
555 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); 629 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
556 630
631#ifndef CONFIG_MAC80211_MESH
632 /* mesh depends on Kconfig, but drivers should set it if they want */
633 local->hw.wiphy->interface_modes &= ~BIT(NL80211_IFTYPE_MESH_POINT);
634#endif
635
636 /* mac80211 supports control port protocol changing */
637 local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL;
638
557 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 639 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
558 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; 640 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
559 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) 641 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
@@ -589,10 +671,41 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
589 if (local->hw.wiphy->max_scan_ie_len) 671 if (local->hw.wiphy->max_scan_ie_len)
590 local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len; 672 local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;
591 673
592 local->hw.wiphy->cipher_suites = cipher_suites; 674 /* Set up cipher suites unless driver already did */
593 local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); 675 if (!local->hw.wiphy->cipher_suites) {
594 if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) 676 local->hw.wiphy->cipher_suites = cipher_suites;
595 local->hw.wiphy->n_cipher_suites--; 677 local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
678 if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE))
679 local->hw.wiphy->n_cipher_suites--;
680 }
681 if (IS_ERR(local->wep_tx_tfm) || IS_ERR(local->wep_rx_tfm)) {
682 if (local->hw.wiphy->cipher_suites == cipher_suites) {
683 local->hw.wiphy->cipher_suites += 2;
684 local->hw.wiphy->n_cipher_suites -= 2;
685 } else {
686 u32 *suites;
687 int r, w = 0;
688
689 /* Filter out WEP */
690
691 suites = kmemdup(
692 local->hw.wiphy->cipher_suites,
693 sizeof(u32) * local->hw.wiphy->n_cipher_suites,
694 GFP_KERNEL);
695 if (!suites)
696 return -ENOMEM;
697 for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) {
698 u32 suite = local->hw.wiphy->cipher_suites[r];
699 if (suite == WLAN_CIPHER_SUITE_WEP40 ||
700 suite == WLAN_CIPHER_SUITE_WEP104)
701 continue;
702 suites[w++] = suite;
703 }
704 local->hw.wiphy->cipher_suites = suites;
705 local->hw.wiphy->n_cipher_suites = w;
706 local->wiphy_ciphers_allocated = true;
707 }
708 }
596 709
597 result = wiphy_register(local->hw.wiphy); 710 result = wiphy_register(local->hw.wiphy);
598 if (result < 0) 711 if (result < 0)
@@ -641,16 +754,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
641 754
642 result = ieee80211_wep_init(local); 755 result = ieee80211_wep_init(local);
643 if (result < 0) 756 if (result < 0)
644 printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", 757 wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
645 wiphy_name(local->hw.wiphy), result); 758 result);
646 759
647 rtnl_lock(); 760 rtnl_lock();
648 761
649 result = ieee80211_init_rate_ctrl_alg(local, 762 result = ieee80211_init_rate_ctrl_alg(local,
650 hw->rate_control_algorithm); 763 hw->rate_control_algorithm);
651 if (result < 0) { 764 if (result < 0) {
652 printk(KERN_DEBUG "%s: Failed to initialize rate control " 765 wiphy_debug(local->hw.wiphy,
653 "algorithm\n", wiphy_name(local->hw.wiphy)); 766 "Failed to initialize rate control algorithm\n");
654 goto fail_rate; 767 goto fail_rate;
655 } 768 }
656 769
@@ -659,8 +772,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
659 result = ieee80211_if_add(local, "wlan%d", NULL, 772 result = ieee80211_if_add(local, "wlan%d", NULL,
660 NL80211_IFTYPE_STATION, NULL); 773 NL80211_IFTYPE_STATION, NULL);
661 if (result) 774 if (result)
662 printk(KERN_WARNING "%s: Failed to add default virtual iface\n", 775 wiphy_warn(local->hw.wiphy,
663 wiphy_name(local->hw.wiphy)); 776 "Failed to add default virtual iface\n");
664 } 777 }
665 778
666 rtnl_unlock(); 779 rtnl_unlock();
@@ -683,6 +796,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
683 goto fail_ifa; 796 goto fail_ifa;
684#endif 797#endif
685 798
799 netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll,
800 local->hw.napi_weight);
801
686 return 0; 802 return 0;
687 803
688#ifdef CONFIG_INET 804#ifdef CONFIG_INET
@@ -703,6 +819,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
703 fail_workqueue: 819 fail_workqueue:
704 wiphy_unregister(local->hw.wiphy); 820 wiphy_unregister(local->hw.wiphy);
705 fail_wiphy_register: 821 fail_wiphy_register:
822 if (local->wiphy_ciphers_allocated)
823 kfree(local->hw.wiphy->cipher_suites);
706 kfree(local->int_scan_req); 824 kfree(local->int_scan_req);
707 return result; 825 return result;
708} 826}
@@ -738,6 +856,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
738 */ 856 */
739 del_timer_sync(&local->work_timer); 857 del_timer_sync(&local->work_timer);
740 858
859 cancel_work_sync(&local->restart_work);
741 cancel_work_sync(&local->reconfig_filter); 860 cancel_work_sync(&local->reconfig_filter);
742 861
743 ieee80211_clear_tx_pending(local); 862 ieee80211_clear_tx_pending(local);
@@ -746,8 +865,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
746 865
747 if (skb_queue_len(&local->skb_queue) || 866 if (skb_queue_len(&local->skb_queue) ||
748 skb_queue_len(&local->skb_queue_unreliable)) 867 skb_queue_len(&local->skb_queue_unreliable))
749 printk(KERN_WARNING "%s: skb_queue not empty\n", 868 wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
750 wiphy_name(local->hw.wiphy));
751 skb_queue_purge(&local->skb_queue); 869 skb_queue_purge(&local->skb_queue);
752 skb_queue_purge(&local->skb_queue_unreliable); 870 skb_queue_purge(&local->skb_queue_unreliable);
753 871
@@ -764,7 +882,10 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
764 struct ieee80211_local *local = hw_to_local(hw); 882 struct ieee80211_local *local = hw_to_local(hw);
765 883
766 mutex_destroy(&local->iflist_mtx); 884 mutex_destroy(&local->iflist_mtx);
767 mutex_destroy(&local->scan_mtx); 885 mutex_destroy(&local->mtx);
886
887 if (local->wiphy_ciphers_allocated)
888 kfree(local->hw.wiphy->cipher_suites);
768 889
769 wiphy_free(local->hw.wiphy); 890 wiphy_free(local->hw.wiphy);
770} 891}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b6c163ac22da..0cb822cc12e9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -54,6 +54,12 @@
54 */ 54 */
55#define IEEE80211_SIGNAL_AVE_WEIGHT 3 55#define IEEE80211_SIGNAL_AVE_WEIGHT 3
56 56
57/*
58 * How many Beacon frames need to have been used in average signal strength
59 * before starting to indicate signal change events.
60 */
61#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
62
57#define TMR_RUNNING_TIMER 0 63#define TMR_RUNNING_TIMER 0
58#define TMR_RUNNING_CHANSW 1 64#define TMR_RUNNING_CHANSW 1
59 65
@@ -778,16 +784,17 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
778 params.uapsd = uapsd; 784 params.uapsd = uapsd;
779 785
780#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 786#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
781 printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d " 787 wiphy_debug(local->hw.wiphy,
782 "cWmin=%d cWmax=%d txop=%d uapsd=%d\n", 788 "WMM queue=%d aci=%d acm=%d aifs=%d "
783 wiphy_name(local->hw.wiphy), queue, aci, acm, 789 "cWmin=%d cWmax=%d txop=%d uapsd=%d\n",
784 params.aifs, params.cw_min, params.cw_max, params.txop, 790 queue, aci, acm,
785 params.uapsd); 791 params.aifs, params.cw_min, params.cw_max,
792 params.txop, params.uapsd);
786#endif 793#endif
787 if (drv_conf_tx(local, queue, &params)) 794 if (drv_conf_tx(local, queue, &params))
788 printk(KERN_DEBUG "%s: failed to set TX queue " 795 wiphy_debug(local->hw.wiphy,
789 "parameters for queue %d\n", 796 "failed to set TX queue parameters for queue %d\n",
790 wiphy_name(local->hw.wiphy), queue); 797 queue);
791 } 798 }
792 799
793 /* enable WMM or activate new settings */ 800 /* enable WMM or activate new settings */
@@ -990,6 +997,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
990 997
991 if (remove_sta) 998 if (remove_sta)
992 sta_info_destroy_addr(sdata, bssid); 999 sta_info_destroy_addr(sdata, bssid);
1000
1001 del_timer_sync(&sdata->u.mgd.conn_mon_timer);
1002 del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
1003 del_timer_sync(&sdata->u.mgd.timer);
1004 del_timer_sync(&sdata->u.mgd.chswitch_timer);
993} 1005}
994 1006
995void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, 1007void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
@@ -1103,8 +1115,11 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
1103 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); 1115 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
1104 1116
1105 ieee80211_set_disassoc(sdata, true); 1117 ieee80211_set_disassoc(sdata, true);
1106 ieee80211_recalc_idle(local);
1107 mutex_unlock(&ifmgd->mtx); 1118 mutex_unlock(&ifmgd->mtx);
1119
1120 mutex_lock(&local->mtx);
1121 ieee80211_recalc_idle(local);
1122 mutex_unlock(&local->mtx);
1108 /* 1123 /*
1109 * must be outside lock due to cfg80211, 1124 * must be outside lock due to cfg80211,
1110 * but that's not a problem. 1125 * but that's not a problem.
@@ -1173,7 +1188,9 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
1173 sdata->name, bssid, reason_code); 1188 sdata->name, bssid, reason_code);
1174 1189
1175 ieee80211_set_disassoc(sdata, true); 1190 ieee80211_set_disassoc(sdata, true);
1191 mutex_lock(&sdata->local->mtx);
1176 ieee80211_recalc_idle(sdata->local); 1192 ieee80211_recalc_idle(sdata->local);
1193 mutex_unlock(&sdata->local->mtx);
1177 1194
1178 return RX_MGMT_CFG80211_DEAUTH; 1195 return RX_MGMT_CFG80211_DEAUTH;
1179} 1196}
@@ -1203,7 +1220,9 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
1203 sdata->name, mgmt->sa, reason_code); 1220 sdata->name, mgmt->sa, reason_code);
1204 1221
1205 ieee80211_set_disassoc(sdata, true); 1222 ieee80211_set_disassoc(sdata, true);
1223 mutex_lock(&sdata->local->mtx);
1206 ieee80211_recalc_idle(sdata->local); 1224 ieee80211_recalc_idle(sdata->local);
1225 mutex_unlock(&sdata->local->mtx);
1207 return RX_MGMT_CFG80211_DISASSOC; 1226 return RX_MGMT_CFG80211_DISASSOC;
1208} 1227}
1209 1228
@@ -1540,15 +1559,18 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1540 ifmgd->last_beacon_signal = rx_status->signal; 1559 ifmgd->last_beacon_signal = rx_status->signal;
1541 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) { 1560 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
1542 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE; 1561 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
1543 ifmgd->ave_beacon_signal = rx_status->signal; 1562 ifmgd->ave_beacon_signal = rx_status->signal * 16;
1544 ifmgd->last_cqm_event_signal = 0; 1563 ifmgd->last_cqm_event_signal = 0;
1564 ifmgd->count_beacon_signal = 1;
1545 } else { 1565 } else {
1546 ifmgd->ave_beacon_signal = 1566 ifmgd->ave_beacon_signal =
1547 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 + 1567 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 +
1548 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) * 1568 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) *
1549 ifmgd->ave_beacon_signal) / 16; 1569 ifmgd->ave_beacon_signal) / 16;
1570 ifmgd->count_beacon_signal++;
1550 } 1571 }
1551 if (bss_conf->cqm_rssi_thold && 1572 if (bss_conf->cqm_rssi_thold &&
1573 ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT &&
1552 !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) { 1574 !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) {
1553 int sig = ifmgd->ave_beacon_signal / 16; 1575 int sig = ifmgd->ave_beacon_signal / 16;
1554 int last_event = ifmgd->last_cqm_event_signal; 1576 int last_event = ifmgd->last_cqm_event_signal;
@@ -1751,7 +1773,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1751 struct ieee80211_local *local = sdata->local; 1773 struct ieee80211_local *local = sdata->local;
1752 struct ieee80211_work *wk; 1774 struct ieee80211_work *wk;
1753 1775
1754 mutex_lock(&local->work_mtx); 1776 mutex_lock(&local->mtx);
1755 list_for_each_entry(wk, &local->work_list, list) { 1777 list_for_each_entry(wk, &local->work_list, list) {
1756 if (wk->sdata != sdata) 1778 if (wk->sdata != sdata)
1757 continue; 1779 continue;
@@ -1783,7 +1805,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1783 free_work(wk); 1805 free_work(wk);
1784 break; 1806 break;
1785 } 1807 }
1786 mutex_unlock(&local->work_mtx); 1808 mutex_unlock(&local->mtx);
1787 1809
1788 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); 1810 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
1789 } 1811 }
@@ -1840,8 +1862,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1840 " after %dms, disconnecting.\n", 1862 " after %dms, disconnecting.\n",
1841 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); 1863 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
1842 ieee80211_set_disassoc(sdata, true); 1864 ieee80211_set_disassoc(sdata, true);
1843 ieee80211_recalc_idle(local);
1844 mutex_unlock(&ifmgd->mtx); 1865 mutex_unlock(&ifmgd->mtx);
1866 mutex_lock(&local->mtx);
1867 ieee80211_recalc_idle(local);
1868 mutex_unlock(&local->mtx);
1845 /* 1869 /*
1846 * must be outside lock due to cfg80211, 1870 * must be outside lock due to cfg80211,
1847 * but that's not a problem. 1871 * but that's not a problem.
@@ -1917,6 +1941,8 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
1917 * time -- the code here is properly synchronised. 1941 * time -- the code here is properly synchronised.
1918 */ 1942 */
1919 1943
1944 cancel_work_sync(&ifmgd->request_smps_work);
1945
1920 cancel_work_sync(&ifmgd->beacon_connection_loss_work); 1946 cancel_work_sync(&ifmgd->beacon_connection_loss_work);
1921 if (del_timer_sync(&ifmgd->timer)) 1947 if (del_timer_sync(&ifmgd->timer))
1922 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); 1948 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
@@ -1952,6 +1978,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
1952 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); 1978 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
1953 INIT_WORK(&ifmgd->beacon_connection_loss_work, 1979 INIT_WORK(&ifmgd->beacon_connection_loss_work,
1954 ieee80211_beacon_connection_loss_work); 1980 ieee80211_beacon_connection_loss_work);
1981 INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work);
1955 setup_timer(&ifmgd->timer, ieee80211_sta_timer, 1982 setup_timer(&ifmgd->timer, ieee80211_sta_timer,
1956 (unsigned long) sdata); 1983 (unsigned long) sdata);
1957 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 1984 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
@@ -2249,6 +2276,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2249 else 2276 else
2250 ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT; 2277 ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT;
2251 2278
2279 sdata->control_port_protocol = req->crypto.control_port_ethertype;
2280 sdata->control_port_no_encrypt = req->crypto.control_port_no_encrypt;
2281
2252 ieee80211_add_work(wk); 2282 ieee80211_add_work(wk);
2253 return 0; 2283 return 0;
2254} 2284}
@@ -2275,7 +2305,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2275 2305
2276 mutex_unlock(&ifmgd->mtx); 2306 mutex_unlock(&ifmgd->mtx);
2277 2307
2278 mutex_lock(&local->work_mtx); 2308 mutex_lock(&local->mtx);
2279 list_for_each_entry(wk, &local->work_list, list) { 2309 list_for_each_entry(wk, &local->work_list, list) {
2280 if (wk->sdata != sdata) 2310 if (wk->sdata != sdata)
2281 continue; 2311 continue;
@@ -2294,7 +2324,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2294 free_work(wk); 2324 free_work(wk);
2295 break; 2325 break;
2296 } 2326 }
2297 mutex_unlock(&local->work_mtx); 2327 mutex_unlock(&local->mtx);
2298 2328
2299 /* 2329 /*
2300 * If somebody requests authentication and we haven't 2330 * If somebody requests authentication and we haven't
@@ -2319,7 +2349,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2319 if (assoc_bss) 2349 if (assoc_bss)
2320 sta_info_destroy_addr(sdata, bssid); 2350 sta_info_destroy_addr(sdata, bssid);
2321 2351
2352 mutex_lock(&sdata->local->mtx);
2322 ieee80211_recalc_idle(sdata->local); 2353 ieee80211_recalc_idle(sdata->local);
2354 mutex_unlock(&sdata->local->mtx);
2323 2355
2324 return 0; 2356 return 0;
2325} 2357}
@@ -2357,7 +2389,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
2357 cookie, !req->local_state_change); 2389 cookie, !req->local_state_change);
2358 sta_info_destroy_addr(sdata, bssid); 2390 sta_info_destroy_addr(sdata, bssid);
2359 2391
2392 mutex_lock(&sdata->local->mtx);
2360 ieee80211_recalc_idle(sdata->local); 2393 ieee80211_recalc_idle(sdata->local);
2394 mutex_unlock(&sdata->local->mtx);
2361 2395
2362 return 0; 2396 return 0;
2363} 2397}
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index c36b1911987a..eeacaa59380a 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -112,8 +112,10 @@ void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
112 * used from user space controlled off-channel operations. 112 * used from user space controlled off-channel operations.
113 */ 113 */
114 if (sdata->vif.type != NL80211_IFTYPE_STATION && 114 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
115 sdata->vif.type != NL80211_IFTYPE_MONITOR) 115 sdata->vif.type != NL80211_IFTYPE_MONITOR) {
116 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
116 netif_tx_stop_all_queues(sdata->dev); 117 netif_tx_stop_all_queues(sdata->dev);
118 }
117 } 119 }
118 mutex_unlock(&local->iflist_mtx); 120 mutex_unlock(&local->iflist_mtx);
119} 121}
@@ -131,6 +133,7 @@ void ieee80211_offchannel_stop_station(struct ieee80211_local *local)
131 continue; 133 continue;
132 134
133 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 135 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
136 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
134 netif_tx_stop_all_queues(sdata->dev); 137 netif_tx_stop_all_queues(sdata->dev);
135 if (sdata->u.mgd.associated) 138 if (sdata->u.mgd.associated)
136 ieee80211_offchannel_ps_enable(sdata); 139 ieee80211_offchannel_ps_enable(sdata);
@@ -155,8 +158,20 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
155 ieee80211_offchannel_ps_disable(sdata); 158 ieee80211_offchannel_ps_disable(sdata);
156 } 159 }
157 160
158 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) 161 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
162 clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
163 /*
164 * This may wake up queues even though the driver
165 * currently has them stopped. This is not very
166 * likely, since the driver won't have gotten any
167 * (or hardly any) new packets while we weren't
168 * on the right channel, and even if it happens
169 * it will at most lead to queueing up one more
170 * packet per queue in mac80211 rather than on
171 * the interface qdisc.
172 */
159 netif_tx_wake_all_queues(sdata->dev); 173 netif_tx_wake_all_queues(sdata->dev);
174 }
160 175
161 /* re-enable beaconing */ 176 /* re-enable beaconing */
162 if (enable_beaconing && 177 if (enable_beaconing &&
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d287fde0431d..ce671dfd238c 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -12,7 +12,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
12 struct ieee80211_sub_if_data *sdata; 12 struct ieee80211_sub_if_data *sdata;
13 struct sta_info *sta; 13 struct sta_info *sta;
14 14
15 ieee80211_scan_cancel(local); 15 if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning)))
16 ieee80211_scan_cancel(local);
16 17
17 ieee80211_stop_queues_by_reason(hw, 18 ieee80211_stop_queues_by_reason(hw,
18 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 19 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index be04d46110fe..4f772de2f213 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -368,8 +368,8 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
368 368
369 ref = rate_control_alloc(name, local); 369 ref = rate_control_alloc(name, local);
370 if (!ref) { 370 if (!ref) {
371 printk(KERN_WARNING "%s: Failed to select rate control " 371 wiphy_warn(local->hw.wiphy,
372 "algorithm\n", wiphy_name(local->hw.wiphy)); 372 "Failed to select rate control algorithm\n");
373 return -ENOENT; 373 return -ENOENT;
374 } 374 }
375 375
@@ -380,9 +380,8 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
380 sta_info_flush(local, NULL); 380 sta_info_flush(local, NULL);
381 } 381 }
382 382
383 printk(KERN_DEBUG "%s: Selected rate control " 383 wiphy_debug(local->hw.wiphy, "Selected rate control algorithm '%s'\n",
384 "algorithm '%s'\n", wiphy_name(local->hw.wiphy), 384 ref->ops->name);
385 ref->ops->name);
386 385
387 return 0; 386 return 0;
388} 387}
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index 47438b4a9af5..135f36fd4d5d 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -162,7 +162,7 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
162 file_info->next_entry = (file_info->next_entry + 1) % 162 file_info->next_entry = (file_info->next_entry + 1) %
163 RC_PID_EVENT_RING_SIZE; 163 RC_PID_EVENT_RING_SIZE;
164 164
165 /* Print information about the event. Note that userpace needs to 165 /* Print information about the event. Note that userspace needs to
166 * provide large enough buffers. */ 166 * provide large enough buffers. */
167 length = length < RC_PID_PRINT_BUF_SIZE ? 167 length = length < RC_PID_PRINT_BUF_SIZE ?
168 length : RC_PID_PRINT_BUF_SIZE; 168 length : RC_PID_PRINT_BUF_SIZE;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fa0f37e4afe4..ac205a33690f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -538,20 +538,12 @@ static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw,
538 int index, 538 int index,
539 struct sk_buff_head *frames) 539 struct sk_buff_head *frames)
540{ 540{
541 struct ieee80211_supported_band *sband;
542 struct ieee80211_rate *rate = NULL;
543 struct sk_buff *skb = tid_agg_rx->reorder_buf[index]; 541 struct sk_buff *skb = tid_agg_rx->reorder_buf[index];
544 struct ieee80211_rx_status *status;
545 542
546 if (!skb) 543 if (!skb)
547 goto no_frame; 544 goto no_frame;
548 545
549 status = IEEE80211_SKB_RXCB(skb); 546 /* release the frame from the reorder ring buffer */
550
551 /* release the reordered frames to stack */
552 sband = hw->wiphy->bands[status->band];
553 if (!(status->flag & RX_FLAG_HT))
554 rate = &sband->bitrates[status->rate_idx];
555 tid_agg_rx->stored_mpdu_num--; 547 tid_agg_rx->stored_mpdu_num--;
556 tid_agg_rx->reorder_buf[index] = NULL; 548 tid_agg_rx->reorder_buf[index] = NULL;
557 __skb_queue_tail(frames, skb); 549 __skb_queue_tail(frames, skb);
@@ -580,9 +572,78 @@ static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw,
580 * frames that have not yet been received are assumed to be lost and the skb 572 * frames that have not yet been received are assumed to be lost and the skb
581 * can be released for processing. This may also release other skb's from the 573 * can be released for processing. This may also release other skb's from the
582 * reorder buffer if there are no additional gaps between the frames. 574 * reorder buffer if there are no additional gaps between the frames.
575 *
576 * Callers must hold tid_agg_rx->reorder_lock.
583 */ 577 */
584#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10) 578#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10)
585 579
580static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
581 struct tid_ampdu_rx *tid_agg_rx,
582 struct sk_buff_head *frames)
583{
584 int index, j;
585
586 /* release the buffer until next missing frame */
587 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
588 tid_agg_rx->buf_size;
589 if (!tid_agg_rx->reorder_buf[index] &&
590 tid_agg_rx->stored_mpdu_num > 1) {
591 /*
592 * No buffers ready to be released, but check whether any
593 * frames in the reorder buffer have timed out.
594 */
595 int skipped = 1;
596 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
597 j = (j + 1) % tid_agg_rx->buf_size) {
598 if (!tid_agg_rx->reorder_buf[j]) {
599 skipped++;
600 continue;
601 }
602 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] +
603 HT_RX_REORDER_BUF_TIMEOUT))
604 goto set_release_timer;
605
606#ifdef CONFIG_MAC80211_HT_DEBUG
607 if (net_ratelimit())
608 wiphy_debug(hw->wiphy,
609 "release an RX reorder frame due to timeout on earlier frames\n");
610#endif
611 ieee80211_release_reorder_frame(hw, tid_agg_rx,
612 j, frames);
613
614 /*
615 * Increment the head seq# also for the skipped slots.
616 */
617 tid_agg_rx->head_seq_num =
618 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
619 skipped = 0;
620 }
621 } else while (tid_agg_rx->reorder_buf[index]) {
622 ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
623 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
624 tid_agg_rx->buf_size;
625 }
626
627 if (tid_agg_rx->stored_mpdu_num) {
628 j = index = seq_sub(tid_agg_rx->head_seq_num,
629 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
630
631 for (; j != (index - 1) % tid_agg_rx->buf_size;
632 j = (j + 1) % tid_agg_rx->buf_size) {
633 if (tid_agg_rx->reorder_buf[j])
634 break;
635 }
636
637 set_release_timer:
638
639 mod_timer(&tid_agg_rx->reorder_timer,
640 tid_agg_rx->reorder_time[j] +
641 HT_RX_REORDER_BUF_TIMEOUT);
642 } else {
643 del_timer(&tid_agg_rx->reorder_timer);
644 }
645}
646
586/* 647/*
587 * As this function belongs to the RX path it must be under 648 * As this function belongs to the RX path it must be under
588 * rcu_read_lock protection. It returns false if the frame 649 * rcu_read_lock protection. It returns false if the frame
@@ -598,14 +659,16 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
598 u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; 659 u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
599 u16 head_seq_num, buf_size; 660 u16 head_seq_num, buf_size;
600 int index; 661 int index;
662 bool ret = true;
601 663
602 buf_size = tid_agg_rx->buf_size; 664 buf_size = tid_agg_rx->buf_size;
603 head_seq_num = tid_agg_rx->head_seq_num; 665 head_seq_num = tid_agg_rx->head_seq_num;
604 666
667 spin_lock(&tid_agg_rx->reorder_lock);
605 /* frame with out of date sequence number */ 668 /* frame with out of date sequence number */
606 if (seq_less(mpdu_seq_num, head_seq_num)) { 669 if (seq_less(mpdu_seq_num, head_seq_num)) {
607 dev_kfree_skb(skb); 670 dev_kfree_skb(skb);
608 return true; 671 goto out;
609 } 672 }
610 673
611 /* 674 /*
@@ -626,7 +689,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
626 /* check if we already stored this frame */ 689 /* check if we already stored this frame */
627 if (tid_agg_rx->reorder_buf[index]) { 690 if (tid_agg_rx->reorder_buf[index]) {
628 dev_kfree_skb(skb); 691 dev_kfree_skb(skb);
629 return true; 692 goto out;
630 } 693 }
631 694
632 /* 695 /*
@@ -636,58 +699,19 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
636 if (mpdu_seq_num == tid_agg_rx->head_seq_num && 699 if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
637 tid_agg_rx->stored_mpdu_num == 0) { 700 tid_agg_rx->stored_mpdu_num == 0) {
638 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); 701 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
639 return false; 702 ret = false;
703 goto out;
640 } 704 }
641 705
642 /* put the frame in the reordering buffer */ 706 /* put the frame in the reordering buffer */
643 tid_agg_rx->reorder_buf[index] = skb; 707 tid_agg_rx->reorder_buf[index] = skb;
644 tid_agg_rx->reorder_time[index] = jiffies; 708 tid_agg_rx->reorder_time[index] = jiffies;
645 tid_agg_rx->stored_mpdu_num++; 709 tid_agg_rx->stored_mpdu_num++;
646 /* release the buffer until next missing frame */ 710 ieee80211_sta_reorder_release(hw, tid_agg_rx, frames);
647 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
648 tid_agg_rx->buf_size;
649 if (!tid_agg_rx->reorder_buf[index] &&
650 tid_agg_rx->stored_mpdu_num > 1) {
651 /*
652 * No buffers ready to be released, but check whether any
653 * frames in the reorder buffer have timed out.
654 */
655 int j;
656 int skipped = 1;
657 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
658 j = (j + 1) % tid_agg_rx->buf_size) {
659 if (!tid_agg_rx->reorder_buf[j]) {
660 skipped++;
661 continue;
662 }
663 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] +
664 HT_RX_REORDER_BUF_TIMEOUT))
665 break;
666 711
667#ifdef CONFIG_MAC80211_HT_DEBUG 712 out:
668 if (net_ratelimit()) 713 spin_unlock(&tid_agg_rx->reorder_lock);
669 printk(KERN_DEBUG "%s: release an RX reorder " 714 return ret;
670 "frame due to timeout on earlier "
671 "frames\n",
672 wiphy_name(hw->wiphy));
673#endif
674 ieee80211_release_reorder_frame(hw, tid_agg_rx,
675 j, frames);
676
677 /*
678 * Increment the head seq# also for the skipped slots.
679 */
680 tid_agg_rx->head_seq_num =
681 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
682 skipped = 0;
683 }
684 } else while (tid_agg_rx->reorder_buf[index]) {
685 ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
686 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
687 tid_agg_rx->buf_size;
688 }
689
690 return true;
691} 715}
692 716
693/* 717/*
@@ -873,6 +897,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
873 897
874 if (!is_multicast_ether_addr(hdr->addr1) && stakey) { 898 if (!is_multicast_ether_addr(hdr->addr1) && stakey) {
875 rx->key = stakey; 899 rx->key = stakey;
900 if ((status->flag & RX_FLAG_DECRYPTED) &&
901 (status->flag & RX_FLAG_IV_STRIPPED))
902 return RX_CONTINUE;
876 /* Skip decryption if the frame is not protected. */ 903 /* Skip decryption if the frame is not protected. */
877 if (!ieee80211_has_protected(fc)) 904 if (!ieee80211_has_protected(fc))
878 return RX_CONTINUE; 905 return RX_CONTINUE;
@@ -935,7 +962,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
935 * pairwise or station-to-station keys, but for WEP we allow 962 * pairwise or station-to-station keys, but for WEP we allow
936 * using a key index as well. 963 * using a key index as well.
937 */ 964 */
938 if (rx->key && rx->key->conf.alg != ALG_WEP && 965 if (rx->key && rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
966 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
939 !is_multicast_ether_addr(hdr->addr1)) 967 !is_multicast_ether_addr(hdr->addr1))
940 rx->key = NULL; 968 rx->key = NULL;
941 } 969 }
@@ -951,8 +979,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
951 return RX_DROP_UNUSABLE; 979 return RX_DROP_UNUSABLE;
952 /* the hdr variable is invalid now! */ 980 /* the hdr variable is invalid now! */
953 981
954 switch (rx->key->conf.alg) { 982 switch (rx->key->conf.cipher) {
955 case ALG_WEP: 983 case WLAN_CIPHER_SUITE_WEP40:
984 case WLAN_CIPHER_SUITE_WEP104:
956 /* Check for weak IVs if possible */ 985 /* Check for weak IVs if possible */
957 if (rx->sta && ieee80211_is_data(fc) && 986 if (rx->sta && ieee80211_is_data(fc) &&
958 (!(status->flag & RX_FLAG_IV_STRIPPED) || 987 (!(status->flag & RX_FLAG_IV_STRIPPED) ||
@@ -962,15 +991,21 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
962 991
963 result = ieee80211_crypto_wep_decrypt(rx); 992 result = ieee80211_crypto_wep_decrypt(rx);
964 break; 993 break;
965 case ALG_TKIP: 994 case WLAN_CIPHER_SUITE_TKIP:
966 result = ieee80211_crypto_tkip_decrypt(rx); 995 result = ieee80211_crypto_tkip_decrypt(rx);
967 break; 996 break;
968 case ALG_CCMP: 997 case WLAN_CIPHER_SUITE_CCMP:
969 result = ieee80211_crypto_ccmp_decrypt(rx); 998 result = ieee80211_crypto_ccmp_decrypt(rx);
970 break; 999 break;
971 case ALG_AES_CMAC: 1000 case WLAN_CIPHER_SUITE_AES_CMAC:
972 result = ieee80211_crypto_aes_cmac_decrypt(rx); 1001 result = ieee80211_crypto_aes_cmac_decrypt(rx);
973 break; 1002 break;
1003 default:
1004 /*
1005 * We can reach here only with HW-only algorithms
1006 * but why didn't it decrypt the frame?!
1007 */
1008 return RX_DROP_UNUSABLE;
974 } 1009 }
975 1010
976 /* either the frame has been decrypted or will be dropped */ 1011 /* either the frame has been decrypted or will be dropped */
@@ -1265,7 +1300,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1265 /* This is the first fragment of a new frame. */ 1300 /* This is the first fragment of a new frame. */
1266 entry = ieee80211_reassemble_add(rx->sdata, frag, seq, 1301 entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
1267 rx->queue, &(rx->skb)); 1302 rx->queue, &(rx->skb));
1268 if (rx->key && rx->key->conf.alg == ALG_CCMP && 1303 if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP &&
1269 ieee80211_has_protected(fc)) { 1304 ieee80211_has_protected(fc)) {
1270 int queue = ieee80211_is_mgmt(fc) ? 1305 int queue = ieee80211_is_mgmt(fc) ?
1271 NUM_RX_DATA_QUEUES : rx->queue; 1306 NUM_RX_DATA_QUEUES : rx->queue;
@@ -1294,7 +1329,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1294 int i; 1329 int i;
1295 u8 pn[CCMP_PN_LEN], *rpn; 1330 u8 pn[CCMP_PN_LEN], *rpn;
1296 int queue; 1331 int queue;
1297 if (!rx->key || rx->key->conf.alg != ALG_CCMP) 1332 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP)
1298 return RX_DROP_UNUSABLE; 1333 return RX_DROP_UNUSABLE;
1299 memcpy(pn, entry->last_pn, CCMP_PN_LEN); 1334 memcpy(pn, entry->last_pn, CCMP_PN_LEN);
1300 for (i = CCMP_PN_LEN - 1; i >= 0; i--) { 1335 for (i = CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -1492,7 +1527,7 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
1492 * Allow EAPOL frames to us/the PAE group address regardless 1527 * Allow EAPOL frames to us/the PAE group address regardless
1493 * of whether the frame was encrypted or not. 1528 * of whether the frame was encrypted or not.
1494 */ 1529 */
1495 if (ehdr->h_proto == htons(ETH_P_PAE) && 1530 if (ehdr->h_proto == rx->sdata->control_port_protocol &&
1496 (compare_ether_addr(ehdr->h_dest, rx->sdata->vif.addr) == 0 || 1531 (compare_ether_addr(ehdr->h_dest, rx->sdata->vif.addr) == 0 ||
1497 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0)) 1532 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0))
1498 return true; 1533 return true;
@@ -1909,13 +1944,36 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
1909} 1944}
1910 1945
1911static ieee80211_rx_result debug_noinline 1946static ieee80211_rx_result debug_noinline
1947ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
1948{
1949 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1950
1951 /*
1952 * From here on, look only at management frames.
1953 * Data and control frames are already handled,
1954 * and unknown (reserved) frames are useless.
1955 */
1956 if (rx->skb->len < 24)
1957 return RX_DROP_MONITOR;
1958
1959 if (!ieee80211_is_mgmt(mgmt->frame_control))
1960 return RX_DROP_MONITOR;
1961
1962 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
1963 return RX_DROP_MONITOR;
1964
1965 if (ieee80211_drop_unencrypted_mgmt(rx))
1966 return RX_DROP_UNUSABLE;
1967
1968 return RX_CONTINUE;
1969}
1970
1971static ieee80211_rx_result debug_noinline
1912ieee80211_rx_h_action(struct ieee80211_rx_data *rx) 1972ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1913{ 1973{
1914 struct ieee80211_local *local = rx->local; 1974 struct ieee80211_local *local = rx->local;
1915 struct ieee80211_sub_if_data *sdata = rx->sdata; 1975 struct ieee80211_sub_if_data *sdata = rx->sdata;
1916 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; 1976 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1917 struct sk_buff *nskb;
1918 struct ieee80211_rx_status *status;
1919 int len = rx->skb->len; 1977 int len = rx->skb->len;
1920 1978
1921 if (!ieee80211_is_action(mgmt->frame_control)) 1979 if (!ieee80211_is_action(mgmt->frame_control))
@@ -1931,9 +1989,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1931 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 1989 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
1932 return RX_DROP_UNUSABLE; 1990 return RX_DROP_UNUSABLE;
1933 1991
1934 if (ieee80211_drop_unencrypted_mgmt(rx))
1935 return RX_DROP_UNUSABLE;
1936
1937 switch (mgmt->u.action.category) { 1992 switch (mgmt->u.action.category) {
1938 case WLAN_CATEGORY_BACK: 1993 case WLAN_CATEGORY_BACK:
1939 /* 1994 /*
@@ -2024,17 +2079,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2024 goto queue; 2079 goto queue;
2025 } 2080 }
2026 2081
2082 return RX_CONTINUE;
2083
2027 invalid: 2084 invalid:
2028 /* 2085 rx->flags |= IEEE80211_MALFORMED_ACTION_FRM;
2029 * For AP mode, hostapd is responsible for handling any action 2086 /* will return in the next handlers */
2030 * frames that we didn't handle, including returning unknown 2087 return RX_CONTINUE;
2031 * ones. For all other modes we will return them to the sender, 2088
2032 * setting the 0x80 bit in the action category, as required by 2089 handled:
2033 * 802.11-2007 7.3.1.11. 2090 if (rx->sta)
2034 */ 2091 rx->sta->rx_packets++;
2035 if (sdata->vif.type == NL80211_IFTYPE_AP || 2092 dev_kfree_skb(rx->skb);
2036 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 2093 return RX_QUEUED;
2037 return RX_DROP_MONITOR; 2094
2095 queue:
2096 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2097 skb_queue_tail(&sdata->skb_queue, rx->skb);
2098 ieee80211_queue_work(&local->hw, &sdata->work);
2099 if (rx->sta)
2100 rx->sta->rx_packets++;
2101 return RX_QUEUED;
2102}
2103
2104static ieee80211_rx_result debug_noinline
2105ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
2106{
2107 struct ieee80211_rx_status *status;
2108
2109 /* skip known-bad action frames and return them in the next handler */
2110 if (rx->flags & IEEE80211_MALFORMED_ACTION_FRM)
2111 return RX_CONTINUE;
2038 2112
2039 /* 2113 /*
2040 * Getting here means the kernel doesn't know how to handle 2114 * Getting here means the kernel doesn't know how to handle
@@ -2044,10 +2118,44 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2044 */ 2118 */
2045 status = IEEE80211_SKB_RXCB(rx->skb); 2119 status = IEEE80211_SKB_RXCB(rx->skb);
2046 2120
2047 if (cfg80211_rx_action(rx->sdata->dev, status->freq, 2121 if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq,
2048 rx->skb->data, rx->skb->len, 2122 rx->skb->data, rx->skb->len,
2049 GFP_ATOMIC)) 2123 GFP_ATOMIC)) {
2050 goto handled; 2124 if (rx->sta)
2125 rx->sta->rx_packets++;
2126 dev_kfree_skb(rx->skb);
2127 return RX_QUEUED;
2128 }
2129
2130
2131 return RX_CONTINUE;
2132}
2133
2134static ieee80211_rx_result debug_noinline
2135ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
2136{
2137 struct ieee80211_local *local = rx->local;
2138 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
2139 struct sk_buff *nskb;
2140 struct ieee80211_sub_if_data *sdata = rx->sdata;
2141
2142 if (!ieee80211_is_action(mgmt->frame_control))
2143 return RX_CONTINUE;
2144
2145 /*
2146 * For AP mode, hostapd is responsible for handling any action
2147 * frames that we didn't handle, including returning unknown
2148 * ones. For all other modes we will return them to the sender,
2149 * setting the 0x80 bit in the action category, as required by
2150 * 802.11-2007 7.3.1.11.
2151 * Newer versions of hostapd shall also use the management frame
2152 * registration mechanisms, but older ones still use cooked
2153 * monitor interfaces so push all frames there.
2154 */
2155 if (!(rx->flags & IEEE80211_MALFORMED_ACTION_FRM) &&
2156 (sdata->vif.type == NL80211_IFTYPE_AP ||
2157 sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
2158 return RX_DROP_MONITOR;
2051 2159
2052 /* do not return rejected action frames */ 2160 /* do not return rejected action frames */
2053 if (mgmt->u.action.category & 0x80) 2161 if (mgmt->u.action.category & 0x80)
@@ -2066,20 +2174,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2066 2174
2067 ieee80211_tx_skb(rx->sdata, nskb); 2175 ieee80211_tx_skb(rx->sdata, nskb);
2068 } 2176 }
2069
2070 handled:
2071 if (rx->sta)
2072 rx->sta->rx_packets++;
2073 dev_kfree_skb(rx->skb); 2177 dev_kfree_skb(rx->skb);
2074 return RX_QUEUED; 2178 return RX_QUEUED;
2075
2076 queue:
2077 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2078 skb_queue_tail(&sdata->skb_queue, rx->skb);
2079 ieee80211_queue_work(&local->hw, &sdata->work);
2080 if (rx->sta)
2081 rx->sta->rx_packets++;
2082 return RX_QUEUED;
2083} 2179}
2084 2180
2085static ieee80211_rx_result debug_noinline 2181static ieee80211_rx_result debug_noinline
@@ -2090,15 +2186,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
2090 struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; 2186 struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
2091 __le16 stype; 2187 __le16 stype;
2092 2188
2093 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
2094 return RX_DROP_MONITOR;
2095
2096 if (rx->skb->len < 24)
2097 return RX_DROP_MONITOR;
2098
2099 if (ieee80211_drop_unencrypted_mgmt(rx))
2100 return RX_DROP_UNUSABLE;
2101
2102 rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb); 2189 rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb);
2103 if (rxs != RX_CONTINUE) 2190 if (rxs != RX_CONTINUE)
2104 return rxs; 2191 return rxs;
@@ -2267,19 +2354,46 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2267 dev_kfree_skb(skb); 2354 dev_kfree_skb(skb);
2268} 2355}
2269 2356
2357static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
2358 ieee80211_rx_result res)
2359{
2360 switch (res) {
2361 case RX_DROP_MONITOR:
2362 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
2363 if (rx->sta)
2364 rx->sta->rx_dropped++;
2365 /* fall through */
2366 case RX_CONTINUE: {
2367 struct ieee80211_rate *rate = NULL;
2368 struct ieee80211_supported_band *sband;
2369 struct ieee80211_rx_status *status;
2370
2371 status = IEEE80211_SKB_RXCB((rx->skb));
2372
2373 sband = rx->local->hw.wiphy->bands[status->band];
2374 if (!(status->flag & RX_FLAG_HT))
2375 rate = &sband->bitrates[status->rate_idx];
2376
2377 ieee80211_rx_cooked_monitor(rx, rate);
2378 break;
2379 }
2380 case RX_DROP_UNUSABLE:
2381 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
2382 if (rx->sta)
2383 rx->sta->rx_dropped++;
2384 dev_kfree_skb(rx->skb);
2385 break;
2386 case RX_QUEUED:
2387 I802_DEBUG_INC(rx->sdata->local->rx_handlers_queued);
2388 break;
2389 }
2390}
2270 2391
2271static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata, 2392static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
2272 struct ieee80211_rx_data *rx, 2393 struct sk_buff_head *frames)
2273 struct sk_buff *skb,
2274 struct ieee80211_rate *rate)
2275{ 2394{
2276 struct sk_buff_head reorder_release;
2277 ieee80211_rx_result res = RX_DROP_MONITOR; 2395 ieee80211_rx_result res = RX_DROP_MONITOR;
2278 2396 struct sk_buff *skb;
2279 __skb_queue_head_init(&reorder_release);
2280
2281 rx->skb = skb;
2282 rx->sdata = sdata;
2283 2397
2284#define CALL_RXH(rxh) \ 2398#define CALL_RXH(rxh) \
2285 do { \ 2399 do { \
@@ -2288,17 +2402,7 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2288 goto rxh_next; \ 2402 goto rxh_next; \
2289 } while (0); 2403 } while (0);
2290 2404
2291 /* 2405 while ((skb = __skb_dequeue(frames))) {
2292 * NB: the rxh_next label works even if we jump
2293 * to it from here because then the list will
2294 * be empty, which is a trivial check
2295 */
2296 CALL_RXH(ieee80211_rx_h_passive_scan)
2297 CALL_RXH(ieee80211_rx_h_check)
2298
2299 ieee80211_rx_reorder_ampdu(rx, &reorder_release);
2300
2301 while ((skb = __skb_dequeue(&reorder_release))) {
2302 /* 2406 /*
2303 * all the other fields are valid across frames 2407 * all the other fields are valid across frames
2304 * that belong to an aMPDU since they are on the 2408 * that belong to an aMPDU since they are on the
@@ -2316,42 +2420,95 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2316 CALL_RXH(ieee80211_rx_h_remove_qos_control) 2420 CALL_RXH(ieee80211_rx_h_remove_qos_control)
2317 CALL_RXH(ieee80211_rx_h_amsdu) 2421 CALL_RXH(ieee80211_rx_h_amsdu)
2318#ifdef CONFIG_MAC80211_MESH 2422#ifdef CONFIG_MAC80211_MESH
2319 if (ieee80211_vif_is_mesh(&sdata->vif)) 2423 if (ieee80211_vif_is_mesh(&rx->sdata->vif))
2320 CALL_RXH(ieee80211_rx_h_mesh_fwding); 2424 CALL_RXH(ieee80211_rx_h_mesh_fwding);
2321#endif 2425#endif
2322 CALL_RXH(ieee80211_rx_h_data) 2426 CALL_RXH(ieee80211_rx_h_data)
2323 2427
2324 /* special treatment -- needs the queue */ 2428 /* special treatment -- needs the queue */
2325 res = ieee80211_rx_h_ctrl(rx, &reorder_release); 2429 res = ieee80211_rx_h_ctrl(rx, frames);
2326 if (res != RX_CONTINUE) 2430 if (res != RX_CONTINUE)
2327 goto rxh_next; 2431 goto rxh_next;
2328 2432
2433 CALL_RXH(ieee80211_rx_h_mgmt_check)
2329 CALL_RXH(ieee80211_rx_h_action) 2434 CALL_RXH(ieee80211_rx_h_action)
2435 CALL_RXH(ieee80211_rx_h_userspace_mgmt)
2436 CALL_RXH(ieee80211_rx_h_action_return)
2330 CALL_RXH(ieee80211_rx_h_mgmt) 2437 CALL_RXH(ieee80211_rx_h_mgmt)
2331 2438
2439 rxh_next:
2440 ieee80211_rx_handlers_result(rx, res);
2441
2332#undef CALL_RXH 2442#undef CALL_RXH
2443 }
2444}
2445
2446static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2447 struct ieee80211_rx_data *rx,
2448 struct sk_buff *skb)
2449{
2450 struct sk_buff_head reorder_release;
2451 ieee80211_rx_result res = RX_DROP_MONITOR;
2452
2453 __skb_queue_head_init(&reorder_release);
2454
2455 rx->skb = skb;
2456 rx->sdata = sdata;
2457
2458#define CALL_RXH(rxh) \
2459 do { \
2460 res = rxh(rx); \
2461 if (res != RX_CONTINUE) \
2462 goto rxh_next; \
2463 } while (0);
2464
2465 CALL_RXH(ieee80211_rx_h_passive_scan)
2466 CALL_RXH(ieee80211_rx_h_check)
2467
2468 ieee80211_rx_reorder_ampdu(rx, &reorder_release);
2469
2470 ieee80211_rx_handlers(rx, &reorder_release);
2471 return;
2333 2472
2334 rxh_next: 2473 rxh_next:
2335 switch (res) { 2474 ieee80211_rx_handlers_result(rx, res);
2336 case RX_DROP_MONITOR: 2475
2337 I802_DEBUG_INC(sdata->local->rx_handlers_drop); 2476#undef CALL_RXH
2338 if (rx->sta) 2477}
2339 rx->sta->rx_dropped++; 2478
2340 /* fall through */ 2479/*
2341 case RX_CONTINUE: 2480 * This function makes calls into the RX path. Therefore the
2342 ieee80211_rx_cooked_monitor(rx, rate); 2481 * caller must hold the sta_info->lock and everything has to
2343 break; 2482 * be under rcu_read_lock protection as well.
2344 case RX_DROP_UNUSABLE: 2483 */
2345 I802_DEBUG_INC(sdata->local->rx_handlers_drop); 2484void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
2346 if (rx->sta) 2485{
2347 rx->sta->rx_dropped++; 2486 struct sk_buff_head frames;
2348 dev_kfree_skb(rx->skb); 2487 struct ieee80211_rx_data rx = { };
2349 break; 2488 struct tid_ampdu_rx *tid_agg_rx;
2350 case RX_QUEUED: 2489
2351 I802_DEBUG_INC(sdata->local->rx_handlers_queued); 2490 tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
2352 break; 2491 if (!tid_agg_rx)
2353 } 2492 return;
2354 } 2493
2494 __skb_queue_head_init(&frames);
2495
2496 /* construct rx struct */
2497 rx.sta = sta;
2498 rx.sdata = sta->sdata;
2499 rx.local = sta->local;
2500 rx.queue = tid;
2501 rx.flags |= IEEE80211_RX_RA_MATCH;
2502
2503 if (unlikely(test_bit(SCAN_HW_SCANNING, &sta->local->scanning) ||
2504 test_bit(SCAN_OFF_CHANNEL, &sta->local->scanning)))
2505 rx.flags |= IEEE80211_RX_IN_SCAN;
2506
2507 spin_lock(&tid_agg_rx->reorder_lock);
2508 ieee80211_sta_reorder_release(&sta->local->hw, tid_agg_rx, &frames);
2509 spin_unlock(&tid_agg_rx->reorder_lock);
2510
2511 ieee80211_rx_handlers(&rx, &frames);
2355} 2512}
2356 2513
2357/* main receive path */ 2514/* main receive path */
@@ -2433,7 +2590,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2433 break; 2590 break;
2434 case NL80211_IFTYPE_MONITOR: 2591 case NL80211_IFTYPE_MONITOR:
2435 case NL80211_IFTYPE_UNSPECIFIED: 2592 case NL80211_IFTYPE_UNSPECIFIED:
2436 case __NL80211_IFTYPE_AFTER_LAST: 2593 case NUM_NL80211_IFTYPES:
2437 /* should never get here */ 2594 /* should never get here */
2438 WARN_ON(1); 2595 WARN_ON(1);
2439 break; 2596 break;
@@ -2447,8 +2604,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2447 * be called with rcu_read_lock protection. 2604 * be called with rcu_read_lock protection.
2448 */ 2605 */
2449static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, 2606static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2450 struct sk_buff *skb, 2607 struct sk_buff *skb)
2451 struct ieee80211_rate *rate)
2452{ 2608{
2453 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2609 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2454 struct ieee80211_local *local = hw_to_local(hw); 2610 struct ieee80211_local *local = hw_to_local(hw);
@@ -2550,13 +2706,12 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2550 skb_new = skb_copy(skb, GFP_ATOMIC); 2706 skb_new = skb_copy(skb, GFP_ATOMIC);
2551 if (!skb_new) { 2707 if (!skb_new) {
2552 if (net_ratelimit()) 2708 if (net_ratelimit())
2553 printk(KERN_DEBUG "%s: failed to copy " 2709 wiphy_debug(local->hw.wiphy,
2554 "multicast frame for %s\n", 2710 "failed to copy multicast frame for %s\n",
2555 wiphy_name(local->hw.wiphy), 2711 prev->name);
2556 prev->name);
2557 goto next; 2712 goto next;
2558 } 2713 }
2559 ieee80211_invoke_rx_handlers(prev, &rx, skb_new, rate); 2714 ieee80211_invoke_rx_handlers(prev, &rx, skb_new);
2560next: 2715next:
2561 prev = sdata; 2716 prev = sdata;
2562 } 2717 }
@@ -2572,7 +2727,7 @@ next:
2572 } 2727 }
2573 } 2728 }
2574 if (prev) 2729 if (prev)
2575 ieee80211_invoke_rx_handlers(prev, &rx, skb, rate); 2730 ieee80211_invoke_rx_handlers(prev, &rx, skb);
2576 else 2731 else
2577 dev_kfree_skb(skb); 2732 dev_kfree_skb(skb);
2578} 2733}
@@ -2615,28 +2770,37 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
2615 if (WARN_ON(!local->started)) 2770 if (WARN_ON(!local->started))
2616 goto drop; 2771 goto drop;
2617 2772
2618 if (status->flag & RX_FLAG_HT) { 2773 if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC))) {
2619 /* 2774 /*
2620 * rate_idx is MCS index, which can be [0-76] as documented on: 2775 * Validate the rate, unless a PLCP error means that
2621 * 2776 * we probably can't have a valid rate here anyway.
2622 * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
2623 *
2624 * Anything else would be some sort of driver or hardware error.
2625 * The driver should catch hardware errors.
2626 */ 2777 */
2627 if (WARN((status->rate_idx < 0 || 2778
2628 status->rate_idx > 76), 2779 if (status->flag & RX_FLAG_HT) {
2629 "Rate marked as an HT rate but passed " 2780 /*
2630 "status->rate_idx is not " 2781 * rate_idx is MCS index, which can be [0-76]
2631 "an MCS index [0-76]: %d (0x%02x)\n", 2782 * as documented on:
2632 status->rate_idx, 2783 *
2633 status->rate_idx)) 2784 * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
2634 goto drop; 2785 *
2635 } else { 2786 * Anything else would be some sort of driver or
2636 if (WARN_ON(status->rate_idx < 0 || 2787 * hardware error. The driver should catch hardware
2637 status->rate_idx >= sband->n_bitrates)) 2788 * errors.
2638 goto drop; 2789 */
2639 rate = &sband->bitrates[status->rate_idx]; 2790 if (WARN((status->rate_idx < 0 ||
2791 status->rate_idx > 76),
2792 "Rate marked as an HT rate but passed "
2793 "status->rate_idx is not "
2794 "an MCS index [0-76]: %d (0x%02x)\n",
2795 status->rate_idx,
2796 status->rate_idx))
2797 goto drop;
2798 } else {
2799 if (WARN_ON(status->rate_idx < 0 ||
2800 status->rate_idx >= sband->n_bitrates))
2801 goto drop;
2802 rate = &sband->bitrates[status->rate_idx];
2803 }
2640 } 2804 }
2641 2805
2642 /* 2806 /*
@@ -2658,7 +2822,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
2658 return; 2822 return;
2659 } 2823 }
2660 2824
2661 __ieee80211_rx_handle_packet(hw, skb, rate); 2825 __ieee80211_rx_handle_packet(hw, skb);
2662 2826
2663 rcu_read_unlock(); 2827 rcu_read_unlock();
2664 2828
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 872d7b6ef6b3..d60389ba9b95 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -248,14 +248,12 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
248 return true; 248 return true;
249} 249}
250 250
251void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) 251static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
252{ 252{
253 struct ieee80211_local *local = hw_to_local(hw); 253 struct ieee80211_local *local = hw_to_local(hw);
254 bool was_hw_scan; 254 bool was_hw_scan;
255 255
256 trace_api_scan_completed(local, aborted); 256 mutex_lock(&local->mtx);
257
258 mutex_lock(&local->scan_mtx);
259 257
260 /* 258 /*
261 * It's ok to abort a not-yet-running scan (that 259 * It's ok to abort a not-yet-running scan (that
@@ -267,7 +265,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
267 aborted = true; 265 aborted = true;
268 266
269 if (WARN_ON(!local->scan_req)) { 267 if (WARN_ON(!local->scan_req)) {
270 mutex_unlock(&local->scan_mtx); 268 mutex_unlock(&local->mtx);
271 return; 269 return;
272 } 270 }
273 271
@@ -275,7 +273,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
275 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { 273 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) {
276 ieee80211_queue_delayed_work(&local->hw, 274 ieee80211_queue_delayed_work(&local->hw,
277 &local->scan_work, 0); 275 &local->scan_work, 0);
278 mutex_unlock(&local->scan_mtx); 276 mutex_unlock(&local->mtx);
279 return; 277 return;
280 } 278 }
281 279
@@ -291,7 +289,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
291 local->scan_channel = NULL; 289 local->scan_channel = NULL;
292 290
293 /* we only have to protect scan_req and hw/sw scan */ 291 /* we only have to protect scan_req and hw/sw scan */
294 mutex_unlock(&local->scan_mtx); 292 mutex_unlock(&local->mtx);
295 293
296 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 294 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
297 if (was_hw_scan) 295 if (was_hw_scan)
@@ -304,12 +302,26 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
304 ieee80211_offchannel_return(local, true); 302 ieee80211_offchannel_return(local, true);
305 303
306 done: 304 done:
305 mutex_lock(&local->mtx);
307 ieee80211_recalc_idle(local); 306 ieee80211_recalc_idle(local);
307 mutex_unlock(&local->mtx);
308 ieee80211_mlme_notify_scan_completed(local); 308 ieee80211_mlme_notify_scan_completed(local);
309 ieee80211_ibss_notify_scan_completed(local); 309 ieee80211_ibss_notify_scan_completed(local);
310 ieee80211_mesh_notify_scan_completed(local); 310 ieee80211_mesh_notify_scan_completed(local);
311 ieee80211_queue_work(&local->hw, &local->work_work); 311 ieee80211_queue_work(&local->hw, &local->work_work);
312} 312}
313
314void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
315{
316 struct ieee80211_local *local = hw_to_local(hw);
317
318 trace_api_scan_completed(local, aborted);
319
320 set_bit(SCAN_COMPLETED, &local->scanning);
321 if (aborted)
322 set_bit(SCAN_ABORTED, &local->scanning);
323 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
324}
313EXPORT_SYMBOL(ieee80211_scan_completed); 325EXPORT_SYMBOL(ieee80211_scan_completed);
314 326
315static int ieee80211_start_sw_scan(struct ieee80211_local *local) 327static int ieee80211_start_sw_scan(struct ieee80211_local *local)
@@ -447,7 +459,7 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
447 459
448 /* if no more bands/channels left, complete scan and advance to the idle state */ 460 /* if no more bands/channels left, complete scan and advance to the idle state */
449 if (local->scan_channel_idx >= local->scan_req->n_channels) { 461 if (local->scan_channel_idx >= local->scan_req->n_channels) {
450 ieee80211_scan_completed(&local->hw, false); 462 __ieee80211_scan_completed(&local->hw, false);
451 return 1; 463 return 1;
452 } 464 }
453 465
@@ -639,17 +651,25 @@ void ieee80211_scan_work(struct work_struct *work)
639 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 651 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
640 unsigned long next_delay = 0; 652 unsigned long next_delay = 0;
641 653
642 mutex_lock(&local->scan_mtx); 654 if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) {
655 bool aborted;
656
657 aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning);
658 __ieee80211_scan_completed(&local->hw, aborted);
659 return;
660 }
661
662 mutex_lock(&local->mtx);
643 if (!sdata || !local->scan_req) { 663 if (!sdata || !local->scan_req) {
644 mutex_unlock(&local->scan_mtx); 664 mutex_unlock(&local->mtx);
645 return; 665 return;
646 } 666 }
647 667
648 if (local->hw_scan_req) { 668 if (local->hw_scan_req) {
649 int rc = drv_hw_scan(local, sdata, local->hw_scan_req); 669 int rc = drv_hw_scan(local, sdata, local->hw_scan_req);
650 mutex_unlock(&local->scan_mtx); 670 mutex_unlock(&local->mtx);
651 if (rc) 671 if (rc)
652 ieee80211_scan_completed(&local->hw, true); 672 __ieee80211_scan_completed(&local->hw, true);
653 return; 673 return;
654 } 674 }
655 675
@@ -661,20 +681,20 @@ void ieee80211_scan_work(struct work_struct *work)
661 local->scan_sdata = NULL; 681 local->scan_sdata = NULL;
662 682
663 rc = __ieee80211_start_scan(sdata, req); 683 rc = __ieee80211_start_scan(sdata, req);
664 mutex_unlock(&local->scan_mtx); 684 mutex_unlock(&local->mtx);
665 685
666 if (rc) 686 if (rc)
667 ieee80211_scan_completed(&local->hw, true); 687 __ieee80211_scan_completed(&local->hw, true);
668 return; 688 return;
669 } 689 }
670 690
671 mutex_unlock(&local->scan_mtx); 691 mutex_unlock(&local->mtx);
672 692
673 /* 693 /*
674 * Avoid re-scheduling when the sdata is going away. 694 * Avoid re-scheduling when the sdata is going away.
675 */ 695 */
676 if (!ieee80211_sdata_running(sdata)) { 696 if (!ieee80211_sdata_running(sdata)) {
677 ieee80211_scan_completed(&local->hw, true); 697 __ieee80211_scan_completed(&local->hw, true);
678 return; 698 return;
679 } 699 }
680 700
@@ -711,9 +731,9 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
711{ 731{
712 int res; 732 int res;
713 733
714 mutex_lock(&sdata->local->scan_mtx); 734 mutex_lock(&sdata->local->mtx);
715 res = __ieee80211_start_scan(sdata, req); 735 res = __ieee80211_start_scan(sdata, req);
716 mutex_unlock(&sdata->local->scan_mtx); 736 mutex_unlock(&sdata->local->mtx);
717 737
718 return res; 738 return res;
719} 739}
@@ -726,7 +746,7 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
726 int ret = -EBUSY; 746 int ret = -EBUSY;
727 enum ieee80211_band band; 747 enum ieee80211_band band;
728 748
729 mutex_lock(&local->scan_mtx); 749 mutex_lock(&local->mtx);
730 750
731 /* busy scanning */ 751 /* busy scanning */
732 if (local->scan_req) 752 if (local->scan_req)
@@ -761,7 +781,7 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
761 781
762 ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req); 782 ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req);
763 unlock: 783 unlock:
764 mutex_unlock(&local->scan_mtx); 784 mutex_unlock(&local->mtx);
765 return ret; 785 return ret;
766} 786}
767 787
@@ -775,11 +795,11 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
775 * Only call this function when a scan can't be 795 * Only call this function when a scan can't be
776 * queued -- mostly at suspend under RTNL. 796 * queued -- mostly at suspend under RTNL.
777 */ 797 */
778 mutex_lock(&local->scan_mtx); 798 mutex_lock(&local->mtx);
779 abortscan = test_bit(SCAN_SW_SCANNING, &local->scanning) || 799 abortscan = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
780 (!local->scanning && local->scan_req); 800 (!local->scanning && local->scan_req);
781 mutex_unlock(&local->scan_mtx); 801 mutex_unlock(&local->mtx);
782 802
783 if (abortscan) 803 if (abortscan)
784 ieee80211_scan_completed(&local->hw, true); 804 __ieee80211_scan_completed(&local->hw, true);
785} 805}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 6d86f0c1ad04..687077e49dc6 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -174,8 +174,7 @@ static void __sta_info_free(struct ieee80211_local *local,
174 } 174 }
175 175
176#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 176#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
177 printk(KERN_DEBUG "%s: Destroyed STA %pM\n", 177 wiphy_debug(local->hw.wiphy, "Destroyed STA %pM\n", sta->sta.addr);
178 wiphy_name(local->hw.wiphy), sta->sta.addr);
179#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 178#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
180 179
181 kfree(sta); 180 kfree(sta);
@@ -262,8 +261,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
262 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); 261 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
263 262
264#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 263#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
265 printk(KERN_DEBUG "%s: Allocated STA %pM\n", 264 wiphy_debug(local->hw.wiphy, "Allocated STA %pM\n", sta->sta.addr);
266 wiphy_name(local->hw.wiphy), sta->sta.addr);
267#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 265#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
268 266
269#ifdef CONFIG_MAC80211_MESH 267#ifdef CONFIG_MAC80211_MESH
@@ -300,8 +298,9 @@ static int sta_info_finish_insert(struct sta_info *sta, bool async)
300 sta->uploaded = true; 298 sta->uploaded = true;
301#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 299#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
302 if (async) 300 if (async)
303 printk(KERN_DEBUG "%s: Finished adding IBSS STA %pM\n", 301 wiphy_debug(local->hw.wiphy,
304 wiphy_name(local->hw.wiphy), sta->sta.addr); 302 "Finished adding IBSS STA %pM\n",
303 sta->sta.addr);
305#endif 304#endif
306 } 305 }
307 306
@@ -411,8 +410,8 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
411 spin_unlock_irqrestore(&local->sta_lock, flags); 410 spin_unlock_irqrestore(&local->sta_lock, flags);
412 411
413#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 412#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
414 printk(KERN_DEBUG "%s: Added IBSS STA %pM\n", 413 wiphy_debug(local->hw.wiphy, "Added IBSS STA %pM\n",
415 wiphy_name(local->hw.wiphy), sta->sta.addr); 414 sta->sta.addr);
416#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 415#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
417 416
418 ieee80211_queue_work(&local->hw, &local->sta_finish_work); 417 ieee80211_queue_work(&local->hw, &local->sta_finish_work);
@@ -459,8 +458,7 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
459 } 458 }
460 459
461#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 460#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
462 printk(KERN_DEBUG "%s: Inserted STA %pM\n", 461 wiphy_debug(local->hw.wiphy, "Inserted STA %pM\n", sta->sta.addr);
463 wiphy_name(local->hw.wiphy), sta->sta.addr);
464#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 462#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
465 463
466 /* move reference to rcu-protected */ 464 /* move reference to rcu-protected */
@@ -690,8 +688,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
690#endif 688#endif
691 689
692#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 690#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
693 printk(KERN_DEBUG "%s: Removed STA %pM\n", 691 wiphy_debug(local->hw.wiphy, "Removed STA %pM\n", sta->sta.addr);
694 wiphy_name(local->hw.wiphy), sta->sta.addr);
695#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 692#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
696 cancel_work_sync(&sta->drv_unblock_wk); 693 cancel_work_sync(&sta->drv_unblock_wk);
697 694
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 54262e72376d..810c5ce98316 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -103,6 +103,7 @@ struct tid_ampdu_tx {
103 * @reorder_buf: buffer to reorder incoming aggregated MPDUs 103 * @reorder_buf: buffer to reorder incoming aggregated MPDUs
104 * @reorder_time: jiffies when skb was added 104 * @reorder_time: jiffies when skb was added
105 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) 105 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
106 * @reorder_timer: releases expired frames from the reorder buffer.
106 * @head_seq_num: head sequence number in reordering buffer. 107 * @head_seq_num: head sequence number in reordering buffer.
107 * @stored_mpdu_num: number of MPDUs in reordering buffer 108 * @stored_mpdu_num: number of MPDUs in reordering buffer
108 * @ssn: Starting Sequence Number expected to be aggregated. 109 * @ssn: Starting Sequence Number expected to be aggregated.
@@ -110,20 +111,25 @@ struct tid_ampdu_tx {
110 * @timeout: reset timer value (in TUs). 111 * @timeout: reset timer value (in TUs).
111 * @dialog_token: dialog token for aggregation session 112 * @dialog_token: dialog token for aggregation session
112 * @rcu_head: RCU head used for freeing this struct 113 * @rcu_head: RCU head used for freeing this struct
114 * @reorder_lock: serializes access to reorder buffer, see below.
113 * 115 *
114 * This structure is protected by RCU and the per-station 116 * This structure is protected by RCU and the per-station
115 * spinlock. Assignments to the array holding it must hold 117 * spinlock. Assignments to the array holding it must hold
116 * the spinlock, only the RX path can access it under RCU 118 * the spinlock.
117 * lock-free. The RX path, since it is single-threaded, 119 *
118 * can even modify the structure without locking since the 120 * The @reorder_lock is used to protect the variables and
119 * only other modifications to it are done when the struct 121 * arrays such as @reorder_buf, @reorder_time, @head_seq_num,
120 * can not yet or no longer be found by the RX path. 122 * @stored_mpdu_num and @reorder_time from being corrupted by
123 * concurrent access of the RX path and the expired frame
124 * release timer.
121 */ 125 */
122struct tid_ampdu_rx { 126struct tid_ampdu_rx {
123 struct rcu_head rcu_head; 127 struct rcu_head rcu_head;
128 spinlock_t reorder_lock;
124 struct sk_buff **reorder_buf; 129 struct sk_buff **reorder_buf;
125 unsigned long *reorder_time; 130 unsigned long *reorder_time;
126 struct timer_list session_timer; 131 struct timer_list session_timer;
132 struct timer_list reorder_timer;
127 u16 head_seq_num; 133 u16 head_seq_num;
128 u16 stored_mpdu_num; 134 u16 stored_mpdu_num;
129 u16 ssn; 135 u16 ssn;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 10caec5ea8fa..571b32bfc54c 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -114,11 +114,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
114 114
115#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 115#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
116 if (net_ratelimit()) 116 if (net_ratelimit())
117 printk(KERN_DEBUG "%s: dropped TX filtered frame, " 117 wiphy_debug(local->hw.wiphy,
118 "queue_len=%d PS=%d @%lu\n", 118 "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n",
119 wiphy_name(local->hw.wiphy), 119 skb_queue_len(&sta->tx_filtered),
120 skb_queue_len(&sta->tx_filtered), 120 !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies);
121 !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies);
122#endif 121#endif
123 dev_kfree_skb(skb); 122 dev_kfree_skb(skb);
124} 123}
@@ -296,7 +295,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
296 } 295 }
297 296
298 if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) 297 if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX)
299 cfg80211_action_tx_status( 298 cfg80211_mgmt_tx_status(
300 skb->dev, (unsigned long) skb, skb->data, skb->len, 299 skb->dev, (unsigned long) skb, skb->data, skb->len,
301 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); 300 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
302 301
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c54db966926b..ccf373788ce9 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -351,8 +351,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
351 351
352 local->total_ps_buffered = total; 352 local->total_ps_buffered = total;
353#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG 353#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
354 printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", 354 wiphy_debug(local->hw.wiphy, "PS buffers full - purged %d frames\n",
355 wiphy_name(local->hw.wiphy), purged); 355 purged);
356#endif 356#endif
357} 357}
358 358
@@ -509,6 +509,18 @@ ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
509} 509}
510 510
511static ieee80211_tx_result debug_noinline 511static ieee80211_tx_result debug_noinline
512ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx)
513{
514 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
515
516 if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol &&
517 tx->sdata->control_port_no_encrypt))
518 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
519
520 return TX_CONTINUE;
521}
522
523static ieee80211_tx_result debug_noinline
512ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) 524ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
513{ 525{
514 struct ieee80211_key *key = NULL; 526 struct ieee80211_key *key = NULL;
@@ -527,7 +539,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
527 else if ((key = rcu_dereference(tx->sdata->default_key))) 539 else if ((key = rcu_dereference(tx->sdata->default_key)))
528 tx->key = key; 540 tx->key = key;
529 else if (tx->sdata->drop_unencrypted && 541 else if (tx->sdata->drop_unencrypted &&
530 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) && 542 (tx->skb->protocol != tx->sdata->control_port_protocol) &&
531 !(info->flags & IEEE80211_TX_CTL_INJECTED) && 543 !(info->flags & IEEE80211_TX_CTL_INJECTED) &&
532 (!ieee80211_is_robust_mgmt_frame(hdr) || 544 (!ieee80211_is_robust_mgmt_frame(hdr) ||
533 (ieee80211_is_action(hdr->frame_control) && 545 (ieee80211_is_action(hdr->frame_control) &&
@@ -543,15 +555,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
543 tx->key->tx_rx_count++; 555 tx->key->tx_rx_count++;
544 /* TODO: add threshold stuff again */ 556 /* TODO: add threshold stuff again */
545 557
546 switch (tx->key->conf.alg) { 558 switch (tx->key->conf.cipher) {
547 case ALG_WEP: 559 case WLAN_CIPHER_SUITE_WEP40:
560 case WLAN_CIPHER_SUITE_WEP104:
548 if (ieee80211_is_auth(hdr->frame_control)) 561 if (ieee80211_is_auth(hdr->frame_control))
549 break; 562 break;
550 case ALG_TKIP: 563 case WLAN_CIPHER_SUITE_TKIP:
551 if (!ieee80211_is_data_present(hdr->frame_control)) 564 if (!ieee80211_is_data_present(hdr->frame_control))
552 tx->key = NULL; 565 tx->key = NULL;
553 break; 566 break;
554 case ALG_CCMP: 567 case WLAN_CIPHER_SUITE_CCMP:
555 if (!ieee80211_is_data_present(hdr->frame_control) && 568 if (!ieee80211_is_data_present(hdr->frame_control) &&
556 !ieee80211_use_mfp(hdr->frame_control, tx->sta, 569 !ieee80211_use_mfp(hdr->frame_control, tx->sta,
557 tx->skb)) 570 tx->skb))
@@ -561,7 +574,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
561 IEEE80211_KEY_FLAG_SW_MGMT) && 574 IEEE80211_KEY_FLAG_SW_MGMT) &&
562 ieee80211_is_mgmt(hdr->frame_control); 575 ieee80211_is_mgmt(hdr->frame_control);
563 break; 576 break;
564 case ALG_AES_CMAC: 577 case WLAN_CIPHER_SUITE_AES_CMAC:
565 if (!ieee80211_is_mgmt(hdr->frame_control)) 578 if (!ieee80211_is_mgmt(hdr->frame_control))
566 tx->key = NULL; 579 tx->key = NULL;
567 break; 580 break;
@@ -946,22 +959,31 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
946static ieee80211_tx_result debug_noinline 959static ieee80211_tx_result debug_noinline
947ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) 960ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
948{ 961{
962 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
963
949 if (!tx->key) 964 if (!tx->key)
950 return TX_CONTINUE; 965 return TX_CONTINUE;
951 966
952 switch (tx->key->conf.alg) { 967 switch (tx->key->conf.cipher) {
953 case ALG_WEP: 968 case WLAN_CIPHER_SUITE_WEP40:
969 case WLAN_CIPHER_SUITE_WEP104:
954 return ieee80211_crypto_wep_encrypt(tx); 970 return ieee80211_crypto_wep_encrypt(tx);
955 case ALG_TKIP: 971 case WLAN_CIPHER_SUITE_TKIP:
956 return ieee80211_crypto_tkip_encrypt(tx); 972 return ieee80211_crypto_tkip_encrypt(tx);
957 case ALG_CCMP: 973 case WLAN_CIPHER_SUITE_CCMP:
958 return ieee80211_crypto_ccmp_encrypt(tx); 974 return ieee80211_crypto_ccmp_encrypt(tx);
959 case ALG_AES_CMAC: 975 case WLAN_CIPHER_SUITE_AES_CMAC:
960 return ieee80211_crypto_aes_cmac_encrypt(tx); 976 return ieee80211_crypto_aes_cmac_encrypt(tx);
977 default:
978 /* handle hw-only algorithm */
979 if (info->control.hw_key) {
980 ieee80211_tx_set_protected(tx);
981 return TX_CONTINUE;
982 }
983 break;
984
961 } 985 }
962 986
963 /* not reached */
964 WARN_ON(1);
965 return TX_DROP; 987 return TX_DROP;
966} 988}
967 989
@@ -1339,6 +1361,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1339 CALL_TXH(ieee80211_tx_h_dynamic_ps); 1361 CALL_TXH(ieee80211_tx_h_dynamic_ps);
1340 CALL_TXH(ieee80211_tx_h_check_assoc); 1362 CALL_TXH(ieee80211_tx_h_check_assoc);
1341 CALL_TXH(ieee80211_tx_h_ps_buf); 1363 CALL_TXH(ieee80211_tx_h_ps_buf);
1364 CALL_TXH(ieee80211_tx_h_check_control_port_protocol);
1342 CALL_TXH(ieee80211_tx_h_select_key); 1365 CALL_TXH(ieee80211_tx_h_select_key);
1343 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) 1366 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
1344 CALL_TXH(ieee80211_tx_h_rate_ctrl); 1367 CALL_TXH(ieee80211_tx_h_rate_ctrl);
@@ -1511,8 +1534,8 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
1511 I802_DEBUG_INC(local->tx_expand_skb_head); 1534 I802_DEBUG_INC(local->tx_expand_skb_head);
1512 1535
1513 if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) { 1536 if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) {
1514 printk(KERN_DEBUG "%s: failed to reallocate TX buffer\n", 1537 wiphy_debug(local->hw.wiphy,
1515 wiphy_name(local->hw.wiphy)); 1538 "failed to reallocate TX buffer\n");
1516 return -ENOMEM; 1539 return -ENOMEM;
1517 } 1540 }
1518 1541
@@ -1699,7 +1722,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1699 u16 ethertype, hdrlen, meshhdrlen = 0; 1722 u16 ethertype, hdrlen, meshhdrlen = 0;
1700 __le16 fc; 1723 __le16 fc;
1701 struct ieee80211_hdr hdr; 1724 struct ieee80211_hdr hdr;
1702 struct ieee80211s_hdr mesh_hdr; 1725 struct ieee80211s_hdr mesh_hdr __maybe_unused;
1703 const u8 *encaps_data; 1726 const u8 *encaps_data;
1704 int encaps_len, skip_header_bytes; 1727 int encaps_len, skip_header_bytes;
1705 int nh_pos, h_pos; 1728 int nh_pos, h_pos;
@@ -1816,7 +1839,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1816#endif 1839#endif
1817 case NL80211_IFTYPE_STATION: 1840 case NL80211_IFTYPE_STATION:
1818 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); 1841 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN);
1819 if (sdata->u.mgd.use_4addr && ethertype != ETH_P_PAE) { 1842 if (sdata->u.mgd.use_4addr &&
1843 cpu_to_be16(ethertype) != sdata->control_port_protocol) {
1820 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); 1844 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
1821 /* RA TA DA SA */ 1845 /* RA TA DA SA */
1822 memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN); 1846 memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
@@ -1869,7 +1893,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1869 if (!ieee80211_vif_is_mesh(&sdata->vif) && 1893 if (!ieee80211_vif_is_mesh(&sdata->vif) &&
1870 unlikely(!is_multicast_ether_addr(hdr.addr1) && 1894 unlikely(!is_multicast_ether_addr(hdr.addr1) &&
1871 !(sta_flags & WLAN_STA_AUTHORIZED) && 1895 !(sta_flags & WLAN_STA_AUTHORIZED) &&
1872 !(ethertype == ETH_P_PAE && 1896 !(cpu_to_be16(ethertype) == sdata->control_port_protocol &&
1873 compare_ether_addr(sdata->vif.addr, 1897 compare_ether_addr(sdata->vif.addr,
1874 skb->data + ETH_ALEN) == 0))) { 1898 skb->data + ETH_ALEN) == 0))) {
1875#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1899#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -2068,8 +2092,7 @@ void ieee80211_tx_pending(unsigned long data)
2068 2092
2069 if (skb_queue_empty(&local->pending[i])) 2093 if (skb_queue_empty(&local->pending[i]))
2070 list_for_each_entry_rcu(sdata, &local->interfaces, list) 2094 list_for_each_entry_rcu(sdata, &local->interfaces, list)
2071 netif_tx_wake_queue( 2095 netif_wake_subqueue(sdata->dev, i);
2072 netdev_get_tx_queue(sdata->dev, i));
2073 } 2096 }
2074 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 2097 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
2075 2098
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 748387d45bc0..bd40b11d5ab9 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -283,8 +283,11 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
283 283
284 if (skb_queue_empty(&local->pending[queue])) { 284 if (skb_queue_empty(&local->pending[queue])) {
285 rcu_read_lock(); 285 rcu_read_lock();
286 list_for_each_entry_rcu(sdata, &local->interfaces, list) 286 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
287 netif_tx_wake_queue(netdev_get_tx_queue(sdata->dev, queue)); 287 if (test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
288 continue;
289 netif_wake_subqueue(sdata->dev, queue);
290 }
288 rcu_read_unlock(); 291 rcu_read_unlock();
289 } else 292 } else
290 tasklet_schedule(&local->tx_pending_tasklet); 293 tasklet_schedule(&local->tx_pending_tasklet);
@@ -323,7 +326,7 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
323 326
324 rcu_read_lock(); 327 rcu_read_lock();
325 list_for_each_entry_rcu(sdata, &local->interfaces, list) 328 list_for_each_entry_rcu(sdata, &local->interfaces, list)
326 netif_tx_stop_queue(netdev_get_tx_queue(sdata->dev, queue)); 329 netif_stop_subqueue(sdata->dev, queue);
327 rcu_read_unlock(); 330 rcu_read_unlock();
328} 331}
329 332
@@ -471,7 +474,7 @@ void ieee80211_iterate_active_interfaces(
471 474
472 list_for_each_entry(sdata, &local->interfaces, list) { 475 list_for_each_entry(sdata, &local->interfaces, list) {
473 switch (sdata->vif.type) { 476 switch (sdata->vif.type) {
474 case __NL80211_IFTYPE_AFTER_LAST: 477 case NUM_NL80211_IFTYPES:
475 case NL80211_IFTYPE_UNSPECIFIED: 478 case NL80211_IFTYPE_UNSPECIFIED:
476 case NL80211_IFTYPE_MONITOR: 479 case NL80211_IFTYPE_MONITOR:
477 case NL80211_IFTYPE_AP_VLAN: 480 case NL80211_IFTYPE_AP_VLAN:
@@ -505,7 +508,7 @@ void ieee80211_iterate_active_interfaces_atomic(
505 508
506 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 509 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
507 switch (sdata->vif.type) { 510 switch (sdata->vif.type) {
508 case __NL80211_IFTYPE_AFTER_LAST: 511 case NUM_NL80211_IFTYPES:
509 case NL80211_IFTYPE_UNSPECIFIED: 512 case NL80211_IFTYPE_UNSPECIFIED:
510 case NL80211_IFTYPE_MONITOR: 513 case NL80211_IFTYPE_MONITOR:
511 case NL80211_IFTYPE_AP_VLAN: 514 case NL80211_IFTYPE_AP_VLAN:
@@ -1189,7 +1192,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1189 /* ignore virtual */ 1192 /* ignore virtual */
1190 break; 1193 break;
1191 case NL80211_IFTYPE_UNSPECIFIED: 1194 case NL80211_IFTYPE_UNSPECIFIED:
1192 case __NL80211_IFTYPE_AFTER_LAST: 1195 case NUM_NL80211_IFTYPES:
1193 WARN_ON(1); 1196 WARN_ON(1);
1194 break; 1197 break;
1195 } 1198 }
@@ -1308,7 +1311,7 @@ void ieee80211_recalc_smps(struct ieee80211_local *local,
1308 */ 1311 */
1309 1312
1310 list_for_each_entry(sdata, &local->interfaces, list) { 1313 list_for_each_entry(sdata, &local->interfaces, list) {
1311 if (!netif_running(sdata->dev)) 1314 if (!ieee80211_sdata_running(sdata))
1312 continue; 1315 continue;
1313 if (sdata->vif.type != NL80211_IFTYPE_STATION) 1316 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1314 goto set; 1317 goto set;
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 9ebc8d8a1f5b..f27484c22b9f 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -240,7 +240,7 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
240 240
241 keyidx = skb->data[hdrlen + 3] >> 6; 241 keyidx = skb->data[hdrlen + 3] >> 6;
242 242
243 if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP) 243 if (!key || keyidx != key->conf.keyidx)
244 return -1; 244 return -1;
245 245
246 klen = 3 + key->conf.keylen; 246 klen = 3 + key->conf.keylen;
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 81d4ad64184a..ae344d1ba056 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -43,7 +43,7 @@ enum work_action {
43/* utils */ 43/* utils */
44static inline void ASSERT_WORK_MTX(struct ieee80211_local *local) 44static inline void ASSERT_WORK_MTX(struct ieee80211_local *local)
45{ 45{
46 WARN_ON(!mutex_is_locked(&local->work_mtx)); 46 lockdep_assert_held(&local->mtx);
47} 47}
48 48
49/* 49/*
@@ -757,7 +757,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
757 mgmt = (struct ieee80211_mgmt *) skb->data; 757 mgmt = (struct ieee80211_mgmt *) skb->data;
758 fc = le16_to_cpu(mgmt->frame_control); 758 fc = le16_to_cpu(mgmt->frame_control);
759 759
760 mutex_lock(&local->work_mtx); 760 mutex_lock(&local->mtx);
761 761
762 list_for_each_entry(wk, &local->work_list, list) { 762 list_for_each_entry(wk, &local->work_list, list) {
763 const u8 *bssid = NULL; 763 const u8 *bssid = NULL;
@@ -833,7 +833,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
833 WARN(1, "unexpected: %d", rma); 833 WARN(1, "unexpected: %d", rma);
834 } 834 }
835 835
836 mutex_unlock(&local->work_mtx); 836 mutex_unlock(&local->mtx);
837 837
838 if (rma != WORK_ACT_DONE) 838 if (rma != WORK_ACT_DONE)
839 goto out; 839 goto out;
@@ -845,9 +845,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
845 case WORK_DONE_REQUEUE: 845 case WORK_DONE_REQUEUE:
846 synchronize_rcu(); 846 synchronize_rcu();
847 wk->started = false; /* restart */ 847 wk->started = false; /* restart */
848 mutex_lock(&local->work_mtx); 848 mutex_lock(&local->mtx);
849 list_add_tail(&wk->list, &local->work_list); 849 list_add_tail(&wk->list, &local->work_list);
850 mutex_unlock(&local->work_mtx); 850 mutex_unlock(&local->mtx);
851 } 851 }
852 852
853 out: 853 out:
@@ -888,9 +888,9 @@ static void ieee80211_work_work(struct work_struct *work)
888 while ((skb = skb_dequeue(&local->work_skb_queue))) 888 while ((skb = skb_dequeue(&local->work_skb_queue)))
889 ieee80211_work_rx_queued_mgmt(local, skb); 889 ieee80211_work_rx_queued_mgmt(local, skb);
890 890
891 ieee80211_recalc_idle(local); 891 mutex_lock(&local->mtx);
892 892
893 mutex_lock(&local->work_mtx); 893 ieee80211_recalc_idle(local);
894 894
895 list_for_each_entry_safe(wk, tmp, &local->work_list, list) { 895 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
896 bool started = wk->started; 896 bool started = wk->started;
@@ -995,20 +995,16 @@ static void ieee80211_work_work(struct work_struct *work)
995 run_again(local, jiffies + HZ/2); 995 run_again(local, jiffies + HZ/2);
996 } 996 }
997 997
998 mutex_lock(&local->scan_mtx);
999
1000 if (list_empty(&local->work_list) && local->scan_req && 998 if (list_empty(&local->work_list) && local->scan_req &&
1001 !local->scanning) 999 !local->scanning)
1002 ieee80211_queue_delayed_work(&local->hw, 1000 ieee80211_queue_delayed_work(&local->hw,
1003 &local->scan_work, 1001 &local->scan_work,
1004 round_jiffies_relative(0)); 1002 round_jiffies_relative(0));
1005 1003
1006 mutex_unlock(&local->scan_mtx);
1007
1008 mutex_unlock(&local->work_mtx);
1009
1010 ieee80211_recalc_idle(local); 1004 ieee80211_recalc_idle(local);
1011 1005
1006 mutex_unlock(&local->mtx);
1007
1012 list_for_each_entry_safe(wk, tmp, &free_work, list) { 1008 list_for_each_entry_safe(wk, tmp, &free_work, list) {
1013 wk->done(wk, NULL); 1009 wk->done(wk, NULL);
1014 list_del(&wk->list); 1010 list_del(&wk->list);
@@ -1035,16 +1031,15 @@ void ieee80211_add_work(struct ieee80211_work *wk)
1035 wk->started = false; 1031 wk->started = false;
1036 1032
1037 local = wk->sdata->local; 1033 local = wk->sdata->local;
1038 mutex_lock(&local->work_mtx); 1034 mutex_lock(&local->mtx);
1039 list_add_tail(&wk->list, &local->work_list); 1035 list_add_tail(&wk->list, &local->work_list);
1040 mutex_unlock(&local->work_mtx); 1036 mutex_unlock(&local->mtx);
1041 1037
1042 ieee80211_queue_work(&local->hw, &local->work_work); 1038 ieee80211_queue_work(&local->hw, &local->work_work);
1043} 1039}
1044 1040
1045void ieee80211_work_init(struct ieee80211_local *local) 1041void ieee80211_work_init(struct ieee80211_local *local)
1046{ 1042{
1047 mutex_init(&local->work_mtx);
1048 INIT_LIST_HEAD(&local->work_list); 1043 INIT_LIST_HEAD(&local->work_list);
1049 setup_timer(&local->work_timer, ieee80211_work_timer, 1044 setup_timer(&local->work_timer, ieee80211_work_timer,
1050 (unsigned long)local); 1045 (unsigned long)local);
@@ -1057,7 +1052,7 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1057 struct ieee80211_local *local = sdata->local; 1052 struct ieee80211_local *local = sdata->local;
1058 struct ieee80211_work *wk; 1053 struct ieee80211_work *wk;
1059 1054
1060 mutex_lock(&local->work_mtx); 1055 mutex_lock(&local->mtx);
1061 list_for_each_entry(wk, &local->work_list, list) { 1056 list_for_each_entry(wk, &local->work_list, list) {
1062 if (wk->sdata != sdata) 1057 if (wk->sdata != sdata)
1063 continue; 1058 continue;
@@ -1065,19 +1060,19 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1065 wk->started = true; 1060 wk->started = true;
1066 wk->timeout = jiffies; 1061 wk->timeout = jiffies;
1067 } 1062 }
1068 mutex_unlock(&local->work_mtx); 1063 mutex_unlock(&local->mtx);
1069 1064
1070 /* run cleanups etc. */ 1065 /* run cleanups etc. */
1071 ieee80211_work_work(&local->work_work); 1066 ieee80211_work_work(&local->work_work);
1072 1067
1073 mutex_lock(&local->work_mtx); 1068 mutex_lock(&local->mtx);
1074 list_for_each_entry(wk, &local->work_list, list) { 1069 list_for_each_entry(wk, &local->work_list, list) {
1075 if (wk->sdata != sdata) 1070 if (wk->sdata != sdata)
1076 continue; 1071 continue;
1077 WARN_ON(1); 1072 WARN_ON(1);
1078 break; 1073 break;
1079 } 1074 }
1080 mutex_unlock(&local->work_mtx); 1075 mutex_unlock(&local->mtx);
1081} 1076}
1082 1077
1083ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata, 1078ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
@@ -1163,7 +1158,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1163 struct ieee80211_work *wk, *tmp; 1158 struct ieee80211_work *wk, *tmp;
1164 bool found = false; 1159 bool found = false;
1165 1160
1166 mutex_lock(&local->work_mtx); 1161 mutex_lock(&local->mtx);
1167 list_for_each_entry_safe(wk, tmp, &local->work_list, list) { 1162 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
1168 if ((unsigned long) wk == cookie) { 1163 if ((unsigned long) wk == cookie) {
1169 wk->timeout = jiffies; 1164 wk->timeout = jiffies;
@@ -1171,7 +1166,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1171 break; 1166 break;
1172 } 1167 }
1173 } 1168 }
1174 mutex_unlock(&local->work_mtx); 1169 mutex_unlock(&local->mtx);
1175 1170
1176 if (!found) 1171 if (!found)
1177 return -ENOENT; 1172 return -ENOENT;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 8d59d27d887e..43882b36da55 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -36,8 +36,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
36 int tail; 36 int tail;
37 37
38 hdr = (struct ieee80211_hdr *)skb->data; 38 hdr = (struct ieee80211_hdr *)skb->data;
39 if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || 39 if (!tx->key || tx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
40 !ieee80211_is_data_present(hdr->frame_control)) 40 skb->len < 24 || !ieee80211_is_data_present(hdr->frame_control))
41 return TX_CONTINUE; 41 return TX_CONTINUE;
42 42
43 hdrlen = ieee80211_hdrlen(hdr->frame_control); 43 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -94,7 +94,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
94 if (status->flag & RX_FLAG_MMIC_STRIPPED) 94 if (status->flag & RX_FLAG_MMIC_STRIPPED)
95 return RX_CONTINUE; 95 return RX_CONTINUE;
96 96
97 if (!rx->key || rx->key->conf.alg != ALG_TKIP || 97 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
98 !ieee80211_has_protected(hdr->frame_control) || 98 !ieee80211_has_protected(hdr->frame_control) ||
99 !ieee80211_is_data_present(hdr->frame_control)) 99 !ieee80211_is_data_present(hdr->frame_control))
100 return RX_CONTINUE; 100 return RX_CONTINUE;
@@ -221,19 +221,13 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
221 if (!rx->sta || skb->len - hdrlen < 12) 221 if (!rx->sta || skb->len - hdrlen < 12)
222 return RX_DROP_UNUSABLE; 222 return RX_DROP_UNUSABLE;
223 223
224 if (status->flag & RX_FLAG_DECRYPTED) { 224 /*
225 if (status->flag & RX_FLAG_IV_STRIPPED) { 225 * Let TKIP code verify IV, but skip decryption.
226 /* 226 * In the case where hardware checks the IV as well,
227 * Hardware took care of all processing, including 227 * we don't even get here, see ieee80211_rx_h_decrypt()
228 * replay protection, and stripped the ICV/IV so 228 */
229 * we cannot do any checks here. 229 if (status->flag & RX_FLAG_DECRYPTED)
230 */
231 return RX_CONTINUE;
232 }
233
234 /* let TKIP code verify IV, but skip decryption */
235 hwaccel = 1; 230 hwaccel = 1;
236 }
237 231
238 res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm, 232 res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm,
239 key, skb->data + hdrlen, 233 key, skb->data + hdrlen,
@@ -447,10 +441,6 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
447 if (!rx->sta || data_len < 0) 441 if (!rx->sta || data_len < 0)
448 return RX_DROP_UNUSABLE; 442 return RX_DROP_UNUSABLE;
449 443
450 if ((status->flag & RX_FLAG_DECRYPTED) &&
451 (status->flag & RX_FLAG_IV_STRIPPED))
452 return RX_CONTINUE;
453
454 ccmp_hdr2pn(pn, skb->data + hdrlen); 444 ccmp_hdr2pn(pn, skb->data + hdrlen);
455 445
456 queue = ieee80211_is_mgmt(hdr->frame_control) ? 446 queue = ieee80211_is_mgmt(hdr->frame_control) ?
@@ -564,10 +554,6 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
564 if (!ieee80211_is_mgmt(hdr->frame_control)) 554 if (!ieee80211_is_mgmt(hdr->frame_control))
565 return RX_CONTINUE; 555 return RX_CONTINUE;
566 556
567 if ((status->flag & RX_FLAG_DECRYPTED) &&
568 (status->flag & RX_FLAG_IV_STRIPPED))
569 return RX_CONTINUE;
570
571 if (skb->len < 24 + sizeof(*mmie)) 557 if (skb->len < 24 + sizeof(*mmie))
572 return RX_DROP_UNUSABLE; 558 return RX_DROP_UNUSABLE;
573 559
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4c2f89df5cce..0c043b6ce65e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -40,6 +40,7 @@
40#include <net/udp.h> 40#include <net/udp.h>
41#include <net/icmp.h> /* for icmp_send */ 41#include <net/icmp.h> /* for icmp_send */
42#include <net/route.h> 42#include <net/route.h>
43#include <net/ip6_checksum.h>
43 44
44#include <linux/netfilter.h> 45#include <linux/netfilter.h>
45#include <linux/netfilter_ipv4.h> 46#include <linux/netfilter_ipv4.h>
@@ -637,10 +638,12 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
637 } 638 }
638 639
639 /* And finally the ICMP checksum */ 640 /* And finally the ICMP checksum */
640 icmph->icmp6_cksum = 0; 641 icmph->icmp6_cksum = ~csum_ipv6_magic(&iph->saddr, &iph->daddr,
641 /* TODO IPv6: is this correct for ICMPv6? */ 642 skb->len - icmp_offset,
642 ip_vs_checksum_complete(skb, icmp_offset); 643 IPPROTO_ICMPV6, 0);
643 skb->ip_summed = CHECKSUM_UNNECESSARY; 644 skb->csum_start = skb_network_header(skb) - skb->head + icmp_offset;
645 skb->csum_offset = offsetof(struct icmp6hdr, icmp6_cksum);
646 skb->ip_summed = CHECKSUM_PARTIAL;
644 647
645 if (inout) 648 if (inout)
646 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 649 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
@@ -1381,8 +1384,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1381 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1384 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1382 cp->protocol == IPPROTO_SCTP) { 1385 cp->protocol == IPPROTO_SCTP) {
1383 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1386 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1384 (atomic_read(&cp->in_pkts) % 1387 (pkts % sysctl_ip_vs_sync_threshold[1]
1385 sysctl_ip_vs_sync_threshold[1]
1386 == sysctl_ip_vs_sync_threshold[0])) || 1388 == sysctl_ip_vs_sync_threshold[0])) ||
1387 (cp->old_state != cp->state && 1389 (cp->old_state != cp->state &&
1388 ((cp->state == IP_VS_SCTP_S_CLOSED) || 1390 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
@@ -1393,7 +1395,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1393 } 1395 }
1394 } 1396 }
1395 1397
1396 if (af == AF_INET && 1398 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1399 else if (af == AF_INET &&
1397 (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1400 (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1398 (((cp->protocol != IPPROTO_TCP || 1401 (((cp->protocol != IPPROTO_TCP ||
1399 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1402 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 0f0c079c422a..ca8ec8c4f311 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -61,7 +61,7 @@ static DEFINE_RWLOCK(__ip_vs_svc_lock);
61static DEFINE_RWLOCK(__ip_vs_rs_lock); 61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62 62
63/* lock for state and timeout tables */ 63/* lock for state and timeout tables */
64static DEFINE_RWLOCK(__ip_vs_securetcp_lock); 64static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65 65
66/* lock for drop entry handling */ 66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); 67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
@@ -204,7 +204,7 @@ static void update_defense_level(void)
204 spin_unlock(&__ip_vs_droppacket_lock); 204 spin_unlock(&__ip_vs_droppacket_lock);
205 205
206 /* secure_tcp */ 206 /* secure_tcp */
207 write_lock(&__ip_vs_securetcp_lock); 207 spin_lock(&ip_vs_securetcp_lock);
208 switch (sysctl_ip_vs_secure_tcp) { 208 switch (sysctl_ip_vs_secure_tcp) {
209 case 0: 209 case 0:
210 if (old_secure_tcp >= 2) 210 if (old_secure_tcp >= 2)
@@ -238,7 +238,7 @@ static void update_defense_level(void)
238 old_secure_tcp = sysctl_ip_vs_secure_tcp; 238 old_secure_tcp = sysctl_ip_vs_secure_tcp;
239 if (to_change >= 0) 239 if (to_change >= 0)
240 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 240 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
241 write_unlock(&__ip_vs_securetcp_lock); 241 spin_unlock(&ip_vs_securetcp_lock);
242 242
243 local_bh_enable(); 243 local_bh_enable();
244} 244}
@@ -843,7 +843,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
843 return -EINVAL; 843 return -EINVAL;
844 } 844 }
845 845
846 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); 846 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
847 if (dest == NULL) { 847 if (dest == NULL) {
848 pr_err("%s(): no memory.\n", __func__); 848 pr_err("%s(): no memory.\n", __func__);
849 return -ENOMEM; 849 return -ENOMEM;
@@ -1177,7 +1177,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1177 } 1177 }
1178#endif 1178#endif
1179 1179
1180 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); 1180 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1181 if (svc == NULL) { 1181 if (svc == NULL) {
1182 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1182 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1183 ret = -ENOMEM; 1183 ret = -ENOMEM;
@@ -2155,7 +2155,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2155 if (cmd != IP_VS_SO_SET_ADD 2155 if (cmd != IP_VS_SO_SET_ADD
2156 && (svc == NULL || svc->protocol != usvc.protocol)) { 2156 && (svc == NULL || svc->protocol != usvc.protocol)) {
2157 ret = -ESRCH; 2157 ret = -ESRCH;
2158 goto out_unlock; 2158 goto out_drop_service;
2159 } 2159 }
2160 2160
2161 switch (cmd) { 2161 switch (cmd) {
@@ -2189,6 +2189,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2189 ret = -EINVAL; 2189 ret = -EINVAL;
2190 } 2190 }
2191 2191
2192out_drop_service:
2192 if (svc) 2193 if (svc)
2193 ip_vs_service_put(svc); 2194 ip_vs_service_put(svc);
2194 2195
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index bbc1ac795952..727e45b66953 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -35,7 +35,7 @@
35static LIST_HEAD(ip_vs_schedulers); 35static LIST_HEAD(ip_vs_schedulers);
36 36
37/* lock for service table */ 37/* lock for service table */
38static DEFINE_RWLOCK(__ip_vs_sched_lock); 38static DEFINE_SPINLOCK(ip_vs_sched_lock);
39 39
40 40
41/* 41/*
@@ -108,7 +108,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
108 108
109 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name); 109 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
110 110
111 read_lock_bh(&__ip_vs_sched_lock); 111 spin_lock_bh(&ip_vs_sched_lock);
112 112
113 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 113 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
114 /* 114 /*
@@ -122,14 +122,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
122 } 122 }
123 if (strcmp(sched_name, sched->name)==0) { 123 if (strcmp(sched_name, sched->name)==0) {
124 /* HIT */ 124 /* HIT */
125 read_unlock_bh(&__ip_vs_sched_lock); 125 spin_unlock_bh(&ip_vs_sched_lock);
126 return sched; 126 return sched;
127 } 127 }
128 if (sched->module) 128 if (sched->module)
129 module_put(sched->module); 129 module_put(sched->module);
130 } 130 }
131 131
132 read_unlock_bh(&__ip_vs_sched_lock); 132 spin_unlock_bh(&ip_vs_sched_lock);
133 return NULL; 133 return NULL;
134} 134}
135 135
@@ -184,10 +184,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
184 /* increase the module use count */ 184 /* increase the module use count */
185 ip_vs_use_count_inc(); 185 ip_vs_use_count_inc();
186 186
187 write_lock_bh(&__ip_vs_sched_lock); 187 spin_lock_bh(&ip_vs_sched_lock);
188 188
189 if (!list_empty(&scheduler->n_list)) { 189 if (!list_empty(&scheduler->n_list)) {
190 write_unlock_bh(&__ip_vs_sched_lock); 190 spin_unlock_bh(&ip_vs_sched_lock);
191 ip_vs_use_count_dec(); 191 ip_vs_use_count_dec();
192 pr_err("%s(): [%s] scheduler already linked\n", 192 pr_err("%s(): [%s] scheduler already linked\n",
193 __func__, scheduler->name); 193 __func__, scheduler->name);
@@ -200,7 +200,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
200 */ 200 */
201 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 201 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
202 if (strcmp(scheduler->name, sched->name) == 0) { 202 if (strcmp(scheduler->name, sched->name) == 0) {
203 write_unlock_bh(&__ip_vs_sched_lock); 203 spin_unlock_bh(&ip_vs_sched_lock);
204 ip_vs_use_count_dec(); 204 ip_vs_use_count_dec();
205 pr_err("%s(): [%s] scheduler already existed " 205 pr_err("%s(): [%s] scheduler already existed "
206 "in the system\n", __func__, scheduler->name); 206 "in the system\n", __func__, scheduler->name);
@@ -211,7 +211,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
211 * Add it into the d-linked scheduler list 211 * Add it into the d-linked scheduler list
212 */ 212 */
213 list_add(&scheduler->n_list, &ip_vs_schedulers); 213 list_add(&scheduler->n_list, &ip_vs_schedulers);
214 write_unlock_bh(&__ip_vs_sched_lock); 214 spin_unlock_bh(&ip_vs_sched_lock);
215 215
216 pr_info("[%s] scheduler registered.\n", scheduler->name); 216 pr_info("[%s] scheduler registered.\n", scheduler->name);
217 217
@@ -229,9 +229,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
229 return -EINVAL; 229 return -EINVAL;
230 } 230 }
231 231
232 write_lock_bh(&__ip_vs_sched_lock); 232 spin_lock_bh(&ip_vs_sched_lock);
233 if (list_empty(&scheduler->n_list)) { 233 if (list_empty(&scheduler->n_list)) {
234 write_unlock_bh(&__ip_vs_sched_lock); 234 spin_unlock_bh(&ip_vs_sched_lock);
235 pr_err("%s(): [%s] scheduler is not in the list. failed\n", 235 pr_err("%s(): [%s] scheduler is not in the list. failed\n",
236 __func__, scheduler->name); 236 __func__, scheduler->name);
237 return -EINVAL; 237 return -EINVAL;
@@ -241,7 +241,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
241 * Remove it from the d-linked scheduler list 241 * Remove it from the d-linked scheduler list
242 */ 242 */
243 list_del(&scheduler->n_list); 243 list_del(&scheduler->n_list);
244 write_unlock_bh(&__ip_vs_sched_lock); 244 spin_unlock_bh(&ip_vs_sched_lock);
245 245
246 /* decrease the module use count */ 246 /* decrease the module use count */
247 ip_vs_use_count_dec(); 247 ip_vs_use_count_dec();
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index b46a8390896d..9228ee0dc11a 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -448,6 +448,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
448{ 448{
449 __be16 _ports[2], *ports; 449 __be16 _ports[2], *ports;
450 u8 nexthdr; 450 u8 nexthdr;
451 int poff;
451 452
452 memset(dst, 0, sizeof(*dst)); 453 memset(dst, 0, sizeof(*dst));
453 454
@@ -492,19 +493,13 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
492 return 0; 493 return 0;
493 } 494 }
494 495
495 switch (nexthdr) { 496 poff = proto_ports_offset(nexthdr);
496 case IPPROTO_TCP: 497 if (poff >= 0) {
497 case IPPROTO_UDP: 498 ports = skb_header_pointer(skb, protoff + poff, sizeof(_ports),
498 case IPPROTO_UDPLITE:
499 case IPPROTO_SCTP:
500 case IPPROTO_DCCP:
501 ports = skb_header_pointer(skb, protoff, sizeof(_ports),
502 &_ports); 499 &_ports);
503 break; 500 } else {
504 default:
505 _ports[0] = _ports[1] = 0; 501 _ports[0] = _ports[1] = 0;
506 ports = _ports; 502 ports = _ports;
507 break;
508 } 503 }
509 if (!ports) 504 if (!ports)
510 return -1; 505 return -1;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9a17f28b1253..3616f27b9d46 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -488,7 +488,7 @@ retry:
488 skb->dev = dev; 488 skb->dev = dev;
489 skb->priority = sk->sk_priority; 489 skb->priority = sk->sk_priority;
490 skb->mark = sk->sk_mark; 490 skb->mark = sk->sk_mark;
491 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 491 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
492 if (err < 0) 492 if (err < 0)
493 goto out_unlock; 493 goto out_unlock;
494 494
@@ -1209,7 +1209,7 @@ static int packet_snd(struct socket *sock,
1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); 1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1210 if (err) 1210 if (err)
1211 goto out_free; 1211 goto out_free;
1212 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 1212 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1213 if (err < 0) 1213 if (err < 0)
1214 goto out_free; 1214 goto out_free;
1215 1215
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index b2a3ae6cad78..04e34196c9de 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -834,6 +834,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
834{ 834{
835 struct pep_sock *pn = pep_sk(sk); 835 struct pep_sock *pn = pep_sk(sk);
836 struct pnpipehdr *ph; 836 struct pnpipehdr *ph;
837 int err;
837 838
838 if (pn_flow_safe(pn->tx_fc) && 839 if (pn_flow_safe(pn->tx_fc) &&
839 !atomic_add_unless(&pn->tx_credits, -1, 0)) { 840 !atomic_add_unless(&pn->tx_credits, -1, 0)) {
@@ -852,7 +853,10 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
852 ph->message_id = PNS_PIPE_DATA; 853 ph->message_id = PNS_PIPE_DATA;
853 ph->pipe_handle = pn->pipe_handle; 854 ph->pipe_handle = pn->pipe_handle;
854 855
855 return pn_skb_send(sk, skb, &pipe_srv); 856 err = pn_skb_send(sk, skb, &pipe_srv);
857 if (err && pn_flow_safe(pn->tx_fc))
858 atomic_inc(&pn->tx_credits);
859 return err;
856} 860}
857 861
858static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, 862static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
@@ -872,7 +876,7 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
872 skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len, 876 skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len,
873 flags & MSG_DONTWAIT, &err); 877 flags & MSG_DONTWAIT, &err);
874 if (!skb) 878 if (!skb)
875 return -ENOBUFS; 879 return err;
876 880
877 skb_reserve(skb, MAX_PHONET_HEADER + 3); 881 skb_reserve(skb, MAX_PHONET_HEADER + 3);
878 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 882 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index b18e48fae975..d0a429459370 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -292,8 +292,7 @@ static void phonet_route_autodel(struct net_device *dev)
292 if (bitmap_empty(deleted, 64)) 292 if (bitmap_empty(deleted, 64))
293 return; /* short-circuit RCU */ 293 return; /* short-circuit RCU */
294 synchronize_rcu(); 294 synchronize_rcu();
295 for (i = find_first_bit(deleted, 64); i < 64; 295 for_each_set_bit(i, deleted, 64) {
296 i = find_next_bit(deleted, 64, i + 1)) {
297 rtm_phonet_notify(RTM_DELROUTE, dev, i); 296 rtm_phonet_notify(RTM_DELROUTE, dev, i);
298 dev_put(dev); 297 dev_put(dev);
299 } 298 }
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 6e9848bf0370..7c91f739f138 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -281,7 +281,9 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
281 if (!mask && sk->sk_state == TCP_CLOSE_WAIT) 281 if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
282 return POLLHUP; 282 return POLLHUP;
283 283
284 if (sk->sk_state == TCP_ESTABLISHED && atomic_read(&pn->tx_credits)) 284 if (sk->sk_state == TCP_ESTABLISHED &&
285 atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
286 atomic_read(&pn->tx_credits))
285 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 287 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
286 288
287 return mask; 289 return mask;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index aebfecbdb841..bb6ad81b671d 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -39,7 +39,15 @@
39#include <net/sock.h> 39#include <net/sock.h>
40 40
41#include "rds.h" 41#include "rds.h"
42#include "rdma.h" 42
43char *rds_str_array(char **array, size_t elements, size_t index)
44{
45 if ((index < elements) && array[index])
46 return array[index];
47 else
48 return "unknown";
49}
50EXPORT_SYMBOL(rds_str_array);
43 51
44/* this is just used for stats gathering :/ */ 52/* this is just used for stats gathering :/ */
45static DEFINE_SPINLOCK(rds_sock_lock); 53static DEFINE_SPINLOCK(rds_sock_lock);
@@ -62,7 +70,7 @@ static int rds_release(struct socket *sock)
62 struct rds_sock *rs; 70 struct rds_sock *rs;
63 unsigned long flags; 71 unsigned long flags;
64 72
65 if (sk == NULL) 73 if (!sk)
66 goto out; 74 goto out;
67 75
68 rs = rds_sk_to_rs(sk); 76 rs = rds_sk_to_rs(sk);
@@ -73,7 +81,15 @@ static int rds_release(struct socket *sock)
73 * with the socket. */ 81 * with the socket. */
74 rds_clear_recv_queue(rs); 82 rds_clear_recv_queue(rs);
75 rds_cong_remove_socket(rs); 83 rds_cong_remove_socket(rs);
84
85 /*
86 * the binding lookup hash uses rcu, we need to
87 * make sure we sychronize_rcu before we free our
88 * entry
89 */
76 rds_remove_bound(rs); 90 rds_remove_bound(rs);
91 synchronize_rcu();
92
77 rds_send_drop_to(rs, NULL); 93 rds_send_drop_to(rs, NULL);
78 rds_rdma_drop_keys(rs); 94 rds_rdma_drop_keys(rs);
79 rds_notify_queue_get(rs, NULL); 95 rds_notify_queue_get(rs, NULL);
@@ -83,6 +99,8 @@ static int rds_release(struct socket *sock)
83 rds_sock_count--; 99 rds_sock_count--;
84 spin_unlock_irqrestore(&rds_sock_lock, flags); 100 spin_unlock_irqrestore(&rds_sock_lock, flags);
85 101
102 rds_trans_put(rs->rs_transport);
103
86 sock->sk = NULL; 104 sock->sk = NULL;
87 sock_put(sk); 105 sock_put(sk);
88out: 106out:
@@ -514,7 +532,7 @@ out:
514 spin_unlock_irqrestore(&rds_sock_lock, flags); 532 spin_unlock_irqrestore(&rds_sock_lock, flags);
515} 533}
516 534
517static void __exit rds_exit(void) 535static void rds_exit(void)
518{ 536{
519 sock_unregister(rds_family_ops.family); 537 sock_unregister(rds_family_ops.family);
520 proto_unregister(&rds_proto); 538 proto_unregister(&rds_proto);
@@ -529,7 +547,7 @@ static void __exit rds_exit(void)
529} 547}
530module_exit(rds_exit); 548module_exit(rds_exit);
531 549
532static int __init rds_init(void) 550static int rds_init(void)
533{ 551{
534 int ret; 552 int ret;
535 553
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 5d95fc007f1a..2f6b3fcc79f8 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -34,45 +34,52 @@
34#include <net/sock.h> 34#include <net/sock.h>
35#include <linux/in.h> 35#include <linux/in.h>
36#include <linux/if_arp.h> 36#include <linux/if_arp.h>
37#include <linux/jhash.h>
37#include "rds.h" 38#include "rds.h"
38 39
39/* 40#define BIND_HASH_SIZE 1024
40 * XXX this probably still needs more work.. no INADDR_ANY, and rbtrees aren't 41static struct hlist_head bind_hash_table[BIND_HASH_SIZE];
41 * particularly zippy.
42 *
43 * This is now called for every incoming frame so we arguably care much more
44 * about it than we used to.
45 */
46static DEFINE_SPINLOCK(rds_bind_lock); 42static DEFINE_SPINLOCK(rds_bind_lock);
47static struct rb_root rds_bind_tree = RB_ROOT;
48 43
49static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port, 44static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port)
50 struct rds_sock *insert) 45{
46 return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
47 (BIND_HASH_SIZE - 1));
48}
49
50static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
51 struct rds_sock *insert)
51{ 52{
52 struct rb_node **p = &rds_bind_tree.rb_node;
53 struct rb_node *parent = NULL;
54 struct rds_sock *rs; 53 struct rds_sock *rs;
54 struct hlist_node *node;
55 struct hlist_head *head = hash_to_bucket(addr, port);
55 u64 cmp; 56 u64 cmp;
56 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); 57 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
57 58
58 while (*p) { 59 rcu_read_lock();
59 parent = *p; 60 hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) {
60 rs = rb_entry(parent, struct rds_sock, rs_bound_node);
61
62 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | 61 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
63 be16_to_cpu(rs->rs_bound_port); 62 be16_to_cpu(rs->rs_bound_port);
64 63
65 if (needle < cmp) 64 if (cmp == needle) {
66 p = &(*p)->rb_left; 65 rcu_read_unlock();
67 else if (needle > cmp)
68 p = &(*p)->rb_right;
69 else
70 return rs; 66 return rs;
67 }
71 } 68 }
69 rcu_read_unlock();
72 70
73 if (insert) { 71 if (insert) {
74 rb_link_node(&insert->rs_bound_node, parent, p); 72 /*
75 rb_insert_color(&insert->rs_bound_node, &rds_bind_tree); 73 * make sure our addr and port are set before
74 * we are added to the list, other people
75 * in rcu will find us as soon as the
76 * hlist_add_head_rcu is done
77 */
78 insert->rs_bound_addr = addr;
79 insert->rs_bound_port = port;
80 rds_sock_addref(insert);
81
82 hlist_add_head_rcu(&insert->rs_bound_node, head);
76 } 83 }
77 return NULL; 84 return NULL;
78} 85}
@@ -86,15 +93,13 @@ static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port,
86struct rds_sock *rds_find_bound(__be32 addr, __be16 port) 93struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
87{ 94{
88 struct rds_sock *rs; 95 struct rds_sock *rs;
89 unsigned long flags;
90 96
91 spin_lock_irqsave(&rds_bind_lock, flags); 97 rs = rds_bind_lookup(addr, port, NULL);
92 rs = rds_bind_tree_walk(addr, port, NULL); 98
93 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) 99 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
94 rds_sock_addref(rs); 100 rds_sock_addref(rs);
95 else 101 else
96 rs = NULL; 102 rs = NULL;
97 spin_unlock_irqrestore(&rds_bind_lock, flags);
98 103
99 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, 104 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
100 ntohs(port)); 105 ntohs(port));
@@ -121,22 +126,15 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
121 do { 126 do {
122 if (rover == 0) 127 if (rover == 0)
123 rover++; 128 rover++;
124 if (rds_bind_tree_walk(addr, cpu_to_be16(rover), rs) == NULL) { 129 if (!rds_bind_lookup(addr, cpu_to_be16(rover), rs)) {
125 *port = cpu_to_be16(rover); 130 *port = rs->rs_bound_port;
126 ret = 0; 131 ret = 0;
132 rdsdebug("rs %p binding to %pI4:%d\n",
133 rs, &addr, (int)ntohs(*port));
127 break; 134 break;
128 } 135 }
129 } while (rover++ != last); 136 } while (rover++ != last);
130 137
131 if (ret == 0) {
132 rs->rs_bound_addr = addr;
133 rs->rs_bound_port = *port;
134 rds_sock_addref(rs);
135
136 rdsdebug("rs %p binding to %pI4:%d\n",
137 rs, &addr, (int)ntohs(*port));
138 }
139
140 spin_unlock_irqrestore(&rds_bind_lock, flags); 138 spin_unlock_irqrestore(&rds_bind_lock, flags);
141 139
142 return ret; 140 return ret;
@@ -153,7 +151,7 @@ void rds_remove_bound(struct rds_sock *rs)
153 rs, &rs->rs_bound_addr, 151 rs, &rs->rs_bound_addr,
154 ntohs(rs->rs_bound_port)); 152 ntohs(rs->rs_bound_port));
155 153
156 rb_erase(&rs->rs_bound_node, &rds_bind_tree); 154 hlist_del_init_rcu(&rs->rs_bound_node);
157 rds_sock_put(rs); 155 rds_sock_put(rs);
158 rs->rs_bound_addr = 0; 156 rs->rs_bound_addr = 0;
159 } 157 }
@@ -184,7 +182,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
184 goto out; 182 goto out;
185 183
186 trans = rds_trans_get_preferred(sin->sin_addr.s_addr); 184 trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
187 if (trans == NULL) { 185 if (!trans) {
188 ret = -EADDRNOTAVAIL; 186 ret = -EADDRNOTAVAIL;
189 rds_remove_bound(rs); 187 rds_remove_bound(rs);
190 if (printk_ratelimit()) 188 if (printk_ratelimit())
@@ -198,5 +196,9 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
198 196
199out: 197out:
200 release_sock(sk); 198 release_sock(sk);
199
200 /* we might have called rds_remove_bound on error */
201 if (ret)
202 synchronize_rcu();
201 return ret; 203 return ret;
202} 204}
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 0871a29f0780..75ea686f27d5 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -141,7 +141,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
141 unsigned long flags; 141 unsigned long flags;
142 142
143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL); 143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
144 if (map == NULL) 144 if (!map)
145 return NULL; 145 return NULL;
146 146
147 map->m_addr = addr; 147 map->m_addr = addr;
@@ -159,7 +159,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
159 ret = rds_cong_tree_walk(addr, map); 159 ret = rds_cong_tree_walk(addr, map);
160 spin_unlock_irqrestore(&rds_cong_lock, flags); 160 spin_unlock_irqrestore(&rds_cong_lock, flags);
161 161
162 if (ret == NULL) { 162 if (!ret) {
163 ret = map; 163 ret = map;
164 map = NULL; 164 map = NULL;
165 } 165 }
@@ -205,7 +205,7 @@ int rds_cong_get_maps(struct rds_connection *conn)
205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr); 205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr); 206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
207 207
208 if (conn->c_lcong == NULL || conn->c_fcong == NULL) 208 if (!(conn->c_lcong && conn->c_fcong))
209 return -ENOMEM; 209 return -ENOMEM;
210 210
211 return 0; 211 return 0;
@@ -221,7 +221,7 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) { 221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (!test_and_set_bit(0, &conn->c_map_queued)) { 222 if (!test_and_set_bit(0, &conn->c_map_queued)) {
223 rds_stats_inc(s_cong_update_queued); 223 rds_stats_inc(s_cong_update_queued);
224 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 224 rds_send_xmit(conn);
225 } 225 }
226 } 226 }
227 227
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7619b671ca28..870992e08cae 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -37,7 +37,6 @@
37 37
38#include "rds.h" 38#include "rds.h"
39#include "loop.h" 39#include "loop.h"
40#include "rdma.h"
41 40
42#define RDS_CONNECTION_HASH_BITS 12 41#define RDS_CONNECTION_HASH_BITS 12
43#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) 42#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS)
@@ -63,18 +62,7 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
63 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ 62 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \
64} while (0) 63} while (0)
65 64
66static inline int rds_conn_is_sending(struct rds_connection *conn) 65/* rcu read lock must be held or the connection spinlock */
67{
68 int ret = 0;
69
70 if (!mutex_trylock(&conn->c_send_lock))
71 ret = 1;
72 else
73 mutex_unlock(&conn->c_send_lock);
74
75 return ret;
76}
77
78static struct rds_connection *rds_conn_lookup(struct hlist_head *head, 66static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
79 __be32 laddr, __be32 faddr, 67 __be32 laddr, __be32 faddr,
80 struct rds_transport *trans) 68 struct rds_transport *trans)
@@ -82,7 +70,7 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
82 struct rds_connection *conn, *ret = NULL; 70 struct rds_connection *conn, *ret = NULL;
83 struct hlist_node *pos; 71 struct hlist_node *pos;
84 72
85 hlist_for_each_entry(conn, pos, head, c_hash_node) { 73 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
86 if (conn->c_faddr == faddr && conn->c_laddr == laddr && 74 if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
87 conn->c_trans == trans) { 75 conn->c_trans == trans) {
88 ret = conn; 76 ret = conn;
@@ -129,10 +117,11 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
129{ 117{
130 struct rds_connection *conn, *parent = NULL; 118 struct rds_connection *conn, *parent = NULL;
131 struct hlist_head *head = rds_conn_bucket(laddr, faddr); 119 struct hlist_head *head = rds_conn_bucket(laddr, faddr);
120 struct rds_transport *loop_trans;
132 unsigned long flags; 121 unsigned long flags;
133 int ret; 122 int ret;
134 123
135 spin_lock_irqsave(&rds_conn_lock, flags); 124 rcu_read_lock();
136 conn = rds_conn_lookup(head, laddr, faddr, trans); 125 conn = rds_conn_lookup(head, laddr, faddr, trans);
137 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && 126 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
138 !is_outgoing) { 127 !is_outgoing) {
@@ -143,12 +132,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
143 parent = conn; 132 parent = conn;
144 conn = parent->c_passive; 133 conn = parent->c_passive;
145 } 134 }
146 spin_unlock_irqrestore(&rds_conn_lock, flags); 135 rcu_read_unlock();
147 if (conn) 136 if (conn)
148 goto out; 137 goto out;
149 138
150 conn = kmem_cache_zalloc(rds_conn_slab, gfp); 139 conn = kmem_cache_zalloc(rds_conn_slab, gfp);
151 if (conn == NULL) { 140 if (!conn) {
152 conn = ERR_PTR(-ENOMEM); 141 conn = ERR_PTR(-ENOMEM);
153 goto out; 142 goto out;
154 } 143 }
@@ -159,7 +148,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
159 spin_lock_init(&conn->c_lock); 148 spin_lock_init(&conn->c_lock);
160 conn->c_next_tx_seq = 1; 149 conn->c_next_tx_seq = 1;
161 150
162 mutex_init(&conn->c_send_lock); 151 init_waitqueue_head(&conn->c_waitq);
163 INIT_LIST_HEAD(&conn->c_send_queue); 152 INIT_LIST_HEAD(&conn->c_send_queue);
164 INIT_LIST_HEAD(&conn->c_retrans); 153 INIT_LIST_HEAD(&conn->c_retrans);
165 154
@@ -175,7 +164,9 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
175 * can bind to the destination address then we'd rather the messages 164 * can bind to the destination address then we'd rather the messages
176 * flow through loopback rather than either transport. 165 * flow through loopback rather than either transport.
177 */ 166 */
178 if (rds_trans_get_preferred(faddr)) { 167 loop_trans = rds_trans_get_preferred(faddr);
168 if (loop_trans) {
169 rds_trans_put(loop_trans);
179 conn->c_loopback = 1; 170 conn->c_loopback = 1;
180 if (is_outgoing && trans->t_prefer_loopback) { 171 if (is_outgoing && trans->t_prefer_loopback) {
181 /* "outgoing" connection - and the transport 172 /* "outgoing" connection - and the transport
@@ -238,7 +229,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
238 kmem_cache_free(rds_conn_slab, conn); 229 kmem_cache_free(rds_conn_slab, conn);
239 conn = found; 230 conn = found;
240 } else { 231 } else {
241 hlist_add_head(&conn->c_hash_node, head); 232 hlist_add_head_rcu(&conn->c_hash_node, head);
242 rds_cong_add_conn(conn); 233 rds_cong_add_conn(conn);
243 rds_conn_count++; 234 rds_conn_count++;
244 } 235 }
@@ -263,21 +254,91 @@ struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
263} 254}
264EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); 255EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
265 256
257void rds_conn_shutdown(struct rds_connection *conn)
258{
259 /* shut it down unless it's down already */
260 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
261 /*
262 * Quiesce the connection mgmt handlers before we start tearing
263 * things down. We don't hold the mutex for the entire
264 * duration of the shutdown operation, else we may be
265 * deadlocking with the CM handler. Instead, the CM event
266 * handler is supposed to check for state DISCONNECTING
267 */
268 mutex_lock(&conn->c_cm_lock);
269 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
270 && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
271 rds_conn_error(conn, "shutdown called in state %d\n",
272 atomic_read(&conn->c_state));
273 mutex_unlock(&conn->c_cm_lock);
274 return;
275 }
276 mutex_unlock(&conn->c_cm_lock);
277
278 wait_event(conn->c_waitq,
279 !test_bit(RDS_IN_XMIT, &conn->c_flags));
280
281 conn->c_trans->conn_shutdown(conn);
282 rds_conn_reset(conn);
283
284 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
285 /* This can happen - eg when we're in the middle of tearing
286 * down the connection, and someone unloads the rds module.
287 * Quite reproduceable with loopback connections.
288 * Mostly harmless.
289 */
290 rds_conn_error(conn,
291 "%s: failed to transition to state DOWN, "
292 "current state is %d\n",
293 __func__,
294 atomic_read(&conn->c_state));
295 return;
296 }
297 }
298
299 /* Then reconnect if it's still live.
300 * The passive side of an IB loopback connection is never added
301 * to the conn hash, so we never trigger a reconnect on this
302 * conn - the reconnect is always triggered by the active peer. */
303 cancel_delayed_work_sync(&conn->c_conn_w);
304 rcu_read_lock();
305 if (!hlist_unhashed(&conn->c_hash_node)) {
306 rcu_read_unlock();
307 rds_queue_reconnect(conn);
308 } else {
309 rcu_read_unlock();
310 }
311}
312
313/*
314 * Stop and free a connection.
315 *
316 * This can only be used in very limited circumstances. It assumes that once
317 * the conn has been shutdown that no one else is referencing the connection.
318 * We can only ensure this in the rmmod path in the current code.
319 */
266void rds_conn_destroy(struct rds_connection *conn) 320void rds_conn_destroy(struct rds_connection *conn)
267{ 321{
268 struct rds_message *rm, *rtmp; 322 struct rds_message *rm, *rtmp;
323 unsigned long flags;
269 324
270 rdsdebug("freeing conn %p for %pI4 -> " 325 rdsdebug("freeing conn %p for %pI4 -> "
271 "%pI4\n", conn, &conn->c_laddr, 326 "%pI4\n", conn, &conn->c_laddr,
272 &conn->c_faddr); 327 &conn->c_faddr);
273 328
274 hlist_del_init(&conn->c_hash_node); 329 /* Ensure conn will not be scheduled for reconnect */
330 spin_lock_irq(&rds_conn_lock);
331 hlist_del_init_rcu(&conn->c_hash_node);
332 spin_unlock_irq(&rds_conn_lock);
333 synchronize_rcu();
275 334
276 /* wait for the rds thread to shut it down */ 335 /* shut the connection down */
277 atomic_set(&conn->c_state, RDS_CONN_ERROR); 336 rds_conn_drop(conn);
278 cancel_delayed_work(&conn->c_conn_w); 337 flush_work(&conn->c_down_w);
279 queue_work(rds_wq, &conn->c_down_w); 338
280 flush_workqueue(rds_wq); 339 /* make sure lingering queued work won't try to ref the conn */
340 cancel_delayed_work_sync(&conn->c_send_w);
341 cancel_delayed_work_sync(&conn->c_recv_w);
281 342
282 /* tear down queued messages */ 343 /* tear down queued messages */
283 list_for_each_entry_safe(rm, rtmp, 344 list_for_each_entry_safe(rm, rtmp,
@@ -302,7 +363,9 @@ void rds_conn_destroy(struct rds_connection *conn)
302 BUG_ON(!list_empty(&conn->c_retrans)); 363 BUG_ON(!list_empty(&conn->c_retrans));
303 kmem_cache_free(rds_conn_slab, conn); 364 kmem_cache_free(rds_conn_slab, conn);
304 365
366 spin_lock_irqsave(&rds_conn_lock, flags);
305 rds_conn_count--; 367 rds_conn_count--;
368 spin_unlock_irqrestore(&rds_conn_lock, flags);
306} 369}
307EXPORT_SYMBOL_GPL(rds_conn_destroy); 370EXPORT_SYMBOL_GPL(rds_conn_destroy);
308 371
@@ -316,23 +379,23 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
316 struct list_head *list; 379 struct list_head *list;
317 struct rds_connection *conn; 380 struct rds_connection *conn;
318 struct rds_message *rm; 381 struct rds_message *rm;
319 unsigned long flags;
320 unsigned int total = 0; 382 unsigned int total = 0;
383 unsigned long flags;
321 size_t i; 384 size_t i;
322 385
323 len /= sizeof(struct rds_info_message); 386 len /= sizeof(struct rds_info_message);
324 387
325 spin_lock_irqsave(&rds_conn_lock, flags); 388 rcu_read_lock();
326 389
327 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
328 i++, head++) { 391 i++, head++) {
329 hlist_for_each_entry(conn, pos, head, c_hash_node) { 392 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
330 if (want_send) 393 if (want_send)
331 list = &conn->c_send_queue; 394 list = &conn->c_send_queue;
332 else 395 else
333 list = &conn->c_retrans; 396 list = &conn->c_retrans;
334 397
335 spin_lock(&conn->c_lock); 398 spin_lock_irqsave(&conn->c_lock, flags);
336 399
337 /* XXX too lazy to maintain counts.. */ 400 /* XXX too lazy to maintain counts.. */
338 list_for_each_entry(rm, list, m_conn_item) { 401 list_for_each_entry(rm, list, m_conn_item) {
@@ -343,11 +406,10 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
343 conn->c_faddr, 0); 406 conn->c_faddr, 0);
344 } 407 }
345 408
346 spin_unlock(&conn->c_lock); 409 spin_unlock_irqrestore(&conn->c_lock, flags);
347 } 410 }
348 } 411 }
349 412 rcu_read_unlock();
350 spin_unlock_irqrestore(&rds_conn_lock, flags);
351 413
352 lens->nr = total; 414 lens->nr = total;
353 lens->each = sizeof(struct rds_info_message); 415 lens->each = sizeof(struct rds_info_message);
@@ -377,19 +439,17 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
377 uint64_t buffer[(item_len + 7) / 8]; 439 uint64_t buffer[(item_len + 7) / 8];
378 struct hlist_head *head; 440 struct hlist_head *head;
379 struct hlist_node *pos; 441 struct hlist_node *pos;
380 struct hlist_node *tmp;
381 struct rds_connection *conn; 442 struct rds_connection *conn;
382 unsigned long flags;
383 size_t i; 443 size_t i;
384 444
385 spin_lock_irqsave(&rds_conn_lock, flags); 445 rcu_read_lock();
386 446
387 lens->nr = 0; 447 lens->nr = 0;
388 lens->each = item_len; 448 lens->each = item_len;
389 449
390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 450 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
391 i++, head++) { 451 i++, head++) {
392 hlist_for_each_entry_safe(conn, pos, tmp, head, c_hash_node) { 452 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
393 453
394 /* XXX no c_lock usage.. */ 454 /* XXX no c_lock usage.. */
395 if (!visitor(conn, buffer)) 455 if (!visitor(conn, buffer))
@@ -405,8 +465,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
405 lens->nr++; 465 lens->nr++;
406 } 466 }
407 } 467 }
408 468 rcu_read_unlock();
409 spin_unlock_irqrestore(&rds_conn_lock, flags);
410} 469}
411EXPORT_SYMBOL_GPL(rds_for_each_conn_info); 470EXPORT_SYMBOL_GPL(rds_for_each_conn_info);
412 471
@@ -423,8 +482,8 @@ static int rds_conn_info_visitor(struct rds_connection *conn,
423 sizeof(cinfo->transport)); 482 sizeof(cinfo->transport));
424 cinfo->flags = 0; 483 cinfo->flags = 0;
425 484
426 rds_conn_info_set(cinfo->flags, 485 rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &conn->c_flags),
427 rds_conn_is_sending(conn), SENDING); 486 SENDING);
428 /* XXX Future: return the state rather than these funky bits */ 487 /* XXX Future: return the state rather than these funky bits */
429 rds_conn_info_set(cinfo->flags, 488 rds_conn_info_set(cinfo->flags,
430 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING, 489 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
@@ -444,12 +503,12 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
444 sizeof(struct rds_info_connection)); 503 sizeof(struct rds_info_connection));
445} 504}
446 505
447int __init rds_conn_init(void) 506int rds_conn_init(void)
448{ 507{
449 rds_conn_slab = kmem_cache_create("rds_connection", 508 rds_conn_slab = kmem_cache_create("rds_connection",
450 sizeof(struct rds_connection), 509 sizeof(struct rds_connection),
451 0, 0, NULL); 510 0, 0, NULL);
452 if (rds_conn_slab == NULL) 511 if (!rds_conn_slab)
453 return -ENOMEM; 512 return -ENOMEM;
454 513
455 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); 514 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
@@ -487,6 +546,18 @@ void rds_conn_drop(struct rds_connection *conn)
487EXPORT_SYMBOL_GPL(rds_conn_drop); 546EXPORT_SYMBOL_GPL(rds_conn_drop);
488 547
489/* 548/*
549 * If the connection is down, trigger a connect. We may have scheduled a
550 * delayed reconnect however - in this case we should not interfere.
551 */
552void rds_conn_connect_if_down(struct rds_connection *conn)
553{
554 if (rds_conn_state(conn) == RDS_CONN_DOWN &&
555 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
556 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
557}
558EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
559
560/*
490 * An error occurred on the connection 561 * An error occurred on the connection
491 */ 562 */
492void 563void
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 8f2d6dd7700a..b12a3951167d 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -53,12 +53,71 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
53module_param(rds_ib_retry_count, int, 0444); 53module_param(rds_ib_retry_count, int, 0444);
54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); 54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
55 55
56/*
57 * we have a clumsy combination of RCU and a rwsem protecting this list
58 * because it is used both in the get_mr fast path and while blocking in
59 * the FMR flushing path.
60 */
61DECLARE_RWSEM(rds_ib_devices_lock);
56struct list_head rds_ib_devices; 62struct list_head rds_ib_devices;
57 63
58/* NOTE: if also grabbing ibdev lock, grab this first */ 64/* NOTE: if also grabbing ibdev lock, grab this first */
59DEFINE_SPINLOCK(ib_nodev_conns_lock); 65DEFINE_SPINLOCK(ib_nodev_conns_lock);
60LIST_HEAD(ib_nodev_conns); 66LIST_HEAD(ib_nodev_conns);
61 67
68void rds_ib_nodev_connect(void)
69{
70 struct rds_ib_connection *ic;
71
72 spin_lock(&ib_nodev_conns_lock);
73 list_for_each_entry(ic, &ib_nodev_conns, ib_node)
74 rds_conn_connect_if_down(ic->conn);
75 spin_unlock(&ib_nodev_conns_lock);
76}
77
78void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev)
79{
80 struct rds_ib_connection *ic;
81 unsigned long flags;
82
83 spin_lock_irqsave(&rds_ibdev->spinlock, flags);
84 list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node)
85 rds_conn_drop(ic->conn);
86 spin_unlock_irqrestore(&rds_ibdev->spinlock, flags);
87}
88
89/*
90 * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references
91 * from interrupt context so we push freing off into a work struct in krdsd.
92 */
93static void rds_ib_dev_free(struct work_struct *work)
94{
95 struct rds_ib_ipaddr *i_ipaddr, *i_next;
96 struct rds_ib_device *rds_ibdev = container_of(work,
97 struct rds_ib_device, free_work);
98
99 if (rds_ibdev->mr_pool)
100 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
101 if (rds_ibdev->mr)
102 ib_dereg_mr(rds_ibdev->mr);
103 if (rds_ibdev->pd)
104 ib_dealloc_pd(rds_ibdev->pd);
105
106 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
107 list_del(&i_ipaddr->list);
108 kfree(i_ipaddr);
109 }
110
111 kfree(rds_ibdev);
112}
113
114void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
115{
116 BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0);
117 if (atomic_dec_and_test(&rds_ibdev->refcount))
118 queue_work(rds_wq, &rds_ibdev->free_work);
119}
120
62void rds_ib_add_one(struct ib_device *device) 121void rds_ib_add_one(struct ib_device *device)
63{ 122{
64 struct rds_ib_device *rds_ibdev; 123 struct rds_ib_device *rds_ibdev;
@@ -77,11 +136,14 @@ void rds_ib_add_one(struct ib_device *device)
77 goto free_attr; 136 goto free_attr;
78 } 137 }
79 138
80 rds_ibdev = kmalloc(sizeof *rds_ibdev, GFP_KERNEL); 139 rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
140 ibdev_to_node(device));
81 if (!rds_ibdev) 141 if (!rds_ibdev)
82 goto free_attr; 142 goto free_attr;
83 143
84 spin_lock_init(&rds_ibdev->spinlock); 144 spin_lock_init(&rds_ibdev->spinlock);
145 atomic_set(&rds_ibdev->refcount, 1);
146 INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
85 147
86 rds_ibdev->max_wrs = dev_attr->max_qp_wr; 148 rds_ibdev->max_wrs = dev_attr->max_qp_wr;
87 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); 149 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
@@ -91,68 +153,107 @@ void rds_ib_add_one(struct ib_device *device)
91 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) : 153 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) :
92 fmr_pool_size; 154 fmr_pool_size;
93 155
156 rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
157 rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
158
94 rds_ibdev->dev = device; 159 rds_ibdev->dev = device;
95 rds_ibdev->pd = ib_alloc_pd(device); 160 rds_ibdev->pd = ib_alloc_pd(device);
96 if (IS_ERR(rds_ibdev->pd)) 161 if (IS_ERR(rds_ibdev->pd)) {
97 goto free_dev; 162 rds_ibdev->pd = NULL;
163 goto put_dev;
164 }
98 165
99 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, 166 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, IB_ACCESS_LOCAL_WRITE);
100 IB_ACCESS_LOCAL_WRITE); 167 if (IS_ERR(rds_ibdev->mr)) {
101 if (IS_ERR(rds_ibdev->mr)) 168 rds_ibdev->mr = NULL;
102 goto err_pd; 169 goto put_dev;
170 }
103 171
104 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); 172 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev);
105 if (IS_ERR(rds_ibdev->mr_pool)) { 173 if (IS_ERR(rds_ibdev->mr_pool)) {
106 rds_ibdev->mr_pool = NULL; 174 rds_ibdev->mr_pool = NULL;
107 goto err_mr; 175 goto put_dev;
108 } 176 }
109 177
110 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); 178 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
111 INIT_LIST_HEAD(&rds_ibdev->conn_list); 179 INIT_LIST_HEAD(&rds_ibdev->conn_list);
112 list_add_tail(&rds_ibdev->list, &rds_ib_devices); 180
181 down_write(&rds_ib_devices_lock);
182 list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
183 up_write(&rds_ib_devices_lock);
184 atomic_inc(&rds_ibdev->refcount);
113 185
114 ib_set_client_data(device, &rds_ib_client, rds_ibdev); 186 ib_set_client_data(device, &rds_ib_client, rds_ibdev);
187 atomic_inc(&rds_ibdev->refcount);
115 188
116 goto free_attr; 189 rds_ib_nodev_connect();
117 190
118err_mr: 191put_dev:
119 ib_dereg_mr(rds_ibdev->mr); 192 rds_ib_dev_put(rds_ibdev);
120err_pd:
121 ib_dealloc_pd(rds_ibdev->pd);
122free_dev:
123 kfree(rds_ibdev);
124free_attr: 193free_attr:
125 kfree(dev_attr); 194 kfree(dev_attr);
126} 195}
127 196
197/*
198 * New connections use this to find the device to associate with the
199 * connection. It's not in the fast path so we're not concerned about the
200 * performance of the IB call. (As of this writing, it uses an interrupt
201 * blocking spinlock to serialize walking a per-device list of all registered
202 * clients.)
203 *
204 * RCU is used to handle incoming connections racing with device teardown.
205 * Rather than use a lock to serialize removal from the client_data and
206 * getting a new reference, we use an RCU grace period. The destruction
207 * path removes the device from client_data and then waits for all RCU
208 * readers to finish.
209 *
210 * A new connection can get NULL from this if its arriving on a
211 * device that is in the process of being removed.
212 */
213struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device)
214{
215 struct rds_ib_device *rds_ibdev;
216
217 rcu_read_lock();
218 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
219 if (rds_ibdev)
220 atomic_inc(&rds_ibdev->refcount);
221 rcu_read_unlock();
222 return rds_ibdev;
223}
224
225/*
226 * The IB stack is letting us know that a device is going away. This can
227 * happen if the underlying HCA driver is removed or if PCI hotplug is removing
228 * the pci function, for example.
229 *
230 * This can be called at any time and can be racing with any other RDS path.
231 */
128void rds_ib_remove_one(struct ib_device *device) 232void rds_ib_remove_one(struct ib_device *device)
129{ 233{
130 struct rds_ib_device *rds_ibdev; 234 struct rds_ib_device *rds_ibdev;
131 struct rds_ib_ipaddr *i_ipaddr, *i_next;
132 235
133 rds_ibdev = ib_get_client_data(device, &rds_ib_client); 236 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
134 if (!rds_ibdev) 237 if (!rds_ibdev)
135 return; 238 return;
136 239
137 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { 240 rds_ib_dev_shutdown(rds_ibdev);
138 list_del(&i_ipaddr->list);
139 kfree(i_ipaddr);
140 }
141 241
142 rds_ib_destroy_conns(rds_ibdev); 242 /* stop connection attempts from getting a reference to this device. */
243 ib_set_client_data(device, &rds_ib_client, NULL);
143 244
144 if (rds_ibdev->mr_pool) 245 down_write(&rds_ib_devices_lock);
145 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); 246 list_del_rcu(&rds_ibdev->list);
146 247 up_write(&rds_ib_devices_lock);
147 ib_dereg_mr(rds_ibdev->mr);
148
149 while (ib_dealloc_pd(rds_ibdev->pd)) {
150 rdsdebug("Failed to dealloc pd %p\n", rds_ibdev->pd);
151 msleep(1);
152 }
153 248
154 list_del(&rds_ibdev->list); 249 /*
155 kfree(rds_ibdev); 250 * This synchronize rcu is waiting for readers of both the ib
251 * client data and the devices list to finish before we drop
252 * both of those references.
253 */
254 synchronize_rcu();
255 rds_ib_dev_put(rds_ibdev);
256 rds_ib_dev_put(rds_ibdev);
156} 257}
157 258
158struct ib_client rds_ib_client = { 259struct ib_client rds_ib_client = {
@@ -186,7 +287,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
186 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); 287 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
187 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); 288 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
188 289
189 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 290 rds_ibdev = ic->rds_ibdev;
190 iinfo->max_send_wr = ic->i_send_ring.w_nr; 291 iinfo->max_send_wr = ic->i_send_ring.w_nr;
191 iinfo->max_recv_wr = ic->i_recv_ring.w_nr; 292 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
192 iinfo->max_send_sge = rds_ibdev->max_sge; 293 iinfo->max_send_sge = rds_ibdev->max_sge;
@@ -248,29 +349,36 @@ static int rds_ib_laddr_check(__be32 addr)
248 return ret; 349 return ret;
249} 350}
250 351
352static void rds_ib_unregister_client(void)
353{
354 ib_unregister_client(&rds_ib_client);
355 /* wait for rds_ib_dev_free() to complete */
356 flush_workqueue(rds_wq);
357}
358
251void rds_ib_exit(void) 359void rds_ib_exit(void)
252{ 360{
253 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 361 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
362 rds_ib_unregister_client();
254 rds_ib_destroy_nodev_conns(); 363 rds_ib_destroy_nodev_conns();
255 ib_unregister_client(&rds_ib_client);
256 rds_ib_sysctl_exit(); 364 rds_ib_sysctl_exit();
257 rds_ib_recv_exit(); 365 rds_ib_recv_exit();
258 rds_trans_unregister(&rds_ib_transport); 366 rds_trans_unregister(&rds_ib_transport);
367 rds_ib_fmr_exit();
259} 368}
260 369
261struct rds_transport rds_ib_transport = { 370struct rds_transport rds_ib_transport = {
262 .laddr_check = rds_ib_laddr_check, 371 .laddr_check = rds_ib_laddr_check,
263 .xmit_complete = rds_ib_xmit_complete, 372 .xmit_complete = rds_ib_xmit_complete,
264 .xmit = rds_ib_xmit, 373 .xmit = rds_ib_xmit,
265 .xmit_cong_map = NULL,
266 .xmit_rdma = rds_ib_xmit_rdma, 374 .xmit_rdma = rds_ib_xmit_rdma,
375 .xmit_atomic = rds_ib_xmit_atomic,
267 .recv = rds_ib_recv, 376 .recv = rds_ib_recv,
268 .conn_alloc = rds_ib_conn_alloc, 377 .conn_alloc = rds_ib_conn_alloc,
269 .conn_free = rds_ib_conn_free, 378 .conn_free = rds_ib_conn_free,
270 .conn_connect = rds_ib_conn_connect, 379 .conn_connect = rds_ib_conn_connect,
271 .conn_shutdown = rds_ib_conn_shutdown, 380 .conn_shutdown = rds_ib_conn_shutdown,
272 .inc_copy_to_user = rds_ib_inc_copy_to_user, 381 .inc_copy_to_user = rds_ib_inc_copy_to_user,
273 .inc_purge = rds_ib_inc_purge,
274 .inc_free = rds_ib_inc_free, 382 .inc_free = rds_ib_inc_free,
275 .cm_initiate_connect = rds_ib_cm_initiate_connect, 383 .cm_initiate_connect = rds_ib_cm_initiate_connect,
276 .cm_handle_connect = rds_ib_cm_handle_connect, 384 .cm_handle_connect = rds_ib_cm_handle_connect,
@@ -286,16 +394,20 @@ struct rds_transport rds_ib_transport = {
286 .t_type = RDS_TRANS_IB 394 .t_type = RDS_TRANS_IB
287}; 395};
288 396
289int __init rds_ib_init(void) 397int rds_ib_init(void)
290{ 398{
291 int ret; 399 int ret;
292 400
293 INIT_LIST_HEAD(&rds_ib_devices); 401 INIT_LIST_HEAD(&rds_ib_devices);
294 402
295 ret = ib_register_client(&rds_ib_client); 403 ret = rds_ib_fmr_init();
296 if (ret) 404 if (ret)
297 goto out; 405 goto out;
298 406
407 ret = ib_register_client(&rds_ib_client);
408 if (ret)
409 goto out_fmr_exit;
410
299 ret = rds_ib_sysctl_init(); 411 ret = rds_ib_sysctl_init();
300 if (ret) 412 if (ret)
301 goto out_ibreg; 413 goto out_ibreg;
@@ -317,7 +429,9 @@ out_recv:
317out_sysctl: 429out_sysctl:
318 rds_ib_sysctl_exit(); 430 rds_ib_sysctl_exit();
319out_ibreg: 431out_ibreg:
320 ib_unregister_client(&rds_ib_client); 432 rds_ib_unregister_client();
433out_fmr_exit:
434 rds_ib_fmr_exit();
321out: 435out:
322 return ret; 436 return ret;
323} 437}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 64df4e79b29f..7ad3d57e06a5 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -3,11 +3,13 @@
3 3
4#include <rdma/ib_verbs.h> 4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h> 5#include <rdma/rdma_cm.h>
6#include <linux/pci.h>
7#include <linux/slab.h>
6#include "rds.h" 8#include "rds.h"
7#include "rdma_transport.h" 9#include "rdma_transport.h"
8 10
9#define RDS_FMR_SIZE 256 11#define RDS_FMR_SIZE 256
10#define RDS_FMR_POOL_SIZE 4096 12#define RDS_FMR_POOL_SIZE 8192
11 13
12#define RDS_IB_MAX_SGE 8 14#define RDS_IB_MAX_SGE 8
13#define RDS_IB_RECV_SGE 2 15#define RDS_IB_RECV_SGE 2
@@ -19,6 +21,9 @@
19 21
20#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ 22#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
21 23
24#define RDS_IB_RECYCLE_BATCH_COUNT 32
25
26extern struct rw_semaphore rds_ib_devices_lock;
22extern struct list_head rds_ib_devices; 27extern struct list_head rds_ib_devices;
23 28
24/* 29/*
@@ -26,20 +31,29 @@ extern struct list_head rds_ib_devices;
26 * try and minimize the amount of memory tied up both the device and 31 * try and minimize the amount of memory tied up both the device and
27 * socket receive queues. 32 * socket receive queues.
28 */ 33 */
29/* page offset of the final full frag that fits in the page */
30#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
31struct rds_page_frag { 34struct rds_page_frag {
32 struct list_head f_item; 35 struct list_head f_item;
33 struct page *f_page; 36 struct list_head f_cache_entry;
34 unsigned long f_offset; 37 struct scatterlist f_sg;
35 dma_addr_t f_mapped;
36}; 38};
37 39
38struct rds_ib_incoming { 40struct rds_ib_incoming {
39 struct list_head ii_frags; 41 struct list_head ii_frags;
42 struct list_head ii_cache_entry;
40 struct rds_incoming ii_inc; 43 struct rds_incoming ii_inc;
41}; 44};
42 45
46struct rds_ib_cache_head {
47 struct list_head *first;
48 unsigned long count;
49};
50
51struct rds_ib_refill_cache {
52 struct rds_ib_cache_head *percpu;
53 struct list_head *xfer;
54 struct list_head *ready;
55};
56
43struct rds_ib_connect_private { 57struct rds_ib_connect_private {
44 /* Add new fields at the end, and don't permute existing fields. */ 58 /* Add new fields at the end, and don't permute existing fields. */
45 __be32 dp_saddr; 59 __be32 dp_saddr;
@@ -53,8 +67,7 @@ struct rds_ib_connect_private {
53}; 67};
54 68
55struct rds_ib_send_work { 69struct rds_ib_send_work {
56 struct rds_message *s_rm; 70 void *s_op;
57 struct rds_rdma_op *s_op;
58 struct ib_send_wr s_wr; 71 struct ib_send_wr s_wr;
59 struct ib_sge s_sge[RDS_IB_MAX_SGE]; 72 struct ib_sge s_sge[RDS_IB_MAX_SGE];
60 unsigned long s_queued; 73 unsigned long s_queued;
@@ -92,10 +105,11 @@ struct rds_ib_connection {
92 105
93 /* tx */ 106 /* tx */
94 struct rds_ib_work_ring i_send_ring; 107 struct rds_ib_work_ring i_send_ring;
95 struct rds_message *i_rm; 108 struct rm_data_op *i_data_op;
96 struct rds_header *i_send_hdrs; 109 struct rds_header *i_send_hdrs;
97 u64 i_send_hdrs_dma; 110 u64 i_send_hdrs_dma;
98 struct rds_ib_send_work *i_sends; 111 struct rds_ib_send_work *i_sends;
112 atomic_t i_signaled_sends;
99 113
100 /* rx */ 114 /* rx */
101 struct tasklet_struct i_recv_tasklet; 115 struct tasklet_struct i_recv_tasklet;
@@ -106,8 +120,9 @@ struct rds_ib_connection {
106 struct rds_header *i_recv_hdrs; 120 struct rds_header *i_recv_hdrs;
107 u64 i_recv_hdrs_dma; 121 u64 i_recv_hdrs_dma;
108 struct rds_ib_recv_work *i_recvs; 122 struct rds_ib_recv_work *i_recvs;
109 struct rds_page_frag i_frag;
110 u64 i_ack_recv; /* last ACK received */ 123 u64 i_ack_recv; /* last ACK received */
124 struct rds_ib_refill_cache i_cache_incs;
125 struct rds_ib_refill_cache i_cache_frags;
111 126
112 /* sending acks */ 127 /* sending acks */
113 unsigned long i_ack_flags; 128 unsigned long i_ack_flags;
@@ -138,7 +153,6 @@ struct rds_ib_connection {
138 153
139 /* Batched completions */ 154 /* Batched completions */
140 unsigned int i_unsignaled_wrs; 155 unsigned int i_unsignaled_wrs;
141 long i_unsignaled_bytes;
142}; 156};
143 157
144/* This assumes that atomic_t is at least 32 bits */ 158/* This assumes that atomic_t is at least 32 bits */
@@ -164,9 +178,17 @@ struct rds_ib_device {
164 unsigned int max_fmrs; 178 unsigned int max_fmrs;
165 int max_sge; 179 int max_sge;
166 unsigned int max_wrs; 180 unsigned int max_wrs;
181 unsigned int max_initiator_depth;
182 unsigned int max_responder_resources;
167 spinlock_t spinlock; /* protect the above */ 183 spinlock_t spinlock; /* protect the above */
184 atomic_t refcount;
185 struct work_struct free_work;
168}; 186};
169 187
188#define pcidev_to_node(pcidev) pcibus_to_node(pcidev->bus)
189#define ibdev_to_node(ibdev) pcidev_to_node(to_pci_dev(ibdev->dma_device))
190#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
191
170/* bits for i_ack_flags */ 192/* bits for i_ack_flags */
171#define IB_ACK_IN_FLIGHT 0 193#define IB_ACK_IN_FLIGHT 0
172#define IB_ACK_REQUESTED 1 194#define IB_ACK_REQUESTED 1
@@ -202,6 +224,8 @@ struct rds_ib_statistics {
202 uint64_t s_ib_rdma_mr_pool_flush; 224 uint64_t s_ib_rdma_mr_pool_flush;
203 uint64_t s_ib_rdma_mr_pool_wait; 225 uint64_t s_ib_rdma_mr_pool_wait;
204 uint64_t s_ib_rdma_mr_pool_depleted; 226 uint64_t s_ib_rdma_mr_pool_depleted;
227 uint64_t s_ib_atomic_cswp;
228 uint64_t s_ib_atomic_fadd;
205}; 229};
206 230
207extern struct workqueue_struct *rds_ib_wq; 231extern struct workqueue_struct *rds_ib_wq;
@@ -243,6 +267,8 @@ static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
243extern struct rds_transport rds_ib_transport; 267extern struct rds_transport rds_ib_transport;
244extern void rds_ib_add_one(struct ib_device *device); 268extern void rds_ib_add_one(struct ib_device *device);
245extern void rds_ib_remove_one(struct ib_device *device); 269extern void rds_ib_remove_one(struct ib_device *device);
270struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
271void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
246extern struct ib_client rds_ib_client; 272extern struct ib_client rds_ib_client;
247 273
248extern unsigned int fmr_pool_size; 274extern unsigned int fmr_pool_size;
@@ -258,7 +284,7 @@ void rds_ib_conn_free(void *arg);
258int rds_ib_conn_connect(struct rds_connection *conn); 284int rds_ib_conn_connect(struct rds_connection *conn);
259void rds_ib_conn_shutdown(struct rds_connection *conn); 285void rds_ib_conn_shutdown(struct rds_connection *conn);
260void rds_ib_state_change(struct sock *sk); 286void rds_ib_state_change(struct sock *sk);
261int __init rds_ib_listen_init(void); 287int rds_ib_listen_init(void);
262void rds_ib_listen_stop(void); 288void rds_ib_listen_stop(void);
263void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); 289void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
264int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, 290int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -275,15 +301,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
275int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); 301int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
276void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 302void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
277void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 303void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
278void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock); 304void rds_ib_destroy_nodev_conns(void);
279static inline void rds_ib_destroy_nodev_conns(void)
280{
281 __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock);
282}
283static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev)
284{
285 __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock);
286}
287struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); 305struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
288void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); 306void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
289void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); 307void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
@@ -292,14 +310,16 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
292void rds_ib_sync_mr(void *trans_private, int dir); 310void rds_ib_sync_mr(void *trans_private, int dir);
293void rds_ib_free_mr(void *trans_private, int invalidate); 311void rds_ib_free_mr(void *trans_private, int invalidate);
294void rds_ib_flush_mrs(void); 312void rds_ib_flush_mrs(void);
313int rds_ib_fmr_init(void);
314void rds_ib_fmr_exit(void);
295 315
296/* ib_recv.c */ 316/* ib_recv.c */
297int __init rds_ib_recv_init(void); 317int rds_ib_recv_init(void);
298void rds_ib_recv_exit(void); 318void rds_ib_recv_exit(void);
299int rds_ib_recv(struct rds_connection *conn); 319int rds_ib_recv(struct rds_connection *conn);
300int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 320int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
301 gfp_t page_gfp, int prefill); 321void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
302void rds_ib_inc_purge(struct rds_incoming *inc); 322void rds_ib_recv_refill(struct rds_connection *conn, int prefill);
303void rds_ib_inc_free(struct rds_incoming *inc); 323void rds_ib_inc_free(struct rds_incoming *inc);
304int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 324int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
305 size_t size); 325 size_t size);
@@ -325,17 +345,19 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
325extern wait_queue_head_t rds_ib_ring_empty_wait; 345extern wait_queue_head_t rds_ib_ring_empty_wait;
326 346
327/* ib_send.c */ 347/* ib_send.c */
348char *rds_ib_wc_status_str(enum ib_wc_status status);
328void rds_ib_xmit_complete(struct rds_connection *conn); 349void rds_ib_xmit_complete(struct rds_connection *conn);
329int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, 350int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
330 unsigned int hdr_off, unsigned int sg, unsigned int off); 351 unsigned int hdr_off, unsigned int sg, unsigned int off);
331void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context); 352void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
332void rds_ib_send_init_ring(struct rds_ib_connection *ic); 353void rds_ib_send_init_ring(struct rds_ib_connection *ic);
333void rds_ib_send_clear_ring(struct rds_ib_connection *ic); 354void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
334int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); 355int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
335void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); 356void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits);
336void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); 357void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted);
337int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, 358int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
338 u32 *adv_credits, int need_posted, int max_posted); 359 u32 *adv_credits, int need_posted, int max_posted);
360int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);
339 361
340/* ib_stats.c */ 362/* ib_stats.c */
341DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); 363DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
@@ -344,7 +366,7 @@ unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
344 unsigned int avail); 366 unsigned int avail);
345 367
346/* ib_sysctl.c */ 368/* ib_sysctl.c */
347int __init rds_ib_sysctl_init(void); 369int rds_ib_sysctl_init(void);
348void rds_ib_sysctl_exit(void); 370void rds_ib_sysctl_exit(void);
349extern unsigned long rds_ib_sysctl_max_send_wr; 371extern unsigned long rds_ib_sysctl_max_send_wr;
350extern unsigned long rds_ib_sysctl_max_recv_wr; 372extern unsigned long rds_ib_sysctl_max_recv_wr;
@@ -354,28 +376,4 @@ extern unsigned long rds_ib_sysctl_max_recv_allocation;
354extern unsigned int rds_ib_sysctl_flow_control; 376extern unsigned int rds_ib_sysctl_flow_control;
355extern ctl_table rds_ib_sysctl_table[]; 377extern ctl_table rds_ib_sysctl_table[];
356 378
357/*
358 * Helper functions for getting/setting the header and data SGEs in
359 * RDS packets (not RDMA)
360 *
361 * From version 3.1 onwards, header is in front of data in the sge.
362 */
363static inline struct ib_sge *
364rds_ib_header_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
365{
366 if (ic->conn->c_version > RDS_PROTOCOL_3_0)
367 return &sge[0];
368 else
369 return &sge[1];
370}
371
372static inline struct ib_sge *
373rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
374{
375 if (ic->conn->c_version > RDS_PROTOCOL_3_0)
376 return &sge[1];
377 else
378 return &sge[0];
379}
380
381#endif 379#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index f68832798db2..bc3dbc1ba61f 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -38,6 +38,36 @@
38#include "rds.h" 38#include "rds.h"
39#include "ib.h" 39#include "ib.h"
40 40
41static char *rds_ib_event_type_strings[] = {
42#define RDS_IB_EVENT_STRING(foo) \
43 [IB_EVENT_##foo] = __stringify(IB_EVENT_##foo)
44 RDS_IB_EVENT_STRING(CQ_ERR),
45 RDS_IB_EVENT_STRING(QP_FATAL),
46 RDS_IB_EVENT_STRING(QP_REQ_ERR),
47 RDS_IB_EVENT_STRING(QP_ACCESS_ERR),
48 RDS_IB_EVENT_STRING(COMM_EST),
49 RDS_IB_EVENT_STRING(SQ_DRAINED),
50 RDS_IB_EVENT_STRING(PATH_MIG),
51 RDS_IB_EVENT_STRING(PATH_MIG_ERR),
52 RDS_IB_EVENT_STRING(DEVICE_FATAL),
53 RDS_IB_EVENT_STRING(PORT_ACTIVE),
54 RDS_IB_EVENT_STRING(PORT_ERR),
55 RDS_IB_EVENT_STRING(LID_CHANGE),
56 RDS_IB_EVENT_STRING(PKEY_CHANGE),
57 RDS_IB_EVENT_STRING(SM_CHANGE),
58 RDS_IB_EVENT_STRING(SRQ_ERR),
59 RDS_IB_EVENT_STRING(SRQ_LIMIT_REACHED),
60 RDS_IB_EVENT_STRING(QP_LAST_WQE_REACHED),
61 RDS_IB_EVENT_STRING(CLIENT_REREGISTER),
62#undef RDS_IB_EVENT_STRING
63};
64
65static char *rds_ib_event_str(enum ib_event_type type)
66{
67 return rds_str_array(rds_ib_event_type_strings,
68 ARRAY_SIZE(rds_ib_event_type_strings), type);
69};
70
41/* 71/*
42 * Set the selected protocol version 72 * Set the selected protocol version
43 */ 73 */
@@ -95,7 +125,6 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
95{ 125{
96 const struct rds_ib_connect_private *dp = NULL; 126 const struct rds_ib_connect_private *dp = NULL;
97 struct rds_ib_connection *ic = conn->c_transport_data; 127 struct rds_ib_connection *ic = conn->c_transport_data;
98 struct rds_ib_device *rds_ibdev;
99 struct ib_qp_attr qp_attr; 128 struct ib_qp_attr qp_attr;
100 int err; 129 int err;
101 130
@@ -111,11 +140,21 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
111 } 140 }
112 } 141 }
113 142
114 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", 143 if (conn->c_version < RDS_PROTOCOL(3,1)) {
115 &conn->c_faddr, 144 printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed,"
116 RDS_PROTOCOL_MAJOR(conn->c_version), 145 " no longer supported\n",
117 RDS_PROTOCOL_MINOR(conn->c_version), 146 &conn->c_faddr,
118 ic->i_flowctl ? ", flow control" : ""); 147 RDS_PROTOCOL_MAJOR(conn->c_version),
148 RDS_PROTOCOL_MINOR(conn->c_version));
149 rds_conn_destroy(conn);
150 return;
151 } else {
152 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n",
153 &conn->c_faddr,
154 RDS_PROTOCOL_MAJOR(conn->c_version),
155 RDS_PROTOCOL_MINOR(conn->c_version),
156 ic->i_flowctl ? ", flow control" : "");
157 }
119 158
120 /* 159 /*
121 * Init rings and fill recv. this needs to wait until protocol negotiation 160 * Init rings and fill recv. this needs to wait until protocol negotiation
@@ -125,7 +164,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
125 rds_ib_recv_init_ring(ic); 164 rds_ib_recv_init_ring(ic);
126 /* Post receive buffers - as a side effect, this will update 165 /* Post receive buffers - as a side effect, this will update
127 * the posted credit count. */ 166 * the posted credit count. */
128 rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); 167 rds_ib_recv_refill(conn, 1);
129 168
130 /* Tune RNR behavior */ 169 /* Tune RNR behavior */
131 rds_ib_tune_rnr(ic, &qp_attr); 170 rds_ib_tune_rnr(ic, &qp_attr);
@@ -135,12 +174,11 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
135 if (err) 174 if (err)
136 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); 175 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
137 176
138 /* update ib_device with this local ipaddr & conn */ 177 /* update ib_device with this local ipaddr */
139 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 178 err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr);
140 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
141 if (err) 179 if (err)
142 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); 180 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
143 rds_ib_add_conn(rds_ibdev, conn); 181 err);
144 182
145 /* If the peer gave us the last packet it saw, process this as if 183 /* If the peer gave us the last packet it saw, process this as if
146 * we had received a regular ACK. */ 184 * we had received a regular ACK. */
@@ -153,18 +191,23 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
153static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, 191static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
154 struct rdma_conn_param *conn_param, 192 struct rdma_conn_param *conn_param,
155 struct rds_ib_connect_private *dp, 193 struct rds_ib_connect_private *dp,
156 u32 protocol_version) 194 u32 protocol_version,
195 u32 max_responder_resources,
196 u32 max_initiator_depth)
157{ 197{
198 struct rds_ib_connection *ic = conn->c_transport_data;
199 struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
200
158 memset(conn_param, 0, sizeof(struct rdma_conn_param)); 201 memset(conn_param, 0, sizeof(struct rdma_conn_param));
159 /* XXX tune these? */ 202
160 conn_param->responder_resources = 1; 203 conn_param->responder_resources =
161 conn_param->initiator_depth = 1; 204 min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources);
205 conn_param->initiator_depth =
206 min_t(u32, rds_ibdev->max_initiator_depth, max_initiator_depth);
162 conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7); 207 conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7);
163 conn_param->rnr_retry_count = 7; 208 conn_param->rnr_retry_count = 7;
164 209
165 if (dp) { 210 if (dp) {
166 struct rds_ib_connection *ic = conn->c_transport_data;
167
168 memset(dp, 0, sizeof(*dp)); 211 memset(dp, 0, sizeof(*dp));
169 dp->dp_saddr = conn->c_laddr; 212 dp->dp_saddr = conn->c_laddr;
170 dp->dp_daddr = conn->c_faddr; 213 dp->dp_daddr = conn->c_faddr;
@@ -189,7 +232,8 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
189 232
190static void rds_ib_cq_event_handler(struct ib_event *event, void *data) 233static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
191{ 234{
192 rdsdebug("event %u data %p\n", event->event, data); 235 rdsdebug("event %u (%s) data %p\n",
236 event->event, rds_ib_event_str(event->event), data);
193} 237}
194 238
195static void rds_ib_qp_event_handler(struct ib_event *event, void *data) 239static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
@@ -197,16 +241,18 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
197 struct rds_connection *conn = data; 241 struct rds_connection *conn = data;
198 struct rds_ib_connection *ic = conn->c_transport_data; 242 struct rds_ib_connection *ic = conn->c_transport_data;
199 243
200 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event); 244 rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event,
245 rds_ib_event_str(event->event));
201 246
202 switch (event->event) { 247 switch (event->event) {
203 case IB_EVENT_COMM_EST: 248 case IB_EVENT_COMM_EST:
204 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); 249 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
205 break; 250 break;
206 default: 251 default:
207 rdsdebug("Fatal QP Event %u " 252 rdsdebug("Fatal QP Event %u (%s) "
208 "- connection %pI4->%pI4, reconnecting\n", 253 "- connection %pI4->%pI4, reconnecting\n",
209 event->event, &conn->c_laddr, &conn->c_faddr); 254 event->event, rds_ib_event_str(event->event),
255 &conn->c_laddr, &conn->c_faddr);
210 rds_conn_drop(conn); 256 rds_conn_drop(conn);
211 break; 257 break;
212 } 258 }
@@ -224,18 +270,16 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
224 struct rds_ib_device *rds_ibdev; 270 struct rds_ib_device *rds_ibdev;
225 int ret; 271 int ret;
226 272
227 /* rds_ib_add_one creates a rds_ib_device object per IB device, 273 /*
228 * and allocates a protection domain, memory range and FMR pool 274 * It's normal to see a null device if an incoming connection races
229 * for each. If that fails for any reason, it will not register 275 * with device removal, so we don't print a warning.
230 * the rds_ibdev at all.
231 */ 276 */
232 rds_ibdev = ib_get_client_data(dev, &rds_ib_client); 277 rds_ibdev = rds_ib_get_client_data(dev);
233 if (rds_ibdev == NULL) { 278 if (!rds_ibdev)
234 if (printk_ratelimit())
235 printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n",
236 dev->name);
237 return -EOPNOTSUPP; 279 return -EOPNOTSUPP;
238 } 280
281 /* add the conn now so that connection establishment has the dev */
282 rds_ib_add_conn(rds_ibdev, conn);
239 283
240 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) 284 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
241 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); 285 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
@@ -306,7 +350,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
306 ic->i_send_ring.w_nr * 350 ic->i_send_ring.w_nr *
307 sizeof(struct rds_header), 351 sizeof(struct rds_header),
308 &ic->i_send_hdrs_dma, GFP_KERNEL); 352 &ic->i_send_hdrs_dma, GFP_KERNEL);
309 if (ic->i_send_hdrs == NULL) { 353 if (!ic->i_send_hdrs) {
310 ret = -ENOMEM; 354 ret = -ENOMEM;
311 rdsdebug("ib_dma_alloc_coherent send failed\n"); 355 rdsdebug("ib_dma_alloc_coherent send failed\n");
312 goto out; 356 goto out;
@@ -316,7 +360,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
316 ic->i_recv_ring.w_nr * 360 ic->i_recv_ring.w_nr *
317 sizeof(struct rds_header), 361 sizeof(struct rds_header),
318 &ic->i_recv_hdrs_dma, GFP_KERNEL); 362 &ic->i_recv_hdrs_dma, GFP_KERNEL);
319 if (ic->i_recv_hdrs == NULL) { 363 if (!ic->i_recv_hdrs) {
320 ret = -ENOMEM; 364 ret = -ENOMEM;
321 rdsdebug("ib_dma_alloc_coherent recv failed\n"); 365 rdsdebug("ib_dma_alloc_coherent recv failed\n");
322 goto out; 366 goto out;
@@ -324,22 +368,24 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
324 368
325 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), 369 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
326 &ic->i_ack_dma, GFP_KERNEL); 370 &ic->i_ack_dma, GFP_KERNEL);
327 if (ic->i_ack == NULL) { 371 if (!ic->i_ack) {
328 ret = -ENOMEM; 372 ret = -ENOMEM;
329 rdsdebug("ib_dma_alloc_coherent ack failed\n"); 373 rdsdebug("ib_dma_alloc_coherent ack failed\n");
330 goto out; 374 goto out;
331 } 375 }
332 376
333 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); 377 ic->i_sends = vmalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
334 if (ic->i_sends == NULL) { 378 ibdev_to_node(dev));
379 if (!ic->i_sends) {
335 ret = -ENOMEM; 380 ret = -ENOMEM;
336 rdsdebug("send allocation failed\n"); 381 rdsdebug("send allocation failed\n");
337 goto out; 382 goto out;
338 } 383 }
339 memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); 384 memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work));
340 385
341 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); 386 ic->i_recvs = vmalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
342 if (ic->i_recvs == NULL) { 387 ibdev_to_node(dev));
388 if (!ic->i_recvs) {
343 ret = -ENOMEM; 389 ret = -ENOMEM;
344 rdsdebug("recv allocation failed\n"); 390 rdsdebug("recv allocation failed\n");
345 goto out; 391 goto out;
@@ -352,6 +398,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
352 ic->i_send_cq, ic->i_recv_cq); 398 ic->i_send_cq, ic->i_recv_cq);
353 399
354out: 400out:
401 rds_ib_dev_put(rds_ibdev);
355 return ret; 402 return ret;
356} 403}
357 404
@@ -409,7 +456,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
409 struct rds_ib_connection *ic = NULL; 456 struct rds_ib_connection *ic = NULL;
410 struct rdma_conn_param conn_param; 457 struct rdma_conn_param conn_param;
411 u32 version; 458 u32 version;
412 int err, destroy = 1; 459 int err = 1, destroy = 1;
413 460
414 /* Check whether the remote protocol version matches ours. */ 461 /* Check whether the remote protocol version matches ours. */
415 version = rds_ib_protocol_compatible(event); 462 version = rds_ib_protocol_compatible(event);
@@ -448,7 +495,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
448 /* Wait and see - our connect may still be succeeding */ 495 /* Wait and see - our connect may still be succeeding */
449 rds_ib_stats_inc(s_ib_connect_raced); 496 rds_ib_stats_inc(s_ib_connect_raced);
450 } 497 }
451 mutex_unlock(&conn->c_cm_lock);
452 goto out; 498 goto out;
453 } 499 }
454 500
@@ -479,20 +525,20 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
479 goto out; 525 goto out;
480 } 526 }
481 527
482 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version); 528 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
529 event->param.conn.responder_resources,
530 event->param.conn.initiator_depth);
483 531
484 /* rdma_accept() calls rdma_reject() internally if it fails */ 532 /* rdma_accept() calls rdma_reject() internally if it fails */
485 err = rdma_accept(cm_id, &conn_param); 533 err = rdma_accept(cm_id, &conn_param);
486 mutex_unlock(&conn->c_cm_lock); 534 if (err)
487 if (err) {
488 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); 535 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
489 goto out;
490 }
491
492 return 0;
493 536
494out: 537out:
495 rdma_reject(cm_id, NULL, 0); 538 if (conn)
539 mutex_unlock(&conn->c_cm_lock);
540 if (err)
541 rdma_reject(cm_id, NULL, 0);
496 return destroy; 542 return destroy;
497} 543}
498 544
@@ -516,8 +562,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
516 goto out; 562 goto out;
517 } 563 }
518 564
519 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION); 565 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
520 566 UINT_MAX, UINT_MAX);
521 ret = rdma_connect(cm_id, &conn_param); 567 ret = rdma_connect(cm_id, &conn_param);
522 if (ret) 568 if (ret)
523 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); 569 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
@@ -601,9 +647,19 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
601 ic->i_cm_id, err); 647 ic->i_cm_id, err);
602 } 648 }
603 649
650 /*
651 * We want to wait for tx and rx completion to finish
652 * before we tear down the connection, but we have to be
653 * careful not to get stuck waiting on a send ring that
654 * only has unsignaled sends in it. We've shutdown new
655 * sends before getting here so by waiting for signaled
656 * sends to complete we're ensured that there will be no
657 * more tx processing.
658 */
604 wait_event(rds_ib_ring_empty_wait, 659 wait_event(rds_ib_ring_empty_wait,
605 rds_ib_ring_empty(&ic->i_send_ring) && 660 rds_ib_ring_empty(&ic->i_recv_ring) &&
606 rds_ib_ring_empty(&ic->i_recv_ring)); 661 (atomic_read(&ic->i_signaled_sends) == 0));
662 tasklet_kill(&ic->i_recv_tasklet);
607 663
608 if (ic->i_send_hdrs) 664 if (ic->i_send_hdrs)
609 ib_dma_free_coherent(dev, 665 ib_dma_free_coherent(dev,
@@ -654,9 +710,12 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
654 BUG_ON(ic->rds_ibdev); 710 BUG_ON(ic->rds_ibdev);
655 711
656 /* Clear pending transmit */ 712 /* Clear pending transmit */
657 if (ic->i_rm) { 713 if (ic->i_data_op) {
658 rds_message_put(ic->i_rm); 714 struct rds_message *rm;
659 ic->i_rm = NULL; 715
716 rm = container_of(ic->i_data_op, struct rds_message, data);
717 rds_message_put(rm);
718 ic->i_data_op = NULL;
660 } 719 }
661 720
662 /* Clear the ACK state */ 721 /* Clear the ACK state */
@@ -690,12 +749,19 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
690{ 749{
691 struct rds_ib_connection *ic; 750 struct rds_ib_connection *ic;
692 unsigned long flags; 751 unsigned long flags;
752 int ret;
693 753
694 /* XXX too lazy? */ 754 /* XXX too lazy? */
695 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); 755 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL);
696 if (ic == NULL) 756 if (!ic)
697 return -ENOMEM; 757 return -ENOMEM;
698 758
759 ret = rds_ib_recv_alloc_caches(ic);
760 if (ret) {
761 kfree(ic);
762 return ret;
763 }
764
699 INIT_LIST_HEAD(&ic->ib_node); 765 INIT_LIST_HEAD(&ic->ib_node);
700 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn, 766 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn,
701 (unsigned long) ic); 767 (unsigned long) ic);
@@ -703,6 +769,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
703#ifndef KERNEL_HAS_ATOMIC64 769#ifndef KERNEL_HAS_ATOMIC64
704 spin_lock_init(&ic->i_ack_lock); 770 spin_lock_init(&ic->i_ack_lock);
705#endif 771#endif
772 atomic_set(&ic->i_signaled_sends, 0);
706 773
707 /* 774 /*
708 * rds_ib_conn_shutdown() waits for these to be emptied so they 775 * rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -744,6 +811,8 @@ void rds_ib_conn_free(void *arg)
744 list_del(&ic->ib_node); 811 list_del(&ic->ib_node);
745 spin_unlock_irq(lock_ptr); 812 spin_unlock_irq(lock_ptr);
746 813
814 rds_ib_recv_free_caches(ic);
815
747 kfree(ic); 816 kfree(ic);
748} 817}
749 818
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index a54cd63f9e35..8f6e221c9f78 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -32,11 +32,16 @@
32 */ 32 */
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/rculist.h>
35 36
36#include "rds.h" 37#include "rds.h"
37#include "rdma.h"
38#include "ib.h" 38#include "ib.h"
39#include "xlist.h"
39 40
41struct workqueue_struct *rds_ib_fmr_wq;
42
43static DEFINE_PER_CPU(unsigned long, clean_list_grace);
44#define CLEAN_LIST_BUSY_BIT 0
40 45
41/* 46/*
42 * This is stored as mr->r_trans_private. 47 * This is stored as mr->r_trans_private.
@@ -45,7 +50,11 @@ struct rds_ib_mr {
45 struct rds_ib_device *device; 50 struct rds_ib_device *device;
46 struct rds_ib_mr_pool *pool; 51 struct rds_ib_mr_pool *pool;
47 struct ib_fmr *fmr; 52 struct ib_fmr *fmr;
48 struct list_head list; 53
54 struct xlist_head xlist;
55
56 /* unmap_list is for freeing */
57 struct list_head unmap_list;
49 unsigned int remap_count; 58 unsigned int remap_count;
50 59
51 struct scatterlist *sg; 60 struct scatterlist *sg;
@@ -59,14 +68,16 @@ struct rds_ib_mr {
59 */ 68 */
60struct rds_ib_mr_pool { 69struct rds_ib_mr_pool {
61 struct mutex flush_lock; /* serialize fmr invalidate */ 70 struct mutex flush_lock; /* serialize fmr invalidate */
62 struct work_struct flush_worker; /* flush worker */ 71 struct delayed_work flush_worker; /* flush worker */
63 72
64 spinlock_t list_lock; /* protect variables below */
65 atomic_t item_count; /* total # of MRs */ 73 atomic_t item_count; /* total # of MRs */
66 atomic_t dirty_count; /* # dirty of MRs */ 74 atomic_t dirty_count; /* # dirty of MRs */
67 struct list_head drop_list; /* MRs that have reached their max_maps limit */ 75
68 struct list_head free_list; /* unused MRs */ 76 struct xlist_head drop_list; /* MRs that have reached their max_maps limit */
69 struct list_head clean_list; /* unused & unamapped MRs */ 77 struct xlist_head free_list; /* unused MRs */
78 struct xlist_head clean_list; /* global unused & unamapped MRs */
79 wait_queue_head_t flush_wait;
80
70 atomic_t free_pinned; /* memory pinned by free MRs */ 81 atomic_t free_pinned; /* memory pinned by free MRs */
71 unsigned long max_items; 82 unsigned long max_items;
72 unsigned long max_items_soft; 83 unsigned long max_items_soft;
@@ -74,7 +85,7 @@ struct rds_ib_mr_pool {
74 struct ib_fmr_attr fmr_attr; 85 struct ib_fmr_attr fmr_attr;
75}; 86};
76 87
77static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all); 88static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **);
78static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr); 89static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
79static void rds_ib_mr_pool_flush_worker(struct work_struct *work); 90static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
80 91
@@ -83,16 +94,17 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
83 struct rds_ib_device *rds_ibdev; 94 struct rds_ib_device *rds_ibdev;
84 struct rds_ib_ipaddr *i_ipaddr; 95 struct rds_ib_ipaddr *i_ipaddr;
85 96
86 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 97 rcu_read_lock();
87 spin_lock_irq(&rds_ibdev->spinlock); 98 list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
88 list_for_each_entry(i_ipaddr, &rds_ibdev->ipaddr_list, list) { 99 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
89 if (i_ipaddr->ipaddr == ipaddr) { 100 if (i_ipaddr->ipaddr == ipaddr) {
90 spin_unlock_irq(&rds_ibdev->spinlock); 101 atomic_inc(&rds_ibdev->refcount);
102 rcu_read_unlock();
91 return rds_ibdev; 103 return rds_ibdev;
92 } 104 }
93 } 105 }
94 spin_unlock_irq(&rds_ibdev->spinlock);
95 } 106 }
107 rcu_read_unlock();
96 108
97 return NULL; 109 return NULL;
98} 110}
@@ -108,7 +120,7 @@ static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
108 i_ipaddr->ipaddr = ipaddr; 120 i_ipaddr->ipaddr = ipaddr;
109 121
110 spin_lock_irq(&rds_ibdev->spinlock); 122 spin_lock_irq(&rds_ibdev->spinlock);
111 list_add_tail(&i_ipaddr->list, &rds_ibdev->ipaddr_list); 123 list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list);
112 spin_unlock_irq(&rds_ibdev->spinlock); 124 spin_unlock_irq(&rds_ibdev->spinlock);
113 125
114 return 0; 126 return 0;
@@ -116,17 +128,24 @@ static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
116 128
117static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 129static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
118{ 130{
119 struct rds_ib_ipaddr *i_ipaddr, *next; 131 struct rds_ib_ipaddr *i_ipaddr;
132 struct rds_ib_ipaddr *to_free = NULL;
133
120 134
121 spin_lock_irq(&rds_ibdev->spinlock); 135 spin_lock_irq(&rds_ibdev->spinlock);
122 list_for_each_entry_safe(i_ipaddr, next, &rds_ibdev->ipaddr_list, list) { 136 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
123 if (i_ipaddr->ipaddr == ipaddr) { 137 if (i_ipaddr->ipaddr == ipaddr) {
124 list_del(&i_ipaddr->list); 138 list_del_rcu(&i_ipaddr->list);
125 kfree(i_ipaddr); 139 to_free = i_ipaddr;
126 break; 140 break;
127 } 141 }
128 } 142 }
129 spin_unlock_irq(&rds_ibdev->spinlock); 143 spin_unlock_irq(&rds_ibdev->spinlock);
144
145 if (to_free) {
146 synchronize_rcu();
147 kfree(to_free);
148 }
130} 149}
131 150
132int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 151int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
@@ -134,8 +153,10 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
134 struct rds_ib_device *rds_ibdev_old; 153 struct rds_ib_device *rds_ibdev_old;
135 154
136 rds_ibdev_old = rds_ib_get_device(ipaddr); 155 rds_ibdev_old = rds_ib_get_device(ipaddr);
137 if (rds_ibdev_old) 156 if (rds_ibdev_old) {
138 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr); 157 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
158 rds_ib_dev_put(rds_ibdev_old);
159 }
139 160
140 return rds_ib_add_ipaddr(rds_ibdev, ipaddr); 161 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
141} 162}
@@ -156,6 +177,7 @@ void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *con
156 spin_unlock_irq(&ib_nodev_conns_lock); 177 spin_unlock_irq(&ib_nodev_conns_lock);
157 178
158 ic->rds_ibdev = rds_ibdev; 179 ic->rds_ibdev = rds_ibdev;
180 atomic_inc(&rds_ibdev->refcount);
159} 181}
160 182
161void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 183void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
@@ -175,18 +197,18 @@ void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *
175 spin_unlock(&ib_nodev_conns_lock); 197 spin_unlock(&ib_nodev_conns_lock);
176 198
177 ic->rds_ibdev = NULL; 199 ic->rds_ibdev = NULL;
200 rds_ib_dev_put(rds_ibdev);
178} 201}
179 202
180void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock) 203void rds_ib_destroy_nodev_conns(void)
181{ 204{
182 struct rds_ib_connection *ic, *_ic; 205 struct rds_ib_connection *ic, *_ic;
183 LIST_HEAD(tmp_list); 206 LIST_HEAD(tmp_list);
184 207
185 /* avoid calling conn_destroy with irqs off */ 208 /* avoid calling conn_destroy with irqs off */
186 spin_lock_irq(list_lock); 209 spin_lock_irq(&ib_nodev_conns_lock);
187 list_splice(list, &tmp_list); 210 list_splice(&ib_nodev_conns, &tmp_list);
188 INIT_LIST_HEAD(list); 211 spin_unlock_irq(&ib_nodev_conns_lock);
189 spin_unlock_irq(list_lock);
190 212
191 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) 213 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node)
192 rds_conn_destroy(ic->conn); 214 rds_conn_destroy(ic->conn);
@@ -200,12 +222,12 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
200 if (!pool) 222 if (!pool)
201 return ERR_PTR(-ENOMEM); 223 return ERR_PTR(-ENOMEM);
202 224
203 INIT_LIST_HEAD(&pool->free_list); 225 INIT_XLIST_HEAD(&pool->free_list);
204 INIT_LIST_HEAD(&pool->drop_list); 226 INIT_XLIST_HEAD(&pool->drop_list);
205 INIT_LIST_HEAD(&pool->clean_list); 227 INIT_XLIST_HEAD(&pool->clean_list);
206 mutex_init(&pool->flush_lock); 228 mutex_init(&pool->flush_lock);
207 spin_lock_init(&pool->list_lock); 229 init_waitqueue_head(&pool->flush_wait);
208 INIT_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); 230 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
209 231
210 pool->fmr_attr.max_pages = fmr_message_size; 232 pool->fmr_attr.max_pages = fmr_message_size;
211 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; 233 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
@@ -233,34 +255,60 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co
233 255
234void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 256void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
235{ 257{
236 flush_workqueue(rds_wq); 258 cancel_delayed_work_sync(&pool->flush_worker);
237 rds_ib_flush_mr_pool(pool, 1); 259 rds_ib_flush_mr_pool(pool, 1, NULL);
238 WARN_ON(atomic_read(&pool->item_count)); 260 WARN_ON(atomic_read(&pool->item_count));
239 WARN_ON(atomic_read(&pool->free_pinned)); 261 WARN_ON(atomic_read(&pool->free_pinned));
240 kfree(pool); 262 kfree(pool);
241} 263}
242 264
265static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl,
266 struct rds_ib_mr **ibmr_ret)
267{
268 struct xlist_head *ibmr_xl;
269 ibmr_xl = xlist_del_head_fast(xl);
270 *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist);
271}
272
243static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) 273static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
244{ 274{
245 struct rds_ib_mr *ibmr = NULL; 275 struct rds_ib_mr *ibmr = NULL;
246 unsigned long flags; 276 struct xlist_head *ret;
277 unsigned long *flag;
247 278
248 spin_lock_irqsave(&pool->list_lock, flags); 279 preempt_disable();
249 if (!list_empty(&pool->clean_list)) { 280 flag = &__get_cpu_var(clean_list_grace);
250 ibmr = list_entry(pool->clean_list.next, struct rds_ib_mr, list); 281 set_bit(CLEAN_LIST_BUSY_BIT, flag);
251 list_del_init(&ibmr->list); 282 ret = xlist_del_head(&pool->clean_list);
252 } 283 if (ret)
253 spin_unlock_irqrestore(&pool->list_lock, flags); 284 ibmr = list_entry(ret, struct rds_ib_mr, xlist);
254 285
286 clear_bit(CLEAN_LIST_BUSY_BIT, flag);
287 preempt_enable();
255 return ibmr; 288 return ibmr;
256} 289}
257 290
291static inline void wait_clean_list_grace(void)
292{
293 int cpu;
294 unsigned long *flag;
295
296 for_each_online_cpu(cpu) {
297 flag = &per_cpu(clean_list_grace, cpu);
298 while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
299 cpu_relax();
300 }
301}
302
258static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) 303static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
259{ 304{
260 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 305 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
261 struct rds_ib_mr *ibmr = NULL; 306 struct rds_ib_mr *ibmr = NULL;
262 int err = 0, iter = 0; 307 int err = 0, iter = 0;
263 308
309 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
310 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
311
264 while (1) { 312 while (1) {
265 ibmr = rds_ib_reuse_fmr(pool); 313 ibmr = rds_ib_reuse_fmr(pool);
266 if (ibmr) 314 if (ibmr)
@@ -287,19 +335,24 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
287 335
288 /* We do have some empty MRs. Flush them out. */ 336 /* We do have some empty MRs. Flush them out. */
289 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait); 337 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait);
290 rds_ib_flush_mr_pool(pool, 0); 338 rds_ib_flush_mr_pool(pool, 0, &ibmr);
339 if (ibmr)
340 return ibmr;
291 } 341 }
292 342
293 ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); 343 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, rdsibdev_to_node(rds_ibdev));
294 if (!ibmr) { 344 if (!ibmr) {
295 err = -ENOMEM; 345 err = -ENOMEM;
296 goto out_no_cigar; 346 goto out_no_cigar;
297 } 347 }
298 348
349 memset(ibmr, 0, sizeof(*ibmr));
350
299 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, 351 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
300 (IB_ACCESS_LOCAL_WRITE | 352 (IB_ACCESS_LOCAL_WRITE |
301 IB_ACCESS_REMOTE_READ | 353 IB_ACCESS_REMOTE_READ |
302 IB_ACCESS_REMOTE_WRITE), 354 IB_ACCESS_REMOTE_WRITE|
355 IB_ACCESS_REMOTE_ATOMIC),
303 &pool->fmr_attr); 356 &pool->fmr_attr);
304 if (IS_ERR(ibmr->fmr)) { 357 if (IS_ERR(ibmr->fmr)) {
305 err = PTR_ERR(ibmr->fmr); 358 err = PTR_ERR(ibmr->fmr);
@@ -367,7 +420,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
367 if (page_cnt > fmr_message_size) 420 if (page_cnt > fmr_message_size)
368 return -EINVAL; 421 return -EINVAL;
369 422
370 dma_pages = kmalloc(sizeof(u64) * page_cnt, GFP_ATOMIC); 423 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
424 rdsibdev_to_node(rds_ibdev));
371 if (!dma_pages) 425 if (!dma_pages)
372 return -ENOMEM; 426 return -ENOMEM;
373 427
@@ -441,7 +495,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
441 495
442 /* FIXME we need a way to tell a r/w MR 496 /* FIXME we need a way to tell a r/w MR
443 * from a r/o MR */ 497 * from a r/o MR */
444 BUG_ON(in_interrupt()); 498 BUG_ON(irqs_disabled());
445 set_page_dirty(page); 499 set_page_dirty(page);
446 put_page(page); 500 put_page(page);
447 } 501 }
@@ -477,33 +531,109 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr
477} 531}
478 532
479/* 533/*
534 * given an xlist of mrs, put them all into the list_head for more processing
535 */
536static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list)
537{
538 struct rds_ib_mr *ibmr;
539 struct xlist_head splice;
540 struct xlist_head *cur;
541 struct xlist_head *next;
542
543 splice.next = NULL;
544 xlist_splice(xlist, &splice);
545 cur = splice.next;
546 while (cur) {
547 next = cur->next;
548 ibmr = list_entry(cur, struct rds_ib_mr, xlist);
549 list_add_tail(&ibmr->unmap_list, list);
550 cur = next;
551 }
552}
553
554/*
555 * this takes a list head of mrs and turns it into an xlist of clusters.
556 * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for
557 * reuse.
558 */
559static void list_append_to_xlist(struct rds_ib_mr_pool *pool,
560 struct list_head *list, struct xlist_head *xlist,
561 struct xlist_head **tail_ret)
562{
563 struct rds_ib_mr *ibmr;
564 struct xlist_head *cur_mr = xlist;
565 struct xlist_head *tail_mr = NULL;
566
567 list_for_each_entry(ibmr, list, unmap_list) {
568 tail_mr = &ibmr->xlist;
569 tail_mr->next = NULL;
570 cur_mr->next = tail_mr;
571 cur_mr = tail_mr;
572 }
573 *tail_ret = tail_mr;
574}
575
576/*
480 * Flush our pool of MRs. 577 * Flush our pool of MRs.
481 * At a minimum, all currently unused MRs are unmapped. 578 * At a minimum, all currently unused MRs are unmapped.
482 * If the number of MRs allocated exceeds the limit, we also try 579 * If the number of MRs allocated exceeds the limit, we also try
483 * to free as many MRs as needed to get back to this limit. 580 * to free as many MRs as needed to get back to this limit.
484 */ 581 */
485static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all) 582static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
583 int free_all, struct rds_ib_mr **ibmr_ret)
486{ 584{
487 struct rds_ib_mr *ibmr, *next; 585 struct rds_ib_mr *ibmr, *next;
586 struct xlist_head clean_xlist;
587 struct xlist_head *clean_tail;
488 LIST_HEAD(unmap_list); 588 LIST_HEAD(unmap_list);
489 LIST_HEAD(fmr_list); 589 LIST_HEAD(fmr_list);
490 unsigned long unpinned = 0; 590 unsigned long unpinned = 0;
491 unsigned long flags;
492 unsigned int nfreed = 0, ncleaned = 0, free_goal; 591 unsigned int nfreed = 0, ncleaned = 0, free_goal;
493 int ret = 0; 592 int ret = 0;
494 593
495 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush); 594 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
496 595
497 mutex_lock(&pool->flush_lock); 596 if (ibmr_ret) {
597 DEFINE_WAIT(wait);
598 while(!mutex_trylock(&pool->flush_lock)) {
599 ibmr = rds_ib_reuse_fmr(pool);
600 if (ibmr) {
601 *ibmr_ret = ibmr;
602 finish_wait(&pool->flush_wait, &wait);
603 goto out_nolock;
604 }
605
606 prepare_to_wait(&pool->flush_wait, &wait,
607 TASK_UNINTERRUPTIBLE);
608 if (xlist_empty(&pool->clean_list))
609 schedule();
610
611 ibmr = rds_ib_reuse_fmr(pool);
612 if (ibmr) {
613 *ibmr_ret = ibmr;
614 finish_wait(&pool->flush_wait, &wait);
615 goto out_nolock;
616 }
617 }
618 finish_wait(&pool->flush_wait, &wait);
619 } else
620 mutex_lock(&pool->flush_lock);
621
622 if (ibmr_ret) {
623 ibmr = rds_ib_reuse_fmr(pool);
624 if (ibmr) {
625 *ibmr_ret = ibmr;
626 goto out;
627 }
628 }
498 629
499 spin_lock_irqsave(&pool->list_lock, flags);
500 /* Get the list of all MRs to be dropped. Ordering matters - 630 /* Get the list of all MRs to be dropped. Ordering matters -
501 * we want to put drop_list ahead of free_list. */ 631 * we want to put drop_list ahead of free_list.
502 list_splice_init(&pool->free_list, &unmap_list); 632 */
503 list_splice_init(&pool->drop_list, &unmap_list); 633 xlist_append_to_list(&pool->drop_list, &unmap_list);
634 xlist_append_to_list(&pool->free_list, &unmap_list);
504 if (free_all) 635 if (free_all)
505 list_splice_init(&pool->clean_list, &unmap_list); 636 xlist_append_to_list(&pool->clean_list, &unmap_list);
506 spin_unlock_irqrestore(&pool->list_lock, flags);
507 637
508 free_goal = rds_ib_flush_goal(pool, free_all); 638 free_goal = rds_ib_flush_goal(pool, free_all);
509 639
@@ -511,19 +641,20 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
511 goto out; 641 goto out;
512 642
513 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ 643 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
514 list_for_each_entry(ibmr, &unmap_list, list) 644 list_for_each_entry(ibmr, &unmap_list, unmap_list)
515 list_add(&ibmr->fmr->list, &fmr_list); 645 list_add(&ibmr->fmr->list, &fmr_list);
646
516 ret = ib_unmap_fmr(&fmr_list); 647 ret = ib_unmap_fmr(&fmr_list);
517 if (ret) 648 if (ret)
518 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret); 649 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret);
519 650
520 /* Now we can destroy the DMA mapping and unpin any pages */ 651 /* Now we can destroy the DMA mapping and unpin any pages */
521 list_for_each_entry_safe(ibmr, next, &unmap_list, list) { 652 list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
522 unpinned += ibmr->sg_len; 653 unpinned += ibmr->sg_len;
523 __rds_ib_teardown_mr(ibmr); 654 __rds_ib_teardown_mr(ibmr);
524 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) { 655 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) {
525 rds_ib_stats_inc(s_ib_rdma_mr_free); 656 rds_ib_stats_inc(s_ib_rdma_mr_free);
526 list_del(&ibmr->list); 657 list_del(&ibmr->unmap_list);
527 ib_dealloc_fmr(ibmr->fmr); 658 ib_dealloc_fmr(ibmr->fmr);
528 kfree(ibmr); 659 kfree(ibmr);
529 nfreed++; 660 nfreed++;
@@ -531,9 +662,27 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
531 ncleaned++; 662 ncleaned++;
532 } 663 }
533 664
534 spin_lock_irqsave(&pool->list_lock, flags); 665 if (!list_empty(&unmap_list)) {
535 list_splice(&unmap_list, &pool->clean_list); 666 /* we have to make sure that none of the things we're about
536 spin_unlock_irqrestore(&pool->list_lock, flags); 667 * to put on the clean list would race with other cpus trying
668 * to pull items off. The xlist would explode if we managed to
669 * remove something from the clean list and then add it back again
670 * while another CPU was spinning on that same item in xlist_del_head.
671 *
672 * This is pretty unlikely, but just in case wait for an xlist grace period
673 * here before adding anything back into the clean list.
674 */
675 wait_clean_list_grace();
676
677 list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail);
678 if (ibmr_ret)
679 refill_local(pool, &clean_xlist, ibmr_ret);
680
681 /* refill_local may have emptied our list */
682 if (!xlist_empty(&clean_xlist))
683 xlist_add(clean_xlist.next, clean_tail, &pool->clean_list);
684
685 }
537 686
538 atomic_sub(unpinned, &pool->free_pinned); 687 atomic_sub(unpinned, &pool->free_pinned);
539 atomic_sub(ncleaned, &pool->dirty_count); 688 atomic_sub(ncleaned, &pool->dirty_count);
@@ -541,14 +690,35 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
541 690
542out: 691out:
543 mutex_unlock(&pool->flush_lock); 692 mutex_unlock(&pool->flush_lock);
693 if (waitqueue_active(&pool->flush_wait))
694 wake_up(&pool->flush_wait);
695out_nolock:
544 return ret; 696 return ret;
545} 697}
546 698
699int rds_ib_fmr_init(void)
700{
701 rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
702 if (!rds_ib_fmr_wq)
703 return -ENOMEM;
704 return 0;
705}
706
707/*
708 * By the time this is called all the IB devices should have been torn down and
709 * had their pools freed. As each pool is freed its work struct is waited on,
710 * so the pool flushing work queue should be idle by the time we get here.
711 */
712void rds_ib_fmr_exit(void)
713{
714 destroy_workqueue(rds_ib_fmr_wq);
715}
716
547static void rds_ib_mr_pool_flush_worker(struct work_struct *work) 717static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
548{ 718{
549 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker); 719 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
550 720
551 rds_ib_flush_mr_pool(pool, 0); 721 rds_ib_flush_mr_pool(pool, 0, NULL);
552} 722}
553 723
554void rds_ib_free_mr(void *trans_private, int invalidate) 724void rds_ib_free_mr(void *trans_private, int invalidate)
@@ -556,47 +726,49 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
556 struct rds_ib_mr *ibmr = trans_private; 726 struct rds_ib_mr *ibmr = trans_private;
557 struct rds_ib_device *rds_ibdev = ibmr->device; 727 struct rds_ib_device *rds_ibdev = ibmr->device;
558 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 728 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
559 unsigned long flags;
560 729
561 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); 730 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
562 731
563 /* Return it to the pool's free list */ 732 /* Return it to the pool's free list */
564 spin_lock_irqsave(&pool->list_lock, flags);
565 if (ibmr->remap_count >= pool->fmr_attr.max_maps) 733 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
566 list_add(&ibmr->list, &pool->drop_list); 734 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list);
567 else 735 else
568 list_add(&ibmr->list, &pool->free_list); 736 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list);
569 737
570 atomic_add(ibmr->sg_len, &pool->free_pinned); 738 atomic_add(ibmr->sg_len, &pool->free_pinned);
571 atomic_inc(&pool->dirty_count); 739 atomic_inc(&pool->dirty_count);
572 spin_unlock_irqrestore(&pool->list_lock, flags);
573 740
574 /* If we've pinned too many pages, request a flush */ 741 /* If we've pinned too many pages, request a flush */
575 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 742 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
576 atomic_read(&pool->dirty_count) >= pool->max_items / 10) 743 atomic_read(&pool->dirty_count) >= pool->max_items / 10)
577 queue_work(rds_wq, &pool->flush_worker); 744 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
578 745
579 if (invalidate) { 746 if (invalidate) {
580 if (likely(!in_interrupt())) { 747 if (likely(!in_interrupt())) {
581 rds_ib_flush_mr_pool(pool, 0); 748 rds_ib_flush_mr_pool(pool, 0, NULL);
582 } else { 749 } else {
583 /* We get here if the user created a MR marked 750 /* We get here if the user created a MR marked
584 * as use_once and invalidate at the same time. */ 751 * as use_once and invalidate at the same time. */
585 queue_work(rds_wq, &pool->flush_worker); 752 queue_delayed_work(rds_ib_fmr_wq,
753 &pool->flush_worker, 10);
586 } 754 }
587 } 755 }
756
757 rds_ib_dev_put(rds_ibdev);
588} 758}
589 759
590void rds_ib_flush_mrs(void) 760void rds_ib_flush_mrs(void)
591{ 761{
592 struct rds_ib_device *rds_ibdev; 762 struct rds_ib_device *rds_ibdev;
593 763
764 down_read(&rds_ib_devices_lock);
594 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 765 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
595 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 766 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
596 767
597 if (pool) 768 if (pool)
598 rds_ib_flush_mr_pool(pool, 0); 769 rds_ib_flush_mr_pool(pool, 0, NULL);
599 } 770 }
771 up_read(&rds_ib_devices_lock);
600} 772}
601 773
602void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, 774void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
@@ -628,6 +800,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
628 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret); 800 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret);
629 801
630 ibmr->device = rds_ibdev; 802 ibmr->device = rds_ibdev;
803 rds_ibdev = NULL;
631 804
632 out: 805 out:
633 if (ret) { 806 if (ret) {
@@ -635,5 +808,8 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
635 rds_ib_free_mr(ibmr, 0); 808 rds_ib_free_mr(ibmr, 0);
636 ibmr = ERR_PTR(ret); 809 ibmr = ERR_PTR(ret);
637 } 810 }
811 if (rds_ibdev)
812 rds_ib_dev_put(rds_ibdev);
638 return ibmr; 813 return ibmr;
639} 814}
815
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index c74e9904a6b2..e29e0ca32f74 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -43,42 +43,6 @@ static struct kmem_cache *rds_ib_incoming_slab;
43static struct kmem_cache *rds_ib_frag_slab; 43static struct kmem_cache *rds_ib_frag_slab;
44static atomic_t rds_ib_allocation = ATOMIC_INIT(0); 44static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
45 45
46static void rds_ib_frag_drop_page(struct rds_page_frag *frag)
47{
48 rdsdebug("frag %p page %p\n", frag, frag->f_page);
49 __free_page(frag->f_page);
50 frag->f_page = NULL;
51}
52
53static void rds_ib_frag_free(struct rds_page_frag *frag)
54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page != NULL);
57 kmem_cache_free(rds_ib_frag_slab, frag);
58}
59
60/*
61 * We map a page at a time. Its fragments are posted in order. This
62 * is called in fragment order as the fragments get send completion events.
63 * Only the last frag in the page performs the unmapping.
64 *
65 * It's OK for ring cleanup to call this in whatever order it likes because
66 * DMA is not in flight and so we can unmap while other ring entries still
67 * hold page references in their frags.
68 */
69static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic,
70 struct rds_ib_recv_work *recv)
71{
72 struct rds_page_frag *frag = recv->r_frag;
73
74 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
75 if (frag->f_mapped)
76 ib_dma_unmap_page(ic->i_cm_id->device,
77 frag->f_mapped,
78 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
79 frag->f_mapped = 0;
80}
81
82void rds_ib_recv_init_ring(struct rds_ib_connection *ic) 46void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
83{ 47{
84 struct rds_ib_recv_work *recv; 48 struct rds_ib_recv_work *recv;
@@ -95,16 +59,161 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
95 recv->r_wr.sg_list = recv->r_sge; 59 recv->r_wr.sg_list = recv->r_sge;
96 recv->r_wr.num_sge = RDS_IB_RECV_SGE; 60 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
97 61
98 sge = rds_ib_data_sge(ic, recv->r_sge); 62 sge = &recv->r_sge[0];
63 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
64 sge->length = sizeof(struct rds_header);
65 sge->lkey = ic->i_mr->lkey;
66
67 sge = &recv->r_sge[1];
99 sge->addr = 0; 68 sge->addr = 0;
100 sge->length = RDS_FRAG_SIZE; 69 sge->length = RDS_FRAG_SIZE;
101 sge->lkey = ic->i_mr->lkey; 70 sge->lkey = ic->i_mr->lkey;
71 }
72}
102 73
103 sge = rds_ib_header_sge(ic, recv->r_sge); 74/*
104 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); 75 * The entire 'from' list, including the from element itself, is put on
105 sge->length = sizeof(struct rds_header); 76 * to the tail of the 'to' list.
106 sge->lkey = ic->i_mr->lkey; 77 */
78static void list_splice_entire_tail(struct list_head *from,
79 struct list_head *to)
80{
81 struct list_head *from_last = from->prev;
82
83 list_splice_tail(from_last, to);
84 list_add_tail(from_last, to);
85}
86
87static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
88{
89 struct list_head *tmp;
90
91 tmp = xchg(&cache->xfer, NULL);
92 if (tmp) {
93 if (cache->ready)
94 list_splice_entire_tail(tmp, cache->ready);
95 else
96 cache->ready = tmp;
97 }
98}
99
100static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
101{
102 struct rds_ib_cache_head *head;
103 int cpu;
104
105 cache->percpu = alloc_percpu(struct rds_ib_cache_head);
106 if (!cache->percpu)
107 return -ENOMEM;
108
109 for_each_possible_cpu(cpu) {
110 head = per_cpu_ptr(cache->percpu, cpu);
111 head->first = NULL;
112 head->count = 0;
113 }
114 cache->xfer = NULL;
115 cache->ready = NULL;
116
117 return 0;
118}
119
120int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic)
121{
122 int ret;
123
124 ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs);
125 if (!ret) {
126 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags);
127 if (ret)
128 free_percpu(ic->i_cache_incs.percpu);
107 } 129 }
130
131 return ret;
132}
133
134static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
135 struct list_head *caller_list)
136{
137 struct rds_ib_cache_head *head;
138 int cpu;
139
140 for_each_possible_cpu(cpu) {
141 head = per_cpu_ptr(cache->percpu, cpu);
142 if (head->first) {
143 list_splice_entire_tail(head->first, caller_list);
144 head->first = NULL;
145 }
146 }
147
148 if (cache->ready) {
149 list_splice_entire_tail(cache->ready, caller_list);
150 cache->ready = NULL;
151 }
152}
153
154void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
155{
156 struct rds_ib_incoming *inc;
157 struct rds_ib_incoming *inc_tmp;
158 struct rds_page_frag *frag;
159 struct rds_page_frag *frag_tmp;
160 LIST_HEAD(list);
161
162 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
163 rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
164 free_percpu(ic->i_cache_incs.percpu);
165
166 list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
167 list_del(&inc->ii_cache_entry);
168 WARN_ON(!list_empty(&inc->ii_frags));
169 kmem_cache_free(rds_ib_incoming_slab, inc);
170 }
171
172 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
173 rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
174 free_percpu(ic->i_cache_frags.percpu);
175
176 list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
177 list_del(&frag->f_cache_entry);
178 WARN_ON(!list_empty(&frag->f_item));
179 kmem_cache_free(rds_ib_frag_slab, frag);
180 }
181}
182
183/* fwd decl */
184static void rds_ib_recv_cache_put(struct list_head *new_item,
185 struct rds_ib_refill_cache *cache);
186static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
187
188
189/* Recycle frag and attached recv buffer f_sg */
190static void rds_ib_frag_free(struct rds_ib_connection *ic,
191 struct rds_page_frag *frag)
192{
193 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
194
195 rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
196}
197
198/* Recycle inc after freeing attached frags */
199void rds_ib_inc_free(struct rds_incoming *inc)
200{
201 struct rds_ib_incoming *ibinc;
202 struct rds_page_frag *frag;
203 struct rds_page_frag *pos;
204 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
205
206 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
207
208 /* Free attached frags */
209 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
210 list_del_init(&frag->f_item);
211 rds_ib_frag_free(ic, frag);
212 }
213 BUG_ON(!list_empty(&ibinc->ii_frags));
214
215 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
216 rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
108} 217}
109 218
110static void rds_ib_recv_clear_one(struct rds_ib_connection *ic, 219static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
@@ -115,10 +224,8 @@ static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
115 recv->r_ibinc = NULL; 224 recv->r_ibinc = NULL;
116 } 225 }
117 if (recv->r_frag) { 226 if (recv->r_frag) {
118 rds_ib_recv_unmap_page(ic, recv); 227 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
119 if (recv->r_frag->f_page) 228 rds_ib_frag_free(ic, recv->r_frag);
120 rds_ib_frag_drop_page(recv->r_frag);
121 rds_ib_frag_free(recv->r_frag);
122 recv->r_frag = NULL; 229 recv->r_frag = NULL;
123 } 230 }
124} 231}
@@ -129,84 +236,111 @@ void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
129 236
130 for (i = 0; i < ic->i_recv_ring.w_nr; i++) 237 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
131 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); 238 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
132
133 if (ic->i_frag.f_page)
134 rds_ib_frag_drop_page(&ic->i_frag);
135} 239}
136 240
137static int rds_ib_recv_refill_one(struct rds_connection *conn, 241static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic,
138 struct rds_ib_recv_work *recv, 242 gfp_t slab_mask)
139 gfp_t kptr_gfp, gfp_t page_gfp)
140{ 243{
141 struct rds_ib_connection *ic = conn->c_transport_data; 244 struct rds_ib_incoming *ibinc;
142 dma_addr_t dma_addr; 245 struct list_head *cache_item;
143 struct ib_sge *sge; 246 int avail_allocs;
144 int ret = -ENOMEM;
145 247
146 if (recv->r_ibinc == NULL) { 248 cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
147 if (!atomic_add_unless(&rds_ib_allocation, 1, rds_ib_sysctl_max_recv_allocation)) { 249 if (cache_item) {
250 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
251 } else {
252 avail_allocs = atomic_add_unless(&rds_ib_allocation,
253 1, rds_ib_sysctl_max_recv_allocation);
254 if (!avail_allocs) {
148 rds_ib_stats_inc(s_ib_rx_alloc_limit); 255 rds_ib_stats_inc(s_ib_rx_alloc_limit);
149 goto out; 256 return NULL;
150 } 257 }
151 recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab, 258 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
152 kptr_gfp); 259 if (!ibinc) {
153 if (recv->r_ibinc == NULL) {
154 atomic_dec(&rds_ib_allocation); 260 atomic_dec(&rds_ib_allocation);
155 goto out; 261 return NULL;
156 } 262 }
157 INIT_LIST_HEAD(&recv->r_ibinc->ii_frags);
158 rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr);
159 } 263 }
264 INIT_LIST_HEAD(&ibinc->ii_frags);
265 rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
160 266
161 if (recv->r_frag == NULL) { 267 return ibinc;
162 recv->r_frag = kmem_cache_alloc(rds_ib_frag_slab, kptr_gfp); 268}
163 if (recv->r_frag == NULL) 269
164 goto out; 270static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic,
165 INIT_LIST_HEAD(&recv->r_frag->f_item); 271 gfp_t slab_mask, gfp_t page_mask)
166 recv->r_frag->f_page = NULL; 272{
273 struct rds_page_frag *frag;
274 struct list_head *cache_item;
275 int ret;
276
277 cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
278 if (cache_item) {
279 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
280 } else {
281 frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask);
282 if (!frag)
283 return NULL;
284
285 sg_init_table(&frag->f_sg, 1);
286 ret = rds_page_remainder_alloc(&frag->f_sg,
287 RDS_FRAG_SIZE, page_mask);
288 if (ret) {
289 kmem_cache_free(rds_ib_frag_slab, frag);
290 return NULL;
291 }
167 } 292 }
168 293
169 if (ic->i_frag.f_page == NULL) { 294 INIT_LIST_HEAD(&frag->f_item);
170 ic->i_frag.f_page = alloc_page(page_gfp); 295
171 if (ic->i_frag.f_page == NULL) 296 return frag;
172 goto out; 297}
173 ic->i_frag.f_offset = 0; 298
299static int rds_ib_recv_refill_one(struct rds_connection *conn,
300 struct rds_ib_recv_work *recv, int prefill)
301{
302 struct rds_ib_connection *ic = conn->c_transport_data;
303 struct ib_sge *sge;
304 int ret = -ENOMEM;
305 gfp_t slab_mask = GFP_NOWAIT;
306 gfp_t page_mask = GFP_NOWAIT;
307
308 if (prefill) {
309 slab_mask = GFP_KERNEL;
310 page_mask = GFP_HIGHUSER;
174 } 311 }
175 312
176 dma_addr = ib_dma_map_page(ic->i_cm_id->device, 313 if (!ic->i_cache_incs.ready)
177 ic->i_frag.f_page, 314 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
178 ic->i_frag.f_offset, 315 if (!ic->i_cache_frags.ready)
179 RDS_FRAG_SIZE, 316 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
180 DMA_FROM_DEVICE);
181 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
182 goto out;
183 317
184 /* 318 /*
185 * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap() 319 * ibinc was taken from recv if recv contained the start of a message.
186 * must be called on this recv. This happens as completions hit 320 * recvs that were continuations will still have this allocated.
187 * in order or on connection shutdown.
188 */ 321 */
189 recv->r_frag->f_page = ic->i_frag.f_page; 322 if (!recv->r_ibinc) {
190 recv->r_frag->f_offset = ic->i_frag.f_offset; 323 recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask);
191 recv->r_frag->f_mapped = dma_addr; 324 if (!recv->r_ibinc)
325 goto out;
326 }
192 327
193 sge = rds_ib_data_sge(ic, recv->r_sge); 328 WARN_ON(recv->r_frag); /* leak! */
194 sge->addr = dma_addr; 329 recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask);
195 sge->length = RDS_FRAG_SIZE; 330 if (!recv->r_frag)
331 goto out;
332
333 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
334 1, DMA_FROM_DEVICE);
335 WARN_ON(ret != 1);
196 336
197 sge = rds_ib_header_sge(ic, recv->r_sge); 337 sge = &recv->r_sge[0];
198 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); 338 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
199 sge->length = sizeof(struct rds_header); 339 sge->length = sizeof(struct rds_header);
200 340
201 get_page(recv->r_frag->f_page); 341 sge = &recv->r_sge[1];
202 342 sge->addr = sg_dma_address(&recv->r_frag->f_sg);
203 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) { 343 sge->length = sg_dma_len(&recv->r_frag->f_sg);
204 ic->i_frag.f_offset += RDS_FRAG_SIZE;
205 } else {
206 put_page(ic->i_frag.f_page);
207 ic->i_frag.f_page = NULL;
208 ic->i_frag.f_offset = 0;
209 }
210 344
211 ret = 0; 345 ret = 0;
212out: 346out:
@@ -216,13 +350,11 @@ out:
216/* 350/*
217 * This tries to allocate and post unused work requests after making sure that 351 * This tries to allocate and post unused work requests after making sure that
218 * they have all the allocations they need to queue received fragments into 352 * they have all the allocations they need to queue received fragments into
219 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc 353 * sockets.
220 * pairs don't go unmatched.
221 * 354 *
222 * -1 is returned if posting fails due to temporary resource exhaustion. 355 * -1 is returned if posting fails due to temporary resource exhaustion.
223 */ 356 */
224int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 357void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
225 gfp_t page_gfp, int prefill)
226{ 358{
227 struct rds_ib_connection *ic = conn->c_transport_data; 359 struct rds_ib_connection *ic = conn->c_transport_data;
228 struct rds_ib_recv_work *recv; 360 struct rds_ib_recv_work *recv;
@@ -236,28 +368,25 @@ int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
236 if (pos >= ic->i_recv_ring.w_nr) { 368 if (pos >= ic->i_recv_ring.w_nr) {
237 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", 369 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
238 pos); 370 pos);
239 ret = -EINVAL;
240 break; 371 break;
241 } 372 }
242 373
243 recv = &ic->i_recvs[pos]; 374 recv = &ic->i_recvs[pos];
244 ret = rds_ib_recv_refill_one(conn, recv, kptr_gfp, page_gfp); 375 ret = rds_ib_recv_refill_one(conn, recv, prefill);
245 if (ret) { 376 if (ret) {
246 ret = -1;
247 break; 377 break;
248 } 378 }
249 379
250 /* XXX when can this fail? */ 380 /* XXX when can this fail? */
251 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); 381 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
252 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, 382 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
253 recv->r_ibinc, recv->r_frag->f_page, 383 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
254 (long) recv->r_frag->f_mapped, ret); 384 (long) sg_dma_address(&recv->r_frag->f_sg), ret);
255 if (ret) { 385 if (ret) {
256 rds_ib_conn_error(conn, "recv post on " 386 rds_ib_conn_error(conn, "recv post on "
257 "%pI4 returned %d, disconnecting and " 387 "%pI4 returned %d, disconnecting and "
258 "reconnecting\n", &conn->c_faddr, 388 "reconnecting\n", &conn->c_faddr,
259 ret); 389 ret);
260 ret = -1;
261 break; 390 break;
262 } 391 }
263 392
@@ -270,37 +399,73 @@ int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
270 399
271 if (ret) 400 if (ret)
272 rds_ib_ring_unalloc(&ic->i_recv_ring, 1); 401 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
273 return ret;
274} 402}
275 403
276void rds_ib_inc_purge(struct rds_incoming *inc) 404/*
405 * We want to recycle several types of recv allocations, like incs and frags.
406 * To use this, the *_free() function passes in the ptr to a list_head within
407 * the recyclee, as well as the cache to put it on.
408 *
409 * First, we put the memory on a percpu list. When this reaches a certain size,
410 * We move it to an intermediate non-percpu list in a lockless manner, with some
411 * xchg/compxchg wizardry.
412 *
413 * N.B. Instead of a list_head as the anchor, we use a single pointer, which can
414 * be NULL and xchg'd. The list is actually empty when the pointer is NULL, and
415 * list_empty() will return true with one element is actually present.
416 */
417static void rds_ib_recv_cache_put(struct list_head *new_item,
418 struct rds_ib_refill_cache *cache)
277{ 419{
278 struct rds_ib_incoming *ibinc; 420 unsigned long flags;
279 struct rds_page_frag *frag; 421 struct rds_ib_cache_head *chp;
280 struct rds_page_frag *pos; 422 struct list_head *old;
281 423
282 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 424 local_irq_save(flags);
283 rdsdebug("purging ibinc %p inc %p\n", ibinc, inc);
284 425
285 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { 426 chp = per_cpu_ptr(cache->percpu, smp_processor_id());
286 list_del_init(&frag->f_item); 427 if (!chp->first)
287 rds_ib_frag_drop_page(frag); 428 INIT_LIST_HEAD(new_item);
288 rds_ib_frag_free(frag); 429 else /* put on front */
289 } 430 list_add_tail(new_item, chp->first);
431 chp->first = new_item;
432 chp->count++;
433
434 if (chp->count < RDS_IB_RECYCLE_BATCH_COUNT)
435 goto end;
436
437 /*
438 * Return our per-cpu first list to the cache's xfer by atomically
439 * grabbing the current xfer list, appending it to our per-cpu list,
440 * and then atomically returning that entire list back to the
441 * cache's xfer list as long as it's still empty.
442 */
443 do {
444 old = xchg(&cache->xfer, NULL);
445 if (old)
446 list_splice_entire_tail(old, chp->first);
447 old = cmpxchg(&cache->xfer, NULL, chp->first);
448 } while (old);
449
450 chp->first = NULL;
451 chp->count = 0;
452end:
453 local_irq_restore(flags);
290} 454}
291 455
292void rds_ib_inc_free(struct rds_incoming *inc) 456static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
293{ 457{
294 struct rds_ib_incoming *ibinc; 458 struct list_head *head = cache->ready;
295 459
296 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 460 if (head) {
461 if (!list_empty(head)) {
462 cache->ready = head->next;
463 list_del_init(head);
464 } else
465 cache->ready = NULL;
466 }
297 467
298 rds_ib_inc_purge(inc); 468 return head;
299 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
300 BUG_ON(!list_empty(&ibinc->ii_frags));
301 kmem_cache_free(rds_ib_incoming_slab, ibinc);
302 atomic_dec(&rds_ib_allocation);
303 BUG_ON(atomic_read(&rds_ib_allocation) < 0);
304} 469}
305 470
306int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, 471int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
@@ -336,13 +501,13 @@ int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
336 to_copy = min_t(unsigned long, to_copy, len - copied); 501 to_copy = min_t(unsigned long, to_copy, len - copied);
337 502
338 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " 503 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
339 "[%p, %lu] + %lu\n", 504 "[%p, %u] + %lu\n",
340 to_copy, iov->iov_base, iov->iov_len, iov_off, 505 to_copy, iov->iov_base, iov->iov_len, iov_off,
341 frag->f_page, frag->f_offset, frag_off); 506 sg_page(&frag->f_sg), frag->f_sg.offset, frag_off);
342 507
343 /* XXX needs + offset for multiple recvs per page */ 508 /* XXX needs + offset for multiple recvs per page */
344 ret = rds_page_copy_to_user(frag->f_page, 509 ret = rds_page_copy_to_user(sg_page(&frag->f_sg),
345 frag->f_offset + frag_off, 510 frag->f_sg.offset + frag_off,
346 iov->iov_base + iov_off, 511 iov->iov_base + iov_off,
347 to_copy); 512 to_copy);
348 if (ret) { 513 if (ret) {
@@ -557,47 +722,6 @@ u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
557 return rds_ib_get_ack(ic); 722 return rds_ib_get_ack(ic);
558} 723}
559 724
560static struct rds_header *rds_ib_get_header(struct rds_connection *conn,
561 struct rds_ib_recv_work *recv,
562 u32 data_len)
563{
564 struct rds_ib_connection *ic = conn->c_transport_data;
565 void *hdr_buff = &ic->i_recv_hdrs[recv - ic->i_recvs];
566 void *addr;
567 u32 misplaced_hdr_bytes;
568
569 /*
570 * Support header at the front (RDS 3.1+) as well as header-at-end.
571 *
572 * Cases:
573 * 1) header all in header buff (great!)
574 * 2) header all in data page (copy all to header buff)
575 * 3) header split across hdr buf + data page
576 * (move bit in hdr buff to end before copying other bit from data page)
577 */
578 if (conn->c_version > RDS_PROTOCOL_3_0 || data_len == RDS_FRAG_SIZE)
579 return hdr_buff;
580
581 if (data_len <= (RDS_FRAG_SIZE - sizeof(struct rds_header))) {
582 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
583 memcpy(hdr_buff,
584 addr + recv->r_frag->f_offset + data_len,
585 sizeof(struct rds_header));
586 kunmap_atomic(addr, KM_SOFTIRQ0);
587 return hdr_buff;
588 }
589
590 misplaced_hdr_bytes = (sizeof(struct rds_header) - (RDS_FRAG_SIZE - data_len));
591
592 memmove(hdr_buff + misplaced_hdr_bytes, hdr_buff, misplaced_hdr_bytes);
593
594 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
595 memcpy(hdr_buff, addr + recv->r_frag->f_offset + data_len,
596 sizeof(struct rds_header) - misplaced_hdr_bytes);
597 kunmap_atomic(addr, KM_SOFTIRQ0);
598 return hdr_buff;
599}
600
601/* 725/*
602 * It's kind of lame that we're copying from the posted receive pages into 726 * It's kind of lame that we're copying from the posted receive pages into
603 * long-lived bitmaps. We could have posted the bitmaps and rdma written into 727 * long-lived bitmaps. We could have posted the bitmaps and rdma written into
@@ -639,7 +763,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
639 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); 763 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
640 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ 764 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
641 765
642 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); 766 addr = kmap_atomic(sg_page(&frag->f_sg), KM_SOFTIRQ0);
643 767
644 src = addr + frag_off; 768 src = addr + frag_off;
645 dst = (void *)map->m_page_addrs[map_page] + map_off; 769 dst = (void *)map->m_page_addrs[map_page] + map_off;
@@ -710,7 +834,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
710 } 834 }
711 data_len -= sizeof(struct rds_header); 835 data_len -= sizeof(struct rds_header);
712 836
713 ihdr = rds_ib_get_header(conn, recv, data_len); 837 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
714 838
715 /* Validate the checksum. */ 839 /* Validate the checksum. */
716 if (!rds_message_verify_checksum(ihdr)) { 840 if (!rds_message_verify_checksum(ihdr)) {
@@ -742,12 +866,12 @@ static void rds_ib_process_recv(struct rds_connection *conn,
742 * the inc is freed. We don't go that route, so we have to drop the 866 * the inc is freed. We don't go that route, so we have to drop the
743 * page ref ourselves. We can't just leave the page on the recv 867 * page ref ourselves. We can't just leave the page on the recv
744 * because that confuses the dma mapping of pages and each recv's use 868 * because that confuses the dma mapping of pages and each recv's use
745 * of a partial page. We can leave the frag, though, it will be 869 * of a partial page.
746 * reused.
747 * 870 *
748 * FIXME: Fold this into the code path below. 871 * FIXME: Fold this into the code path below.
749 */ 872 */
750 rds_ib_frag_drop_page(recv->r_frag); 873 rds_ib_frag_free(ic, recv->r_frag);
874 recv->r_frag = NULL;
751 return; 875 return;
752 } 876 }
753 877
@@ -757,7 +881,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
757 * into the inc and save the inc so we can hang upcoming fragments 881 * into the inc and save the inc so we can hang upcoming fragments
758 * off its list. 882 * off its list.
759 */ 883 */
760 if (ibinc == NULL) { 884 if (!ibinc) {
761 ibinc = recv->r_ibinc; 885 ibinc = recv->r_ibinc;
762 recv->r_ibinc = NULL; 886 recv->r_ibinc = NULL;
763 ic->i_ibinc = ibinc; 887 ic->i_ibinc = ibinc;
@@ -842,32 +966,38 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic,
842 struct rds_ib_recv_work *recv; 966 struct rds_ib_recv_work *recv;
843 967
844 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { 968 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
845 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 969 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
846 (unsigned long long)wc.wr_id, wc.status, wc.byte_len, 970 (unsigned long long)wc.wr_id, wc.status,
971 rds_ib_wc_status_str(wc.status), wc.byte_len,
847 be32_to_cpu(wc.ex.imm_data)); 972 be32_to_cpu(wc.ex.imm_data));
848 rds_ib_stats_inc(s_ib_rx_cq_event); 973 rds_ib_stats_inc(s_ib_rx_cq_event);
849 974
850 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)]; 975 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
851 976
852 rds_ib_recv_unmap_page(ic, recv); 977 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
853 978
854 /* 979 /*
855 * Also process recvs in connecting state because it is possible 980 * Also process recvs in connecting state because it is possible
856 * to get a recv completion _before_ the rdmacm ESTABLISHED 981 * to get a recv completion _before_ the rdmacm ESTABLISHED
857 * event is processed. 982 * event is processed.
858 */ 983 */
859 if (rds_conn_up(conn) || rds_conn_connecting(conn)) { 984 if (wc.status == IB_WC_SUCCESS) {
985 rds_ib_process_recv(conn, recv, wc.byte_len, state);
986 } else {
860 /* We expect errors as the qp is drained during shutdown */ 987 /* We expect errors as the qp is drained during shutdown */
861 if (wc.status == IB_WC_SUCCESS) { 988 if (rds_conn_up(conn) || rds_conn_connecting(conn))
862 rds_ib_process_recv(conn, recv, wc.byte_len, state); 989 rds_ib_conn_error(conn, "recv completion on %pI4 had "
863 } else { 990 "status %u (%s), disconnecting and "
864 rds_ib_conn_error(conn, "recv completion on " 991 "reconnecting\n", &conn->c_faddr,
865 "%pI4 had status %u, disconnecting and " 992 wc.status,
866 "reconnecting\n", &conn->c_faddr, 993 rds_ib_wc_status_str(wc.status));
867 wc.status);
868 }
869 } 994 }
870 995
996 /*
997 * It's very important that we only free this ring entry if we've truly
998 * freed the resources allocated to the entry. The refilling path can
999 * leak if we don't.
1000 */
871 rds_ib_ring_free(&ic->i_recv_ring, 1); 1001 rds_ib_ring_free(&ic->i_recv_ring, 1);
872 } 1002 }
873} 1003}
@@ -897,11 +1027,8 @@ void rds_ib_recv_tasklet_fn(unsigned long data)
897 if (rds_ib_ring_empty(&ic->i_recv_ring)) 1027 if (rds_ib_ring_empty(&ic->i_recv_ring))
898 rds_ib_stats_inc(s_ib_rx_ring_empty); 1028 rds_ib_stats_inc(s_ib_rx_ring_empty);
899 1029
900 /*
901 * If the ring is running low, then schedule the thread to refill.
902 */
903 if (rds_ib_ring_low(&ic->i_recv_ring)) 1030 if (rds_ib_ring_low(&ic->i_recv_ring))
904 queue_delayed_work(rds_wq, &conn->c_recv_w, 0); 1031 rds_ib_recv_refill(conn, 0);
905} 1032}
906 1033
907int rds_ib_recv(struct rds_connection *conn) 1034int rds_ib_recv(struct rds_connection *conn)
@@ -910,25 +1037,13 @@ int rds_ib_recv(struct rds_connection *conn)
910 int ret = 0; 1037 int ret = 0;
911 1038
912 rdsdebug("conn %p\n", conn); 1039 rdsdebug("conn %p\n", conn);
913
914 /*
915 * If we get a temporary posting failure in this context then
916 * we're really low and we want the caller to back off for a bit.
917 */
918 mutex_lock(&ic->i_recv_mutex);
919 if (rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
920 ret = -ENOMEM;
921 else
922 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
923 mutex_unlock(&ic->i_recv_mutex);
924
925 if (rds_conn_up(conn)) 1040 if (rds_conn_up(conn))
926 rds_ib_attempt_ack(ic); 1041 rds_ib_attempt_ack(ic);
927 1042
928 return ret; 1043 return ret;
929} 1044}
930 1045
931int __init rds_ib_recv_init(void) 1046int rds_ib_recv_init(void)
932{ 1047{
933 struct sysinfo si; 1048 struct sysinfo si;
934 int ret = -ENOMEM; 1049 int ret = -ENOMEM;
@@ -939,14 +1054,14 @@ int __init rds_ib_recv_init(void)
939 1054
940 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", 1055 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
941 sizeof(struct rds_ib_incoming), 1056 sizeof(struct rds_ib_incoming),
942 0, 0, NULL); 1057 0, SLAB_HWCACHE_ALIGN, NULL);
943 if (rds_ib_incoming_slab == NULL) 1058 if (!rds_ib_incoming_slab)
944 goto out; 1059 goto out;
945 1060
946 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag", 1061 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
947 sizeof(struct rds_page_frag), 1062 sizeof(struct rds_page_frag),
948 0, 0, NULL); 1063 0, SLAB_HWCACHE_ALIGN, NULL);
949 if (rds_ib_frag_slab == NULL) 1064 if (!rds_ib_frag_slab)
950 kmem_cache_destroy(rds_ib_incoming_slab); 1065 kmem_cache_destroy(rds_ib_incoming_slab);
951 else 1066 else
952 ret = 0; 1067 ret = 0;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 17fa80803ab0..71f373c421bc 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -36,11 +36,49 @@
36#include <linux/dmapool.h> 36#include <linux/dmapool.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40#include "ib.h" 39#include "ib.h"
41 40
42static void rds_ib_send_rdma_complete(struct rds_message *rm, 41static char *rds_ib_wc_status_strings[] = {
43 int wc_status) 42#define RDS_IB_WC_STATUS_STR(foo) \
43 [IB_WC_##foo] = __stringify(IB_WC_##foo)
44 RDS_IB_WC_STATUS_STR(SUCCESS),
45 RDS_IB_WC_STATUS_STR(LOC_LEN_ERR),
46 RDS_IB_WC_STATUS_STR(LOC_QP_OP_ERR),
47 RDS_IB_WC_STATUS_STR(LOC_EEC_OP_ERR),
48 RDS_IB_WC_STATUS_STR(LOC_PROT_ERR),
49 RDS_IB_WC_STATUS_STR(WR_FLUSH_ERR),
50 RDS_IB_WC_STATUS_STR(MW_BIND_ERR),
51 RDS_IB_WC_STATUS_STR(BAD_RESP_ERR),
52 RDS_IB_WC_STATUS_STR(LOC_ACCESS_ERR),
53 RDS_IB_WC_STATUS_STR(REM_INV_REQ_ERR),
54 RDS_IB_WC_STATUS_STR(REM_ACCESS_ERR),
55 RDS_IB_WC_STATUS_STR(REM_OP_ERR),
56 RDS_IB_WC_STATUS_STR(RETRY_EXC_ERR),
57 RDS_IB_WC_STATUS_STR(RNR_RETRY_EXC_ERR),
58 RDS_IB_WC_STATUS_STR(LOC_RDD_VIOL_ERR),
59 RDS_IB_WC_STATUS_STR(REM_INV_RD_REQ_ERR),
60 RDS_IB_WC_STATUS_STR(REM_ABORT_ERR),
61 RDS_IB_WC_STATUS_STR(INV_EECN_ERR),
62 RDS_IB_WC_STATUS_STR(INV_EEC_STATE_ERR),
63 RDS_IB_WC_STATUS_STR(FATAL_ERR),
64 RDS_IB_WC_STATUS_STR(RESP_TIMEOUT_ERR),
65 RDS_IB_WC_STATUS_STR(GENERAL_ERR),
66#undef RDS_IB_WC_STATUS_STR
67};
68
69char *rds_ib_wc_status_str(enum ib_wc_status status)
70{
71 return rds_str_array(rds_ib_wc_status_strings,
72 ARRAY_SIZE(rds_ib_wc_status_strings), status);
73}
74
75/*
76 * Convert IB-specific error message to RDS error message and call core
77 * completion handler.
78 */
79static void rds_ib_send_complete(struct rds_message *rm,
80 int wc_status,
81 void (*complete)(struct rds_message *rm, int status))
44{ 82{
45 int notify_status; 83 int notify_status;
46 84
@@ -60,69 +98,125 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm,
60 notify_status = RDS_RDMA_OTHER_ERROR; 98 notify_status = RDS_RDMA_OTHER_ERROR;
61 break; 99 break;
62 } 100 }
63 rds_rdma_send_complete(rm, notify_status); 101 complete(rm, notify_status);
102}
103
104static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
105 struct rm_data_op *op,
106 int wc_status)
107{
108 if (op->op_nents)
109 ib_dma_unmap_sg(ic->i_cm_id->device,
110 op->op_sg, op->op_nents,
111 DMA_TO_DEVICE);
64} 112}
65 113
66static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, 114static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
67 struct rds_rdma_op *op) 115 struct rm_rdma_op *op,
116 int wc_status)
68{ 117{
69 if (op->r_mapped) { 118 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device, 119 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents, 120 op->op_sg, op->op_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 121 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0; 122 op->op_mapped = 0;
74 } 123 }
124
125 /* If the user asked for a completion notification on this
126 * message, we can implement three different semantics:
127 * 1. Notify when we received the ACK on the RDS message
128 * that was queued with the RDMA. This provides reliable
129 * notification of RDMA status at the expense of a one-way
130 * packet delay.
131 * 2. Notify when the IB stack gives us the completion event for
132 * the RDMA operation.
133 * 3. Notify when the IB stack gives us the completion event for
134 * the accompanying RDS messages.
135 * Here, we implement approach #3. To implement approach #2,
136 * we would need to take an event for the rdma WR. To implement #1,
137 * don't call rds_rdma_send_complete at all, and fall back to the notify
138 * handling in the ACK processing code.
139 *
140 * Note: There's no need to explicitly sync any RDMA buffers using
141 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
142 * operation itself unmapped the RDMA buffers, which takes care
143 * of synching.
144 */
145 rds_ib_send_complete(container_of(op, struct rds_message, rdma),
146 wc_status, rds_rdma_send_complete);
147
148 if (op->op_write)
149 rds_stats_add(s_send_rdma_bytes, op->op_bytes);
150 else
151 rds_stats_add(s_recv_rdma_bytes, op->op_bytes);
75} 152}
76 153
77static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, 154static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
78 struct rds_ib_send_work *send, 155 struct rm_atomic_op *op,
79 int wc_status) 156 int wc_status)
80{ 157{
81 struct rds_message *rm = send->s_rm; 158 /* unmap atomic recvbuf */
82 159 if (op->op_mapped) {
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm); 160 ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1,
84 161 DMA_FROM_DEVICE);
85 ib_dma_unmap_sg(ic->i_cm_id->device, 162 op->op_mapped = 0;
86 rm->m_sg, rm->m_nents, 163 }
87 DMA_TO_DEVICE);
88
89 if (rm->m_rdma_op != NULL) {
90 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
91
92 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics:
94 * 1. Notify when we received the ACK on the RDS message
95 * that was queued with the RDMA. This provides reliable
96 * notification of RDMA status at the expense of a one-way
97 * packet delay.
98 * 2. Notify when the IB stack gives us the completion event for
99 * the RDMA operation.
100 * 3. Notify when the IB stack gives us the completion event for
101 * the accompanying RDS messages.
102 * Here, we implement approach #3. To implement approach #2,
103 * call rds_rdma_send_complete from the cq_handler. To implement #1,
104 * don't call rds_rdma_send_complete at all, and fall back to the notify
105 * handling in the ACK processing code.
106 *
107 * Note: There's no need to explicitly sync any RDMA buffers using
108 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
109 * operation itself unmapped the RDMA buffers, which takes care
110 * of synching.
111 */
112 rds_ib_send_rdma_complete(rm, wc_status);
113 164
114 if (rm->m_rdma_op->r_write) 165 rds_ib_send_complete(container_of(op, struct rds_message, atomic),
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); 166 wc_status, rds_atomic_send_complete);
116 else 167
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); 168 if (op->op_type == RDS_ATOMIC_TYPE_CSWP)
169 rds_ib_stats_inc(s_ib_atomic_cswp);
170 else
171 rds_ib_stats_inc(s_ib_atomic_fadd);
172}
173
174/*
175 * Unmap the resources associated with a struct send_work.
176 *
177 * Returns the rm for no good reason other than it is unobtainable
178 * other than by switching on wr.opcode, currently, and the caller,
179 * the event handler, needs it.
180 */
181static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic,
182 struct rds_ib_send_work *send,
183 int wc_status)
184{
185 struct rds_message *rm = NULL;
186
187 /* In the error case, wc.opcode sometimes contains garbage */
188 switch (send->s_wr.opcode) {
189 case IB_WR_SEND:
190 if (send->s_op) {
191 rm = container_of(send->s_op, struct rds_message, data);
192 rds_ib_send_unmap_data(ic, send->s_op, wc_status);
193 }
194 break;
195 case IB_WR_RDMA_WRITE:
196 case IB_WR_RDMA_READ:
197 if (send->s_op) {
198 rm = container_of(send->s_op, struct rds_message, rdma);
199 rds_ib_send_unmap_rdma(ic, send->s_op, wc_status);
200 }
201 break;
202 case IB_WR_ATOMIC_FETCH_AND_ADD:
203 case IB_WR_ATOMIC_CMP_AND_SWP:
204 if (send->s_op) {
205 rm = container_of(send->s_op, struct rds_message, atomic);
206 rds_ib_send_unmap_atomic(ic, send->s_op, wc_status);
207 }
208 break;
209 default:
210 if (printk_ratelimit())
211 printk(KERN_NOTICE
212 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
213 __func__, send->s_wr.opcode);
214 break;
118 } 215 }
119 216
120 /* If anyone waited for this message to get flushed out, wake 217 send->s_wr.opcode = 0xdead;
121 * them up now */
122 rds_message_unmapped(rm);
123 218
124 rds_message_put(rm); 219 return rm;
125 send->s_rm = NULL;
126} 220}
127 221
128void rds_ib_send_init_ring(struct rds_ib_connection *ic) 222void rds_ib_send_init_ring(struct rds_ib_connection *ic)
@@ -133,23 +227,18 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 227 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge; 228 struct ib_sge *sge;
135 229
136 send->s_rm = NULL;
137 send->s_op = NULL; 230 send->s_op = NULL;
138 231
139 send->s_wr.wr_id = i; 232 send->s_wr.wr_id = i;
140 send->s_wr.sg_list = send->s_sge; 233 send->s_wr.sg_list = send->s_sge;
141 send->s_wr.num_sge = 1;
142 send->s_wr.opcode = IB_WR_SEND;
143 send->s_wr.send_flags = 0;
144 send->s_wr.ex.imm_data = 0; 234 send->s_wr.ex.imm_data = 0;
145 235
146 sge = rds_ib_data_sge(ic, send->s_sge); 236 sge = &send->s_sge[0];
147 sge->lkey = ic->i_mr->lkey;
148
149 sge = rds_ib_header_sge(ic, send->s_sge);
150 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); 237 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
151 sge->length = sizeof(struct rds_header); 238 sge->length = sizeof(struct rds_header);
152 sge->lkey = ic->i_mr->lkey; 239 sge->lkey = ic->i_mr->lkey;
240
241 send->s_sge[1].lkey = ic->i_mr->lkey;
153 } 242 }
154} 243}
155 244
@@ -159,16 +248,24 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
159 u32 i; 248 u32 i;
160 249
161 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 250 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
162 if (send->s_wr.opcode == 0xdead) 251 if (send->s_op && send->s_wr.opcode != 0xdead)
163 continue; 252 rds_ib_send_unmap_op(ic, send, IB_WC_WR_FLUSH_ERR);
164 if (send->s_rm)
165 rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
166 if (send->s_op)
167 rds_ib_send_unmap_rdma(ic, send->s_op);
168 } 253 }
169} 254}
170 255
171/* 256/*
257 * The only fast path caller always has a non-zero nr, so we don't
258 * bother testing nr before performing the atomic sub.
259 */
260static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
261{
262 if ((atomic_sub_return(nr, &ic->i_signaled_sends) == 0) &&
263 waitqueue_active(&rds_ib_ring_empty_wait))
264 wake_up(&rds_ib_ring_empty_wait);
265 BUG_ON(atomic_read(&ic->i_signaled_sends) < 0);
266}
267
268/*
172 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc 269 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
173 * operations performed in the send path. As the sender allocs and potentially 270 * operations performed in the send path. As the sender allocs and potentially
174 * unallocs the next free entry in the ring it doesn't alter which is 271 * unallocs the next free entry in the ring it doesn't alter which is
@@ -178,12 +275,14 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
178{ 275{
179 struct rds_connection *conn = context; 276 struct rds_connection *conn = context;
180 struct rds_ib_connection *ic = conn->c_transport_data; 277 struct rds_ib_connection *ic = conn->c_transport_data;
278 struct rds_message *rm = NULL;
181 struct ib_wc wc; 279 struct ib_wc wc;
182 struct rds_ib_send_work *send; 280 struct rds_ib_send_work *send;
183 u32 completed; 281 u32 completed;
184 u32 oldest; 282 u32 oldest;
185 u32 i = 0; 283 u32 i = 0;
186 int ret; 284 int ret;
285 int nr_sig = 0;
187 286
188 rdsdebug("cq %p conn %p\n", cq, conn); 287 rdsdebug("cq %p conn %p\n", cq, conn);
189 rds_ib_stats_inc(s_ib_tx_cq_call); 288 rds_ib_stats_inc(s_ib_tx_cq_call);
@@ -192,8 +291,9 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
192 rdsdebug("ib_req_notify_cq send failed: %d\n", ret); 291 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
193 292
194 while (ib_poll_cq(cq, 1, &wc) > 0) { 293 while (ib_poll_cq(cq, 1, &wc) > 0) {
195 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 294 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
196 (unsigned long long)wc.wr_id, wc.status, wc.byte_len, 295 (unsigned long long)wc.wr_id, wc.status,
296 rds_ib_wc_status_str(wc.status), wc.byte_len,
197 be32_to_cpu(wc.ex.imm_data)); 297 be32_to_cpu(wc.ex.imm_data));
198 rds_ib_stats_inc(s_ib_tx_cq_event); 298 rds_ib_stats_inc(s_ib_tx_cq_event);
199 299
@@ -210,51 +310,30 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
210 310
211 for (i = 0; i < completed; i++) { 311 for (i = 0; i < completed; i++) {
212 send = &ic->i_sends[oldest]; 312 send = &ic->i_sends[oldest];
313 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
314 nr_sig++;
213 315
214 /* In the error case, wc.opcode sometimes contains garbage */ 316 rm = rds_ib_send_unmap_op(ic, send, wc.status);
215 switch (send->s_wr.opcode) {
216 case IB_WR_SEND:
217 if (send->s_rm)
218 rds_ib_send_unmap_rm(ic, send, wc.status);
219 break;
220 case IB_WR_RDMA_WRITE:
221 case IB_WR_RDMA_READ:
222 /* Nothing to be done - the SG list will be unmapped
223 * when the SEND completes. */
224 break;
225 default:
226 if (printk_ratelimit())
227 printk(KERN_NOTICE
228 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
229 __func__, send->s_wr.opcode);
230 break;
231 }
232 317
233 send->s_wr.opcode = 0xdead;
234 send->s_wr.num_sge = 1;
235 if (send->s_queued + HZ/2 < jiffies) 318 if (send->s_queued + HZ/2 < jiffies)
236 rds_ib_stats_inc(s_ib_tx_stalled); 319 rds_ib_stats_inc(s_ib_tx_stalled);
237 320
238 /* If a RDMA operation produced an error, signal this right 321 if (send->s_op) {
239 * away. If we don't, the subsequent SEND that goes with this 322 if (send->s_op == rm->m_final_op) {
240 * RDMA will be canceled with ERR_WFLUSH, and the application 323 /* If anyone waited for this message to get flushed out, wake
241 * never learn that the RDMA failed. */ 324 * them up now */
242 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { 325 rds_message_unmapped(rm);
243 struct rds_message *rm;
244
245 rm = rds_send_get_message(conn, send->s_op);
246 if (rm) {
247 if (rm->m_rdma_op)
248 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
249 rds_ib_send_rdma_complete(rm, wc.status);
250 rds_message_put(rm);
251 } 326 }
327 rds_message_put(rm);
328 send->s_op = NULL;
252 } 329 }
253 330
254 oldest = (oldest + 1) % ic->i_send_ring.w_nr; 331 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
255 } 332 }
256 333
257 rds_ib_ring_free(&ic->i_send_ring, completed); 334 rds_ib_ring_free(&ic->i_send_ring, completed);
335 rds_ib_sub_signaled(ic, nr_sig);
336 nr_sig = 0;
258 337
259 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || 338 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
260 test_bit(0, &conn->c_map_queued)) 339 test_bit(0, &conn->c_map_queued))
@@ -262,10 +341,10 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
262 341
263 /* We expect errors as the qp is drained during shutdown */ 342 /* We expect errors as the qp is drained during shutdown */
264 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { 343 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
265 rds_ib_conn_error(conn, 344 rds_ib_conn_error(conn, "send completion on %pI4 had status "
266 "send completion on %pI4 " 345 "%u (%s), disconnecting and reconnecting\n",
267 "had status %u, disconnecting and reconnecting\n", 346 &conn->c_faddr, wc.status,
268 &conn->c_faddr, wc.status); 347 rds_ib_wc_status_str(wc.status));
269 } 348 }
270 } 349 }
271} 350}
@@ -294,7 +373,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
294 * credits (see rds_ib_send_add_credits below). 373 * credits (see rds_ib_send_add_credits below).
295 * 374 *
296 * The RDS send code is essentially single-threaded; rds_send_xmit 375 * The RDS send code is essentially single-threaded; rds_send_xmit
297 * grabs c_send_lock to ensure exclusive access to the send ring. 376 * sets RDS_IN_XMIT to ensure exclusive access to the send ring.
298 * However, the ACK sending code is independent and can race with 377 * However, the ACK sending code is independent and can race with
299 * message SENDs. 378 * message SENDs.
300 * 379 *
@@ -413,40 +492,21 @@ void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
413 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 492 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
414} 493}
415 494
416static inline void 495static inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic,
417rds_ib_xmit_populate_wr(struct rds_ib_connection *ic, 496 struct rds_ib_send_work *send,
418 struct rds_ib_send_work *send, unsigned int pos, 497 bool notify)
419 unsigned long buffer, unsigned int length,
420 int send_flags)
421{ 498{
422 struct ib_sge *sge; 499 /*
423 500 * We want to delay signaling completions just enough to get
424 WARN_ON(pos != send - ic->i_sends); 501 * the batching benefits but not so much that we create dead time
425 502 * on the wire.
426 send->s_wr.send_flags = send_flags; 503 */
427 send->s_wr.opcode = IB_WR_SEND; 504 if (ic->i_unsignaled_wrs-- == 0 || notify) {
428 send->s_wr.num_sge = 2; 505 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
429 send->s_wr.next = NULL; 506 send->s_wr.send_flags |= IB_SEND_SIGNALED;
430 send->s_queued = jiffies; 507 return 1;
431 send->s_op = NULL;
432
433 if (length != 0) {
434 sge = rds_ib_data_sge(ic, send->s_sge);
435 sge->addr = buffer;
436 sge->length = length;
437 sge->lkey = ic->i_mr->lkey;
438
439 sge = rds_ib_header_sge(ic, send->s_sge);
440 } else {
441 /* We're sending a packet with no payload. There is only
442 * one SGE */
443 send->s_wr.num_sge = 1;
444 sge = &send->s_sge[0];
445 } 508 }
446 509 return 0;
447 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
448 sge->length = sizeof(struct rds_header);
449 sge->lkey = ic->i_mr->lkey;
450} 510}
451 511
452/* 512/*
@@ -475,13 +535,14 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
475 u32 pos; 535 u32 pos;
476 u32 i; 536 u32 i;
477 u32 work_alloc; 537 u32 work_alloc;
478 u32 credit_alloc; 538 u32 credit_alloc = 0;
479 u32 posted; 539 u32 posted;
480 u32 adv_credits = 0; 540 u32 adv_credits = 0;
481 int send_flags = 0; 541 int send_flags = 0;
482 int sent; 542 int bytes_sent = 0;
483 int ret; 543 int ret;
484 int flow_controlled = 0; 544 int flow_controlled = 0;
545 int nr_sig = 0;
485 546
486 BUG_ON(off % RDS_FRAG_SIZE); 547 BUG_ON(off % RDS_FRAG_SIZE);
487 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); 548 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
@@ -507,14 +568,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
507 goto out; 568 goto out;
508 } 569 }
509 570
510 credit_alloc = work_alloc;
511 if (ic->i_flowctl) { 571 if (ic->i_flowctl) {
512 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT); 572 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
513 adv_credits += posted; 573 adv_credits += posted;
514 if (credit_alloc < work_alloc) { 574 if (credit_alloc < work_alloc) {
515 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); 575 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
516 work_alloc = credit_alloc; 576 work_alloc = credit_alloc;
517 flow_controlled++; 577 flow_controlled = 1;
518 } 578 }
519 if (work_alloc == 0) { 579 if (work_alloc == 0) {
520 set_bit(RDS_LL_SEND_FULL, &conn->c_flags); 580 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
@@ -525,31 +585,25 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
525 } 585 }
526 586
527 /* map the message the first time we see it */ 587 /* map the message the first time we see it */
528 if (ic->i_rm == NULL) { 588 if (!ic->i_data_op) {
529 /* 589 if (rm->data.op_nents) {
530 printk(KERN_NOTICE "rds_ib_xmit prep msg dport=%u flags=0x%x len=%d\n", 590 rm->data.op_count = ib_dma_map_sg(dev,
531 be16_to_cpu(rm->m_inc.i_hdr.h_dport), 591 rm->data.op_sg,
532 rm->m_inc.i_hdr.h_flags, 592 rm->data.op_nents,
533 be32_to_cpu(rm->m_inc.i_hdr.h_len)); 593 DMA_TO_DEVICE);
534 */ 594 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
535 if (rm->m_nents) { 595 if (rm->data.op_count == 0) {
536 rm->m_count = ib_dma_map_sg(dev,
537 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
538 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
539 if (rm->m_count == 0) {
540 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); 596 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
541 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 597 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
542 ret = -ENOMEM; /* XXX ? */ 598 ret = -ENOMEM; /* XXX ? */
543 goto out; 599 goto out;
544 } 600 }
545 } else { 601 } else {
546 rm->m_count = 0; 602 rm->data.op_count = 0;
547 } 603 }
548 604
549 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
550 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes;
551 rds_message_addref(rm); 605 rds_message_addref(rm);
552 ic->i_rm = rm; 606 ic->i_data_op = &rm->data;
553 607
554 /* Finalize the header */ 608 /* Finalize the header */
555 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags)) 609 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
@@ -559,10 +613,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
559 613
560 /* If it has a RDMA op, tell the peer we did it. This is 614 /* If it has a RDMA op, tell the peer we did it. This is
561 * used by the peer to release use-once RDMA MRs. */ 615 * used by the peer to release use-once RDMA MRs. */
562 if (rm->m_rdma_op) { 616 if (rm->rdma.op_active) {
563 struct rds_ext_header_rdma ext_hdr; 617 struct rds_ext_header_rdma ext_hdr;
564 618
565 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); 619 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
566 rds_message_add_extension(&rm->m_inc.i_hdr, 620 rds_message_add_extension(&rm->m_inc.i_hdr,
567 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 621 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
568 } 622 }
@@ -582,99 +636,77 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
582 /* 636 /*
583 * Update adv_credits since we reset the ACK_REQUIRED bit. 637 * Update adv_credits since we reset the ACK_REQUIRED bit.
584 */ 638 */
585 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); 639 if (ic->i_flowctl) {
586 adv_credits += posted; 640 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
587 BUG_ON(adv_credits > 255); 641 adv_credits += posted;
642 BUG_ON(adv_credits > 255);
643 }
588 } 644 }
589 645
590 send = &ic->i_sends[pos];
591 first = send;
592 prev = NULL;
593 scat = &rm->m_sg[sg];
594 sent = 0;
595 i = 0;
596
597 /* Sometimes you want to put a fence between an RDMA 646 /* Sometimes you want to put a fence between an RDMA
598 * READ and the following SEND. 647 * READ and the following SEND.
599 * We could either do this all the time 648 * We could either do this all the time
600 * or when requested by the user. Right now, we let 649 * or when requested by the user. Right now, we let
601 * the application choose. 650 * the application choose.
602 */ 651 */
603 if (rm->m_rdma_op && rm->m_rdma_op->r_fence) 652 if (rm->rdma.op_active && rm->rdma.op_fence)
604 send_flags = IB_SEND_FENCE; 653 send_flags = IB_SEND_FENCE;
605 654
606 /* 655 /* Each frag gets a header. Msgs may be 0 bytes */
607 * We could be copying the header into the unused tail of the page. 656 send = &ic->i_sends[pos];
608 * That would need to be changed in the future when those pages might 657 first = send;
609 * be mapped userspace pages or page cache pages. So instead we always 658 prev = NULL;
610 * use a second sge and our long-lived ring of mapped headers. We send 659 scat = &ic->i_data_op->op_sg[sg];
611 * the header after the data so that the data payload can be aligned on 660 i = 0;
612 * the receiver. 661 do {
613 */ 662 unsigned int len = 0;
614 663
615 /* handle a 0-len message */ 664 /* Set up the header */
616 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) { 665 send->s_wr.send_flags = send_flags;
617 rds_ib_xmit_populate_wr(ic, send, pos, 0, 0, send_flags); 666 send->s_wr.opcode = IB_WR_SEND;
618 goto add_header; 667 send->s_wr.num_sge = 1;
619 } 668 send->s_wr.next = NULL;
669 send->s_queued = jiffies;
670 send->s_op = NULL;
620 671
621 /* if there's data reference it with a chain of work reqs */ 672 send->s_sge[0].addr = ic->i_send_hdrs_dma
622 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { 673 + (pos * sizeof(struct rds_header));
623 unsigned int len; 674 send->s_sge[0].length = sizeof(struct rds_header);
624 675
625 send = &ic->i_sends[pos]; 676 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
626 677
627 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); 678 /* Set up the data, if present */
628 rds_ib_xmit_populate_wr(ic, send, pos, 679 if (i < work_alloc
629 ib_sg_dma_address(dev, scat) + off, len, 680 && scat != &rm->data.op_sg[rm->data.op_count]) {
630 send_flags); 681 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
682 send->s_wr.num_sge = 2;
631 683
632 /* 684 send->s_sge[1].addr = ib_sg_dma_address(dev, scat) + off;
633 * We want to delay signaling completions just enough to get 685 send->s_sge[1].length = len;
634 * the batching benefits but not so much that we create dead time
635 * on the wire.
636 */
637 if (ic->i_unsignaled_wrs-- == 0) {
638 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
639 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
640 }
641 686
642 ic->i_unsignaled_bytes -= len; 687 bytes_sent += len;
643 if (ic->i_unsignaled_bytes <= 0) { 688 off += len;
644 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes; 689 if (off == ib_sg_dma_len(dev, scat)) {
645 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 690 scat++;
691 off = 0;
692 }
646 } 693 }
647 694
695 rds_ib_set_wr_signal_state(ic, send, 0);
696
648 /* 697 /*
649 * Always signal the last one if we're stopping due to flow control. 698 * Always signal the last one if we're stopping due to flow control.
650 */ 699 */
651 if (flow_controlled && i == (work_alloc-1)) 700 if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
652 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 701 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
653 702
703 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
704 nr_sig++;
705
654 rdsdebug("send %p wr %p num_sge %u next %p\n", send, 706 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
655 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 707 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
656 708
657 sent += len; 709 if (ic->i_flowctl && adv_credits) {
658 off += len;
659 if (off == ib_sg_dma_len(dev, scat)) {
660 scat++;
661 off = 0;
662 }
663
664add_header:
665 /* Tack on the header after the data. The header SGE should already
666 * have been set up to point to the right header buffer. */
667 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
668
669 if (0) {
670 struct rds_header *hdr = &ic->i_send_hdrs[pos];
671
672 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
673 be16_to_cpu(hdr->h_dport),
674 hdr->h_flags,
675 be32_to_cpu(hdr->h_len));
676 }
677 if (adv_credits) {
678 struct rds_header *hdr = &ic->i_send_hdrs[pos]; 710 struct rds_header *hdr = &ic->i_send_hdrs[pos];
679 711
680 /* add credit and redo the header checksum */ 712 /* add credit and redo the header checksum */
@@ -689,20 +721,25 @@ add_header:
689 prev = send; 721 prev = send;
690 722
691 pos = (pos + 1) % ic->i_send_ring.w_nr; 723 pos = (pos + 1) % ic->i_send_ring.w_nr;
692 } 724 send = &ic->i_sends[pos];
725 i++;
726
727 } while (i < work_alloc
728 && scat != &rm->data.op_sg[rm->data.op_count]);
693 729
694 /* Account the RDS header in the number of bytes we sent, but just once. 730 /* Account the RDS header in the number of bytes we sent, but just once.
695 * The caller has no concept of fragmentation. */ 731 * The caller has no concept of fragmentation. */
696 if (hdr_off == 0) 732 if (hdr_off == 0)
697 sent += sizeof(struct rds_header); 733 bytes_sent += sizeof(struct rds_header);
698 734
699 /* if we finished the message then send completion owns it */ 735 /* if we finished the message then send completion owns it */
700 if (scat == &rm->m_sg[rm->m_count]) { 736 if (scat == &rm->data.op_sg[rm->data.op_count]) {
701 prev->s_rm = ic->i_rm; 737 prev->s_op = ic->i_data_op;
702 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 738 prev->s_wr.send_flags |= IB_SEND_SOLICITED;
703 ic->i_rm = NULL; 739 ic->i_data_op = NULL;
704 } 740 }
705 741
742 /* Put back wrs & credits we didn't use */
706 if (i < work_alloc) { 743 if (i < work_alloc) {
707 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); 744 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
708 work_alloc = i; 745 work_alloc = i;
@@ -710,6 +747,9 @@ add_header:
710 if (ic->i_flowctl && i < credit_alloc) 747 if (ic->i_flowctl && i < credit_alloc)
711 rds_ib_send_add_credits(conn, credit_alloc - i); 748 rds_ib_send_add_credits(conn, credit_alloc - i);
712 749
750 if (nr_sig)
751 atomic_add(nr_sig, &ic->i_signaled_sends);
752
713 /* XXX need to worry about failed_wr and partial sends. */ 753 /* XXX need to worry about failed_wr and partial sends. */
714 failed_wr = &first->s_wr; 754 failed_wr = &first->s_wr;
715 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 755 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
@@ -720,32 +760,127 @@ add_header:
720 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 " 760 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
721 "returned %d\n", &conn->c_faddr, ret); 761 "returned %d\n", &conn->c_faddr, ret);
722 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 762 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
723 if (prev->s_rm) { 763 rds_ib_sub_signaled(ic, nr_sig);
724 ic->i_rm = prev->s_rm; 764 if (prev->s_op) {
725 prev->s_rm = NULL; 765 ic->i_data_op = prev->s_op;
766 prev->s_op = NULL;
726 } 767 }
727 768
728 rds_ib_conn_error(ic->conn, "ib_post_send failed\n"); 769 rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
729 goto out; 770 goto out;
730 } 771 }
731 772
732 ret = sent; 773 ret = bytes_sent;
733out: 774out:
734 BUG_ON(adv_credits); 775 BUG_ON(adv_credits);
735 return ret; 776 return ret;
736} 777}
737 778
738int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) 779/*
780 * Issue atomic operation.
781 * A simplified version of the rdma case, we always map 1 SG, and
782 * only 8 bytes, for the return value from the atomic operation.
783 */
784int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
785{
786 struct rds_ib_connection *ic = conn->c_transport_data;
787 struct rds_ib_send_work *send = NULL;
788 struct ib_send_wr *failed_wr;
789 struct rds_ib_device *rds_ibdev;
790 u32 pos;
791 u32 work_alloc;
792 int ret;
793 int nr_sig = 0;
794
795 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
796
797 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
798 if (work_alloc != 1) {
799 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
800 rds_ib_stats_inc(s_ib_tx_ring_full);
801 ret = -ENOMEM;
802 goto out;
803 }
804
805 /* address of send request in ring */
806 send = &ic->i_sends[pos];
807 send->s_queued = jiffies;
808
809 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
810 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
811 send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
812 send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
813 send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
814 send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
815 } else { /* FADD */
816 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
817 send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
818 send->s_wr.wr.atomic.swap = 0;
819 send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
820 send->s_wr.wr.atomic.swap_mask = 0;
821 }
822 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
823 send->s_wr.num_sge = 1;
824 send->s_wr.next = NULL;
825 send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
826 send->s_wr.wr.atomic.rkey = op->op_rkey;
827 send->s_op = op;
828 rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
829
830 /* map 8 byte retval buffer to the device */
831 ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE);
832 rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret);
833 if (ret != 1) {
834 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
835 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
836 ret = -ENOMEM; /* XXX ? */
837 goto out;
838 }
839
840 /* Convert our struct scatterlist to struct ib_sge */
841 send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
842 send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
843 send->s_sge[0].lkey = ic->i_mr->lkey;
844
845 rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
846 send->s_sge[0].addr, send->s_sge[0].length);
847
848 if (nr_sig)
849 atomic_add(nr_sig, &ic->i_signaled_sends);
850
851 failed_wr = &send->s_wr;
852 ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
853 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
854 send, &send->s_wr, ret, failed_wr);
855 BUG_ON(failed_wr != &send->s_wr);
856 if (ret) {
857 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
858 "returned %d\n", &conn->c_faddr, ret);
859 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
860 rds_ib_sub_signaled(ic, nr_sig);
861 goto out;
862 }
863
864 if (unlikely(failed_wr != &send->s_wr)) {
865 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
866 BUG_ON(failed_wr != &send->s_wr);
867 }
868
869out:
870 return ret;
871}
872
873int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
739{ 874{
740 struct rds_ib_connection *ic = conn->c_transport_data; 875 struct rds_ib_connection *ic = conn->c_transport_data;
741 struct rds_ib_send_work *send = NULL; 876 struct rds_ib_send_work *send = NULL;
742 struct rds_ib_send_work *first; 877 struct rds_ib_send_work *first;
743 struct rds_ib_send_work *prev; 878 struct rds_ib_send_work *prev;
744 struct ib_send_wr *failed_wr; 879 struct ib_send_wr *failed_wr;
745 struct rds_ib_device *rds_ibdev;
746 struct scatterlist *scat; 880 struct scatterlist *scat;
747 unsigned long len; 881 unsigned long len;
748 u64 remote_addr = op->r_remote_addr; 882 u64 remote_addr = op->op_remote_addr;
883 u32 max_sge = ic->rds_ibdev->max_sge;
749 u32 pos; 884 u32 pos;
750 u32 work_alloc; 885 u32 work_alloc;
751 u32 i; 886 u32 i;
@@ -753,29 +888,28 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
753 int sent; 888 int sent;
754 int ret; 889 int ret;
755 int num_sge; 890 int num_sge;
756 891 int nr_sig = 0;
757 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 892
758 893 /* map the op the first time we see it */
759 /* map the message the first time we see it */ 894 if (!op->op_mapped) {
760 if (!op->r_mapped) { 895 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
761 op->r_count = ib_dma_map_sg(ic->i_cm_id->device, 896 op->op_sg, op->op_nents, (op->op_write) ?
762 op->r_sg, op->r_nents, (op->r_write) ? 897 DMA_TO_DEVICE : DMA_FROM_DEVICE);
763 DMA_TO_DEVICE : DMA_FROM_DEVICE); 898 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
764 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); 899 if (op->op_count == 0) {
765 if (op->r_count == 0) {
766 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); 900 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
767 ret = -ENOMEM; /* XXX ? */ 901 ret = -ENOMEM; /* XXX ? */
768 goto out; 902 goto out;
769 } 903 }
770 904
771 op->r_mapped = 1; 905 op->op_mapped = 1;
772 } 906 }
773 907
774 /* 908 /*
775 * Instead of knowing how to return a partial rdma read/write we insist that there 909 * Instead of knowing how to return a partial rdma read/write we insist that there
776 * be enough work requests to send the entire message. 910 * be enough work requests to send the entire message.
777 */ 911 */
778 i = ceil(op->r_count, rds_ibdev->max_sge); 912 i = ceil(op->op_count, max_sge);
779 913
780 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); 914 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
781 if (work_alloc != i) { 915 if (work_alloc != i) {
@@ -788,30 +922,24 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
788 send = &ic->i_sends[pos]; 922 send = &ic->i_sends[pos];
789 first = send; 923 first = send;
790 prev = NULL; 924 prev = NULL;
791 scat = &op->r_sg[0]; 925 scat = &op->op_sg[0];
792 sent = 0; 926 sent = 0;
793 num_sge = op->r_count; 927 num_sge = op->op_count;
794 928
795 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { 929 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
796 send->s_wr.send_flags = 0; 930 send->s_wr.send_flags = 0;
797 send->s_queued = jiffies; 931 send->s_queued = jiffies;
798 /* 932 send->s_op = NULL;
799 * We want to delay signaling completions just enough to get 933
800 * the batching benefits but not so much that we create dead time on the wire. 934 nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
801 */
802 if (ic->i_unsignaled_wrs-- == 0) {
803 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
804 send->s_wr.send_flags = IB_SEND_SIGNALED;
805 }
806 935
807 send->s_wr.opcode = op->r_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; 936 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
808 send->s_wr.wr.rdma.remote_addr = remote_addr; 937 send->s_wr.wr.rdma.remote_addr = remote_addr;
809 send->s_wr.wr.rdma.rkey = op->r_key; 938 send->s_wr.wr.rdma.rkey = op->op_rkey;
810 send->s_op = op;
811 939
812 if (num_sge > rds_ibdev->max_sge) { 940 if (num_sge > max_sge) {
813 send->s_wr.num_sge = rds_ibdev->max_sge; 941 send->s_wr.num_sge = max_sge;
814 num_sge -= rds_ibdev->max_sge; 942 num_sge -= max_sge;
815 } else { 943 } else {
816 send->s_wr.num_sge = num_sge; 944 send->s_wr.num_sge = num_sge;
817 } 945 }
@@ -821,7 +949,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
821 if (prev) 949 if (prev)
822 prev->s_wr.next = &send->s_wr; 950 prev->s_wr.next = &send->s_wr;
823 951
824 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { 952 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
825 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 953 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
826 send->s_sge[j].addr = 954 send->s_sge[j].addr =
827 ib_sg_dma_address(ic->i_cm_id->device, scat); 955 ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -843,15 +971,20 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
843 send = ic->i_sends; 971 send = ic->i_sends;
844 } 972 }
845 973
846 /* if we finished the message then send completion owns it */ 974 /* give a reference to the last op */
847 if (scat == &op->r_sg[op->r_count]) 975 if (scat == &op->op_sg[op->op_count]) {
848 prev->s_wr.send_flags = IB_SEND_SIGNALED; 976 prev->s_op = op;
977 rds_message_addref(container_of(op, struct rds_message, rdma));
978 }
849 979
850 if (i < work_alloc) { 980 if (i < work_alloc) {
851 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); 981 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
852 work_alloc = i; 982 work_alloc = i;
853 } 983 }
854 984
985 if (nr_sig)
986 atomic_add(nr_sig, &ic->i_signaled_sends);
987
855 failed_wr = &first->s_wr; 988 failed_wr = &first->s_wr;
856 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 989 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
857 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, 990 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
@@ -861,6 +994,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
861 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " 994 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
862 "returned %d\n", &conn->c_faddr, ret); 995 "returned %d\n", &conn->c_faddr, ret);
863 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 996 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
997 rds_ib_sub_signaled(ic, nr_sig);
864 goto out; 998 goto out;
865 } 999 }
866 1000
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index d2c904dd6fbc..2d5965d6e97c 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -67,6 +67,8 @@ static const char *const rds_ib_stat_names[] = {
67 "ib_rdma_mr_pool_flush", 67 "ib_rdma_mr_pool_flush",
68 "ib_rdma_mr_pool_wait", 68 "ib_rdma_mr_pool_wait",
69 "ib_rdma_mr_pool_depleted", 69 "ib_rdma_mr_pool_depleted",
70 "ib_atomic_cswp",
71 "ib_atomic_fadd",
70}; 72};
71 73
72unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter, 74unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
index 03f01cb4e0fe..fc3da37220fd 100644
--- a/net/rds/ib_sysctl.c
+++ b/net/rds/ib_sysctl.c
@@ -49,10 +49,6 @@ unsigned long rds_ib_sysctl_max_unsig_wrs = 16;
49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1; 49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1;
50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; 50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64;
51 51
52unsigned long rds_ib_sysctl_max_unsig_bytes = (16 << 20);
53static unsigned long rds_ib_sysctl_max_unsig_bytes_min = 1;
54static unsigned long rds_ib_sysctl_max_unsig_bytes_max = ~0UL;
55
56/* 52/*
57 * This sysctl does nothing. 53 * This sysctl does nothing.
58 * 54 *
@@ -94,15 +90,6 @@ ctl_table rds_ib_sysctl_table[] = {
94 .extra2 = &rds_ib_sysctl_max_unsig_wr_max, 90 .extra2 = &rds_ib_sysctl_max_unsig_wr_max,
95 }, 91 },
96 { 92 {
97 .procname = "max_unsignaled_bytes",
98 .data = &rds_ib_sysctl_max_unsig_bytes,
99 .maxlen = sizeof(unsigned long),
100 .mode = 0644,
101 .proc_handler = proc_doulongvec_minmax,
102 .extra1 = &rds_ib_sysctl_max_unsig_bytes_min,
103 .extra2 = &rds_ib_sysctl_max_unsig_bytes_max,
104 },
105 {
106 .procname = "max_recv_allocation", 93 .procname = "max_recv_allocation",
107 .data = &rds_ib_sysctl_max_recv_allocation, 94 .data = &rds_ib_sysctl_max_recv_allocation,
108 .maxlen = sizeof(unsigned long), 95 .maxlen = sizeof(unsigned long),
@@ -132,10 +119,10 @@ void rds_ib_sysctl_exit(void)
132 unregister_sysctl_table(rds_ib_sysctl_hdr); 119 unregister_sysctl_table(rds_ib_sysctl_hdr);
133} 120}
134 121
135int __init rds_ib_sysctl_init(void) 122int rds_ib_sysctl_init(void)
136{ 123{
137 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table); 124 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table);
138 if (rds_ib_sysctl_hdr == NULL) 125 if (!rds_ib_sysctl_hdr)
139 return -ENOMEM; 126 return -ENOMEM;
140 return 0; 127 return 0;
141} 128}
diff --git a/net/rds/info.c b/net/rds/info.c
index c45c4173a44d..4fdf1b6e84ff 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -76,7 +76,7 @@ void rds_info_register_func(int optname, rds_info_func func)
76 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); 76 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST);
77 77
78 spin_lock(&rds_info_lock); 78 spin_lock(&rds_info_lock);
79 BUG_ON(rds_info_funcs[offset] != NULL); 79 BUG_ON(rds_info_funcs[offset]);
80 rds_info_funcs[offset] = func; 80 rds_info_funcs[offset] = func;
81 spin_unlock(&rds_info_lock); 81 spin_unlock(&rds_info_lock);
82} 82}
@@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(rds_info_deregister_func);
102 */ 102 */
103void rds_info_iter_unmap(struct rds_info_iterator *iter) 103void rds_info_iter_unmap(struct rds_info_iterator *iter)
104{ 104{
105 if (iter->addr != NULL) { 105 if (iter->addr) {
106 kunmap_atomic(iter->addr, KM_USER0); 106 kunmap_atomic(iter->addr, KM_USER0);
107 iter->addr = NULL; 107 iter->addr = NULL;
108 } 108 }
@@ -117,7 +117,7 @@ void rds_info_copy(struct rds_info_iterator *iter, void *data,
117 unsigned long this; 117 unsigned long this;
118 118
119 while (bytes) { 119 while (bytes) {
120 if (iter->addr == NULL) 120 if (!iter->addr)
121 iter->addr = kmap_atomic(*iter->pages, KM_USER0); 121 iter->addr = kmap_atomic(*iter->pages, KM_USER0);
122 122
123 this = min(bytes, PAGE_SIZE - iter->offset); 123 this = min(bytes, PAGE_SIZE - iter->offset);
@@ -188,7 +188,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
188 >> PAGE_SHIFT; 188 >> PAGE_SHIFT;
189 189
190 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); 190 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
191 if (pages == NULL) { 191 if (!pages) {
192 ret = -ENOMEM; 192 ret = -ENOMEM;
193 goto out; 193 goto out;
194 } 194 }
@@ -206,7 +206,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
206 206
207call_func: 207call_func:
208 func = rds_info_funcs[optname - RDS_INFO_FIRST]; 208 func = rds_info_funcs[optname - RDS_INFO_FIRST];
209 if (func == NULL) { 209 if (!func) {
210 ret = -ENOPROTOOPT; 210 ret = -ENOPROTOOPT;
211 goto out; 211 goto out;
212 } 212 }
@@ -234,7 +234,7 @@ call_func:
234 ret = -EFAULT; 234 ret = -EFAULT;
235 235
236out: 236out:
237 for (i = 0; pages != NULL && i < nr_pages; i++) 237 for (i = 0; pages && i < nr_pages; i++)
238 put_page(pages[i]); 238 put_page(pages[i]);
239 kfree(pages); 239 kfree(pages);
240 240
diff --git a/net/rds/iw.c b/net/rds/iw.c
index c8f3d3525cb9..56808cac0fc7 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -264,7 +264,6 @@ struct rds_transport rds_iw_transport = {
264 .laddr_check = rds_iw_laddr_check, 264 .laddr_check = rds_iw_laddr_check,
265 .xmit_complete = rds_iw_xmit_complete, 265 .xmit_complete = rds_iw_xmit_complete,
266 .xmit = rds_iw_xmit, 266 .xmit = rds_iw_xmit,
267 .xmit_cong_map = NULL,
268 .xmit_rdma = rds_iw_xmit_rdma, 267 .xmit_rdma = rds_iw_xmit_rdma,
269 .recv = rds_iw_recv, 268 .recv = rds_iw_recv,
270 .conn_alloc = rds_iw_conn_alloc, 269 .conn_alloc = rds_iw_conn_alloc,
@@ -272,7 +271,6 @@ struct rds_transport rds_iw_transport = {
272 .conn_connect = rds_iw_conn_connect, 271 .conn_connect = rds_iw_conn_connect,
273 .conn_shutdown = rds_iw_conn_shutdown, 272 .conn_shutdown = rds_iw_conn_shutdown,
274 .inc_copy_to_user = rds_iw_inc_copy_to_user, 273 .inc_copy_to_user = rds_iw_inc_copy_to_user,
275 .inc_purge = rds_iw_inc_purge,
276 .inc_free = rds_iw_inc_free, 274 .inc_free = rds_iw_inc_free,
277 .cm_initiate_connect = rds_iw_cm_initiate_connect, 275 .cm_initiate_connect = rds_iw_cm_initiate_connect,
278 .cm_handle_connect = rds_iw_cm_handle_connect, 276 .cm_handle_connect = rds_iw_cm_handle_connect,
@@ -289,7 +287,7 @@ struct rds_transport rds_iw_transport = {
289 .t_prefer_loopback = 1, 287 .t_prefer_loopback = 1,
290}; 288};
291 289
292int __init rds_iw_init(void) 290int rds_iw_init(void)
293{ 291{
294 int ret; 292 int ret;
295 293
diff --git a/net/rds/iw.h b/net/rds/iw.h
index eef2f0c28476..543e665fafe3 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -70,7 +70,7 @@ struct rds_iw_send_work {
70 struct rds_message *s_rm; 70 struct rds_message *s_rm;
71 71
72 /* We should really put these into a union: */ 72 /* We should really put these into a union: */
73 struct rds_rdma_op *s_op; 73 struct rm_rdma_op *s_op;
74 struct rds_iw_mapping *s_mapping; 74 struct rds_iw_mapping *s_mapping;
75 struct ib_mr *s_mr; 75 struct ib_mr *s_mr;
76 struct ib_fast_reg_page_list *s_page_list; 76 struct ib_fast_reg_page_list *s_page_list;
@@ -284,7 +284,7 @@ void rds_iw_conn_free(void *arg);
284int rds_iw_conn_connect(struct rds_connection *conn); 284int rds_iw_conn_connect(struct rds_connection *conn);
285void rds_iw_conn_shutdown(struct rds_connection *conn); 285void rds_iw_conn_shutdown(struct rds_connection *conn);
286void rds_iw_state_change(struct sock *sk); 286void rds_iw_state_change(struct sock *sk);
287int __init rds_iw_listen_init(void); 287int rds_iw_listen_init(void);
288void rds_iw_listen_stop(void); 288void rds_iw_listen_stop(void);
289void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...); 289void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...);
290int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, 290int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -321,12 +321,11 @@ void rds_iw_flush_mrs(void);
321void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); 321void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
322 322
323/* ib_recv.c */ 323/* ib_recv.c */
324int __init rds_iw_recv_init(void); 324int rds_iw_recv_init(void);
325void rds_iw_recv_exit(void); 325void rds_iw_recv_exit(void);
326int rds_iw_recv(struct rds_connection *conn); 326int rds_iw_recv(struct rds_connection *conn);
327int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 327int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
328 gfp_t page_gfp, int prefill); 328 gfp_t page_gfp, int prefill);
329void rds_iw_inc_purge(struct rds_incoming *inc);
330void rds_iw_inc_free(struct rds_incoming *inc); 329void rds_iw_inc_free(struct rds_incoming *inc);
331int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 330int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
332 size_t size); 331 size_t size);
@@ -358,7 +357,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
358void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context); 357void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context);
359void rds_iw_send_init_ring(struct rds_iw_connection *ic); 358void rds_iw_send_init_ring(struct rds_iw_connection *ic);
360void rds_iw_send_clear_ring(struct rds_iw_connection *ic); 359void rds_iw_send_clear_ring(struct rds_iw_connection *ic);
361int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); 360int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
362void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits); 361void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits);
363void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted); 362void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted);
364int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted, 363int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted,
@@ -371,7 +370,7 @@ unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
371 unsigned int avail); 370 unsigned int avail);
372 371
373/* ib_sysctl.c */ 372/* ib_sysctl.c */
374int __init rds_iw_sysctl_init(void); 373int rds_iw_sysctl_init(void);
375void rds_iw_sysctl_exit(void); 374void rds_iw_sysctl_exit(void);
376extern unsigned long rds_iw_sysctl_max_send_wr; 375extern unsigned long rds_iw_sysctl_max_send_wr;
377extern unsigned long rds_iw_sysctl_max_recv_wr; 376extern unsigned long rds_iw_sysctl_max_recv_wr;
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index b5dd6ac39be8..712cf2d1f28e 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -257,7 +257,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
257 * the rds_iwdev at all. 257 * the rds_iwdev at all.
258 */ 258 */
259 rds_iwdev = ib_get_client_data(dev, &rds_iw_client); 259 rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
260 if (rds_iwdev == NULL) { 260 if (!rds_iwdev) {
261 if (printk_ratelimit()) 261 if (printk_ratelimit())
262 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", 262 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
263 dev->name); 263 dev->name);
@@ -292,7 +292,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
292 ic->i_send_ring.w_nr * 292 ic->i_send_ring.w_nr *
293 sizeof(struct rds_header), 293 sizeof(struct rds_header),
294 &ic->i_send_hdrs_dma, GFP_KERNEL); 294 &ic->i_send_hdrs_dma, GFP_KERNEL);
295 if (ic->i_send_hdrs == NULL) { 295 if (!ic->i_send_hdrs) {
296 ret = -ENOMEM; 296 ret = -ENOMEM;
297 rdsdebug("ib_dma_alloc_coherent send failed\n"); 297 rdsdebug("ib_dma_alloc_coherent send failed\n");
298 goto out; 298 goto out;
@@ -302,7 +302,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
302 ic->i_recv_ring.w_nr * 302 ic->i_recv_ring.w_nr *
303 sizeof(struct rds_header), 303 sizeof(struct rds_header),
304 &ic->i_recv_hdrs_dma, GFP_KERNEL); 304 &ic->i_recv_hdrs_dma, GFP_KERNEL);
305 if (ic->i_recv_hdrs == NULL) { 305 if (!ic->i_recv_hdrs) {
306 ret = -ENOMEM; 306 ret = -ENOMEM;
307 rdsdebug("ib_dma_alloc_coherent recv failed\n"); 307 rdsdebug("ib_dma_alloc_coherent recv failed\n");
308 goto out; 308 goto out;
@@ -310,14 +310,14 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
310 310
311 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), 311 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
312 &ic->i_ack_dma, GFP_KERNEL); 312 &ic->i_ack_dma, GFP_KERNEL);
313 if (ic->i_ack == NULL) { 313 if (!ic->i_ack) {
314 ret = -ENOMEM; 314 ret = -ENOMEM;
315 rdsdebug("ib_dma_alloc_coherent ack failed\n"); 315 rdsdebug("ib_dma_alloc_coherent ack failed\n");
316 goto out; 316 goto out;
317 } 317 }
318 318
319 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work)); 319 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
320 if (ic->i_sends == NULL) { 320 if (!ic->i_sends) {
321 ret = -ENOMEM; 321 ret = -ENOMEM;
322 rdsdebug("send allocation failed\n"); 322 rdsdebug("send allocation failed\n");
323 goto out; 323 goto out;
@@ -325,7 +325,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
325 rds_iw_send_init_ring(ic); 325 rds_iw_send_init_ring(ic);
326 326
327 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work)); 327 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
328 if (ic->i_recvs == NULL) { 328 if (!ic->i_recvs) {
329 ret = -ENOMEM; 329 ret = -ENOMEM;
330 rdsdebug("recv allocation failed\n"); 330 rdsdebug("recv allocation failed\n");
331 goto out; 331 goto out;
@@ -696,7 +696,7 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
696 696
697 /* XXX too lazy? */ 697 /* XXX too lazy? */
698 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL); 698 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL);
699 if (ic == NULL) 699 if (!ic)
700 return -ENOMEM; 700 return -ENOMEM;
701 701
702 INIT_LIST_HEAD(&ic->iw_node); 702 INIT_LIST_HEAD(&ic->iw_node);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 13dc1862d862..4e152e2daa3d 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -34,7 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35 35
36#include "rds.h" 36#include "rds.h"
37#include "rdma.h"
38#include "iw.h" 37#include "iw.h"
39 38
40 39
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 3d479067d54d..5e57347f49ff 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -53,7 +53,7 @@ static void rds_iw_frag_drop_page(struct rds_page_frag *frag)
53static void rds_iw_frag_free(struct rds_page_frag *frag) 53static void rds_iw_frag_free(struct rds_page_frag *frag)
54{ 54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page); 55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page != NULL); 56 BUG_ON(frag->f_page);
57 kmem_cache_free(rds_iw_frag_slab, frag); 57 kmem_cache_free(rds_iw_frag_slab, frag);
58} 58}
59 59
@@ -143,14 +143,14 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn,
143 struct ib_sge *sge; 143 struct ib_sge *sge;
144 int ret = -ENOMEM; 144 int ret = -ENOMEM;
145 145
146 if (recv->r_iwinc == NULL) { 146 if (!recv->r_iwinc) {
147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) { 147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) {
148 rds_iw_stats_inc(s_iw_rx_alloc_limit); 148 rds_iw_stats_inc(s_iw_rx_alloc_limit);
149 goto out; 149 goto out;
150 } 150 }
151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab, 151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab,
152 kptr_gfp); 152 kptr_gfp);
153 if (recv->r_iwinc == NULL) { 153 if (!recv->r_iwinc) {
154 atomic_dec(&rds_iw_allocation); 154 atomic_dec(&rds_iw_allocation);
155 goto out; 155 goto out;
156 } 156 }
@@ -158,17 +158,17 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn,
158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr); 158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr);
159 } 159 }
160 160
161 if (recv->r_frag == NULL) { 161 if (!recv->r_frag) {
162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp); 162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp);
163 if (recv->r_frag == NULL) 163 if (!recv->r_frag)
164 goto out; 164 goto out;
165 INIT_LIST_HEAD(&recv->r_frag->f_item); 165 INIT_LIST_HEAD(&recv->r_frag->f_item);
166 recv->r_frag->f_page = NULL; 166 recv->r_frag->f_page = NULL;
167 } 167 }
168 168
169 if (ic->i_frag.f_page == NULL) { 169 if (!ic->i_frag.f_page) {
170 ic->i_frag.f_page = alloc_page(page_gfp); 170 ic->i_frag.f_page = alloc_page(page_gfp);
171 if (ic->i_frag.f_page == NULL) 171 if (!ic->i_frag.f_page)
172 goto out; 172 goto out;
173 ic->i_frag.f_offset = 0; 173 ic->i_frag.f_offset = 0;
174 } 174 }
@@ -273,7 +273,7 @@ int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
273 return ret; 273 return ret;
274} 274}
275 275
276void rds_iw_inc_purge(struct rds_incoming *inc) 276static void rds_iw_inc_purge(struct rds_incoming *inc)
277{ 277{
278 struct rds_iw_incoming *iwinc; 278 struct rds_iw_incoming *iwinc;
279 struct rds_page_frag *frag; 279 struct rds_page_frag *frag;
@@ -716,7 +716,7 @@ static void rds_iw_process_recv(struct rds_connection *conn,
716 * into the inc and save the inc so we can hang upcoming fragments 716 * into the inc and save the inc so we can hang upcoming fragments
717 * off its list. 717 * off its list.
718 */ 718 */
719 if (iwinc == NULL) { 719 if (!iwinc) {
720 iwinc = recv->r_iwinc; 720 iwinc = recv->r_iwinc;
721 recv->r_iwinc = NULL; 721 recv->r_iwinc = NULL;
722 ic->i_iwinc = iwinc; 722 ic->i_iwinc = iwinc;
@@ -887,7 +887,7 @@ int rds_iw_recv(struct rds_connection *conn)
887 return ret; 887 return ret;
888} 888}
889 889
890int __init rds_iw_recv_init(void) 890int rds_iw_recv_init(void)
891{ 891{
892 struct sysinfo si; 892 struct sysinfo si;
893 int ret = -ENOMEM; 893 int ret = -ENOMEM;
@@ -899,13 +899,13 @@ int __init rds_iw_recv_init(void)
899 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming", 899 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming",
900 sizeof(struct rds_iw_incoming), 900 sizeof(struct rds_iw_incoming),
901 0, 0, NULL); 901 0, 0, NULL);
902 if (rds_iw_incoming_slab == NULL) 902 if (!rds_iw_incoming_slab)
903 goto out; 903 goto out;
904 904
905 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag", 905 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag",
906 sizeof(struct rds_page_frag), 906 sizeof(struct rds_page_frag),
907 0, 0, NULL); 907 0, 0, NULL);
908 if (rds_iw_frag_slab == NULL) 908 if (!rds_iw_frag_slab)
909 kmem_cache_destroy(rds_iw_incoming_slab); 909 kmem_cache_destroy(rds_iw_incoming_slab);
910 else 910 else
911 ret = 0; 911 ret = 0;
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 52182ff7519e..6280ea020d4e 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -36,7 +36,6 @@
36#include <linux/dmapool.h> 36#include <linux/dmapool.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40#include "iw.h" 39#include "iw.h"
41 40
42static void rds_iw_send_rdma_complete(struct rds_message *rm, 41static void rds_iw_send_rdma_complete(struct rds_message *rm,
@@ -64,13 +63,13 @@ static void rds_iw_send_rdma_complete(struct rds_message *rm,
64} 63}
65 64
66static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic, 65static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic,
67 struct rds_rdma_op *op) 66 struct rm_rdma_op *op)
68{ 67{
69 if (op->r_mapped) { 68 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device, 69 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents, 70 op->op_sg, op->op_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 71 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0; 72 op->op_mapped = 0;
74 } 73 }
75} 74}
76 75
@@ -83,11 +82,11 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm); 82 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84 83
85 ib_dma_unmap_sg(ic->i_cm_id->device, 84 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->m_sg, rm->m_nents, 85 rm->data.op_sg, rm->data.op_nents,
87 DMA_TO_DEVICE); 86 DMA_TO_DEVICE);
88 87
89 if (rm->m_rdma_op != NULL) { 88 if (rm->rdma.op_active) {
90 rds_iw_send_unmap_rdma(ic, rm->m_rdma_op); 89 rds_iw_send_unmap_rdma(ic, &rm->rdma);
91 90
92 /* If the user asked for a completion notification on this 91 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics: 92 * message, we can implement three different semantics:
@@ -111,10 +110,10 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
111 */ 110 */
112 rds_iw_send_rdma_complete(rm, wc_status); 111 rds_iw_send_rdma_complete(rm, wc_status);
113 112
114 if (rm->m_rdma_op->r_write) 113 if (rm->rdma.op_write)
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); 114 rds_stats_add(s_send_rdma_bytes, rm->rdma.op_bytes);
116 else 115 else
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); 116 rds_stats_add(s_recv_rdma_bytes, rm->rdma.op_bytes);
118 } 117 }
119 118
120 /* If anyone waited for this message to get flushed out, wake 119 /* If anyone waited for this message to get flushed out, wake
@@ -556,25 +555,27 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
556 } 555 }
557 556
558 /* map the message the first time we see it */ 557 /* map the message the first time we see it */
559 if (ic->i_rm == NULL) { 558 if (!ic->i_rm) {
560 /* 559 /*
561 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n", 560 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n",
562 be16_to_cpu(rm->m_inc.i_hdr.h_dport), 561 be16_to_cpu(rm->m_inc.i_hdr.h_dport),
563 rm->m_inc.i_hdr.h_flags, 562 rm->m_inc.i_hdr.h_flags,
564 be32_to_cpu(rm->m_inc.i_hdr.h_len)); 563 be32_to_cpu(rm->m_inc.i_hdr.h_len));
565 */ 564 */
566 if (rm->m_nents) { 565 if (rm->data.op_nents) {
567 rm->m_count = ib_dma_map_sg(dev, 566 rm->data.op_count = ib_dma_map_sg(dev,
568 rm->m_sg, rm->m_nents, DMA_TO_DEVICE); 567 rm->data.op_sg,
569 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count); 568 rm->data.op_nents,
570 if (rm->m_count == 0) { 569 DMA_TO_DEVICE);
570 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
571 if (rm->data.op_count == 0) {
571 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); 572 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
572 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); 573 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
573 ret = -ENOMEM; /* XXX ? */ 574 ret = -ENOMEM; /* XXX ? */
574 goto out; 575 goto out;
575 } 576 }
576 } else { 577 } else {
577 rm->m_count = 0; 578 rm->data.op_count = 0;
578 } 579 }
579 580
580 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; 581 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
@@ -590,10 +591,10 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
590 591
591 /* If it has a RDMA op, tell the peer we did it. This is 592 /* If it has a RDMA op, tell the peer we did it. This is
592 * used by the peer to release use-once RDMA MRs. */ 593 * used by the peer to release use-once RDMA MRs. */
593 if (rm->m_rdma_op) { 594 if (rm->rdma.op_active) {
594 struct rds_ext_header_rdma ext_hdr; 595 struct rds_ext_header_rdma ext_hdr;
595 596
596 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); 597 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
597 rds_message_add_extension(&rm->m_inc.i_hdr, 598 rds_message_add_extension(&rm->m_inc.i_hdr,
598 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 599 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
599 } 600 }
@@ -621,7 +622,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
621 send = &ic->i_sends[pos]; 622 send = &ic->i_sends[pos];
622 first = send; 623 first = send;
623 prev = NULL; 624 prev = NULL;
624 scat = &rm->m_sg[sg]; 625 scat = &rm->data.op_sg[sg];
625 sent = 0; 626 sent = 0;
626 i = 0; 627 i = 0;
627 628
@@ -631,7 +632,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
631 * or when requested by the user. Right now, we let 632 * or when requested by the user. Right now, we let
632 * the application choose. 633 * the application choose.
633 */ 634 */
634 if (rm->m_rdma_op && rm->m_rdma_op->r_fence) 635 if (rm->rdma.op_active && rm->rdma.op_fence)
635 send_flags = IB_SEND_FENCE; 636 send_flags = IB_SEND_FENCE;
636 637
637 /* 638 /*
@@ -650,7 +651,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
650 } 651 }
651 652
652 /* if there's data reference it with a chain of work reqs */ 653 /* if there's data reference it with a chain of work reqs */
653 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { 654 for (; i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]; i++) {
654 unsigned int len; 655 unsigned int len;
655 656
656 send = &ic->i_sends[pos]; 657 send = &ic->i_sends[pos];
@@ -728,7 +729,7 @@ add_header:
728 sent += sizeof(struct rds_header); 729 sent += sizeof(struct rds_header);
729 730
730 /* if we finished the message then send completion owns it */ 731 /* if we finished the message then send completion owns it */
731 if (scat == &rm->m_sg[rm->m_count]) { 732 if (scat == &rm->data.op_sg[rm->data.op_count]) {
732 prev->s_rm = ic->i_rm; 733 prev->s_rm = ic->i_rm;
733 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 734 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
734 ic->i_rm = NULL; 735 ic->i_rm = NULL;
@@ -784,7 +785,7 @@ static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rd
784 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); 785 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
785} 786}
786 787
787int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) 788int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
788{ 789{
789 struct rds_iw_connection *ic = conn->c_transport_data; 790 struct rds_iw_connection *ic = conn->c_transport_data;
790 struct rds_iw_send_work *send = NULL; 791 struct rds_iw_send_work *send = NULL;
@@ -794,7 +795,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
794 struct rds_iw_device *rds_iwdev; 795 struct rds_iw_device *rds_iwdev;
795 struct scatterlist *scat; 796 struct scatterlist *scat;
796 unsigned long len; 797 unsigned long len;
797 u64 remote_addr = op->r_remote_addr; 798 u64 remote_addr = op->op_remote_addr;
798 u32 pos, fr_pos; 799 u32 pos, fr_pos;
799 u32 work_alloc; 800 u32 work_alloc;
800 u32 i; 801 u32 i;
@@ -806,21 +807,21 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
806 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); 807 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
807 808
808 /* map the message the first time we see it */ 809 /* map the message the first time we see it */
809 if (!op->r_mapped) { 810 if (!op->op_mapped) {
810 op->r_count = ib_dma_map_sg(ic->i_cm_id->device, 811 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
811 op->r_sg, op->r_nents, (op->r_write) ? 812 op->op_sg, op->op_nents, (op->op_write) ?
812 DMA_TO_DEVICE : DMA_FROM_DEVICE); 813 DMA_TO_DEVICE : DMA_FROM_DEVICE);
813 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); 814 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
814 if (op->r_count == 0) { 815 if (op->op_count == 0) {
815 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); 816 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
816 ret = -ENOMEM; /* XXX ? */ 817 ret = -ENOMEM; /* XXX ? */
817 goto out; 818 goto out;
818 } 819 }
819 820
820 op->r_mapped = 1; 821 op->op_mapped = 1;
821 } 822 }
822 823
823 if (!op->r_write) { 824 if (!op->op_write) {
824 /* Alloc space on the send queue for the fastreg */ 825 /* Alloc space on the send queue for the fastreg */
825 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos); 826 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos);
826 if (work_alloc != 1) { 827 if (work_alloc != 1) {
@@ -835,7 +836,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
835 * Instead of knowing how to return a partial rdma read/write we insist that there 836 * Instead of knowing how to return a partial rdma read/write we insist that there
836 * be enough work requests to send the entire message. 837 * be enough work requests to send the entire message.
837 */ 838 */
838 i = ceil(op->r_count, rds_iwdev->max_sge); 839 i = ceil(op->op_count, rds_iwdev->max_sge);
839 840
840 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos); 841 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
841 if (work_alloc != i) { 842 if (work_alloc != i) {
@@ -846,17 +847,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
846 } 847 }
847 848
848 send = &ic->i_sends[pos]; 849 send = &ic->i_sends[pos];
849 if (!op->r_write) { 850 if (!op->op_write) {
850 first = prev = &ic->i_sends[fr_pos]; 851 first = prev = &ic->i_sends[fr_pos];
851 } else { 852 } else {
852 first = send; 853 first = send;
853 prev = NULL; 854 prev = NULL;
854 } 855 }
855 scat = &op->r_sg[0]; 856 scat = &op->op_sg[0];
856 sent = 0; 857 sent = 0;
857 num_sge = op->r_count; 858 num_sge = op->op_count;
858 859
859 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { 860 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
860 send->s_wr.send_flags = 0; 861 send->s_wr.send_flags = 0;
861 send->s_queued = jiffies; 862 send->s_queued = jiffies;
862 863
@@ -873,13 +874,13 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
873 * for local access after RDS is finished with it, using 874 * for local access after RDS is finished with it, using
874 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed. 875 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
875 */ 876 */
876 if (op->r_write) 877 if (op->op_write)
877 send->s_wr.opcode = IB_WR_RDMA_WRITE; 878 send->s_wr.opcode = IB_WR_RDMA_WRITE;
878 else 879 else
879 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV; 880 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
880 881
881 send->s_wr.wr.rdma.remote_addr = remote_addr; 882 send->s_wr.wr.rdma.remote_addr = remote_addr;
882 send->s_wr.wr.rdma.rkey = op->r_key; 883 send->s_wr.wr.rdma.rkey = op->op_rkey;
883 send->s_op = op; 884 send->s_op = op;
884 885
885 if (num_sge > rds_iwdev->max_sge) { 886 if (num_sge > rds_iwdev->max_sge) {
@@ -893,7 +894,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
893 if (prev) 894 if (prev)
894 prev->s_wr.next = &send->s_wr; 895 prev->s_wr.next = &send->s_wr;
895 896
896 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { 897 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
897 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 898 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
898 899
899 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) 900 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
@@ -927,7 +928,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
927 } 928 }
928 929
929 /* if we finished the message then send completion owns it */ 930 /* if we finished the message then send completion owns it */
930 if (scat == &op->r_sg[op->r_count]) 931 if (scat == &op->op_sg[op->op_count])
931 first->s_wr.send_flags = IB_SEND_SIGNALED; 932 first->s_wr.send_flags = IB_SEND_SIGNALED;
932 933
933 if (i < work_alloc) { 934 if (i < work_alloc) {
@@ -941,9 +942,9 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
941 * adapters do not allow using the lkey for this at all. To bypass this use a 942 * adapters do not allow using the lkey for this at all. To bypass this use a
942 * fastreg_mr (or possibly a dma_mr) 943 * fastreg_mr (or possibly a dma_mr)
943 */ 944 */
944 if (!op->r_write) { 945 if (!op->op_write) {
945 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos], 946 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
946 op->r_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr); 947 op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
947 work_alloc++; 948 work_alloc++;
948 } 949 }
949 950
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
index 1c4428a61a02..23e3a9a26aaf 100644
--- a/net/rds/iw_sysctl.c
+++ b/net/rds/iw_sysctl.c
@@ -122,10 +122,10 @@ void rds_iw_sysctl_exit(void)
122 unregister_sysctl_table(rds_iw_sysctl_hdr); 122 unregister_sysctl_table(rds_iw_sysctl_hdr);
123} 123}
124 124
125int __init rds_iw_sysctl_init(void) 125int rds_iw_sysctl_init(void)
126{ 126{
127 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table); 127 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table);
128 if (rds_iw_sysctl_hdr == NULL) 128 if (!rds_iw_sysctl_hdr)
129 return -ENOMEM; 129 return -ENOMEM;
130 return 0; 130 return 0;
131} 131}
diff --git a/net/rds/loop.c b/net/rds/loop.c
index dd9879379457..c390156b426f 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,17 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
61 unsigned int hdr_off, unsigned int sg, 61 unsigned int hdr_off, unsigned int sg,
62 unsigned int off) 62 unsigned int off)
63{ 63{
64 /* Do not send cong updates to loopback */
65 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
66 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
67 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
68 }
69
64 BUG_ON(hdr_off || sg || off); 70 BUG_ON(hdr_off || sg || off);
65 71
66 rds_inc_init(&rm->m_inc, conn, conn->c_laddr); 72 rds_inc_init(&rm->m_inc, conn, conn->c_laddr);
67 rds_message_addref(rm); /* for the inc */ 73 /* For the embedded inc. Matching put is in loop_inc_free() */
74 rds_message_addref(rm);
68 75
69 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc, 76 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc,
70 GFP_KERNEL, KM_USER0); 77 GFP_KERNEL, KM_USER0);
@@ -77,16 +84,14 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
77 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); 84 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len);
78} 85}
79 86
80static int rds_loop_xmit_cong_map(struct rds_connection *conn, 87/*
81 struct rds_cong_map *map, 88 * See rds_loop_xmit(). Since our inc is embedded in the rm, we
82 unsigned long offset) 89 * make sure the rm lives at least until the inc is done.
90 */
91static void rds_loop_inc_free(struct rds_incoming *inc)
83{ 92{
84 BUG_ON(offset); 93 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
85 BUG_ON(map != conn->c_lcong); 94 rds_message_put(rm);
86
87 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
88
89 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
90} 95}
91 96
92/* we need to at least give the thread something to succeed */ 97/* we need to at least give the thread something to succeed */
@@ -112,7 +117,7 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
112 unsigned long flags; 117 unsigned long flags;
113 118
114 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL); 119 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL);
115 if (lc == NULL) 120 if (!lc)
116 return -ENOMEM; 121 return -ENOMEM;
117 122
118 INIT_LIST_HEAD(&lc->loop_node); 123 INIT_LIST_HEAD(&lc->loop_node);
@@ -169,14 +174,12 @@ void rds_loop_exit(void)
169 */ 174 */
170struct rds_transport rds_loop_transport = { 175struct rds_transport rds_loop_transport = {
171 .xmit = rds_loop_xmit, 176 .xmit = rds_loop_xmit,
172 .xmit_cong_map = rds_loop_xmit_cong_map,
173 .recv = rds_loop_recv, 177 .recv = rds_loop_recv,
174 .conn_alloc = rds_loop_conn_alloc, 178 .conn_alloc = rds_loop_conn_alloc,
175 .conn_free = rds_loop_conn_free, 179 .conn_free = rds_loop_conn_free,
176 .conn_connect = rds_loop_conn_connect, 180 .conn_connect = rds_loop_conn_connect,
177 .conn_shutdown = rds_loop_conn_shutdown, 181 .conn_shutdown = rds_loop_conn_shutdown,
178 .inc_copy_to_user = rds_message_inc_copy_to_user, 182 .inc_copy_to_user = rds_message_inc_copy_to_user,
179 .inc_purge = rds_message_inc_purge, 183 .inc_free = rds_loop_inc_free,
180 .inc_free = rds_message_inc_free,
181 .t_name = "loopback", 184 .t_name = "loopback",
182}; 185};
diff --git a/net/rds/message.c b/net/rds/message.c
index 9a1d67e001ba..84f937f11d47 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -34,9 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35 35
36#include "rds.h" 36#include "rds.h"
37#include "rdma.h"
38
39static DECLARE_WAIT_QUEUE_HEAD(rds_message_flush_waitq);
40 37
41static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { 38static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
42[RDS_EXTHDR_NONE] = 0, 39[RDS_EXTHDR_NONE] = 0,
@@ -63,29 +60,31 @@ static void rds_message_purge(struct rds_message *rm)
63 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) 60 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
64 return; 61 return;
65 62
66 for (i = 0; i < rm->m_nents; i++) { 63 for (i = 0; i < rm->data.op_nents; i++) {
67 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i])); 64 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
68 /* XXX will have to put_page for page refs */ 65 /* XXX will have to put_page for page refs */
69 __free_page(sg_page(&rm->m_sg[i])); 66 __free_page(sg_page(&rm->data.op_sg[i]));
70 } 67 }
71 rm->m_nents = 0; 68 rm->data.op_nents = 0;
72 69
73 if (rm->m_rdma_op) 70 if (rm->rdma.op_active)
74 rds_rdma_free_op(rm->m_rdma_op); 71 rds_rdma_free_op(&rm->rdma);
75 if (rm->m_rdma_mr) 72 if (rm->rdma.op_rdma_mr)
76 rds_mr_put(rm->m_rdma_mr); 73 rds_mr_put(rm->rdma.op_rdma_mr);
77}
78 74
79void rds_message_inc_purge(struct rds_incoming *inc) 75 if (rm->atomic.op_active)
80{ 76 rds_atomic_free_op(&rm->atomic);
81 struct rds_message *rm = container_of(inc, struct rds_message, m_inc); 77 if (rm->atomic.op_rdma_mr)
82 rds_message_purge(rm); 78 rds_mr_put(rm->atomic.op_rdma_mr);
83} 79}
84 80
85void rds_message_put(struct rds_message *rm) 81void rds_message_put(struct rds_message *rm)
86{ 82{
87 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); 83 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
88 84 if (atomic_read(&rm->m_refcount) == 0) {
85printk(KERN_CRIT "danger refcount zero on %p\n", rm);
86WARN_ON(1);
87 }
89 if (atomic_dec_and_test(&rm->m_refcount)) { 88 if (atomic_dec_and_test(&rm->m_refcount)) {
90 BUG_ON(!list_empty(&rm->m_sock_item)); 89 BUG_ON(!list_empty(&rm->m_sock_item));
91 BUG_ON(!list_empty(&rm->m_conn_item)); 90 BUG_ON(!list_empty(&rm->m_conn_item));
@@ -96,12 +95,6 @@ void rds_message_put(struct rds_message *rm)
96} 95}
97EXPORT_SYMBOL_GPL(rds_message_put); 96EXPORT_SYMBOL_GPL(rds_message_put);
98 97
99void rds_message_inc_free(struct rds_incoming *inc)
100{
101 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
102 rds_message_put(rm);
103}
104
105void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 98void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
106 __be16 dport, u64 seq) 99 __be16 dport, u64 seq)
107{ 100{
@@ -214,41 +207,68 @@ int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 o
214} 207}
215EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension); 208EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension);
216 209
217struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp) 210/*
211 * Each rds_message is allocated with extra space for the scatterlist entries
212 * rds ops will need. This is to minimize memory allocation count. Then, each rds op
213 * can grab SGs when initializing its part of the rds_message.
214 */
215struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp)
218{ 216{
219 struct rds_message *rm; 217 struct rds_message *rm;
220 218
221 rm = kzalloc(sizeof(struct rds_message) + 219 rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp);
222 (nents * sizeof(struct scatterlist)), gfp);
223 if (!rm) 220 if (!rm)
224 goto out; 221 goto out;
225 222
226 if (nents) 223 rm->m_used_sgs = 0;
227 sg_init_table(rm->m_sg, nents); 224 rm->m_total_sgs = extra_len / sizeof(struct scatterlist);
225
228 atomic_set(&rm->m_refcount, 1); 226 atomic_set(&rm->m_refcount, 1);
229 INIT_LIST_HEAD(&rm->m_sock_item); 227 INIT_LIST_HEAD(&rm->m_sock_item);
230 INIT_LIST_HEAD(&rm->m_conn_item); 228 INIT_LIST_HEAD(&rm->m_conn_item);
231 spin_lock_init(&rm->m_rs_lock); 229 spin_lock_init(&rm->m_rs_lock);
230 init_waitqueue_head(&rm->m_flush_wait);
232 231
233out: 232out:
234 return rm; 233 return rm;
235} 234}
236 235
236/*
237 * RDS ops use this to grab SG entries from the rm's sg pool.
238 */
239struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
240{
241 struct scatterlist *sg_first = (struct scatterlist *) &rm[1];
242 struct scatterlist *sg_ret;
243
244 WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs);
245 WARN_ON(!nents);
246
247 sg_ret = &sg_first[rm->m_used_sgs];
248 sg_init_table(sg_ret, nents);
249 rm->m_used_sgs += nents;
250
251 return sg_ret;
252}
253
237struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len) 254struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
238{ 255{
239 struct rds_message *rm; 256 struct rds_message *rm;
240 unsigned int i; 257 unsigned int i;
258 int num_sgs = ceil(total_len, PAGE_SIZE);
259 int extra_bytes = num_sgs * sizeof(struct scatterlist);
241 260
242 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL); 261 rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
243 if (rm == NULL) 262 if (!rm)
244 return ERR_PTR(-ENOMEM); 263 return ERR_PTR(-ENOMEM);
245 264
246 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); 265 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
247 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 266 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
248 rm->m_nents = ceil(total_len, PAGE_SIZE); 267 rm->data.op_nents = ceil(total_len, PAGE_SIZE);
268 rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
249 269
250 for (i = 0; i < rm->m_nents; ++i) { 270 for (i = 0; i < rm->data.op_nents; ++i) {
251 sg_set_page(&rm->m_sg[i], 271 sg_set_page(&rm->data.op_sg[i],
252 virt_to_page(page_addrs[i]), 272 virt_to_page(page_addrs[i]),
253 PAGE_SIZE, 0); 273 PAGE_SIZE, 0);
254 } 274 }
@@ -256,40 +276,33 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
256 return rm; 276 return rm;
257} 277}
258 278
259struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, 279int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
260 size_t total_len) 280 size_t total_len)
261{ 281{
262 unsigned long to_copy; 282 unsigned long to_copy;
263 unsigned long iov_off; 283 unsigned long iov_off;
264 unsigned long sg_off; 284 unsigned long sg_off;
265 struct rds_message *rm;
266 struct iovec *iov; 285 struct iovec *iov;
267 struct scatterlist *sg; 286 struct scatterlist *sg;
268 int ret; 287 int ret = 0;
269
270 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL);
271 if (rm == NULL) {
272 ret = -ENOMEM;
273 goto out;
274 }
275 288
276 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 289 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
277 290
278 /* 291 /*
279 * now allocate and copy in the data payload. 292 * now allocate and copy in the data payload.
280 */ 293 */
281 sg = rm->m_sg; 294 sg = rm->data.op_sg;
282 iov = first_iov; 295 iov = first_iov;
283 iov_off = 0; 296 iov_off = 0;
284 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ 297 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
285 298
286 while (total_len) { 299 while (total_len) {
287 if (sg_page(sg) == NULL) { 300 if (!sg_page(sg)) {
288 ret = rds_page_remainder_alloc(sg, total_len, 301 ret = rds_page_remainder_alloc(sg, total_len,
289 GFP_HIGHUSER); 302 GFP_HIGHUSER);
290 if (ret) 303 if (ret)
291 goto out; 304 goto out;
292 rm->m_nents++; 305 rm->data.op_nents++;
293 sg_off = 0; 306 sg_off = 0;
294 } 307 }
295 308
@@ -320,14 +333,8 @@ struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
320 sg++; 333 sg++;
321 } 334 }
322 335
323 ret = 0;
324out: 336out:
325 if (ret) { 337 return ret;
326 if (rm)
327 rds_message_put(rm);
328 rm = ERR_PTR(ret);
329 }
330 return rm;
331} 338}
332 339
333int rds_message_inc_copy_to_user(struct rds_incoming *inc, 340int rds_message_inc_copy_to_user(struct rds_incoming *inc,
@@ -348,7 +355,7 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,
348 355
349 iov = first_iov; 356 iov = first_iov;
350 iov_off = 0; 357 iov_off = 0;
351 sg = rm->m_sg; 358 sg = rm->data.op_sg;
352 vec_off = 0; 359 vec_off = 0;
353 copied = 0; 360 copied = 0;
354 361
@@ -394,15 +401,14 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,
394 */ 401 */
395void rds_message_wait(struct rds_message *rm) 402void rds_message_wait(struct rds_message *rm)
396{ 403{
397 wait_event(rds_message_flush_waitq, 404 wait_event_interruptible(rm->m_flush_wait,
398 !test_bit(RDS_MSG_MAPPED, &rm->m_flags)); 405 !test_bit(RDS_MSG_MAPPED, &rm->m_flags));
399} 406}
400 407
401void rds_message_unmapped(struct rds_message *rm) 408void rds_message_unmapped(struct rds_message *rm)
402{ 409{
403 clear_bit(RDS_MSG_MAPPED, &rm->m_flags); 410 clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
404 if (waitqueue_active(&rds_message_flush_waitq)) 411 wake_up_interruptible(&rm->m_flush_wait);
405 wake_up(&rds_message_flush_waitq);
406} 412}
407EXPORT_SYMBOL_GPL(rds_message_unmapped); 413EXPORT_SYMBOL_GPL(rds_message_unmapped);
408 414
diff --git a/net/rds/page.c b/net/rds/page.c
index 595a952d4b17..5e44f5ae7898 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -116,7 +116,7 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
116 /* jump straight to allocation if we're trying for a huge page */ 116 /* jump straight to allocation if we're trying for a huge page */
117 if (bytes >= PAGE_SIZE) { 117 if (bytes >= PAGE_SIZE) {
118 page = alloc_page(gfp); 118 page = alloc_page(gfp);
119 if (page == NULL) { 119 if (!page) {
120 ret = -ENOMEM; 120 ret = -ENOMEM;
121 } else { 121 } else {
122 sg_set_page(scat, page, PAGE_SIZE, 0); 122 sg_set_page(scat, page, PAGE_SIZE, 0);
@@ -162,7 +162,7 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
162 rem = &per_cpu(rds_page_remainders, get_cpu()); 162 rem = &per_cpu(rds_page_remainders, get_cpu());
163 local_irq_save(flags); 163 local_irq_save(flags);
164 164
165 if (page == NULL) { 165 if (!page) {
166 ret = -ENOMEM; 166 ret = -ENOMEM;
167 break; 167 break;
168 } 168 }
@@ -186,6 +186,7 @@ out:
186 ret ? 0 : scat->length); 186 ret ? 0 : scat->length);
187 return ret; 187 return ret;
188} 188}
189EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
189 190
190static int rds_page_remainder_cpu_notify(struct notifier_block *self, 191static int rds_page_remainder_cpu_notify(struct notifier_block *self,
191 unsigned long action, void *hcpu) 192 unsigned long action, void *hcpu)
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 75fd13bb631b..48064673fc76 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -35,7 +35,7 @@
35#include <linux/rbtree.h> 35#include <linux/rbtree.h>
36#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */ 36#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */
37 37
38#include "rdma.h" 38#include "rds.h"
39 39
40/* 40/*
41 * XXX 41 * XXX
@@ -130,14 +130,22 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
130{ 130{
131 struct rds_mr *mr; 131 struct rds_mr *mr;
132 struct rb_node *node; 132 struct rb_node *node;
133 unsigned long flags;
133 134
134 /* Release any MRs associated with this socket */ 135 /* Release any MRs associated with this socket */
136 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
135 while ((node = rb_first(&rs->rs_rdma_keys))) { 137 while ((node = rb_first(&rs->rs_rdma_keys))) {
136 mr = container_of(node, struct rds_mr, r_rb_node); 138 mr = container_of(node, struct rds_mr, r_rb_node);
137 if (mr->r_trans == rs->rs_transport) 139 if (mr->r_trans == rs->rs_transport)
138 mr->r_invalidate = 0; 140 mr->r_invalidate = 0;
141 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
142 RB_CLEAR_NODE(&mr->r_rb_node);
143 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
144 rds_destroy_mr(mr);
139 rds_mr_put(mr); 145 rds_mr_put(mr);
146 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
140 } 147 }
148 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
141 149
142 if (rs->rs_transport && rs->rs_transport->flush_mrs) 150 if (rs->rs_transport && rs->rs_transport->flush_mrs)
143 rs->rs_transport->flush_mrs(); 151 rs->rs_transport->flush_mrs();
@@ -181,7 +189,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
181 goto out; 189 goto out;
182 } 190 }
183 191
184 if (rs->rs_transport->get_mr == NULL) { 192 if (!rs->rs_transport->get_mr) {
185 ret = -EOPNOTSUPP; 193 ret = -EOPNOTSUPP;
186 goto out; 194 goto out;
187 } 195 }
@@ -197,13 +205,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
197 205
198 /* XXX clamp nr_pages to limit the size of this alloc? */ 206 /* XXX clamp nr_pages to limit the size of this alloc? */
199 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 207 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
200 if (pages == NULL) { 208 if (!pages) {
201 ret = -ENOMEM; 209 ret = -ENOMEM;
202 goto out; 210 goto out;
203 } 211 }
204 212
205 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL); 213 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL);
206 if (mr == NULL) { 214 if (!mr) {
207 ret = -ENOMEM; 215 ret = -ENOMEM;
208 goto out; 216 goto out;
209 } 217 }
@@ -230,13 +238,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
230 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to 238 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to
231 * the zero page. 239 * the zero page.
232 */ 240 */
233 ret = rds_pin_pages(args->vec.addr & PAGE_MASK, nr_pages, pages, 1); 241 ret = rds_pin_pages(args->vec.addr, nr_pages, pages, 1);
234 if (ret < 0) 242 if (ret < 0)
235 goto out; 243 goto out;
236 244
237 nents = ret; 245 nents = ret;
238 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); 246 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
239 if (sg == NULL) { 247 if (!sg) {
240 ret = -ENOMEM; 248 ret = -ENOMEM;
241 goto out; 249 goto out;
242 } 250 }
@@ -406,68 +414,127 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
406 414
407 spin_lock_irqsave(&rs->rs_rdma_lock, flags); 415 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
408 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 416 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
409 if (mr && (mr->r_use_once || force)) { 417 if (!mr) {
418 printk(KERN_ERR "rds: trying to unuse MR with unknown r_key %u!\n", r_key);
419 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
420 return;
421 }
422
423 if (mr->r_use_once || force) {
410 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); 424 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
411 RB_CLEAR_NODE(&mr->r_rb_node); 425 RB_CLEAR_NODE(&mr->r_rb_node);
412 zot_me = 1; 426 zot_me = 1;
413 } else if (mr) 427 }
414 atomic_inc(&mr->r_refcount);
415 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); 428 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
416 429
417 /* May have to issue a dma_sync on this memory region. 430 /* May have to issue a dma_sync on this memory region.
418 * Note we could avoid this if the operation was a RDMA READ, 431 * Note we could avoid this if the operation was a RDMA READ,
419 * but at this point we can't tell. */ 432 * but at this point we can't tell. */
420 if (mr != NULL) { 433 if (mr->r_trans->sync_mr)
421 if (mr->r_trans->sync_mr) 434 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE);
422 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); 435
423 436 /* If the MR was marked as invalidate, this will
424 /* If the MR was marked as invalidate, this will 437 * trigger an async flush. */
425 * trigger an async flush. */ 438 if (zot_me)
426 if (zot_me) 439 rds_destroy_mr(mr);
427 rds_destroy_mr(mr); 440 rds_mr_put(mr);
428 rds_mr_put(mr);
429 }
430} 441}
431 442
432void rds_rdma_free_op(struct rds_rdma_op *ro) 443void rds_rdma_free_op(struct rm_rdma_op *ro)
433{ 444{
434 unsigned int i; 445 unsigned int i;
435 446
436 for (i = 0; i < ro->r_nents; i++) { 447 for (i = 0; i < ro->op_nents; i++) {
437 struct page *page = sg_page(&ro->r_sg[i]); 448 struct page *page = sg_page(&ro->op_sg[i]);
438 449
439 /* Mark page dirty if it was possibly modified, which 450 /* Mark page dirty if it was possibly modified, which
440 * is the case for a RDMA_READ which copies from remote 451 * is the case for a RDMA_READ which copies from remote
441 * to local memory */ 452 * to local memory */
442 if (!ro->r_write) { 453 if (!ro->op_write) {
443 BUG_ON(in_interrupt()); 454 BUG_ON(irqs_disabled());
444 set_page_dirty(page); 455 set_page_dirty(page);
445 } 456 }
446 put_page(page); 457 put_page(page);
447 } 458 }
448 459
449 kfree(ro->r_notifier); 460 kfree(ro->op_notifier);
450 kfree(ro); 461 ro->op_notifier = NULL;
462 ro->op_active = 0;
463}
464
465void rds_atomic_free_op(struct rm_atomic_op *ao)
466{
467 struct page *page = sg_page(ao->op_sg);
468
469 /* Mark page dirty if it was possibly modified, which
470 * is the case for a RDMA_READ which copies from remote
471 * to local memory */
472 set_page_dirty(page);
473 put_page(page);
474
475 kfree(ao->op_notifier);
476 ao->op_notifier = NULL;
477 ao->op_active = 0;
478}
479
480
481/*
482 * Count the number of pages needed to describe an incoming iovec.
483 */
484static int rds_rdma_pages(struct rds_rdma_args *args)
485{
486 struct rds_iovec vec;
487 struct rds_iovec __user *local_vec;
488 unsigned int tot_pages = 0;
489 unsigned int nr_pages;
490 unsigned int i;
491
492 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
493
494 /* figure out the number of pages in the vector */
495 for (i = 0; i < args->nr_local; i++) {
496 if (copy_from_user(&vec, &local_vec[i],
497 sizeof(struct rds_iovec)))
498 return -EFAULT;
499
500 nr_pages = rds_pages_in_vec(&vec);
501 if (nr_pages == 0)
502 return -EINVAL;
503
504 tot_pages += nr_pages;
505 }
506
507 return tot_pages;
508}
509
510int rds_rdma_extra_size(struct rds_rdma_args *args)
511{
512 return rds_rdma_pages(args) * sizeof(struct scatterlist);
451} 513}
452 514
453/* 515/*
454 * args is a pointer to an in-kernel copy in the sendmsg cmsg. 516 * The application asks for a RDMA transfer.
517 * Extract all arguments and set up the rdma_op
455 */ 518 */
456static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, 519int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
457 struct rds_rdma_args *args) 520 struct cmsghdr *cmsg)
458{ 521{
522 struct rds_rdma_args *args;
459 struct rds_iovec vec; 523 struct rds_iovec vec;
460 struct rds_rdma_op *op = NULL; 524 struct rm_rdma_op *op = &rm->rdma;
461 unsigned int nr_pages; 525 unsigned int nr_pages;
462 unsigned int max_pages;
463 unsigned int nr_bytes; 526 unsigned int nr_bytes;
464 struct page **pages = NULL; 527 struct page **pages = NULL;
465 struct rds_iovec __user *local_vec; 528 struct rds_iovec __user *local_vec;
466 struct scatterlist *sg;
467 unsigned int nr; 529 unsigned int nr;
468 unsigned int i, j; 530 unsigned int i, j;
469 int ret; 531 int ret = 0;
532
533 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args))
534 || rm->rdma.op_active)
535 return -EINVAL;
470 536
537 args = CMSG_DATA(cmsg);
471 538
472 if (rs->rs_bound_addr == 0) { 539 if (rs->rs_bound_addr == 0) {
473 ret = -ENOTCONN; /* XXX not a great errno */ 540 ret = -ENOTCONN; /* XXX not a great errno */
@@ -479,61 +546,38 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
479 goto out; 546 goto out;
480 } 547 }
481 548
482 nr_pages = 0; 549 nr_pages = rds_rdma_pages(args);
483 max_pages = 0; 550 if (nr_pages < 0)
484
485 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
486
487 /* figure out the number of pages in the vector */
488 for (i = 0; i < args->nr_local; i++) {
489 if (copy_from_user(&vec, &local_vec[i],
490 sizeof(struct rds_iovec))) {
491 ret = -EFAULT;
492 goto out;
493 }
494
495 nr = rds_pages_in_vec(&vec);
496 if (nr == 0) {
497 ret = -EINVAL;
498 goto out;
499 }
500
501 max_pages = max(nr, max_pages);
502 nr_pages += nr;
503 }
504
505 pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL);
506 if (pages == NULL) {
507 ret = -ENOMEM;
508 goto out; 551 goto out;
509 }
510 552
511 op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); 553 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
512 if (op == NULL) { 554 if (!pages) {
513 ret = -ENOMEM; 555 ret = -ENOMEM;
514 goto out; 556 goto out;
515 } 557 }
516 558
517 op->r_write = !!(args->flags & RDS_RDMA_READWRITE); 559 op->op_write = !!(args->flags & RDS_RDMA_READWRITE);
518 op->r_fence = !!(args->flags & RDS_RDMA_FENCE); 560 op->op_fence = !!(args->flags & RDS_RDMA_FENCE);
519 op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); 561 op->op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
520 op->r_recverr = rs->rs_recverr; 562 op->op_silent = !!(args->flags & RDS_RDMA_SILENT);
563 op->op_active = 1;
564 op->op_recverr = rs->rs_recverr;
521 WARN_ON(!nr_pages); 565 WARN_ON(!nr_pages);
522 sg_init_table(op->r_sg, nr_pages); 566 op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
523 567
524 if (op->r_notify || op->r_recverr) { 568 if (op->op_notify || op->op_recverr) {
525 /* We allocate an uninitialized notifier here, because 569 /* We allocate an uninitialized notifier here, because
526 * we don't want to do that in the completion handler. We 570 * we don't want to do that in the completion handler. We
527 * would have to use GFP_ATOMIC there, and don't want to deal 571 * would have to use GFP_ATOMIC there, and don't want to deal
528 * with failed allocations. 572 * with failed allocations.
529 */ 573 */
530 op->r_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); 574 op->op_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL);
531 if (!op->r_notifier) { 575 if (!op->op_notifier) {
532 ret = -ENOMEM; 576 ret = -ENOMEM;
533 goto out; 577 goto out;
534 } 578 }
535 op->r_notifier->n_user_token = args->user_token; 579 op->op_notifier->n_user_token = args->user_token;
536 op->r_notifier->n_status = RDS_RDMA_SUCCESS; 580 op->op_notifier->n_status = RDS_RDMA_SUCCESS;
537 } 581 }
538 582
539 /* The cookie contains the R_Key of the remote memory region, and 583 /* The cookie contains the R_Key of the remote memory region, and
@@ -543,15 +587,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
543 * destination address (which is really an offset into the MR) 587 * destination address (which is really an offset into the MR)
544 * FIXME: We may want to move this into ib_rdma.c 588 * FIXME: We may want to move this into ib_rdma.c
545 */ 589 */
546 op->r_key = rds_rdma_cookie_key(args->cookie); 590 op->op_rkey = rds_rdma_cookie_key(args->cookie);
547 op->r_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie); 591 op->op_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie);
548 592
549 nr_bytes = 0; 593 nr_bytes = 0;
550 594
551 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n", 595 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n",
552 (unsigned long long)args->nr_local, 596 (unsigned long long)args->nr_local,
553 (unsigned long long)args->remote_vec.addr, 597 (unsigned long long)args->remote_vec.addr,
554 op->r_key); 598 op->op_rkey);
599
600 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
555 601
556 for (i = 0; i < args->nr_local; i++) { 602 for (i = 0; i < args->nr_local; i++) {
557 if (copy_from_user(&vec, &local_vec[i], 603 if (copy_from_user(&vec, &local_vec[i],
@@ -569,15 +615,10 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
569 rs->rs_user_addr = vec.addr; 615 rs->rs_user_addr = vec.addr;
570 rs->rs_user_bytes = vec.bytes; 616 rs->rs_user_bytes = vec.bytes;
571 617
572 /* did the user change the vec under us? */
573 if (nr > max_pages || op->r_nents + nr > nr_pages) {
574 ret = -EINVAL;
575 goto out;
576 }
577 /* If it's a WRITE operation, we want to pin the pages for reading. 618 /* If it's a WRITE operation, we want to pin the pages for reading.
578 * If it's a READ operation, we need to pin the pages for writing. 619 * If it's a READ operation, we need to pin the pages for writing.
579 */ 620 */
580 ret = rds_pin_pages(vec.addr & PAGE_MASK, nr, pages, !op->r_write); 621 ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write);
581 if (ret < 0) 622 if (ret < 0)
582 goto out; 623 goto out;
583 624
@@ -588,8 +629,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
588 629
589 for (j = 0; j < nr; j++) { 630 for (j = 0; j < nr; j++) {
590 unsigned int offset = vec.addr & ~PAGE_MASK; 631 unsigned int offset = vec.addr & ~PAGE_MASK;
632 struct scatterlist *sg;
591 633
592 sg = &op->r_sg[op->r_nents + j]; 634 sg = &op->op_sg[op->op_nents + j];
593 sg_set_page(sg, pages[j], 635 sg_set_page(sg, pages[j],
594 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), 636 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset),
595 offset); 637 offset);
@@ -601,10 +643,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
601 vec.bytes -= sg->length; 643 vec.bytes -= sg->length;
602 } 644 }
603 645
604 op->r_nents += nr; 646 op->op_nents += nr;
605 } 647 }
606 648
607
608 if (nr_bytes > args->remote_vec.bytes) { 649 if (nr_bytes > args->remote_vec.bytes) {
609 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n", 650 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n",
610 nr_bytes, 651 nr_bytes,
@@ -612,38 +653,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
612 ret = -EINVAL; 653 ret = -EINVAL;
613 goto out; 654 goto out;
614 } 655 }
615 op->r_bytes = nr_bytes; 656 op->op_bytes = nr_bytes;
616 657
617 ret = 0; 658 ret = 0;
618out: 659out:
619 kfree(pages); 660 kfree(pages);
620 if (ret) { 661 if (ret)
621 if (op) 662 rds_rdma_free_op(op);
622 rds_rdma_free_op(op);
623 op = ERR_PTR(ret);
624 }
625 return op;
626}
627
628/*
629 * The application asks for a RDMA transfer.
630 * Extract all arguments and set up the rdma_op
631 */
632int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
633 struct cmsghdr *cmsg)
634{
635 struct rds_rdma_op *op;
636
637 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) ||
638 rm->m_rdma_op != NULL)
639 return -EINVAL;
640 663
641 op = rds_rdma_prepare(rs, CMSG_DATA(cmsg));
642 if (IS_ERR(op))
643 return PTR_ERR(op);
644 rds_stats_inc(s_send_rdma); 664 rds_stats_inc(s_send_rdma);
645 rm->m_rdma_op = op; 665
646 return 0; 666 return ret;
647} 667}
648 668
649/* 669/*
@@ -673,7 +693,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
673 693
674 spin_lock_irqsave(&rs->rs_rdma_lock, flags); 694 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
675 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 695 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
676 if (mr == NULL) 696 if (!mr)
677 err = -EINVAL; /* invalid r_key */ 697 err = -EINVAL; /* invalid r_key */
678 else 698 else
679 atomic_inc(&mr->r_refcount); 699 atomic_inc(&mr->r_refcount);
@@ -681,7 +701,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
681 701
682 if (mr) { 702 if (mr) {
683 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); 703 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
684 rm->m_rdma_mr = mr; 704 rm->rdma.op_rdma_mr = mr;
685 } 705 }
686 return err; 706 return err;
687} 707}
@@ -699,5 +719,98 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
699 rm->m_rdma_cookie != 0) 719 rm->m_rdma_cookie != 0)
700 return -EINVAL; 720 return -EINVAL;
701 721
702 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr); 722 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr);
723}
724
725/*
726 * Fill in rds_message for an atomic request.
727 */
728int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
729 struct cmsghdr *cmsg)
730{
731 struct page *page = NULL;
732 struct rds_atomic_args *args;
733 int ret = 0;
734
735 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args))
736 || rm->atomic.op_active)
737 return -EINVAL;
738
739 args = CMSG_DATA(cmsg);
740
741 /* Nonmasked & masked cmsg ops converted to masked hw ops */
742 switch (cmsg->cmsg_type) {
743 case RDS_CMSG_ATOMIC_FADD:
744 rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
745 rm->atomic.op_m_fadd.add = args->fadd.add;
746 rm->atomic.op_m_fadd.nocarry_mask = 0;
747 break;
748 case RDS_CMSG_MASKED_ATOMIC_FADD:
749 rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
750 rm->atomic.op_m_fadd.add = args->m_fadd.add;
751 rm->atomic.op_m_fadd.nocarry_mask = args->m_fadd.nocarry_mask;
752 break;
753 case RDS_CMSG_ATOMIC_CSWP:
754 rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
755 rm->atomic.op_m_cswp.compare = args->cswp.compare;
756 rm->atomic.op_m_cswp.swap = args->cswp.swap;
757 rm->atomic.op_m_cswp.compare_mask = ~0;
758 rm->atomic.op_m_cswp.swap_mask = ~0;
759 break;
760 case RDS_CMSG_MASKED_ATOMIC_CSWP:
761 rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
762 rm->atomic.op_m_cswp.compare = args->m_cswp.compare;
763 rm->atomic.op_m_cswp.swap = args->m_cswp.swap;
764 rm->atomic.op_m_cswp.compare_mask = args->m_cswp.compare_mask;
765 rm->atomic.op_m_cswp.swap_mask = args->m_cswp.swap_mask;
766 break;
767 default:
768 BUG(); /* should never happen */
769 }
770
771 rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
772 rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT);
773 rm->atomic.op_active = 1;
774 rm->atomic.op_recverr = rs->rs_recverr;
775 rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
776
777 /* verify 8 byte-aligned */
778 if (args->local_addr & 0x7) {
779 ret = -EFAULT;
780 goto err;
781 }
782
783 ret = rds_pin_pages(args->local_addr, 1, &page, 1);
784 if (ret != 1)
785 goto err;
786 ret = 0;
787
788 sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr));
789
790 if (rm->atomic.op_notify || rm->atomic.op_recverr) {
791 /* We allocate an uninitialized notifier here, because
792 * we don't want to do that in the completion handler. We
793 * would have to use GFP_ATOMIC there, and don't want to deal
794 * with failed allocations.
795 */
796 rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL);
797 if (!rm->atomic.op_notifier) {
798 ret = -ENOMEM;
799 goto err;
800 }
801
802 rm->atomic.op_notifier->n_user_token = args->user_token;
803 rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS;
804 }
805
806 rm->atomic.op_rkey = rds_rdma_cookie_key(args->cookie);
807 rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie);
808
809 return ret;
810err:
811 if (page)
812 put_page(page);
813 kfree(rm->atomic.op_notifier);
814
815 return ret;
703} 816}
diff --git a/net/rds/rdma.h b/net/rds/rdma.h
deleted file mode 100644
index 909c39835a5d..000000000000
--- a/net/rds/rdma.h
+++ /dev/null
@@ -1,85 +0,0 @@
1#ifndef _RDS_RDMA_H
2#define _RDS_RDMA_H
3
4#include <linux/rbtree.h>
5#include <linux/spinlock.h>
6#include <linux/scatterlist.h>
7
8#include "rds.h"
9
10struct rds_mr {
11 struct rb_node r_rb_node;
12 atomic_t r_refcount;
13 u32 r_key;
14
15 /* A copy of the creation flags */
16 unsigned int r_use_once:1;
17 unsigned int r_invalidate:1;
18 unsigned int r_write:1;
19
20 /* This is for RDS_MR_DEAD.
21 * It would be nice & consistent to make this part of the above
22 * bit field here, but we need to use test_and_set_bit.
23 */
24 unsigned long r_state;
25 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
26 struct rds_transport *r_trans;
27 void *r_trans_private;
28};
29
30/* Flags for mr->r_state */
31#define RDS_MR_DEAD 0
32
33struct rds_rdma_op {
34 u32 r_key;
35 u64 r_remote_addr;
36 unsigned int r_write:1;
37 unsigned int r_fence:1;
38 unsigned int r_notify:1;
39 unsigned int r_recverr:1;
40 unsigned int r_mapped:1;
41 struct rds_notifier *r_notifier;
42 unsigned int r_bytes;
43 unsigned int r_nents;
44 unsigned int r_count;
45 struct scatterlist r_sg[0];
46};
47
48static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
49{
50 return r_key | (((u64) offset) << 32);
51}
52
53static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
54{
55 return cookie;
56}
57
58static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
59{
60 return cookie >> 32;
61}
62
63int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
64int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
65int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
66void rds_rdma_drop_keys(struct rds_sock *rs);
67int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
68 struct cmsghdr *cmsg);
69int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
70 struct cmsghdr *cmsg);
71int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
72 struct cmsghdr *cmsg);
73int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
74 struct cmsghdr *cmsg);
75void rds_rdma_free_op(struct rds_rdma_op *ro);
76void rds_rdma_send_complete(struct rds_message *rm, int);
77
78extern void __rds_put_mr_final(struct rds_mr *mr);
79static inline void rds_mr_put(struct rds_mr *mr)
80{
81 if (atomic_dec_and_test(&mr->r_refcount))
82 __rds_put_mr_final(mr);
83}
84
85#endif
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index e599ba2f950d..e6ed10aee190 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -36,6 +36,34 @@
36 36
37static struct rdma_cm_id *rds_rdma_listen_id; 37static struct rdma_cm_id *rds_rdma_listen_id;
38 38
39static char *rds_cm_event_strings[] = {
40#define RDS_CM_EVENT_STRING(foo) \
41 [RDMA_CM_EVENT_##foo] = __stringify(RDMA_CM_EVENT_##foo)
42 RDS_CM_EVENT_STRING(ADDR_RESOLVED),
43 RDS_CM_EVENT_STRING(ADDR_ERROR),
44 RDS_CM_EVENT_STRING(ROUTE_RESOLVED),
45 RDS_CM_EVENT_STRING(ROUTE_ERROR),
46 RDS_CM_EVENT_STRING(CONNECT_REQUEST),
47 RDS_CM_EVENT_STRING(CONNECT_RESPONSE),
48 RDS_CM_EVENT_STRING(CONNECT_ERROR),
49 RDS_CM_EVENT_STRING(UNREACHABLE),
50 RDS_CM_EVENT_STRING(REJECTED),
51 RDS_CM_EVENT_STRING(ESTABLISHED),
52 RDS_CM_EVENT_STRING(DISCONNECTED),
53 RDS_CM_EVENT_STRING(DEVICE_REMOVAL),
54 RDS_CM_EVENT_STRING(MULTICAST_JOIN),
55 RDS_CM_EVENT_STRING(MULTICAST_ERROR),
56 RDS_CM_EVENT_STRING(ADDR_CHANGE),
57 RDS_CM_EVENT_STRING(TIMEWAIT_EXIT),
58#undef RDS_CM_EVENT_STRING
59};
60
61static char *rds_cm_event_str(enum rdma_cm_event_type type)
62{
63 return rds_str_array(rds_cm_event_strings,
64 ARRAY_SIZE(rds_cm_event_strings), type);
65};
66
39int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, 67int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
40 struct rdma_cm_event *event) 68 struct rdma_cm_event *event)
41{ 69{
@@ -44,8 +72,8 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
44 struct rds_transport *trans; 72 struct rds_transport *trans;
45 int ret = 0; 73 int ret = 0;
46 74
47 rdsdebug("conn %p id %p handling event %u\n", conn, cm_id, 75 rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
48 event->event); 76 event->event, rds_cm_event_str(event->event));
49 77
50 if (cm_id->device->node_type == RDMA_NODE_RNIC) 78 if (cm_id->device->node_type == RDMA_NODE_RNIC)
51 trans = &rds_iw_transport; 79 trans = &rds_iw_transport;
@@ -109,7 +137,8 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
109 137
110 default: 138 default:
111 /* things like device disconnect? */ 139 /* things like device disconnect? */
112 printk(KERN_ERR "RDS: unknown event %u!\n", event->event); 140 printk(KERN_ERR "RDS: unknown event %u (%s)!\n",
141 event->event, rds_cm_event_str(event->event));
113 break; 142 break;
114 } 143 }
115 144
@@ -117,12 +146,13 @@ out:
117 if (conn) 146 if (conn)
118 mutex_unlock(&conn->c_cm_lock); 147 mutex_unlock(&conn->c_cm_lock);
119 148
120 rdsdebug("id %p event %u handling ret %d\n", cm_id, event->event, ret); 149 rdsdebug("id %p event %u (%s) handling ret %d\n", cm_id, event->event,
150 rds_cm_event_str(event->event), ret);
121 151
122 return ret; 152 return ret;
123} 153}
124 154
125static int __init rds_rdma_listen_init(void) 155static int rds_rdma_listen_init(void)
126{ 156{
127 struct sockaddr_in sin; 157 struct sockaddr_in sin;
128 struct rdma_cm_id *cm_id; 158 struct rdma_cm_id *cm_id;
@@ -177,7 +207,7 @@ static void rds_rdma_listen_stop(void)
177 } 207 }
178} 208}
179 209
180int __init rds_rdma_init(void) 210int rds_rdma_init(void)
181{ 211{
182 int ret; 212 int ret;
183 213
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c224b5bb3ba9..8103dcf8b976 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -80,6 +80,7 @@ enum {
80/* Bits for c_flags */ 80/* Bits for c_flags */
81#define RDS_LL_SEND_FULL 0 81#define RDS_LL_SEND_FULL 0
82#define RDS_RECONNECT_PENDING 1 82#define RDS_RECONNECT_PENDING 1
83#define RDS_IN_XMIT 2
83 84
84struct rds_connection { 85struct rds_connection {
85 struct hlist_node c_hash_node; 86 struct hlist_node c_hash_node;
@@ -91,12 +92,13 @@ struct rds_connection {
91 struct rds_cong_map *c_lcong; 92 struct rds_cong_map *c_lcong;
92 struct rds_cong_map *c_fcong; 93 struct rds_cong_map *c_fcong;
93 94
94 struct mutex c_send_lock; /* protect send ring */
95 struct rds_message *c_xmit_rm; 95 struct rds_message *c_xmit_rm;
96 unsigned long c_xmit_sg; 96 unsigned long c_xmit_sg;
97 unsigned int c_xmit_hdr_off; 97 unsigned int c_xmit_hdr_off;
98 unsigned int c_xmit_data_off; 98 unsigned int c_xmit_data_off;
99 unsigned int c_xmit_atomic_sent;
99 unsigned int c_xmit_rdma_sent; 100 unsigned int c_xmit_rdma_sent;
101 unsigned int c_xmit_data_sent;
100 102
101 spinlock_t c_lock; /* protect msg queues */ 103 spinlock_t c_lock; /* protect msg queues */
102 u64 c_next_tx_seq; 104 u64 c_next_tx_seq;
@@ -116,11 +118,10 @@ struct rds_connection {
116 struct delayed_work c_conn_w; 118 struct delayed_work c_conn_w;
117 struct work_struct c_down_w; 119 struct work_struct c_down_w;
118 struct mutex c_cm_lock; /* protect conn state & cm */ 120 struct mutex c_cm_lock; /* protect conn state & cm */
121 wait_queue_head_t c_waitq;
119 122
120 struct list_head c_map_item; 123 struct list_head c_map_item;
121 unsigned long c_map_queued; 124 unsigned long c_map_queued;
122 unsigned long c_map_offset;
123 unsigned long c_map_bytes;
124 125
125 unsigned int c_unacked_packets; 126 unsigned int c_unacked_packets;
126 unsigned int c_unacked_bytes; 127 unsigned int c_unacked_bytes;
@@ -206,6 +207,48 @@ struct rds_incoming {
206 rds_rdma_cookie_t i_rdma_cookie; 207 rds_rdma_cookie_t i_rdma_cookie;
207}; 208};
208 209
210struct rds_mr {
211 struct rb_node r_rb_node;
212 atomic_t r_refcount;
213 u32 r_key;
214
215 /* A copy of the creation flags */
216 unsigned int r_use_once:1;
217 unsigned int r_invalidate:1;
218 unsigned int r_write:1;
219
220 /* This is for RDS_MR_DEAD.
221 * It would be nice & consistent to make this part of the above
222 * bit field here, but we need to use test_and_set_bit.
223 */
224 unsigned long r_state;
225 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
226 struct rds_transport *r_trans;
227 void *r_trans_private;
228};
229
230/* Flags for mr->r_state */
231#define RDS_MR_DEAD 0
232
233static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
234{
235 return r_key | (((u64) offset) << 32);
236}
237
238static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
239{
240 return cookie;
241}
242
243static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
244{
245 return cookie >> 32;
246}
247
248/* atomic operation types */
249#define RDS_ATOMIC_TYPE_CSWP 0
250#define RDS_ATOMIC_TYPE_FADD 1
251
209/* 252/*
210 * m_sock_item and m_conn_item are on lists that are serialized under 253 * m_sock_item and m_conn_item are on lists that are serialized under
211 * conn->c_lock. m_sock_item has additional meaning in that once it is empty 254 * conn->c_lock. m_sock_item has additional meaning in that once it is empty
@@ -258,13 +301,71 @@ struct rds_message {
258 * -> rs->rs_lock 301 * -> rs->rs_lock
259 */ 302 */
260 spinlock_t m_rs_lock; 303 spinlock_t m_rs_lock;
304 wait_queue_head_t m_flush_wait;
305
261 struct rds_sock *m_rs; 306 struct rds_sock *m_rs;
262 struct rds_rdma_op *m_rdma_op; 307
308 /* cookie to send to remote, in rds header */
263 rds_rdma_cookie_t m_rdma_cookie; 309 rds_rdma_cookie_t m_rdma_cookie;
264 struct rds_mr *m_rdma_mr; 310
265 unsigned int m_nents; 311 unsigned int m_used_sgs;
266 unsigned int m_count; 312 unsigned int m_total_sgs;
267 struct scatterlist m_sg[0]; 313
314 void *m_final_op;
315
316 struct {
317 struct rm_atomic_op {
318 int op_type;
319 union {
320 struct {
321 uint64_t compare;
322 uint64_t swap;
323 uint64_t compare_mask;
324 uint64_t swap_mask;
325 } op_m_cswp;
326 struct {
327 uint64_t add;
328 uint64_t nocarry_mask;
329 } op_m_fadd;
330 };
331
332 u32 op_rkey;
333 u64 op_remote_addr;
334 unsigned int op_notify:1;
335 unsigned int op_recverr:1;
336 unsigned int op_mapped:1;
337 unsigned int op_silent:1;
338 unsigned int op_active:1;
339 struct scatterlist *op_sg;
340 struct rds_notifier *op_notifier;
341
342 struct rds_mr *op_rdma_mr;
343 } atomic;
344 struct rm_rdma_op {
345 u32 op_rkey;
346 u64 op_remote_addr;
347 unsigned int op_write:1;
348 unsigned int op_fence:1;
349 unsigned int op_notify:1;
350 unsigned int op_recverr:1;
351 unsigned int op_mapped:1;
352 unsigned int op_silent:1;
353 unsigned int op_active:1;
354 unsigned int op_bytes;
355 unsigned int op_nents;
356 unsigned int op_count;
357 struct scatterlist *op_sg;
358 struct rds_notifier *op_notifier;
359
360 struct rds_mr *op_rdma_mr;
361 } rdma;
362 struct rm_data_op {
363 unsigned int op_active:1;
364 unsigned int op_nents;
365 unsigned int op_count;
366 struct scatterlist *op_sg;
367 } data;
368 };
268}; 369};
269 370
270/* 371/*
@@ -305,10 +406,6 @@ struct rds_notifier {
305 * transport is responsible for other serialization, including 406 * transport is responsible for other serialization, including
306 * rds_recv_incoming(). This is called in process context but 407 * rds_recv_incoming(). This is called in process context but
307 * should try hard not to block. 408 * should try hard not to block.
308 *
309 * @xmit_cong_map: This asks the transport to send the local bitmap down the
310 * given connection. XXX get a better story about the bitmap
311 * flag and header.
312 */ 409 */
313 410
314#define RDS_TRANS_IB 0 411#define RDS_TRANS_IB 0
@@ -332,13 +429,11 @@ struct rds_transport {
332 void (*xmit_complete)(struct rds_connection *conn); 429 void (*xmit_complete)(struct rds_connection *conn);
333 int (*xmit)(struct rds_connection *conn, struct rds_message *rm, 430 int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
334 unsigned int hdr_off, unsigned int sg, unsigned int off); 431 unsigned int hdr_off, unsigned int sg, unsigned int off);
335 int (*xmit_cong_map)(struct rds_connection *conn, 432 int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
336 struct rds_cong_map *map, unsigned long offset); 433 int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
337 int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op);
338 int (*recv)(struct rds_connection *conn); 434 int (*recv)(struct rds_connection *conn);
339 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, 435 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov,
340 size_t size); 436 size_t size);
341 void (*inc_purge)(struct rds_incoming *inc);
342 void (*inc_free)(struct rds_incoming *inc); 437 void (*inc_free)(struct rds_incoming *inc);
343 438
344 int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 439 int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
@@ -367,17 +462,11 @@ struct rds_sock {
367 * bound_addr used for both incoming and outgoing, no INADDR_ANY 462 * bound_addr used for both incoming and outgoing, no INADDR_ANY
368 * support. 463 * support.
369 */ 464 */
370 struct rb_node rs_bound_node; 465 struct hlist_node rs_bound_node;
371 __be32 rs_bound_addr; 466 __be32 rs_bound_addr;
372 __be32 rs_conn_addr; 467 __be32 rs_conn_addr;
373 __be16 rs_bound_port; 468 __be16 rs_bound_port;
374 __be16 rs_conn_port; 469 __be16 rs_conn_port;
375
376 /*
377 * This is only used to communicate the transport between bind and
378 * initiating connections. All other trans use is referenced through
379 * the connection.
380 */
381 struct rds_transport *rs_transport; 470 struct rds_transport *rs_transport;
382 471
383 /* 472 /*
@@ -466,8 +555,8 @@ struct rds_statistics {
466 uint64_t s_recv_ping; 555 uint64_t s_recv_ping;
467 uint64_t s_send_queue_empty; 556 uint64_t s_send_queue_empty;
468 uint64_t s_send_queue_full; 557 uint64_t s_send_queue_full;
469 uint64_t s_send_sem_contention; 558 uint64_t s_send_lock_contention;
470 uint64_t s_send_sem_queue_raced; 559 uint64_t s_send_lock_queue_raced;
471 uint64_t s_send_immediate_retry; 560 uint64_t s_send_immediate_retry;
472 uint64_t s_send_delayed_retry; 561 uint64_t s_send_delayed_retry;
473 uint64_t s_send_drop_acked; 562 uint64_t s_send_drop_acked;
@@ -487,6 +576,7 @@ struct rds_statistics {
487}; 576};
488 577
489/* af_rds.c */ 578/* af_rds.c */
579char *rds_str_array(char **array, size_t elements, size_t index);
490void rds_sock_addref(struct rds_sock *rs); 580void rds_sock_addref(struct rds_sock *rs);
491void rds_sock_put(struct rds_sock *rs); 581void rds_sock_put(struct rds_sock *rs);
492void rds_wake_sk_sleep(struct rds_sock *rs); 582void rds_wake_sk_sleep(struct rds_sock *rs);
@@ -521,15 +611,17 @@ void rds_cong_exit(void);
521struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); 611struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
522 612
523/* conn.c */ 613/* conn.c */
524int __init rds_conn_init(void); 614int rds_conn_init(void);
525void rds_conn_exit(void); 615void rds_conn_exit(void);
526struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr, 616struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
527 struct rds_transport *trans, gfp_t gfp); 617 struct rds_transport *trans, gfp_t gfp);
528struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, 618struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
529 struct rds_transport *trans, gfp_t gfp); 619 struct rds_transport *trans, gfp_t gfp);
620void rds_conn_shutdown(struct rds_connection *conn);
530void rds_conn_destroy(struct rds_connection *conn); 621void rds_conn_destroy(struct rds_connection *conn);
531void rds_conn_reset(struct rds_connection *conn); 622void rds_conn_reset(struct rds_connection *conn);
532void rds_conn_drop(struct rds_connection *conn); 623void rds_conn_drop(struct rds_connection *conn);
624void rds_conn_connect_if_down(struct rds_connection *conn);
533void rds_for_each_conn_info(struct socket *sock, unsigned int len, 625void rds_for_each_conn_info(struct socket *sock, unsigned int len,
534 struct rds_info_iterator *iter, 626 struct rds_info_iterator *iter,
535 struct rds_info_lengths *lens, 627 struct rds_info_lengths *lens,
@@ -566,7 +658,8 @@ rds_conn_connecting(struct rds_connection *conn)
566 658
567/* message.c */ 659/* message.c */
568struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); 660struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
569struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, 661struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
662int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
570 size_t total_len); 663 size_t total_len);
571struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); 664struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
572void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 665void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
@@ -580,7 +673,6 @@ int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *vers
580int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset); 673int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
581int rds_message_inc_copy_to_user(struct rds_incoming *inc, 674int rds_message_inc_copy_to_user(struct rds_incoming *inc,
582 struct iovec *first_iov, size_t size); 675 struct iovec *first_iov, size_t size);
583void rds_message_inc_purge(struct rds_incoming *inc);
584void rds_message_inc_free(struct rds_incoming *inc); 676void rds_message_inc_free(struct rds_incoming *inc);
585void rds_message_addref(struct rds_message *rm); 677void rds_message_addref(struct rds_message *rm);
586void rds_message_put(struct rds_message *rm); 678void rds_message_put(struct rds_message *rm);
@@ -636,14 +728,39 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
636typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack); 728typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
637void rds_send_drop_acked(struct rds_connection *conn, u64 ack, 729void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
638 is_acked_func is_acked); 730 is_acked_func is_acked);
639int rds_send_acked_before(struct rds_connection *conn, u64 seq);
640void rds_send_remove_from_sock(struct list_head *messages, int status); 731void rds_send_remove_from_sock(struct list_head *messages, int status);
641int rds_send_pong(struct rds_connection *conn, __be16 dport); 732int rds_send_pong(struct rds_connection *conn, __be16 dport);
642struct rds_message *rds_send_get_message(struct rds_connection *, 733struct rds_message *rds_send_get_message(struct rds_connection *,
643 struct rds_rdma_op *); 734 struct rm_rdma_op *);
644 735
645/* rdma.c */ 736/* rdma.c */
646void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); 737void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
738int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
739int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
740int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
741void rds_rdma_drop_keys(struct rds_sock *rs);
742int rds_rdma_extra_size(struct rds_rdma_args *args);
743int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
744 struct cmsghdr *cmsg);
745int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
746 struct cmsghdr *cmsg);
747int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
748 struct cmsghdr *cmsg);
749int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
750 struct cmsghdr *cmsg);
751void rds_rdma_free_op(struct rm_rdma_op *ro);
752void rds_atomic_free_op(struct rm_atomic_op *ao);
753void rds_rdma_send_complete(struct rds_message *rm, int wc_status);
754void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
755int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
756 struct cmsghdr *cmsg);
757
758extern void __rds_put_mr_final(struct rds_mr *mr);
759static inline void rds_mr_put(struct rds_mr *mr)
760{
761 if (atomic_dec_and_test(&mr->r_refcount))
762 __rds_put_mr_final(mr);
763}
647 764
648/* stats.c */ 765/* stats.c */
649DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); 766DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
@@ -657,14 +774,14 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
657 put_cpu(); \ 774 put_cpu(); \
658} while (0) 775} while (0)
659#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count) 776#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count)
660int __init rds_stats_init(void); 777int rds_stats_init(void);
661void rds_stats_exit(void); 778void rds_stats_exit(void);
662void rds_stats_info_copy(struct rds_info_iterator *iter, 779void rds_stats_info_copy(struct rds_info_iterator *iter,
663 uint64_t *values, const char *const *names, 780 uint64_t *values, const char *const *names,
664 size_t nr); 781 size_t nr);
665 782
666/* sysctl.c */ 783/* sysctl.c */
667int __init rds_sysctl_init(void); 784int rds_sysctl_init(void);
668void rds_sysctl_exit(void); 785void rds_sysctl_exit(void);
669extern unsigned long rds_sysctl_sndbuf_min; 786extern unsigned long rds_sysctl_sndbuf_min;
670extern unsigned long rds_sysctl_sndbuf_default; 787extern unsigned long rds_sysctl_sndbuf_default;
@@ -678,9 +795,10 @@ extern unsigned long rds_sysctl_trace_flags;
678extern unsigned int rds_sysctl_trace_level; 795extern unsigned int rds_sysctl_trace_level;
679 796
680/* threads.c */ 797/* threads.c */
681int __init rds_threads_init(void); 798int rds_threads_init(void);
682void rds_threads_exit(void); 799void rds_threads_exit(void);
683extern struct workqueue_struct *rds_wq; 800extern struct workqueue_struct *rds_wq;
801void rds_queue_reconnect(struct rds_connection *conn);
684void rds_connect_worker(struct work_struct *); 802void rds_connect_worker(struct work_struct *);
685void rds_shutdown_worker(struct work_struct *); 803void rds_shutdown_worker(struct work_struct *);
686void rds_send_worker(struct work_struct *); 804void rds_send_worker(struct work_struct *);
@@ -691,9 +809,10 @@ void rds_connect_complete(struct rds_connection *conn);
691int rds_trans_register(struct rds_transport *trans); 809int rds_trans_register(struct rds_transport *trans);
692void rds_trans_unregister(struct rds_transport *trans); 810void rds_trans_unregister(struct rds_transport *trans);
693struct rds_transport *rds_trans_get_preferred(__be32 addr); 811struct rds_transport *rds_trans_get_preferred(__be32 addr);
812void rds_trans_put(struct rds_transport *trans);
694unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, 813unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
695 unsigned int avail); 814 unsigned int avail);
696int __init rds_trans_init(void); 815int rds_trans_init(void);
697void rds_trans_exit(void); 816void rds_trans_exit(void);
698 817
699#endif 818#endif
diff --git a/net/rds/recv.c b/net/rds/recv.c
index c93588c2d553..68800f02aa30 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -36,7 +36,6 @@
36#include <linux/in.h> 36#include <linux/in.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40 39
41void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, 40void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
42 __be32 saddr) 41 __be32 saddr)
@@ -210,7 +209,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
210 } 209 }
211 210
212 rs = rds_find_bound(daddr, inc->i_hdr.h_dport); 211 rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
213 if (rs == NULL) { 212 if (!rs) {
214 rds_stats_inc(s_recv_drop_no_sock); 213 rds_stats_inc(s_recv_drop_no_sock);
215 goto out; 214 goto out;
216 } 215 }
@@ -251,7 +250,7 @@ static int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc)
251{ 250{
252 unsigned long flags; 251 unsigned long flags;
253 252
254 if (*inc == NULL) { 253 if (!*inc) {
255 read_lock_irqsave(&rs->rs_recv_lock, flags); 254 read_lock_irqsave(&rs->rs_recv_lock, flags);
256 if (!list_empty(&rs->rs_recv_queue)) { 255 if (!list_empty(&rs->rs_recv_queue)) {
257 *inc = list_entry(rs->rs_recv_queue.next, 256 *inc = list_entry(rs->rs_recv_queue.next,
@@ -334,10 +333,10 @@ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
334 333
335 if (msghdr) { 334 if (msghdr) {
336 cmsg.user_token = notifier->n_user_token; 335 cmsg.user_token = notifier->n_user_token;
337 cmsg.status = notifier->n_status; 336 cmsg.status = notifier->n_status;
338 337
339 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS, 338 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS,
340 sizeof(cmsg), &cmsg); 339 sizeof(cmsg), &cmsg);
341 if (err) 340 if (err)
342 break; 341 break;
343 } 342 }
diff --git a/net/rds/send.c b/net/rds/send.c
index 9c1c6bcaa6c9..9b951a0ab6b7 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -37,7 +37,6 @@
37#include <linux/list.h> 37#include <linux/list.h>
38 38
39#include "rds.h" 39#include "rds.h"
40#include "rdma.h"
41 40
42/* When transmitting messages in rds_send_xmit, we need to emerge from 41/* When transmitting messages in rds_send_xmit, we need to emerge from
43 * time to time and briefly release the CPU. Otherwise the softlock watchdog 42 * time to time and briefly release the CPU. Otherwise the softlock watchdog
@@ -54,7 +53,8 @@ module_param(send_batch_count, int, 0444);
54MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue"); 53MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue");
55 54
56/* 55/*
57 * Reset the send state. Caller must hold c_send_lock when calling here. 56 * Reset the send state. Callers must ensure that this doesn't race with
57 * rds_send_xmit().
58 */ 58 */
59void rds_send_reset(struct rds_connection *conn) 59void rds_send_reset(struct rds_connection *conn)
60{ 60{
@@ -62,18 +62,22 @@ void rds_send_reset(struct rds_connection *conn)
62 unsigned long flags; 62 unsigned long flags;
63 63
64 if (conn->c_xmit_rm) { 64 if (conn->c_xmit_rm) {
65 rm = conn->c_xmit_rm;
66 conn->c_xmit_rm = NULL;
65 /* Tell the user the RDMA op is no longer mapped by the 67 /* Tell the user the RDMA op is no longer mapped by the
66 * transport. This isn't entirely true (it's flushed out 68 * transport. This isn't entirely true (it's flushed out
67 * independently) but as the connection is down, there's 69 * independently) but as the connection is down, there's
68 * no ongoing RDMA to/from that memory */ 70 * no ongoing RDMA to/from that memory */
69 rds_message_unmapped(conn->c_xmit_rm); 71 rds_message_unmapped(rm);
70 rds_message_put(conn->c_xmit_rm); 72 rds_message_put(rm);
71 conn->c_xmit_rm = NULL;
72 } 73 }
74
73 conn->c_xmit_sg = 0; 75 conn->c_xmit_sg = 0;
74 conn->c_xmit_hdr_off = 0; 76 conn->c_xmit_hdr_off = 0;
75 conn->c_xmit_data_off = 0; 77 conn->c_xmit_data_off = 0;
78 conn->c_xmit_atomic_sent = 0;
76 conn->c_xmit_rdma_sent = 0; 79 conn->c_xmit_rdma_sent = 0;
80 conn->c_xmit_data_sent = 0;
77 81
78 conn->c_map_queued = 0; 82 conn->c_map_queued = 0;
79 83
@@ -90,6 +94,25 @@ void rds_send_reset(struct rds_connection *conn)
90 spin_unlock_irqrestore(&conn->c_lock, flags); 94 spin_unlock_irqrestore(&conn->c_lock, flags);
91} 95}
92 96
97static int acquire_in_xmit(struct rds_connection *conn)
98{
99 return test_and_set_bit(RDS_IN_XMIT, &conn->c_flags) == 0;
100}
101
102static void release_in_xmit(struct rds_connection *conn)
103{
104 clear_bit(RDS_IN_XMIT, &conn->c_flags);
105 smp_mb__after_clear_bit();
106 /*
107 * We don't use wait_on_bit()/wake_up_bit() because our waking is in a
108 * hot path and finding waiters is very rare. We don't want to walk
109 * the system-wide hashed waitqueue buckets in the fast path only to
110 * almost never find waiters.
111 */
112 if (waitqueue_active(&conn->c_waitq))
113 wake_up_all(&conn->c_waitq);
114}
115
93/* 116/*
94 * We're making the concious trade-off here to only send one message 117 * We're making the concious trade-off here to only send one message
95 * down the connection at a time. 118 * down the connection at a time.
@@ -109,102 +132,69 @@ int rds_send_xmit(struct rds_connection *conn)
109 struct rds_message *rm; 132 struct rds_message *rm;
110 unsigned long flags; 133 unsigned long flags;
111 unsigned int tmp; 134 unsigned int tmp;
112 unsigned int send_quota = send_batch_count;
113 struct scatterlist *sg; 135 struct scatterlist *sg;
114 int ret = 0; 136 int ret = 0;
115 int was_empty = 0;
116 LIST_HEAD(to_be_dropped); 137 LIST_HEAD(to_be_dropped);
117 138
139restart:
140
118 /* 141 /*
119 * sendmsg calls here after having queued its message on the send 142 * sendmsg calls here after having queued its message on the send
120 * queue. We only have one task feeding the connection at a time. If 143 * queue. We only have one task feeding the connection at a time. If
121 * another thread is already feeding the queue then we back off. This 144 * another thread is already feeding the queue then we back off. This
122 * avoids blocking the caller and trading per-connection data between 145 * avoids blocking the caller and trading per-connection data between
123 * caches per message. 146 * caches per message.
124 *
125 * The sem holder will issue a retry if they notice that someone queued
126 * a message after they stopped walking the send queue but before they
127 * dropped the sem.
128 */ 147 */
129 if (!mutex_trylock(&conn->c_send_lock)) { 148 if (!acquire_in_xmit(conn)) {
130 rds_stats_inc(s_send_sem_contention); 149 rds_stats_inc(s_send_lock_contention);
131 ret = -ENOMEM; 150 ret = -ENOMEM;
132 goto out; 151 goto out;
133 } 152 }
134 153
154 /*
155 * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
156 * we do the opposite to avoid races.
157 */
158 if (!rds_conn_up(conn)) {
159 release_in_xmit(conn);
160 ret = 0;
161 goto out;
162 }
163
135 if (conn->c_trans->xmit_prepare) 164 if (conn->c_trans->xmit_prepare)
136 conn->c_trans->xmit_prepare(conn); 165 conn->c_trans->xmit_prepare(conn);
137 166
138 /* 167 /*
139 * spin trying to push headers and data down the connection until 168 * spin trying to push headers and data down the connection until
140 * the connection doens't make forward progress. 169 * the connection doesn't make forward progress.
141 */ 170 */
142 while (--send_quota) { 171 while (1) {
143 /*
144 * See if need to send a congestion map update if we're
145 * between sending messages. The send_sem protects our sole
146 * use of c_map_offset and _bytes.
147 * Note this is used only by transports that define a special
148 * xmit_cong_map function. For all others, we create allocate
149 * a cong_map message and treat it just like any other send.
150 */
151 if (conn->c_map_bytes) {
152 ret = conn->c_trans->xmit_cong_map(conn, conn->c_lcong,
153 conn->c_map_offset);
154 if (ret <= 0)
155 break;
156 172
157 conn->c_map_offset += ret;
158 conn->c_map_bytes -= ret;
159 if (conn->c_map_bytes)
160 continue;
161 }
162
163 /* If we're done sending the current message, clear the
164 * offset and S/G temporaries.
165 */
166 rm = conn->c_xmit_rm; 173 rm = conn->c_xmit_rm;
167 if (rm != NULL &&
168 conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
169 conn->c_xmit_sg == rm->m_nents) {
170 conn->c_xmit_rm = NULL;
171 conn->c_xmit_sg = 0;
172 conn->c_xmit_hdr_off = 0;
173 conn->c_xmit_data_off = 0;
174 conn->c_xmit_rdma_sent = 0;
175 174
176 /* Release the reference to the previous message. */ 175 /*
177 rds_message_put(rm); 176 * If between sending messages, we can send a pending congestion
178 rm = NULL; 177 * map update.
179 }
180
181 /* If we're asked to send a cong map update, do so.
182 */ 178 */
183 if (rm == NULL && test_and_clear_bit(0, &conn->c_map_queued)) { 179 if (!rm && test_and_clear_bit(0, &conn->c_map_queued)) {
184 if (conn->c_trans->xmit_cong_map != NULL) {
185 conn->c_map_offset = 0;
186 conn->c_map_bytes = sizeof(struct rds_header) +
187 RDS_CONG_MAP_BYTES;
188 continue;
189 }
190
191 rm = rds_cong_update_alloc(conn); 180 rm = rds_cong_update_alloc(conn);
192 if (IS_ERR(rm)) { 181 if (IS_ERR(rm)) {
193 ret = PTR_ERR(rm); 182 ret = PTR_ERR(rm);
194 break; 183 break;
195 } 184 }
185 rm->data.op_active = 1;
196 186
197 conn->c_xmit_rm = rm; 187 conn->c_xmit_rm = rm;
198 } 188 }
199 189
200 /* 190 /*
201 * Grab the next message from the send queue, if there is one. 191 * If not already working on one, grab the next message.
202 * 192 *
203 * c_xmit_rm holds a ref while we're sending this message down 193 * c_xmit_rm holds a ref while we're sending this message down
204 * the connction. We can use this ref while holding the 194 * the connction. We can use this ref while holding the
205 * send_sem.. rds_send_reset() is serialized with it. 195 * send_sem.. rds_send_reset() is serialized with it.
206 */ 196 */
207 if (rm == NULL) { 197 if (!rm) {
208 unsigned int len; 198 unsigned int len;
209 199
210 spin_lock_irqsave(&conn->c_lock, flags); 200 spin_lock_irqsave(&conn->c_lock, flags);
@@ -224,10 +214,8 @@ int rds_send_xmit(struct rds_connection *conn)
224 214
225 spin_unlock_irqrestore(&conn->c_lock, flags); 215 spin_unlock_irqrestore(&conn->c_lock, flags);
226 216
227 if (rm == NULL) { 217 if (!rm)
228 was_empty = 1;
229 break; 218 break;
230 }
231 219
232 /* Unfortunately, the way Infiniband deals with 220 /* Unfortunately, the way Infiniband deals with
233 * RDMA to a bad MR key is by moving the entire 221 * RDMA to a bad MR key is by moving the entire
@@ -236,13 +224,12 @@ int rds_send_xmit(struct rds_connection *conn)
236 * connection. 224 * connection.
237 * Therefore, we never retransmit messages with RDMA ops. 225 * Therefore, we never retransmit messages with RDMA ops.
238 */ 226 */
239 if (rm->m_rdma_op && 227 if (rm->rdma.op_active &&
240 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { 228 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
241 spin_lock_irqsave(&conn->c_lock, flags); 229 spin_lock_irqsave(&conn->c_lock, flags);
242 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 230 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
243 list_move(&rm->m_conn_item, &to_be_dropped); 231 list_move(&rm->m_conn_item, &to_be_dropped);
244 spin_unlock_irqrestore(&conn->c_lock, flags); 232 spin_unlock_irqrestore(&conn->c_lock, flags);
245 rds_message_put(rm);
246 continue; 233 continue;
247 } 234 }
248 235
@@ -263,23 +250,55 @@ int rds_send_xmit(struct rds_connection *conn)
263 conn->c_xmit_rm = rm; 250 conn->c_xmit_rm = rm;
264 } 251 }
265 252
266 /* 253 /* The transport either sends the whole rdma or none of it */
267 * Try and send an rdma message. Let's see if we can 254 if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) {
268 * keep this simple and require that the transport either 255 rm->m_final_op = &rm->rdma;
269 * send the whole rdma or none of it. 256 ret = conn->c_trans->xmit_rdma(conn, &rm->rdma);
270 */
271 if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) {
272 ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op);
273 if (ret) 257 if (ret)
274 break; 258 break;
275 conn->c_xmit_rdma_sent = 1; 259 conn->c_xmit_rdma_sent = 1;
260
276 /* The transport owns the mapped memory for now. 261 /* The transport owns the mapped memory for now.
277 * You can't unmap it while it's on the send queue */ 262 * You can't unmap it while it's on the send queue */
278 set_bit(RDS_MSG_MAPPED, &rm->m_flags); 263 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
279 } 264 }
280 265
281 if (conn->c_xmit_hdr_off < sizeof(struct rds_header) || 266 if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
282 conn->c_xmit_sg < rm->m_nents) { 267 rm->m_final_op = &rm->atomic;
268 ret = conn->c_trans->xmit_atomic(conn, &rm->atomic);
269 if (ret)
270 break;
271 conn->c_xmit_atomic_sent = 1;
272
273 /* The transport owns the mapped memory for now.
274 * You can't unmap it while it's on the send queue */
275 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
276 }
277
278 /*
279 * A number of cases require an RDS header to be sent
280 * even if there is no data.
281 * We permit 0-byte sends; rds-ping depends on this.
282 * However, if there are exclusively attached silent ops,
283 * we skip the hdr/data send, to enable silent operation.
284 */
285 if (rm->data.op_nents == 0) {
286 int ops_present;
287 int all_ops_are_silent = 1;
288
289 ops_present = (rm->atomic.op_active || rm->rdma.op_active);
290 if (rm->atomic.op_active && !rm->atomic.op_silent)
291 all_ops_are_silent = 0;
292 if (rm->rdma.op_active && !rm->rdma.op_silent)
293 all_ops_are_silent = 0;
294
295 if (ops_present && all_ops_are_silent
296 && !rm->m_rdma_cookie)
297 rm->data.op_active = 0;
298 }
299
300 if (rm->data.op_active && !conn->c_xmit_data_sent) {
301 rm->m_final_op = &rm->data;
283 ret = conn->c_trans->xmit(conn, rm, 302 ret = conn->c_trans->xmit(conn, rm,
284 conn->c_xmit_hdr_off, 303 conn->c_xmit_hdr_off,
285 conn->c_xmit_sg, 304 conn->c_xmit_sg,
@@ -295,7 +314,7 @@ int rds_send_xmit(struct rds_connection *conn)
295 ret -= tmp; 314 ret -= tmp;
296 } 315 }
297 316
298 sg = &rm->m_sg[conn->c_xmit_sg]; 317 sg = &rm->data.op_sg[conn->c_xmit_sg];
299 while (ret) { 318 while (ret) {
300 tmp = min_t(int, ret, sg->length - 319 tmp = min_t(int, ret, sg->length -
301 conn->c_xmit_data_off); 320 conn->c_xmit_data_off);
@@ -306,49 +325,63 @@ int rds_send_xmit(struct rds_connection *conn)
306 sg++; 325 sg++;
307 conn->c_xmit_sg++; 326 conn->c_xmit_sg++;
308 BUG_ON(ret != 0 && 327 BUG_ON(ret != 0 &&
309 conn->c_xmit_sg == rm->m_nents); 328 conn->c_xmit_sg == rm->data.op_nents);
310 } 329 }
311 } 330 }
331
332 if (conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
333 (conn->c_xmit_sg == rm->data.op_nents))
334 conn->c_xmit_data_sent = 1;
312 } 335 }
313 }
314 336
315 /* Nuke any messages we decided not to retransmit. */ 337 /*
316 if (!list_empty(&to_be_dropped)) 338 * A rm will only take multiple times through this loop
317 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED); 339 * if there is a data op. Thus, if the data is sent (or there was
340 * none), then we're done with the rm.
341 */
342 if (!rm->data.op_active || conn->c_xmit_data_sent) {
343 conn->c_xmit_rm = NULL;
344 conn->c_xmit_sg = 0;
345 conn->c_xmit_hdr_off = 0;
346 conn->c_xmit_data_off = 0;
347 conn->c_xmit_rdma_sent = 0;
348 conn->c_xmit_atomic_sent = 0;
349 conn->c_xmit_data_sent = 0;
350
351 rds_message_put(rm);
352 }
353 }
318 354
319 if (conn->c_trans->xmit_complete) 355 if (conn->c_trans->xmit_complete)
320 conn->c_trans->xmit_complete(conn); 356 conn->c_trans->xmit_complete(conn);
321 357
322 /* 358 release_in_xmit(conn);
323 * We might be racing with another sender who queued a message but
324 * backed off on noticing that we held the c_send_lock. If we check
325 * for queued messages after dropping the sem then either we'll
326 * see the queued message or the queuer will get the sem. If we
327 * notice the queued message then we trigger an immediate retry.
328 *
329 * We need to be careful only to do this when we stopped processing
330 * the send queue because it was empty. It's the only way we
331 * stop processing the loop when the transport hasn't taken
332 * responsibility for forward progress.
333 */
334 mutex_unlock(&conn->c_send_lock);
335 359
336 if (conn->c_map_bytes || (send_quota == 0 && !was_empty)) { 360 /* Nuke any messages we decided not to retransmit. */
337 /* We exhausted the send quota, but there's work left to 361 if (!list_empty(&to_be_dropped)) {
338 * do. Return and (re-)schedule the send worker. 362 /* irqs on here, so we can put(), unlike above */
339 */ 363 list_for_each_entry(rm, &to_be_dropped, m_conn_item)
340 ret = -EAGAIN; 364 rds_message_put(rm);
365 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED);
341 } 366 }
342 367
343 if (ret == 0 && was_empty) { 368 /*
344 /* A simple bit test would be way faster than taking the 369 * Other senders can queue a message after we last test the send queue
345 * spin lock */ 370 * but before we clear RDS_IN_XMIT. In that case they'd back off and
346 spin_lock_irqsave(&conn->c_lock, flags); 371 * not try and send their newly queued message. We need to check the
372 * send queue after having cleared RDS_IN_XMIT so that their message
373 * doesn't get stuck on the send queue.
374 *
375 * If the transport cannot continue (i.e ret != 0), then it must
376 * call us when more room is available, such as from the tx
377 * completion handler.
378 */
379 if (ret == 0) {
380 smp_mb();
347 if (!list_empty(&conn->c_send_queue)) { 381 if (!list_empty(&conn->c_send_queue)) {
348 rds_stats_inc(s_send_sem_queue_raced); 382 rds_stats_inc(s_send_lock_queue_raced);
349 ret = -EAGAIN; 383 goto restart;
350 } 384 }
351 spin_unlock_irqrestore(&conn->c_lock, flags);
352 } 385 }
353out: 386out:
354 return ret; 387 return ret;
@@ -376,52 +409,60 @@ static inline int rds_send_is_acked(struct rds_message *rm, u64 ack,
376} 409}
377 410
378/* 411/*
379 * Returns true if there are no messages on the send and retransmit queues 412 * This is pretty similar to what happens below in the ACK
380 * which have a sequence number greater than or equal to the given sequence 413 * handling code - except that we call here as soon as we get
381 * number. 414 * the IB send completion on the RDMA op and the accompanying
415 * message.
382 */ 416 */
383int rds_send_acked_before(struct rds_connection *conn, u64 seq) 417void rds_rdma_send_complete(struct rds_message *rm, int status)
384{ 418{
385 struct rds_message *rm, *tmp; 419 struct rds_sock *rs = NULL;
386 int ret = 1; 420 struct rm_rdma_op *ro;
421 struct rds_notifier *notifier;
422 unsigned long flags;
387 423
388 spin_lock(&conn->c_lock); 424 spin_lock_irqsave(&rm->m_rs_lock, flags);
389 425
390 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { 426 ro = &rm->rdma;
391 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) 427 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
392 ret = 0; 428 ro->op_active && ro->op_notify && ro->op_notifier) {
393 break; 429 notifier = ro->op_notifier;
394 } 430 rs = rm->m_rs;
431 sock_hold(rds_rs_to_sk(rs));
395 432
396 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { 433 notifier->n_status = status;
397 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) 434 spin_lock(&rs->rs_lock);
398 ret = 0; 435 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
399 break; 436 spin_unlock(&rs->rs_lock);
437
438 ro->op_notifier = NULL;
400 } 439 }
401 440
402 spin_unlock(&conn->c_lock); 441 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
403 442
404 return ret; 443 if (rs) {
444 rds_wake_sk_sleep(rs);
445 sock_put(rds_rs_to_sk(rs));
446 }
405} 447}
448EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
406 449
407/* 450/*
408 * This is pretty similar to what happens below in the ACK 451 * Just like above, except looks at atomic op
409 * handling code - except that we call here as soon as we get
410 * the IB send completion on the RDMA op and the accompanying
411 * message.
412 */ 452 */
413void rds_rdma_send_complete(struct rds_message *rm, int status) 453void rds_atomic_send_complete(struct rds_message *rm, int status)
414{ 454{
415 struct rds_sock *rs = NULL; 455 struct rds_sock *rs = NULL;
416 struct rds_rdma_op *ro; 456 struct rm_atomic_op *ao;
417 struct rds_notifier *notifier; 457 struct rds_notifier *notifier;
458 unsigned long flags;
418 459
419 spin_lock(&rm->m_rs_lock); 460 spin_lock_irqsave(&rm->m_rs_lock, flags);
420 461
421 ro = rm->m_rdma_op; 462 ao = &rm->atomic;
422 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && 463 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)
423 ro && ro->r_notify && ro->r_notifier) { 464 && ao->op_active && ao->op_notify && ao->op_notifier) {
424 notifier = ro->r_notifier; 465 notifier = ao->op_notifier;
425 rs = rm->m_rs; 466 rs = rm->m_rs;
426 sock_hold(rds_rs_to_sk(rs)); 467 sock_hold(rds_rs_to_sk(rs));
427 468
@@ -430,17 +471,17 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
430 list_add_tail(&notifier->n_list, &rs->rs_notify_queue); 471 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
431 spin_unlock(&rs->rs_lock); 472 spin_unlock(&rs->rs_lock);
432 473
433 ro->r_notifier = NULL; 474 ao->op_notifier = NULL;
434 } 475 }
435 476
436 spin_unlock(&rm->m_rs_lock); 477 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
437 478
438 if (rs) { 479 if (rs) {
439 rds_wake_sk_sleep(rs); 480 rds_wake_sk_sleep(rs);
440 sock_put(rds_rs_to_sk(rs)); 481 sock_put(rds_rs_to_sk(rs));
441 } 482 }
442} 483}
443EXPORT_SYMBOL_GPL(rds_rdma_send_complete); 484EXPORT_SYMBOL_GPL(rds_atomic_send_complete);
444 485
445/* 486/*
446 * This is the same as rds_rdma_send_complete except we 487 * This is the same as rds_rdma_send_complete except we
@@ -448,15 +489,23 @@ EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
448 * socket, socket lock) and can just move the notifier. 489 * socket, socket lock) and can just move the notifier.
449 */ 490 */
450static inline void 491static inline void
451__rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status) 492__rds_send_complete(struct rds_sock *rs, struct rds_message *rm, int status)
452{ 493{
453 struct rds_rdma_op *ro; 494 struct rm_rdma_op *ro;
495 struct rm_atomic_op *ao;
496
497 ro = &rm->rdma;
498 if (ro->op_active && ro->op_notify && ro->op_notifier) {
499 ro->op_notifier->n_status = status;
500 list_add_tail(&ro->op_notifier->n_list, &rs->rs_notify_queue);
501 ro->op_notifier = NULL;
502 }
454 503
455 ro = rm->m_rdma_op; 504 ao = &rm->atomic;
456 if (ro && ro->r_notify && ro->r_notifier) { 505 if (ao->op_active && ao->op_notify && ao->op_notifier) {
457 ro->r_notifier->n_status = status; 506 ao->op_notifier->n_status = status;
458 list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue); 507 list_add_tail(&ao->op_notifier->n_list, &rs->rs_notify_queue);
459 ro->r_notifier = NULL; 508 ao->op_notifier = NULL;
460 } 509 }
461 510
462 /* No need to wake the app - caller does this */ 511 /* No need to wake the app - caller does this */
@@ -468,7 +517,7 @@ __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status
468 * So speed is not an issue here. 517 * So speed is not an issue here.
469 */ 518 */
470struct rds_message *rds_send_get_message(struct rds_connection *conn, 519struct rds_message *rds_send_get_message(struct rds_connection *conn,
471 struct rds_rdma_op *op) 520 struct rm_rdma_op *op)
472{ 521{
473 struct rds_message *rm, *tmp, *found = NULL; 522 struct rds_message *rm, *tmp, *found = NULL;
474 unsigned long flags; 523 unsigned long flags;
@@ -476,7 +525,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
476 spin_lock_irqsave(&conn->c_lock, flags); 525 spin_lock_irqsave(&conn->c_lock, flags);
477 526
478 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { 527 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
479 if (rm->m_rdma_op == op) { 528 if (&rm->rdma == op) {
480 atomic_inc(&rm->m_refcount); 529 atomic_inc(&rm->m_refcount);
481 found = rm; 530 found = rm;
482 goto out; 531 goto out;
@@ -484,7 +533,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
484 } 533 }
485 534
486 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { 535 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
487 if (rm->m_rdma_op == op) { 536 if (&rm->rdma == op) {
488 atomic_inc(&rm->m_refcount); 537 atomic_inc(&rm->m_refcount);
489 found = rm; 538 found = rm;
490 break; 539 break;
@@ -544,19 +593,20 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
544 spin_lock(&rs->rs_lock); 593 spin_lock(&rs->rs_lock);
545 594
546 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { 595 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
547 struct rds_rdma_op *ro = rm->m_rdma_op; 596 struct rm_rdma_op *ro = &rm->rdma;
548 struct rds_notifier *notifier; 597 struct rds_notifier *notifier;
549 598
550 list_del_init(&rm->m_sock_item); 599 list_del_init(&rm->m_sock_item);
551 rds_send_sndbuf_remove(rs, rm); 600 rds_send_sndbuf_remove(rs, rm);
552 601
553 if (ro && ro->r_notifier && (status || ro->r_notify)) { 602 if (ro->op_active && ro->op_notifier &&
554 notifier = ro->r_notifier; 603 (ro->op_notify || (ro->op_recverr && status))) {
604 notifier = ro->op_notifier;
555 list_add_tail(&notifier->n_list, 605 list_add_tail(&notifier->n_list,
556 &rs->rs_notify_queue); 606 &rs->rs_notify_queue);
557 if (!notifier->n_status) 607 if (!notifier->n_status)
558 notifier->n_status = status; 608 notifier->n_status = status;
559 rm->m_rdma_op->r_notifier = NULL; 609 rm->rdma.op_notifier = NULL;
560 } 610 }
561 was_on_sock = 1; 611 was_on_sock = 1;
562 rm->m_rs = NULL; 612 rm->m_rs = NULL;
@@ -619,9 +669,8 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
619{ 669{
620 struct rds_message *rm, *tmp; 670 struct rds_message *rm, *tmp;
621 struct rds_connection *conn; 671 struct rds_connection *conn;
622 unsigned long flags, flags2; 672 unsigned long flags;
623 LIST_HEAD(list); 673 LIST_HEAD(list);
624 int wake = 0;
625 674
626 /* get all the messages we're dropping under the rs lock */ 675 /* get all the messages we're dropping under the rs lock */
627 spin_lock_irqsave(&rs->rs_lock, flags); 676 spin_lock_irqsave(&rs->rs_lock, flags);
@@ -631,59 +680,54 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
631 dest->sin_port != rm->m_inc.i_hdr.h_dport)) 680 dest->sin_port != rm->m_inc.i_hdr.h_dport))
632 continue; 681 continue;
633 682
634 wake = 1;
635 list_move(&rm->m_sock_item, &list); 683 list_move(&rm->m_sock_item, &list);
636 rds_send_sndbuf_remove(rs, rm); 684 rds_send_sndbuf_remove(rs, rm);
637 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); 685 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
638 } 686 }
639 687
640 /* order flag updates with the rs lock */ 688 /* order flag updates with the rs lock */
641 if (wake) 689 smp_mb__after_clear_bit();
642 smp_mb__after_clear_bit();
643 690
644 spin_unlock_irqrestore(&rs->rs_lock, flags); 691 spin_unlock_irqrestore(&rs->rs_lock, flags);
645 692
646 conn = NULL; 693 if (list_empty(&list))
694 return;
647 695
648 /* now remove the messages from the conn list as needed */ 696 /* Remove the messages from the conn */
649 list_for_each_entry(rm, &list, m_sock_item) { 697 list_for_each_entry(rm, &list, m_sock_item) {
650 /* We do this here rather than in the loop above, so that
651 * we don't have to nest m_rs_lock under rs->rs_lock */
652 spin_lock_irqsave(&rm->m_rs_lock, flags2);
653 /* If this is a RDMA operation, notify the app. */
654 spin_lock(&rs->rs_lock);
655 __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
656 spin_unlock(&rs->rs_lock);
657 rm->m_rs = NULL;
658 spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
659 698
699 conn = rm->m_inc.i_conn;
700
701 spin_lock_irqsave(&conn->c_lock, flags);
660 /* 702 /*
661 * If we see this flag cleared then we're *sure* that someone 703 * Maybe someone else beat us to removing rm from the conn.
662 * else beat us to removing it from the conn. If we race 704 * If we race with their flag update we'll get the lock and
663 * with their flag update we'll get the lock and then really 705 * then really see that the flag has been cleared.
664 * see that the flag has been cleared.
665 */ 706 */
666 if (!test_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 707 if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
708 spin_unlock_irqrestore(&conn->c_lock, flags);
667 continue; 709 continue;
668
669 if (conn != rm->m_inc.i_conn) {
670 if (conn)
671 spin_unlock_irqrestore(&conn->c_lock, flags);
672 conn = rm->m_inc.i_conn;
673 spin_lock_irqsave(&conn->c_lock, flags);
674 } 710 }
711 list_del_init(&rm->m_conn_item);
712 spin_unlock_irqrestore(&conn->c_lock, flags);
675 713
676 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { 714 /*
677 list_del_init(&rm->m_conn_item); 715 * Couldn't grab m_rs_lock in top loop (lock ordering),
678 rds_message_put(rm); 716 * but we can now.
679 } 717 */
680 } 718 spin_lock_irqsave(&rm->m_rs_lock, flags);
681 719
682 if (conn) 720 spin_lock(&rs->rs_lock);
683 spin_unlock_irqrestore(&conn->c_lock, flags); 721 __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
722 spin_unlock(&rs->rs_lock);
684 723
685 if (wake) 724 rm->m_rs = NULL;
686 rds_wake_sk_sleep(rs); 725 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
726
727 rds_message_put(rm);
728 }
729
730 rds_wake_sk_sleep(rs);
687 731
688 while (!list_empty(&list)) { 732 while (!list_empty(&list)) {
689 rm = list_entry(list.next, struct rds_message, m_sock_item); 733 rm = list_entry(list.next, struct rds_message, m_sock_item);
@@ -763,6 +807,63 @@ out:
763 return *queued; 807 return *queued;
764} 808}
765 809
810/*
811 * rds_message is getting to be quite complicated, and we'd like to allocate
812 * it all in one go. This figures out how big it needs to be up front.
813 */
814static int rds_rm_size(struct msghdr *msg, int data_len)
815{
816 struct cmsghdr *cmsg;
817 int size = 0;
818 int cmsg_groups = 0;
819 int retval;
820
821 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
822 if (!CMSG_OK(msg, cmsg))
823 return -EINVAL;
824
825 if (cmsg->cmsg_level != SOL_RDS)
826 continue;
827
828 switch (cmsg->cmsg_type) {
829 case RDS_CMSG_RDMA_ARGS:
830 cmsg_groups |= 1;
831 retval = rds_rdma_extra_size(CMSG_DATA(cmsg));
832 if (retval < 0)
833 return retval;
834 size += retval;
835
836 break;
837
838 case RDS_CMSG_RDMA_DEST:
839 case RDS_CMSG_RDMA_MAP:
840 cmsg_groups |= 2;
841 /* these are valid but do no add any size */
842 break;
843
844 case RDS_CMSG_ATOMIC_CSWP:
845 case RDS_CMSG_ATOMIC_FADD:
846 case RDS_CMSG_MASKED_ATOMIC_CSWP:
847 case RDS_CMSG_MASKED_ATOMIC_FADD:
848 cmsg_groups |= 1;
849 size += sizeof(struct scatterlist);
850 break;
851
852 default:
853 return -EINVAL;
854 }
855
856 }
857
858 size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
859
860 /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
861 if (cmsg_groups == 3)
862 return -EINVAL;
863
864 return size;
865}
866
766static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, 867static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
767 struct msghdr *msg, int *allocated_mr) 868 struct msghdr *msg, int *allocated_mr)
768{ 869{
@@ -777,7 +878,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
777 continue; 878 continue;
778 879
779 /* As a side effect, RDMA_DEST and RDMA_MAP will set 880 /* As a side effect, RDMA_DEST and RDMA_MAP will set
780 * rm->m_rdma_cookie and rm->m_rdma_mr. 881 * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr.
781 */ 882 */
782 switch (cmsg->cmsg_type) { 883 switch (cmsg->cmsg_type) {
783 case RDS_CMSG_RDMA_ARGS: 884 case RDS_CMSG_RDMA_ARGS:
@@ -793,6 +894,12 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
793 if (!ret) 894 if (!ret)
794 *allocated_mr = 1; 895 *allocated_mr = 1;
795 break; 896 break;
897 case RDS_CMSG_ATOMIC_CSWP:
898 case RDS_CMSG_ATOMIC_FADD:
899 case RDS_CMSG_MASKED_ATOMIC_CSWP:
900 case RDS_CMSG_MASKED_ATOMIC_FADD:
901 ret = rds_cmsg_atomic(rs, rm, cmsg);
902 break;
796 903
797 default: 904 default:
798 return -EINVAL; 905 return -EINVAL;
@@ -850,13 +957,26 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
850 goto out; 957 goto out;
851 } 958 }
852 959
853 rm = rds_message_copy_from_user(msg->msg_iov, payload_len); 960 /* size of rm including all sgs */
854 if (IS_ERR(rm)) { 961 ret = rds_rm_size(msg, payload_len);
855 ret = PTR_ERR(rm); 962 if (ret < 0)
856 rm = NULL; 963 goto out;
964
965 rm = rds_message_alloc(ret, GFP_KERNEL);
966 if (!rm) {
967 ret = -ENOMEM;
857 goto out; 968 goto out;
858 } 969 }
859 970
971 /* Attach data to the rm */
972 if (payload_len) {
973 rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
974 ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
975 if (ret)
976 goto out;
977 }
978 rm->data.op_active = 1;
979
860 rm->m_daddr = daddr; 980 rm->m_daddr = daddr;
861 981
862 /* rds_conn_create has a spinlock that runs with IRQ off. 982 /* rds_conn_create has a spinlock that runs with IRQ off.
@@ -879,22 +999,23 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
879 if (ret) 999 if (ret)
880 goto out; 1000 goto out;
881 1001
882 if ((rm->m_rdma_cookie || rm->m_rdma_op) && 1002 if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
883 conn->c_trans->xmit_rdma == NULL) {
884 if (printk_ratelimit()) 1003 if (printk_ratelimit())
885 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", 1004 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
886 rm->m_rdma_op, conn->c_trans->xmit_rdma); 1005 &rm->rdma, conn->c_trans->xmit_rdma);
887 ret = -EOPNOTSUPP; 1006 ret = -EOPNOTSUPP;
888 goto out; 1007 goto out;
889 } 1008 }
890 1009
891 /* If the connection is down, trigger a connect. We may 1010 if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) {
892 * have scheduled a delayed reconnect however - in this case 1011 if (printk_ratelimit())
893 * we should not interfere. 1012 printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n",
894 */ 1013 &rm->atomic, conn->c_trans->xmit_atomic);
895 if (rds_conn_state(conn) == RDS_CONN_DOWN && 1014 ret = -EOPNOTSUPP;
896 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) 1015 goto out;
897 queue_delayed_work(rds_wq, &conn->c_conn_w, 0); 1016 }
1017
1018 rds_conn_connect_if_down(conn);
898 1019
899 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); 1020 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
900 if (ret) { 1021 if (ret) {
@@ -938,7 +1059,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
938 rds_stats_inc(s_send_queued); 1059 rds_stats_inc(s_send_queued);
939 1060
940 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) 1061 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
941 rds_send_worker(&conn->c_send_w.work); 1062 rds_send_xmit(conn);
942 1063
943 rds_message_put(rm); 1064 rds_message_put(rm);
944 return payload_len; 1065 return payload_len;
@@ -966,20 +1087,15 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
966 int ret = 0; 1087 int ret = 0;
967 1088
968 rm = rds_message_alloc(0, GFP_ATOMIC); 1089 rm = rds_message_alloc(0, GFP_ATOMIC);
969 if (rm == NULL) { 1090 if (!rm) {
970 ret = -ENOMEM; 1091 ret = -ENOMEM;
971 goto out; 1092 goto out;
972 } 1093 }
973 1094
974 rm->m_daddr = conn->c_faddr; 1095 rm->m_daddr = conn->c_faddr;
1096 rm->data.op_active = 1;
975 1097
976 /* If the connection is down, trigger a connect. We may 1098 rds_conn_connect_if_down(conn);
977 * have scheduled a delayed reconnect however - in this case
978 * we should not interfere.
979 */
980 if (rds_conn_state(conn) == RDS_CONN_DOWN &&
981 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
982 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
983 1099
984 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL); 1100 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL);
985 if (ret) 1101 if (ret)
@@ -999,7 +1115,9 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
999 rds_stats_inc(s_send_queued); 1115 rds_stats_inc(s_send_queued);
1000 rds_stats_inc(s_send_pong); 1116 rds_stats_inc(s_send_pong);
1001 1117
1002 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 1118 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
1119 rds_send_xmit(conn);
1120
1003 rds_message_put(rm); 1121 rds_message_put(rm);
1004 return 0; 1122 return 0;
1005 1123
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 7598eb07cfb1..10c759ccac0c 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -57,8 +57,8 @@ static const char *const rds_stat_names[] = {
57 "recv_ping", 57 "recv_ping",
58 "send_queue_empty", 58 "send_queue_empty",
59 "send_queue_full", 59 "send_queue_full",
60 "send_sem_contention", 60 "send_lock_contention",
61 "send_sem_queue_raced", 61 "send_lock_queue_raced",
62 "send_immediate_retry", 62 "send_immediate_retry",
63 "send_delayed_retry", 63 "send_delayed_retry",
64 "send_drop_acked", 64 "send_drop_acked",
@@ -143,7 +143,7 @@ void rds_stats_exit(void)
143 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info); 143 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info);
144} 144}
145 145
146int __init rds_stats_init(void) 146int rds_stats_init(void)
147{ 147{
148 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info); 148 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info);
149 return 0; 149 return 0;
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index 7829a20325d3..25ad0c77a26c 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -105,13 +105,13 @@ void rds_sysctl_exit(void)
105 unregister_sysctl_table(rds_sysctl_reg_table); 105 unregister_sysctl_table(rds_sysctl_reg_table);
106} 106}
107 107
108int __init rds_sysctl_init(void) 108int rds_sysctl_init(void)
109{ 109{
110 rds_sysctl_reconnect_min = msecs_to_jiffies(1); 110 rds_sysctl_reconnect_min = msecs_to_jiffies(1);
111 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; 111 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min;
112 112
113 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table); 113 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table);
114 if (rds_sysctl_reg_table == NULL) 114 if (!rds_sysctl_reg_table)
115 return -ENOMEM; 115 return -ENOMEM;
116 return 0; 116 return 0;
117} 117}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index babf4577ff7d..eeb08e6ab96b 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -200,7 +200,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
200 struct rds_tcp_connection *tc; 200 struct rds_tcp_connection *tc;
201 201
202 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); 202 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
203 if (tc == NULL) 203 if (!tc)
204 return -ENOMEM; 204 return -ENOMEM;
205 205
206 tc->t_sock = NULL; 206 tc->t_sock = NULL;
@@ -258,7 +258,6 @@ struct rds_transport rds_tcp_transport = {
258 .laddr_check = rds_tcp_laddr_check, 258 .laddr_check = rds_tcp_laddr_check,
259 .xmit_prepare = rds_tcp_xmit_prepare, 259 .xmit_prepare = rds_tcp_xmit_prepare,
260 .xmit_complete = rds_tcp_xmit_complete, 260 .xmit_complete = rds_tcp_xmit_complete,
261 .xmit_cong_map = rds_tcp_xmit_cong_map,
262 .xmit = rds_tcp_xmit, 261 .xmit = rds_tcp_xmit,
263 .recv = rds_tcp_recv, 262 .recv = rds_tcp_recv,
264 .conn_alloc = rds_tcp_conn_alloc, 263 .conn_alloc = rds_tcp_conn_alloc,
@@ -266,7 +265,6 @@ struct rds_transport rds_tcp_transport = {
266 .conn_connect = rds_tcp_conn_connect, 265 .conn_connect = rds_tcp_conn_connect,
267 .conn_shutdown = rds_tcp_conn_shutdown, 266 .conn_shutdown = rds_tcp_conn_shutdown,
268 .inc_copy_to_user = rds_tcp_inc_copy_to_user, 267 .inc_copy_to_user = rds_tcp_inc_copy_to_user,
269 .inc_purge = rds_tcp_inc_purge,
270 .inc_free = rds_tcp_inc_free, 268 .inc_free = rds_tcp_inc_free,
271 .stats_info_copy = rds_tcp_stats_info_copy, 269 .stats_info_copy = rds_tcp_stats_info_copy,
272 .exit = rds_tcp_exit, 270 .exit = rds_tcp_exit,
@@ -276,14 +274,14 @@ struct rds_transport rds_tcp_transport = {
276 .t_prefer_loopback = 1, 274 .t_prefer_loopback = 1,
277}; 275};
278 276
279int __init rds_tcp_init(void) 277int rds_tcp_init(void)
280{ 278{
281 int ret; 279 int ret;
282 280
283 rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection", 281 rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection",
284 sizeof(struct rds_tcp_connection), 282 sizeof(struct rds_tcp_connection),
285 0, 0, NULL); 283 0, 0, NULL);
286 if (rds_tcp_conn_slab == NULL) { 284 if (!rds_tcp_conn_slab) {
287 ret = -ENOMEM; 285 ret = -ENOMEM;
288 goto out; 286 goto out;
289 } 287 }
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 844fa6b9cf5a..f5e6f7bebb50 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -43,7 +43,7 @@ struct rds_tcp_statistics {
43}; 43};
44 44
45/* tcp.c */ 45/* tcp.c */
46int __init rds_tcp_init(void); 46int rds_tcp_init(void);
47void rds_tcp_exit(void); 47void rds_tcp_exit(void);
48void rds_tcp_tune(struct socket *sock); 48void rds_tcp_tune(struct socket *sock);
49void rds_tcp_nonagle(struct socket *sock); 49void rds_tcp_nonagle(struct socket *sock);
@@ -61,16 +61,15 @@ void rds_tcp_conn_shutdown(struct rds_connection *conn);
61void rds_tcp_state_change(struct sock *sk); 61void rds_tcp_state_change(struct sock *sk);
62 62
63/* tcp_listen.c */ 63/* tcp_listen.c */
64int __init rds_tcp_listen_init(void); 64int rds_tcp_listen_init(void);
65void rds_tcp_listen_stop(void); 65void rds_tcp_listen_stop(void);
66void rds_tcp_listen_data_ready(struct sock *sk, int bytes); 66void rds_tcp_listen_data_ready(struct sock *sk, int bytes);
67 67
68/* tcp_recv.c */ 68/* tcp_recv.c */
69int __init rds_tcp_recv_init(void); 69int rds_tcp_recv_init(void);
70void rds_tcp_recv_exit(void); 70void rds_tcp_recv_exit(void);
71void rds_tcp_data_ready(struct sock *sk, int bytes); 71void rds_tcp_data_ready(struct sock *sk, int bytes);
72int rds_tcp_recv(struct rds_connection *conn); 72int rds_tcp_recv(struct rds_connection *conn);
73void rds_tcp_inc_purge(struct rds_incoming *inc);
74void rds_tcp_inc_free(struct rds_incoming *inc); 73void rds_tcp_inc_free(struct rds_incoming *inc);
75int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 74int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
76 size_t size); 75 size_t size);
@@ -81,8 +80,6 @@ void rds_tcp_xmit_complete(struct rds_connection *conn);
81int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, 80int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
82 unsigned int hdr_off, unsigned int sg, unsigned int off); 81 unsigned int hdr_off, unsigned int sg, unsigned int off);
83void rds_tcp_write_space(struct sock *sk); 82void rds_tcp_write_space(struct sock *sk);
84int rds_tcp_xmit_cong_map(struct rds_connection *conn,
85 struct rds_cong_map *map, unsigned long offset);
86 83
87/* tcp_stats.c */ 84/* tcp_stats.c */
88DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats); 85DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index c397524c039c..a65ee78db0c5 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -45,7 +45,7 @@ void rds_tcp_state_change(struct sock *sk)
45 45
46 read_lock(&sk->sk_callback_lock); 46 read_lock(&sk->sk_callback_lock);
47 conn = sk->sk_user_data; 47 conn = sk->sk_user_data;
48 if (conn == NULL) { 48 if (!conn) {
49 state_change = sk->sk_state_change; 49 state_change = sk->sk_state_change;
50 goto out; 50 goto out;
51 } 51 }
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 975183fe6950..ae27869dfc21 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -116,7 +116,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
116 116
117 read_lock(&sk->sk_callback_lock); 117 read_lock(&sk->sk_callback_lock);
118 ready = sk->sk_user_data; 118 ready = sk->sk_user_data;
119 if (ready == NULL) { /* check for teardown race */ 119 if (!ready) { /* check for teardown race */
120 ready = sk->sk_data_ready; 120 ready = sk->sk_data_ready;
121 goto out; 121 goto out;
122 } 122 }
@@ -135,7 +135,7 @@ out:
135 ready(sk, bytes); 135 ready(sk, bytes);
136} 136}
137 137
138int __init rds_tcp_listen_init(void) 138int rds_tcp_listen_init(void)
139{ 139{
140 struct sockaddr_in sin; 140 struct sockaddr_in sin;
141 struct socket *sock = NULL; 141 struct socket *sock = NULL;
@@ -178,7 +178,7 @@ void rds_tcp_listen_stop(void)
178 struct socket *sock = rds_tcp_listen_sock; 178 struct socket *sock = rds_tcp_listen_sock;
179 struct sock *sk; 179 struct sock *sk;
180 180
181 if (sock == NULL) 181 if (!sock)
182 return; 182 return;
183 183
184 sk = sock->sk; 184 sk = sock->sk;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 1aba6878fa5d..7017f3af80b6 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -39,7 +39,7 @@
39 39
40static struct kmem_cache *rds_tcp_incoming_slab; 40static struct kmem_cache *rds_tcp_incoming_slab;
41 41
42void rds_tcp_inc_purge(struct rds_incoming *inc) 42static void rds_tcp_inc_purge(struct rds_incoming *inc)
43{ 43{
44 struct rds_tcp_incoming *tinc; 44 struct rds_tcp_incoming *tinc;
45 tinc = container_of(inc, struct rds_tcp_incoming, ti_inc); 45 tinc = container_of(inc, struct rds_tcp_incoming, ti_inc);
@@ -190,10 +190,10 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
190 * processing. 190 * processing.
191 */ 191 */
192 while (left) { 192 while (left) {
193 if (tinc == NULL) { 193 if (!tinc) {
194 tinc = kmem_cache_alloc(rds_tcp_incoming_slab, 194 tinc = kmem_cache_alloc(rds_tcp_incoming_slab,
195 arg->gfp); 195 arg->gfp);
196 if (tinc == NULL) { 196 if (!tinc) {
197 desc->error = -ENOMEM; 197 desc->error = -ENOMEM;
198 goto out; 198 goto out;
199 } 199 }
@@ -229,7 +229,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
229 229
230 if (left && tc->t_tinc_data_rem) { 230 if (left && tc->t_tinc_data_rem) {
231 clone = skb_clone(skb, arg->gfp); 231 clone = skb_clone(skb, arg->gfp);
232 if (clone == NULL) { 232 if (!clone) {
233 desc->error = -ENOMEM; 233 desc->error = -ENOMEM;
234 goto out; 234 goto out;
235 } 235 }
@@ -326,7 +326,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
326 326
327 read_lock(&sk->sk_callback_lock); 327 read_lock(&sk->sk_callback_lock);
328 conn = sk->sk_user_data; 328 conn = sk->sk_user_data;
329 if (conn == NULL) { /* check for teardown race */ 329 if (!conn) { /* check for teardown race */
330 ready = sk->sk_data_ready; 330 ready = sk->sk_data_ready;
331 goto out; 331 goto out;
332 } 332 }
@@ -342,12 +342,12 @@ out:
342 ready(sk, bytes); 342 ready(sk, bytes);
343} 343}
344 344
345int __init rds_tcp_recv_init(void) 345int rds_tcp_recv_init(void)
346{ 346{
347 rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming", 347 rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming",
348 sizeof(struct rds_tcp_incoming), 348 sizeof(struct rds_tcp_incoming),
349 0, 0, NULL); 349 0, 0, NULL);
350 if (rds_tcp_incoming_slab == NULL) 350 if (!rds_tcp_incoming_slab)
351 return -ENOMEM; 351 return -ENOMEM;
352 return 0; 352 return 0;
353} 353}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index a28b895ff0d1..2979fb4a4b9a 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -77,56 +77,6 @@ int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
77} 77}
78 78
79/* the core send_sem serializes this with other xmit and shutdown */ 79/* the core send_sem serializes this with other xmit and shutdown */
80int rds_tcp_xmit_cong_map(struct rds_connection *conn,
81 struct rds_cong_map *map, unsigned long offset)
82{
83 static struct rds_header rds_tcp_map_header = {
84 .h_flags = RDS_FLAG_CONG_BITMAP,
85 };
86 struct rds_tcp_connection *tc = conn->c_transport_data;
87 unsigned long i;
88 int ret;
89 int copied = 0;
90
91 /* Some problem claims cpu_to_be32(constant) isn't a constant. */
92 rds_tcp_map_header.h_len = cpu_to_be32(RDS_CONG_MAP_BYTES);
93
94 if (offset < sizeof(struct rds_header)) {
95 ret = rds_tcp_sendmsg(tc->t_sock,
96 (void *)&rds_tcp_map_header + offset,
97 sizeof(struct rds_header) - offset);
98 if (ret <= 0)
99 return ret;
100 offset += ret;
101 copied = ret;
102 if (offset < sizeof(struct rds_header))
103 return ret;
104 }
105
106 offset -= sizeof(struct rds_header);
107 i = offset / PAGE_SIZE;
108 offset = offset % PAGE_SIZE;
109 BUG_ON(i >= RDS_CONG_MAP_PAGES);
110
111 do {
112 ret = tc->t_sock->ops->sendpage(tc->t_sock,
113 virt_to_page(map->m_page_addrs[i]),
114 offset, PAGE_SIZE - offset,
115 MSG_DONTWAIT);
116 if (ret <= 0)
117 break;
118 copied += ret;
119 offset += ret;
120 if (offset == PAGE_SIZE) {
121 offset = 0;
122 i++;
123 }
124 } while (i < RDS_CONG_MAP_PAGES);
125
126 return copied ? copied : ret;
127}
128
129/* the core send_sem serializes this with other xmit and shutdown */
130int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, 80int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
131 unsigned int hdr_off, unsigned int sg, unsigned int off) 81 unsigned int hdr_off, unsigned int sg, unsigned int off)
132{ 82{
@@ -166,21 +116,21 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
166 goto out; 116 goto out;
167 } 117 }
168 118
169 while (sg < rm->m_nents) { 119 while (sg < rm->data.op_nents) {
170 ret = tc->t_sock->ops->sendpage(tc->t_sock, 120 ret = tc->t_sock->ops->sendpage(tc->t_sock,
171 sg_page(&rm->m_sg[sg]), 121 sg_page(&rm->data.op_sg[sg]),
172 rm->m_sg[sg].offset + off, 122 rm->data.op_sg[sg].offset + off,
173 rm->m_sg[sg].length - off, 123 rm->data.op_sg[sg].length - off,
174 MSG_DONTWAIT|MSG_NOSIGNAL); 124 MSG_DONTWAIT|MSG_NOSIGNAL);
175 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->m_sg[sg]), 125 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]),
176 rm->m_sg[sg].offset + off, rm->m_sg[sg].length - off, 126 rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off,
177 ret); 127 ret);
178 if (ret <= 0) 128 if (ret <= 0)
179 break; 129 break;
180 130
181 off += ret; 131 off += ret;
182 done += ret; 132 done += ret;
183 if (off == rm->m_sg[sg].length) { 133 if (off == rm->data.op_sg[sg].length) {
184 off = 0; 134 off = 0;
185 sg++; 135 sg++;
186 } 136 }
@@ -226,7 +176,7 @@ void rds_tcp_write_space(struct sock *sk)
226 176
227 read_lock(&sk->sk_callback_lock); 177 read_lock(&sk->sk_callback_lock);
228 conn = sk->sk_user_data; 178 conn = sk->sk_user_data;
229 if (conn == NULL) { 179 if (!conn) {
230 write_space = sk->sk_write_space; 180 write_space = sk->sk_write_space;
231 goto out; 181 goto out;
232 } 182 }
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 786c20eaaf5e..0fd90f8c5f59 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -61,7 +61,7 @@
61 * 61 *
62 * Transition to state DISCONNECTING/DOWN: 62 * Transition to state DISCONNECTING/DOWN:
63 * - Inside the shutdown worker; synchronizes with xmit path 63 * - Inside the shutdown worker; synchronizes with xmit path
64 * through c_send_lock, and with connection management callbacks 64 * through RDS_IN_XMIT, and with connection management callbacks
65 * via c_cm_lock. 65 * via c_cm_lock.
66 * 66 *
67 * For receive callbacks, we rely on the underlying transport 67 * For receive callbacks, we rely on the underlying transport
@@ -110,7 +110,7 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
110 * We should *always* start with a random backoff; otherwise a broken connection 110 * We should *always* start with a random backoff; otherwise a broken connection
111 * will always take several iterations to be re-established. 111 * will always take several iterations to be re-established.
112 */ 112 */
113static void rds_queue_reconnect(struct rds_connection *conn) 113void rds_queue_reconnect(struct rds_connection *conn)
114{ 114{
115 unsigned long rand; 115 unsigned long rand;
116 116
@@ -156,58 +156,6 @@ void rds_connect_worker(struct work_struct *work)
156 } 156 }
157} 157}
158 158
159void rds_shutdown_worker(struct work_struct *work)
160{
161 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
162
163 /* shut it down unless it's down already */
164 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
165 /*
166 * Quiesce the connection mgmt handlers before we start tearing
167 * things down. We don't hold the mutex for the entire
168 * duration of the shutdown operation, else we may be
169 * deadlocking with the CM handler. Instead, the CM event
170 * handler is supposed to check for state DISCONNECTING
171 */
172 mutex_lock(&conn->c_cm_lock);
173 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING) &&
174 !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
175 rds_conn_error(conn, "shutdown called in state %d\n",
176 atomic_read(&conn->c_state));
177 mutex_unlock(&conn->c_cm_lock);
178 return;
179 }
180 mutex_unlock(&conn->c_cm_lock);
181
182 mutex_lock(&conn->c_send_lock);
183 conn->c_trans->conn_shutdown(conn);
184 rds_conn_reset(conn);
185 mutex_unlock(&conn->c_send_lock);
186
187 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
188 /* This can happen - eg when we're in the middle of tearing
189 * down the connection, and someone unloads the rds module.
190 * Quite reproduceable with loopback connections.
191 * Mostly harmless.
192 */
193 rds_conn_error(conn,
194 "%s: failed to transition to state DOWN, "
195 "current state is %d\n",
196 __func__,
197 atomic_read(&conn->c_state));
198 return;
199 }
200 }
201
202 /* Then reconnect if it's still live.
203 * The passive side of an IB loopback connection is never added
204 * to the conn hash, so we never trigger a reconnect on this
205 * conn - the reconnect is always triggered by the active peer. */
206 cancel_delayed_work(&conn->c_conn_w);
207 if (!hlist_unhashed(&conn->c_hash_node))
208 rds_queue_reconnect(conn);
209}
210
211void rds_send_worker(struct work_struct *work) 159void rds_send_worker(struct work_struct *work)
212{ 160{
213 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work); 161 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work);
@@ -252,15 +200,22 @@ void rds_recv_worker(struct work_struct *work)
252 } 200 }
253} 201}
254 202
203void rds_shutdown_worker(struct work_struct *work)
204{
205 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
206
207 rds_conn_shutdown(conn);
208}
209
255void rds_threads_exit(void) 210void rds_threads_exit(void)
256{ 211{
257 destroy_workqueue(rds_wq); 212 destroy_workqueue(rds_wq);
258} 213}
259 214
260int __init rds_threads_init(void) 215int rds_threads_init(void)
261{ 216{
262 rds_wq = create_workqueue("krdsd"); 217 rds_wq = create_singlethread_workqueue("krdsd");
263 if (rds_wq == NULL) 218 if (!rds_wq)
264 return -ENOMEM; 219 return -ENOMEM;
265 220
266 return 0; 221 return 0;
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7e1067901353..7f2ac4fec367 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -71,19 +71,28 @@ void rds_trans_unregister(struct rds_transport *trans)
71} 71}
72EXPORT_SYMBOL_GPL(rds_trans_unregister); 72EXPORT_SYMBOL_GPL(rds_trans_unregister);
73 73
74void rds_trans_put(struct rds_transport *trans)
75{
76 if (trans && trans->t_owner)
77 module_put(trans->t_owner);
78}
79
74struct rds_transport *rds_trans_get_preferred(__be32 addr) 80struct rds_transport *rds_trans_get_preferred(__be32 addr)
75{ 81{
76 struct rds_transport *ret = NULL; 82 struct rds_transport *ret = NULL;
77 int i; 83 struct rds_transport *trans;
84 unsigned int i;
78 85
79 if (IN_LOOPBACK(ntohl(addr))) 86 if (IN_LOOPBACK(ntohl(addr)))
80 return &rds_loop_transport; 87 return &rds_loop_transport;
81 88
82 down_read(&rds_trans_sem); 89 down_read(&rds_trans_sem);
83 for (i = 0; i < RDS_TRANS_COUNT; i++) 90 for (i = 0; i < RDS_TRANS_COUNT; i++) {
84 { 91 trans = transports[i];
85 if (transports[i] && (transports[i]->laddr_check(addr) == 0)) { 92
86 ret = transports[i]; 93 if (trans && (trans->laddr_check(addr) == 0) &&
94 (!trans->t_owner || try_module_get(trans->t_owner))) {
95 ret = trans;
87 break; 96 break;
88 } 97 }
89 } 98 }
diff --git a/net/rds/xlist.h b/net/rds/xlist.h
new file mode 100644
index 000000000000..e6b5190daddd
--- /dev/null
+++ b/net/rds/xlist.h
@@ -0,0 +1,80 @@
1#ifndef _LINUX_XLIST_H
2#define _LINUX_XLIST_H
3
4#include <linux/stddef.h>
5#include <linux/poison.h>
6#include <linux/prefetch.h>
7#include <asm/system.h>
8
9struct xlist_head {
10 struct xlist_head *next;
11};
12
13static inline void INIT_XLIST_HEAD(struct xlist_head *list)
14{
15 list->next = NULL;
16}
17
18static inline int xlist_empty(struct xlist_head *head)
19{
20 return head->next == NULL;
21}
22
23static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail,
24 struct xlist_head *head)
25{
26 struct xlist_head *cur;
27 struct xlist_head *check;
28
29 while (1) {
30 cur = head->next;
31 tail->next = cur;
32 check = cmpxchg(&head->next, cur, new);
33 if (check == cur)
34 break;
35 }
36}
37
38static inline struct xlist_head *xlist_del_head(struct xlist_head *head)
39{
40 struct xlist_head *cur;
41 struct xlist_head *check;
42 struct xlist_head *next;
43
44 while (1) {
45 cur = head->next;
46 if (!cur)
47 goto out;
48
49 next = cur->next;
50 check = cmpxchg(&head->next, cur, next);
51 if (check == cur)
52 goto out;
53 }
54out:
55 return cur;
56}
57
58static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head)
59{
60 struct xlist_head *cur;
61
62 cur = head->next;
63 if (!cur)
64 return NULL;
65
66 head->next = cur->next;
67 return cur;
68}
69
70static inline void xlist_splice(struct xlist_head *list,
71 struct xlist_head *head)
72{
73 struct xlist_head *cur;
74
75 WARN_ON(head->next);
76 cur = xchg(&list->next, NULL);
77 head->next = cur;
78}
79
80#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f691fb180d1..a36270a994d7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -518,6 +518,16 @@ config NET_ACT_SKBEDIT
518 To compile this code as a module, choose M here: the 518 To compile this code as a module, choose M here: the
519 module will be called act_skbedit. 519 module will be called act_skbedit.
520 520
521config NET_ACT_CSUM
522 tristate "Checksum Updating"
523 depends on NET_CLS_ACT && INET
524 ---help---
525 Say Y here to update some common checksum after some direct
526 packet alterations.
527
528 To compile this code as a module, choose M here: the
529 module will be called act_csum.
530
521config NET_CLS_IND 531config NET_CLS_IND
522 bool "Incoming device classification" 532 bool "Incoming device classification"
523 depends on NET_CLS_U32 || NET_CLS_FW 533 depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f14e71bfa58f..960f5dba6304 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o 15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o 16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o 17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
18obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
18obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o 19obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
19obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o 20obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
20obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o 21obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
new file mode 100644
index 000000000000..67dc7ce9b63a
--- /dev/null
+++ b/net/sched/act_csum.c
@@ -0,0 +1,595 @@
1/*
2 * Checksum updating actions
3 *
4 * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#include <linux/types.h>
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/spinlock.h>
18
19#include <linux/netlink.h>
20#include <net/netlink.h>
21#include <linux/rtnetlink.h>
22
23#include <linux/skbuff.h>
24
25#include <net/ip.h>
26#include <net/ipv6.h>
27#include <net/icmp.h>
28#include <linux/icmpv6.h>
29#include <linux/igmp.h>
30#include <net/tcp.h>
31#include <net/udp.h>
32#include <net/ip6_checksum.h>
33
34#include <net/act_api.h>
35
36#include <linux/tc_act/tc_csum.h>
37#include <net/tc_act/tc_csum.h>
38
39#define CSUM_TAB_MASK 15
40static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
41static u32 csum_idx_gen;
42static DEFINE_RWLOCK(csum_lock);
43
44static struct tcf_hashinfo csum_hash_info = {
45 .htab = tcf_csum_ht,
46 .hmask = CSUM_TAB_MASK,
47 .lock = &csum_lock,
48};
49
50static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
51 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
52};
53
54static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
55 struct tc_action *a, int ovr, int bind)
56{
57 struct nlattr *tb[TCA_CSUM_MAX + 1];
58 struct tc_csum *parm;
59 struct tcf_common *pc;
60 struct tcf_csum *p;
61 int ret = 0, err;
62
63 if (nla == NULL)
64 return -EINVAL;
65
66 err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
67 if (err < 0)
68 return err;
69
70 if (tb[TCA_CSUM_PARMS] == NULL)
71 return -EINVAL;
72 parm = nla_data(tb[TCA_CSUM_PARMS]);
73
74 pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info);
75 if (!pc) {
76 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
77 &csum_idx_gen, &csum_hash_info);
78 if (IS_ERR(pc))
79 return PTR_ERR(pc);
80 p = to_tcf_csum(pc);
81 ret = ACT_P_CREATED;
82 } else {
83 p = to_tcf_csum(pc);
84 if (!ovr) {
85 tcf_hash_release(pc, bind, &csum_hash_info);
86 return -EEXIST;
87 }
88 }
89
90 spin_lock_bh(&p->tcf_lock);
91 p->tcf_action = parm->action;
92 p->update_flags = parm->update_flags;
93 spin_unlock_bh(&p->tcf_lock);
94
95 if (ret == ACT_P_CREATED)
96 tcf_hash_insert(pc, &csum_hash_info);
97
98 return ret;
99}
100
101static int tcf_csum_cleanup(struct tc_action *a, int bind)
102{
103 struct tcf_csum *p = a->priv;
104 return tcf_hash_release(&p->common, bind, &csum_hash_info);
105}
106
107/**
108 * tcf_csum_skb_nextlayer - Get next layer pointer
109 * @skb: sk_buff to use
110 * @ihl: previous summed headers length
111 * @ipl: complete packet length
112 * @jhl: next header length
113 *
114 * Check the expected next layer availability in the specified sk_buff.
115 * Return the next layer pointer if pass, NULL otherwise.
116 */
117static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
118 unsigned int ihl, unsigned int ipl,
119 unsigned int jhl)
120{
121 int ntkoff = skb_network_offset(skb);
122 int hl = ihl + jhl;
123
124 if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
125 (skb_cloned(skb) &&
126 !skb_clone_writable(skb, hl + ntkoff) &&
127 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
128 return NULL;
129 else
130 return (void *)(skb_network_header(skb) + ihl);
131}
132
133static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
134 unsigned int ihl, unsigned int ipl)
135{
136 struct icmphdr *icmph;
137
138 icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph));
139 if (icmph == NULL)
140 return 0;
141
142 icmph->checksum = 0;
143 skb->csum = csum_partial(icmph, ipl - ihl, 0);
144 icmph->checksum = csum_fold(skb->csum);
145
146 skb->ip_summed = CHECKSUM_NONE;
147
148 return 1;
149}
150
151static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
152 unsigned int ihl, unsigned int ipl)
153{
154 struct igmphdr *igmph;
155
156 igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph));
157 if (igmph == NULL)
158 return 0;
159
160 igmph->csum = 0;
161 skb->csum = csum_partial(igmph, ipl - ihl, 0);
162 igmph->csum = csum_fold(skb->csum);
163
164 skb->ip_summed = CHECKSUM_NONE;
165
166 return 1;
167}
168
169static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
170 unsigned int ihl, unsigned int ipl)
171{
172 struct icmp6hdr *icmp6h;
173
174 icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
175 if (icmp6h == NULL)
176 return 0;
177
178 icmp6h->icmp6_cksum = 0;
179 skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
180 icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
181 ipl - ihl, IPPROTO_ICMPV6,
182 skb->csum);
183
184 skb->ip_summed = CHECKSUM_NONE;
185
186 return 1;
187}
188
189static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
190 unsigned int ihl, unsigned int ipl)
191{
192 struct tcphdr *tcph;
193
194 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
195 if (tcph == NULL)
196 return 0;
197
198 tcph->check = 0;
199 skb->csum = csum_partial(tcph, ipl - ihl, 0);
200 tcph->check = tcp_v4_check(ipl - ihl,
201 iph->saddr, iph->daddr, skb->csum);
202
203 skb->ip_summed = CHECKSUM_NONE;
204
205 return 1;
206}
207
208static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
209 unsigned int ihl, unsigned int ipl)
210{
211 struct tcphdr *tcph;
212
213 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
214 if (tcph == NULL)
215 return 0;
216
217 tcph->check = 0;
218 skb->csum = csum_partial(tcph, ipl - ihl, 0);
219 tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
220 ipl - ihl, IPPROTO_TCP,
221 skb->csum);
222
223 skb->ip_summed = CHECKSUM_NONE;
224
225 return 1;
226}
227
228static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
229 unsigned int ihl, unsigned int ipl, int udplite)
230{
231 struct udphdr *udph;
232 u16 ul;
233
234 /*
235 * Support both UDP and UDPLITE checksum algorithms, Don't use
236 * udph->len to get the real length without any protocol check,
237 * UDPLITE uses udph->len for another thing,
238 * Use iph->tot_len, or just ipl.
239 */
240
241 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
242 if (udph == NULL)
243 return 0;
244
245 ul = ntohs(udph->len);
246
247 if (udplite || udph->check) {
248
249 udph->check = 0;
250
251 if (udplite) {
252 if (ul == 0)
253 skb->csum = csum_partial(udph, ipl - ihl, 0);
254 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
255 skb->csum = csum_partial(udph, ul, 0);
256 else
257 goto ignore_obscure_skb;
258 } else {
259 if (ul != ipl - ihl)
260 goto ignore_obscure_skb;
261
262 skb->csum = csum_partial(udph, ul, 0);
263 }
264
265 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
266 ul, iph->protocol,
267 skb->csum);
268
269 if (!udph->check)
270 udph->check = CSUM_MANGLED_0;
271 }
272
273 skb->ip_summed = CHECKSUM_NONE;
274
275ignore_obscure_skb:
276 return 1;
277}
278
279static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
280 unsigned int ihl, unsigned int ipl, int udplite)
281{
282 struct udphdr *udph;
283 u16 ul;
284
285 /*
286 * Support both UDP and UDPLITE checksum algorithms, Don't use
287 * udph->len to get the real length without any protocol check,
288 * UDPLITE uses udph->len for another thing,
289 * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl.
290 */
291
292 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
293 if (udph == NULL)
294 return 0;
295
296 ul = ntohs(udph->len);
297
298 udph->check = 0;
299
300 if (udplite) {
301 if (ul == 0)
302 skb->csum = csum_partial(udph, ipl - ihl, 0);
303
304 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
305 skb->csum = csum_partial(udph, ul, 0);
306
307 else
308 goto ignore_obscure_skb;
309 } else {
310 if (ul != ipl - ihl)
311 goto ignore_obscure_skb;
312
313 skb->csum = csum_partial(udph, ul, 0);
314 }
315
316 udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul,
317 udplite ? IPPROTO_UDPLITE : IPPROTO_UDP,
318 skb->csum);
319
320 if (!udph->check)
321 udph->check = CSUM_MANGLED_0;
322
323 skb->ip_summed = CHECKSUM_NONE;
324
325ignore_obscure_skb:
326 return 1;
327}
328
329static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
330{
331 struct iphdr *iph;
332 int ntkoff;
333
334 ntkoff = skb_network_offset(skb);
335
336 if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff))
337 goto fail;
338
339 iph = ip_hdr(skb);
340
341 switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
342 case IPPROTO_ICMP:
343 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
344 if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4,
345 ntohs(iph->tot_len)))
346 goto fail;
347 break;
348 case IPPROTO_IGMP:
349 if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP)
350 if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4,
351 ntohs(iph->tot_len)))
352 goto fail;
353 break;
354 case IPPROTO_TCP:
355 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
356 if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4,
357 ntohs(iph->tot_len)))
358 goto fail;
359 break;
360 case IPPROTO_UDP:
361 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
362 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
363 ntohs(iph->tot_len), 0))
364 goto fail;
365 break;
366 case IPPROTO_UDPLITE:
367 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
368 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
369 ntohs(iph->tot_len), 1))
370 goto fail;
371 break;
372 }
373
374 if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
375 if (skb_cloned(skb) &&
376 !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
377 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
378 goto fail;
379
380 ip_send_check(iph);
381 }
382
383 return 1;
384
385fail:
386 return 0;
387}
388
389static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
390 unsigned int ixhl, unsigned int *pl)
391{
392 int off, len, optlen;
393 unsigned char *xh = (void *)ip6xh;
394
395 off = sizeof(*ip6xh);
396 len = ixhl - off;
397
398 while (len > 1) {
399 switch (xh[off]) {
400 case IPV6_TLV_PAD0:
401 optlen = 1;
402 break;
403 case IPV6_TLV_JUMBO:
404 optlen = xh[off + 1] + 2;
405 if (optlen != 6 || len < 6 || (off & 3) != 2)
406 /* wrong jumbo option length/alignment */
407 return 0;
408 *pl = ntohl(*(__be32 *)(xh + off + 2));
409 goto done;
410 default:
411 optlen = xh[off + 1] + 2;
412 if (optlen > len)
413 /* ignore obscure options */
414 goto done;
415 break;
416 }
417 off += optlen;
418 len -= optlen;
419 }
420
421done:
422 return 1;
423}
424
425static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
426{
427 struct ipv6hdr *ip6h;
428 struct ipv6_opt_hdr *ip6xh;
429 unsigned int hl, ixhl;
430 unsigned int pl;
431 int ntkoff;
432 u8 nexthdr;
433
434 ntkoff = skb_network_offset(skb);
435
436 hl = sizeof(*ip6h);
437
438 if (!pskb_may_pull(skb, hl + ntkoff))
439 goto fail;
440
441 ip6h = ipv6_hdr(skb);
442
443 pl = ntohs(ip6h->payload_len);
444 nexthdr = ip6h->nexthdr;
445
446 do {
447 switch (nexthdr) {
448 case NEXTHDR_FRAGMENT:
449 goto ignore_skb;
450 case NEXTHDR_ROUTING:
451 case NEXTHDR_HOP:
452 case NEXTHDR_DEST:
453 if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff))
454 goto fail;
455 ip6xh = (void *)(skb_network_header(skb) + hl);
456 ixhl = ipv6_optlen(ip6xh);
457 if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
458 goto fail;
459 if ((nexthdr == NEXTHDR_HOP) &&
460 !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
461 goto fail;
462 nexthdr = ip6xh->nexthdr;
463 hl += ixhl;
464 break;
465 case IPPROTO_ICMPV6:
466 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
467 if (!tcf_csum_ipv6_icmp(skb, ip6h,
468 hl, pl + sizeof(*ip6h)))
469 goto fail;
470 goto done;
471 case IPPROTO_TCP:
472 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
473 if (!tcf_csum_ipv6_tcp(skb, ip6h,
474 hl, pl + sizeof(*ip6h)))
475 goto fail;
476 goto done;
477 case IPPROTO_UDP:
478 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
479 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
480 pl + sizeof(*ip6h), 0))
481 goto fail;
482 goto done;
483 case IPPROTO_UDPLITE:
484 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
485 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
486 pl + sizeof(*ip6h), 1))
487 goto fail;
488 goto done;
489 default:
490 goto ignore_skb;
491 }
492 } while (pskb_may_pull(skb, hl + 1 + ntkoff));
493
494done:
495ignore_skb:
496 return 1;
497
498fail:
499 return 0;
500}
501
502static int tcf_csum(struct sk_buff *skb,
503 struct tc_action *a, struct tcf_result *res)
504{
505 struct tcf_csum *p = a->priv;
506 int action;
507 u32 update_flags;
508
509 spin_lock(&p->tcf_lock);
510 p->tcf_tm.lastuse = jiffies;
511 p->tcf_bstats.bytes += qdisc_pkt_len(skb);
512 p->tcf_bstats.packets++;
513 action = p->tcf_action;
514 update_flags = p->update_flags;
515 spin_unlock(&p->tcf_lock);
516
517 if (unlikely(action == TC_ACT_SHOT))
518 goto drop;
519
520 switch (skb->protocol) {
521 case cpu_to_be16(ETH_P_IP):
522 if (!tcf_csum_ipv4(skb, update_flags))
523 goto drop;
524 break;
525 case cpu_to_be16(ETH_P_IPV6):
526 if (!tcf_csum_ipv6(skb, update_flags))
527 goto drop;
528 break;
529 }
530
531 return action;
532
533drop:
534 spin_lock(&p->tcf_lock);
535 p->tcf_qstats.drops++;
536 spin_unlock(&p->tcf_lock);
537 return TC_ACT_SHOT;
538}
539
540static int tcf_csum_dump(struct sk_buff *skb,
541 struct tc_action *a, int bind, int ref)
542{
543 unsigned char *b = skb_tail_pointer(skb);
544 struct tcf_csum *p = a->priv;
545 struct tc_csum opt = {
546 .update_flags = p->update_flags,
547 .index = p->tcf_index,
548 .action = p->tcf_action,
549 .refcnt = p->tcf_refcnt - ref,
550 .bindcnt = p->tcf_bindcnt - bind,
551 };
552 struct tcf_t t;
553
554 NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt);
555 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
556 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
557 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
558 NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t);
559
560 return skb->len;
561
562nla_put_failure:
563 nlmsg_trim(skb, b);
564 return -1;
565}
566
567static struct tc_action_ops act_csum_ops = {
568 .kind = "csum",
569 .hinfo = &csum_hash_info,
570 .type = TCA_ACT_CSUM,
571 .capab = TCA_CAP_NONE,
572 .owner = THIS_MODULE,
573 .act = tcf_csum,
574 .dump = tcf_csum_dump,
575 .cleanup = tcf_csum_cleanup,
576 .lookup = tcf_hash_search,
577 .init = tcf_csum_init,
578 .walk = tcf_generic_walker
579};
580
581MODULE_DESCRIPTION("Checksum updating actions");
582MODULE_LICENSE("GPL");
583
584static int __init csum_init_module(void)
585{
586 return tcf_register_action(&act_csum_ops);
587}
588
589static void __exit csum_cleanup_module(void)
590{
591 tcf_unregister_action(&act_csum_ops);
592}
593
594module_init(csum_init_module);
595module_exit(csum_cleanup_module);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index e17096e3913c..5b271a18bc3a 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -111,44 +111,41 @@ static u32 flow_get_proto(struct sk_buff *skb)
111 } 111 }
112} 112}
113 113
114static int has_ports(u8 protocol)
115{
116 switch (protocol) {
117 case IPPROTO_TCP:
118 case IPPROTO_UDP:
119 case IPPROTO_UDPLITE:
120 case IPPROTO_SCTP:
121 case IPPROTO_DCCP:
122 case IPPROTO_ESP:
123 return 1;
124 default:
125 return 0;
126 }
127}
128
129static u32 flow_get_proto_src(struct sk_buff *skb) 114static u32 flow_get_proto_src(struct sk_buff *skb)
130{ 115{
131 switch (skb->protocol) { 116 switch (skb->protocol) {
132 case htons(ETH_P_IP): { 117 case htons(ETH_P_IP): {
133 struct iphdr *iph; 118 struct iphdr *iph;
119 int poff;
134 120
135 if (!pskb_network_may_pull(skb, sizeof(*iph))) 121 if (!pskb_network_may_pull(skb, sizeof(*iph)))
136 break; 122 break;
137 iph = ip_hdr(skb); 123 iph = ip_hdr(skb);
138 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 124 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
139 has_ports(iph->protocol) && 125 break;
140 pskb_network_may_pull(skb, iph->ihl * 4 + 2)) 126 poff = proto_ports_offset(iph->protocol);
141 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); 127 if (poff >= 0 &&
128 pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) {
129 iph = ip_hdr(skb);
130 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
131 poff));
132 }
142 break; 133 break;
143 } 134 }
144 case htons(ETH_P_IPV6): { 135 case htons(ETH_P_IPV6): {
145 struct ipv6hdr *iph; 136 struct ipv6hdr *iph;
137 int poff;
146 138
147 if (!pskb_network_may_pull(skb, sizeof(*iph) + 2)) 139 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 break; 140 break;
149 iph = ipv6_hdr(skb); 141 iph = ipv6_hdr(skb);
150 if (has_ports(iph->nexthdr)) 142 poff = proto_ports_offset(iph->nexthdr);
151 return ntohs(*(__be16 *)&iph[1]); 143 if (poff >= 0 &&
144 pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) {
145 iph = ipv6_hdr(skb);
146 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
147 poff));
148 }
152 break; 149 break;
153 } 150 }
154 } 151 }
@@ -161,24 +158,36 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
161 switch (skb->protocol) { 158 switch (skb->protocol) {
162 case htons(ETH_P_IP): { 159 case htons(ETH_P_IP): {
163 struct iphdr *iph; 160 struct iphdr *iph;
161 int poff;
164 162
165 if (!pskb_network_may_pull(skb, sizeof(*iph))) 163 if (!pskb_network_may_pull(skb, sizeof(*iph)))
166 break; 164 break;
167 iph = ip_hdr(skb); 165 iph = ip_hdr(skb);
168 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 166 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
169 has_ports(iph->protocol) && 167 break;
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 168 poff = proto_ports_offset(iph->protocol);
171 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); 169 if (poff >= 0 &&
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
171 iph = ip_hdr(skb);
172 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
173 2 + poff));
174 }
172 break; 175 break;
173 } 176 }
174 case htons(ETH_P_IPV6): { 177 case htons(ETH_P_IPV6): {
175 struct ipv6hdr *iph; 178 struct ipv6hdr *iph;
179 int poff;
176 180
177 if (!pskb_network_may_pull(skb, sizeof(*iph) + 4)) 181 if (!pskb_network_may_pull(skb, sizeof(*iph)))
178 break; 182 break;
179 iph = ipv6_hdr(skb); 183 iph = ipv6_hdr(skb);
180 if (has_ports(iph->nexthdr)) 184 poff = proto_ports_offset(iph->nexthdr);
181 return ntohs(*(__be16 *)((void *)&iph[1] + 2)); 185 if (poff >= 0 &&
186 pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) {
187 iph = ipv6_hdr(skb);
188 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
189 poff + 2));
190 }
182 break; 191 break;
183 } 192 }
184 } 193 }
@@ -297,6 +306,11 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
297 return tag & VLAN_VID_MASK; 306 return tag & VLAN_VID_MASK;
298} 307}
299 308
309static u32 flow_get_rxhash(struct sk_buff *skb)
310{
311 return skb_get_rxhash(skb);
312}
313
300static u32 flow_key_get(struct sk_buff *skb, int key) 314static u32 flow_key_get(struct sk_buff *skb, int key)
301{ 315{
302 switch (key) { 316 switch (key) {
@@ -334,6 +348,8 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
334 return flow_get_skgid(skb); 348 return flow_get_skgid(skb);
335 case FLOW_KEY_VLAN_TAG: 349 case FLOW_KEY_VLAN_TAG:
336 return flow_get_vlan_tag(skb); 350 return flow_get_vlan_tag(skb);
351 case FLOW_KEY_RXHASH:
352 return flow_get_rxhash(skb);
337 default: 353 default:
338 WARN_ON(1); 354 WARN_ON(1);
339 return 0; 355 return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 3bcac8aa333c..34da5e29ea1a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -223,6 +223,11 @@ META_COLLECTOR(int_maclen)
223 dst->value = skb->mac_len; 223 dst->value = skb->mac_len;
224} 224}
225 225
226META_COLLECTOR(int_rxhash)
227{
228 dst->value = skb_get_rxhash(skb);
229}
230
226/************************************************************************** 231/**************************************************************************
227 * Netfilter 232 * Netfilter
228 **************************************************************************/ 233 **************************************************************************/
@@ -541,6 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
541 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), 546 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off),
542 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), 547 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend),
543 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), 548 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag),
549 [META_ID(RXHASH)] = META_FUNC(int_rxhash),
544 } 550 }
545}; 551};
546 552
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 408eea7086aa..6fb3d41c0e41 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -360,7 +360,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
360 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 360 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
361 } 361 }
362 362
363 if (!s || tsize != s->tsize || (!tab && tsize > 0)) 363 if (tsize != s->tsize || (!tab && tsize > 0))
364 return ERR_PTR(-EINVAL); 364 return ERR_PTR(-EINVAL);
365 365
366 spin_lock(&qdisc_stab_lock); 366 spin_lock(&qdisc_stab_lock);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 201cbac2b32c..3cf478d012dd 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -123,40 +123,39 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
123 case htons(ETH_P_IP): 123 case htons(ETH_P_IP):
124 { 124 {
125 const struct iphdr *iph; 125 const struct iphdr *iph;
126 int poff;
126 127
127 if (!pskb_network_may_pull(skb, sizeof(*iph))) 128 if (!pskb_network_may_pull(skb, sizeof(*iph)))
128 goto err; 129 goto err;
129 iph = ip_hdr(skb); 130 iph = ip_hdr(skb);
130 h = (__force u32)iph->daddr; 131 h = (__force u32)iph->daddr;
131 h2 = (__force u32)iph->saddr ^ iph->protocol; 132 h2 = (__force u32)iph->saddr ^ iph->protocol;
132 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 133 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
133 (iph->protocol == IPPROTO_TCP || 134 break;
134 iph->protocol == IPPROTO_UDP || 135 poff = proto_ports_offset(iph->protocol);
135 iph->protocol == IPPROTO_UDPLITE || 136 if (poff >= 0 &&
136 iph->protocol == IPPROTO_SCTP || 137 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
137 iph->protocol == IPPROTO_DCCP || 138 iph = ip_hdr(skb);
138 iph->protocol == IPPROTO_ESP) && 139 h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
139 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 140 }
140 h2 ^= *(((u32*)iph) + iph->ihl);
141 break; 141 break;
142 } 142 }
143 case htons(ETH_P_IPV6): 143 case htons(ETH_P_IPV6):
144 { 144 {
145 struct ipv6hdr *iph; 145 struct ipv6hdr *iph;
146 int poff;
146 147
147 if (!pskb_network_may_pull(skb, sizeof(*iph))) 148 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 goto err; 149 goto err;
149 iph = ipv6_hdr(skb); 150 iph = ipv6_hdr(skb);
150 h = (__force u32)iph->daddr.s6_addr32[3]; 151 h = (__force u32)iph->daddr.s6_addr32[3];
151 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; 152 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
152 if ((iph->nexthdr == IPPROTO_TCP || 153 poff = proto_ports_offset(iph->nexthdr);
153 iph->nexthdr == IPPROTO_UDP || 154 if (poff >= 0 &&
154 iph->nexthdr == IPPROTO_UDPLITE || 155 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
155 iph->nexthdr == IPPROTO_SCTP || 156 iph = ipv6_hdr(skb);
156 iph->nexthdr == IPPROTO_DCCP || 157 h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
157 iph->nexthdr == IPPROTO_ESP) && 158 }
158 pskb_network_may_pull(skb, sizeof(*iph) + 4))
159 h2 ^= *(u32*)&iph[1];
160 break; 159 break;
161 } 160 }
162 default: 161 default:
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0b85e5256434..5f1fb8bd862d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -48,6 +48,8 @@
48 * be incorporated into the next SCTP release. 48 * be incorporated into the next SCTP release.
49 */ 49 */
50 50
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52
51#include <linux/types.h> 53#include <linux/types.h>
52#include <linux/fcntl.h> 54#include <linux/fcntl.h>
53#include <linux/poll.h> 55#include <linux/poll.h>
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 476caaf100ed..6c8556459a75 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -37,6 +37,8 @@
37 * be incorporated into the next SCTP release. 37 * be incorporated into the next SCTP release.
38 */ 38 */
39 39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
40#include <linux/types.h> 42#include <linux/types.h>
41#include <linux/kernel.h> 43#include <linux/kernel.h>
42#include <linux/net.h> 44#include <linux/net.h>
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index ccb6dc48d15b..397296fb156f 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -43,6 +43,8 @@
43 * be incorporated into the next SCTP release. 43 * be incorporated into the next SCTP release.
44 */ 44 */
45 45
46#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
47
46#include <net/sctp/sctp.h> 48#include <net/sctp/sctp.h>
47#include <net/sctp/sm.h> 49#include <net/sctp/sm.h>
48#include <linux/interrupt.h> 50#include <linux/interrupt.h>
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 732689140fb8..95e0c8eda1a0 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -47,6 +47,8 @@
47 * be incorporated into the next SCTP release. 47 * be incorporated into the next SCTP release.
48 */ 48 */
49 49
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51
50#include <linux/module.h> 52#include <linux/module.h>
51#include <linux/errno.h> 53#include <linux/errno.h>
52#include <linux/types.h> 54#include <linux/types.h>
@@ -336,7 +338,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
336 memcpy(saddr, baddr, sizeof(union sctp_addr)); 338 memcpy(saddr, baddr, sizeof(union sctp_addr));
337 SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); 339 SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr);
338 } else { 340 } else {
339 printk(KERN_ERR "%s: asoc:%p Could not find a valid source " 341 pr_err("%s: asoc:%p Could not find a valid source "
340 "address for the dest:%pI6\n", 342 "address for the dest:%pI6\n",
341 __func__, asoc, &daddr->v6.sin6_addr); 343 __func__, asoc, &daddr->v6.sin6_addr);
342 } 344 }
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index f73ec0ea93ba..8ef8e7d9eb61 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -38,6 +38,8 @@
38 * be incorporated into the next SCTP release. 38 * be incorporated into the next SCTP release.
39 */ 39 */
40 40
41#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
42
41#include <linux/kernel.h> 43#include <linux/kernel.h>
42#include <net/sctp/sctp.h> 44#include <net/sctp/sctp.h>
43 45
@@ -134,8 +136,7 @@ void sctp_dbg_objcnt_init(void)
134 ent = proc_create("sctp_dbg_objcnt", 0, 136 ent = proc_create("sctp_dbg_objcnt", 0,
135 proc_net_sctp, &sctp_objcnt_ops); 137 proc_net_sctp, &sctp_objcnt_ops);
136 if (!ent) 138 if (!ent)
137 printk(KERN_WARNING 139 pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
138 "sctp_dbg_objcnt: Unable to create /proc entry.\n");
139} 140}
140 141
141/* Cleanup the objcount entry in the proc filesystem. */ 142/* Cleanup the objcount entry in the proc filesystem. */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index a646681f5acd..901764b17aee 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -41,6 +41,8 @@
41 * be incorporated into the next SCTP release. 41 * be incorporated into the next SCTP release.
42 */ 42 */
43 43
44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
45
44#include <linux/types.h> 46#include <linux/types.h>
45#include <linux/kernel.h> 47#include <linux/kernel.h>
46#include <linux/wait.h> 48#include <linux/wait.h>
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index c04b2eb59186..8c6d379b4bb6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/types.h> 51#include <linux/types.h>
50#include <linux/list.h> /* For struct list_head */ 52#include <linux/list.h> /* For struct list_head */
51#include <linux/socket.h> 53#include <linux/socket.h>
@@ -1463,23 +1465,23 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1463 /* Display the end of the 1465 /* Display the end of the
1464 * current range. 1466 * current range.
1465 */ 1467 */
1466 SCTP_DEBUG_PRINTK("-%08x", 1468 SCTP_DEBUG_PRINTK_CONT("-%08x",
1467 dbg_last_ack_tsn); 1469 dbg_last_ack_tsn);
1468 } 1470 }
1469 1471
1470 /* Start a new range. */ 1472 /* Start a new range. */
1471 SCTP_DEBUG_PRINTK(",%08x", tsn); 1473 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1472 dbg_ack_tsn = tsn; 1474 dbg_ack_tsn = tsn;
1473 break; 1475 break;
1474 1476
1475 case 1: /* The last TSN was NOT ACKed. */ 1477 case 1: /* The last TSN was NOT ACKed. */
1476 if (dbg_last_kept_tsn != dbg_kept_tsn) { 1478 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1477 /* Display the end of current range. */ 1479 /* Display the end of current range. */
1478 SCTP_DEBUG_PRINTK("-%08x", 1480 SCTP_DEBUG_PRINTK_CONT("-%08x",
1479 dbg_last_kept_tsn); 1481 dbg_last_kept_tsn);
1480 } 1482 }
1481 1483
1482 SCTP_DEBUG_PRINTK("\n"); 1484 SCTP_DEBUG_PRINTK_CONT("\n");
1483 1485
1484 /* FALL THROUGH... */ 1486 /* FALL THROUGH... */
1485 default: 1487 default:
@@ -1526,18 +1528,18 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1526 break; 1528 break;
1527 1529
1528 if (dbg_last_kept_tsn != dbg_kept_tsn) 1530 if (dbg_last_kept_tsn != dbg_kept_tsn)
1529 SCTP_DEBUG_PRINTK("-%08x", 1531 SCTP_DEBUG_PRINTK_CONT("-%08x",
1530 dbg_last_kept_tsn); 1532 dbg_last_kept_tsn);
1531 1533
1532 SCTP_DEBUG_PRINTK(",%08x", tsn); 1534 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1533 dbg_kept_tsn = tsn; 1535 dbg_kept_tsn = tsn;
1534 break; 1536 break;
1535 1537
1536 case 0: 1538 case 0:
1537 if (dbg_last_ack_tsn != dbg_ack_tsn) 1539 if (dbg_last_ack_tsn != dbg_ack_tsn)
1538 SCTP_DEBUG_PRINTK("-%08x", 1540 SCTP_DEBUG_PRINTK_CONT("-%08x",
1539 dbg_last_ack_tsn); 1541 dbg_last_ack_tsn);
1540 SCTP_DEBUG_PRINTK("\n"); 1542 SCTP_DEBUG_PRINTK_CONT("\n");
1541 1543
1542 /* FALL THROUGH... */ 1544 /* FALL THROUGH... */
1543 default: 1545 default:
@@ -1556,17 +1558,17 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1556 switch (dbg_prt_state) { 1558 switch (dbg_prt_state) {
1557 case 0: 1559 case 0:
1558 if (dbg_last_ack_tsn != dbg_ack_tsn) { 1560 if (dbg_last_ack_tsn != dbg_ack_tsn) {
1559 SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_ack_tsn); 1561 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn);
1560 } else { 1562 } else {
1561 SCTP_DEBUG_PRINTK("\n"); 1563 SCTP_DEBUG_PRINTK_CONT("\n");
1562 } 1564 }
1563 break; 1565 break;
1564 1566
1565 case 1: 1567 case 1:
1566 if (dbg_last_kept_tsn != dbg_kept_tsn) { 1568 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1567 SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_kept_tsn); 1569 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn);
1568 } else { 1570 } else {
1569 SCTP_DEBUG_PRINTK("\n"); 1571 SCTP_DEBUG_PRINTK_CONT("\n");
1570 } 1572 }
1571 } 1573 }
1572#endif /* SCTP_DEBUG */ 1574#endif /* SCTP_DEBUG */
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index db3a42b8b349..2e63e9dc010e 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -22,6 +22,8 @@
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 23 */
24 24
25#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
25#include <linux/kernel.h> 27#include <linux/kernel.h>
26#include <linux/kprobes.h> 28#include <linux/kprobes.h>
27#include <linux/socket.h> 29#include <linux/socket.h>
@@ -192,7 +194,7 @@ static __init int sctpprobe_init(void)
192 if (ret) 194 if (ret)
193 goto remove_proc; 195 goto remove_proc;
194 196
195 pr_info("SCTP probe registered (port=%d)\n", port); 197 pr_info("probe registered (port=%d)\n", port);
196 198
197 return 0; 199 return 0;
198 200
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5027b83f1cc0..f774e657641a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/module.h> 51#include <linux/module.h>
50#include <linux/init.h> 52#include <linux/init.h>
51#include <linux/netdevice.h> 53#include <linux/netdevice.h>
@@ -707,8 +709,7 @@ static int sctp_ctl_sock_init(void)
707 &init_net); 709 &init_net);
708 710
709 if (err < 0) { 711 if (err < 0) {
710 printk(KERN_ERR 712 pr_err("Failed to create the SCTP control socket\n");
711 "SCTP: Failed to create the SCTP control socket.\n");
712 return err; 713 return err;
713 } 714 }
714 return 0; 715 return 0;
@@ -1206,7 +1207,7 @@ SCTP_STATIC __init int sctp_init(void)
1206 __get_free_pages(GFP_ATOMIC, order); 1207 __get_free_pages(GFP_ATOMIC, order);
1207 } while (!sctp_assoc_hashtable && --order > 0); 1208 } while (!sctp_assoc_hashtable && --order > 0);
1208 if (!sctp_assoc_hashtable) { 1209 if (!sctp_assoc_hashtable) {
1209 printk(KERN_ERR "SCTP: Failed association hash alloc.\n"); 1210 pr_err("Failed association hash alloc\n");
1210 status = -ENOMEM; 1211 status = -ENOMEM;
1211 goto err_ahash_alloc; 1212 goto err_ahash_alloc;
1212 } 1213 }
@@ -1220,7 +1221,7 @@ SCTP_STATIC __init int sctp_init(void)
1220 sctp_ep_hashtable = (struct sctp_hashbucket *) 1221 sctp_ep_hashtable = (struct sctp_hashbucket *)
1221 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL); 1222 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL);
1222 if (!sctp_ep_hashtable) { 1223 if (!sctp_ep_hashtable) {
1223 printk(KERN_ERR "SCTP: Failed endpoint_hash alloc.\n"); 1224 pr_err("Failed endpoint_hash alloc\n");
1224 status = -ENOMEM; 1225 status = -ENOMEM;
1225 goto err_ehash_alloc; 1226 goto err_ehash_alloc;
1226 } 1227 }
@@ -1239,7 +1240,7 @@ SCTP_STATIC __init int sctp_init(void)
1239 __get_free_pages(GFP_ATOMIC, order); 1240 __get_free_pages(GFP_ATOMIC, order);
1240 } while (!sctp_port_hashtable && --order > 0); 1241 } while (!sctp_port_hashtable && --order > 0);
1241 if (!sctp_port_hashtable) { 1242 if (!sctp_port_hashtable) {
1242 printk(KERN_ERR "SCTP: Failed bind hash alloc."); 1243 pr_err("Failed bind hash alloc\n");
1243 status = -ENOMEM; 1244 status = -ENOMEM;
1244 goto err_bhash_alloc; 1245 goto err_bhash_alloc;
1245 } 1246 }
@@ -1248,8 +1249,7 @@ SCTP_STATIC __init int sctp_init(void)
1248 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); 1249 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
1249 } 1250 }
1250 1251
1251 printk(KERN_INFO "SCTP: Hash tables configured " 1252 pr_info("Hash tables configured (established %d bind %d)\n",
1252 "(established %d bind %d)\n",
1253 sctp_assoc_hashsize, sctp_port_hashsize); 1253 sctp_assoc_hashsize, sctp_port_hashsize);
1254 1254
1255 /* Disable ADDIP by default. */ 1255 /* Disable ADDIP by default. */
@@ -1290,8 +1290,7 @@ SCTP_STATIC __init int sctp_init(void)
1290 1290
1291 /* Initialize the control inode/socket for handling OOTB packets. */ 1291 /* Initialize the control inode/socket for handling OOTB packets. */
1292 if ((status = sctp_ctl_sock_init())) { 1292 if ((status = sctp_ctl_sock_init())) {
1293 printk (KERN_ERR 1293 pr_err("Failed to initialize the SCTP control sock\n");
1294 "SCTP: Failed to initialize the SCTP control sock.\n");
1295 goto err_ctl_sock_init; 1294 goto err_ctl_sock_init;
1296 } 1295 }
1297 1296
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 246f92924658..2cc46f0962ca 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -50,6 +50,8 @@
50 * be incorporated into the next SCTP release. 50 * be incorporated into the next SCTP release.
51 */ 51 */
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54
53#include <linux/types.h> 55#include <linux/types.h>
54#include <linux/kernel.h> 56#include <linux/kernel.h>
55#include <linux/ip.h> 57#include <linux/ip.h>
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index f5e5e27cac5e..b21b218d564f 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -47,6 +47,8 @@
47 * be incorporated into the next SCTP release. 47 * be incorporated into the next SCTP release.
48 */ 48 */
49 49
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51
50#include <linux/skbuff.h> 52#include <linux/skbuff.h>
51#include <linux/types.h> 53#include <linux/types.h>
52#include <linux/socket.h> 54#include <linux/socket.h>
@@ -1146,26 +1148,23 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
1146 1148
1147 case SCTP_DISPOSITION_VIOLATION: 1149 case SCTP_DISPOSITION_VIOLATION:
1148 if (net_ratelimit()) 1150 if (net_ratelimit())
1149 printk(KERN_ERR "sctp protocol violation state %d " 1151 pr_err("protocol violation state %d chunkid %d\n",
1150 "chunkid %d\n", state, subtype.chunk); 1152 state, subtype.chunk);
1151 break; 1153 break;
1152 1154
1153 case SCTP_DISPOSITION_NOT_IMPL: 1155 case SCTP_DISPOSITION_NOT_IMPL:
1154 printk(KERN_WARNING "sctp unimplemented feature in state %d, " 1156 pr_warn("unimplemented feature in state %d, event_type %d, event_id %d\n",
1155 "event_type %d, event_id %d\n", 1157 state, event_type, subtype.chunk);
1156 state, event_type, subtype.chunk);
1157 break; 1158 break;
1158 1159
1159 case SCTP_DISPOSITION_BUG: 1160 case SCTP_DISPOSITION_BUG:
1160 printk(KERN_ERR "sctp bug in state %d, " 1161 pr_err("bug in state %d, event_type %d, event_id %d\n",
1161 "event_type %d, event_id %d\n",
1162 state, event_type, subtype.chunk); 1162 state, event_type, subtype.chunk);
1163 BUG(); 1163 BUG();
1164 break; 1164 break;
1165 1165
1166 default: 1166 default:
1167 printk(KERN_ERR "sctp impossible disposition %d " 1167 pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n",
1168 "in state %d, event_type %d, event_id %d\n",
1169 status, state, event_type, subtype.chunk); 1168 status, state, event_type, subtype.chunk);
1170 BUG(); 1169 BUG();
1171 break; 1170 break;
@@ -1679,8 +1678,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1679 sctp_cmd_send_asconf(asoc); 1678 sctp_cmd_send_asconf(asoc);
1680 break; 1679 break;
1681 default: 1680 default:
1682 printk(KERN_WARNING "Impossible command: %u, %p\n", 1681 pr_warn("Impossible command: %u, %p\n",
1683 cmd->verb, cmd->obj.ptr); 1682 cmd->verb, cmd->obj.ptr);
1684 break; 1683 break;
1685 } 1684 }
1686 1685
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d344dc481ccc..4b4eb7c96bbd 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -50,6 +50,8 @@
50 * be incorporated into the next SCTP release. 50 * be incorporated into the next SCTP release.
51 */ 51 */
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54
53#include <linux/types.h> 55#include <linux/types.h>
54#include <linux/kernel.h> 56#include <linux/kernel.h>
55#include <linux/ip.h> 57#include <linux/ip.h>
@@ -1138,18 +1140,16 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
1138 if (unlikely(!link)) { 1140 if (unlikely(!link)) {
1139 if (from_addr.sa.sa_family == AF_INET6) { 1141 if (from_addr.sa.sa_family == AF_INET6) {
1140 if (net_ratelimit()) 1142 if (net_ratelimit())
1141 printk(KERN_WARNING 1143 pr_warn("%s association %p could not find address %pI6\n",
1142 "%s association %p could not find address %pI6\n", 1144 __func__,
1143 __func__, 1145 asoc,
1144 asoc, 1146 &from_addr.v6.sin6_addr);
1145 &from_addr.v6.sin6_addr);
1146 } else { 1147 } else {
1147 if (net_ratelimit()) 1148 if (net_ratelimit())
1148 printk(KERN_WARNING 1149 pr_warn("%s association %p could not find address %pI4\n",
1149 "%s association %p could not find address %pI4\n", 1150 __func__,
1150 __func__, 1151 asoc,
1151 asoc, 1152 &from_addr.v4.sin_addr.s_addr);
1152 &from_addr.v4.sin_addr.s_addr);
1153 } 1153 }
1154 return SCTP_DISPOSITION_DISCARD; 1154 return SCTP_DISPOSITION_DISCARD;
1155 } 1155 }
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 6d9b3aafcc5d..546d4387fb3c 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/skbuff.h> 51#include <linux/skbuff.h>
50#include <net/sctp/sctp.h> 52#include <net/sctp/sctp.h>
51#include <net/sctp/sm.h> 53#include <net/sctp/sm.h>
@@ -66,15 +68,19 @@ static const sctp_sm_table_entry_t bug = {
66 .name = "sctp_sf_bug" 68 .name = "sctp_sf_bug"
67}; 69};
68 70
69#define DO_LOOKUP(_max, _type, _table) \ 71#define DO_LOOKUP(_max, _type, _table) \
70 if ((event_subtype._type > (_max))) { \ 72({ \
71 printk(KERN_WARNING \ 73 const sctp_sm_table_entry_t *rtn; \
72 "sctp table %p possible attack:" \ 74 \
73 " event %d exceeds max %d\n", \ 75 if ((event_subtype._type > (_max))) { \
74 _table, event_subtype._type, _max); \ 76 pr_warn("table %p possible attack: event %d exceeds max %d\n", \
75 return &bug; \ 77 _table, event_subtype._type, _max); \
76 } \ 78 rtn = &bug; \
77 return &_table[event_subtype._type][(int)state]; 79 } else \
80 rtn = &_table[event_subtype._type][(int)state]; \
81 \
82 rtn; \
83})
78 84
79const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, 85const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
80 sctp_state_t state, 86 sctp_state_t state,
@@ -83,21 +89,15 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
83 switch (event_type) { 89 switch (event_type) {
84 case SCTP_EVENT_T_CHUNK: 90 case SCTP_EVENT_T_CHUNK:
85 return sctp_chunk_event_lookup(event_subtype.chunk, state); 91 return sctp_chunk_event_lookup(event_subtype.chunk, state);
86 break;
87 case SCTP_EVENT_T_TIMEOUT: 92 case SCTP_EVENT_T_TIMEOUT:
88 DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, 93 return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout,
89 timeout_event_table); 94 timeout_event_table);
90 break;
91
92 case SCTP_EVENT_T_OTHER: 95 case SCTP_EVENT_T_OTHER:
93 DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, other_event_table); 96 return DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other,
94 break; 97 other_event_table);
95
96 case SCTP_EVENT_T_PRIMITIVE: 98 case SCTP_EVENT_T_PRIMITIVE:
97 DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive, 99 return DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive,
98 primitive_event_table); 100 primitive_event_table);
99 break;
100
101 default: 101 default:
102 /* Yikes! We got an illegal event type. */ 102 /* Yikes! We got an illegal event type. */
103 return &bug; 103 return &bug;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ca44917872d2..6a691d84aef4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -57,6 +57,8 @@
57 * be incorporated into the next SCTP release. 57 * be incorporated into the next SCTP release.
58 */ 58 */
59 59
60#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
61
60#include <linux/types.h> 62#include <linux/types.h>
61#include <linux/kernel.h> 63#include <linux/kernel.h>
62#include <linux/wait.h> 64#include <linux/wait.h>
@@ -2458,9 +2460,8 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
2458 if (params.sack_delay == 0 && params.sack_freq == 0) 2460 if (params.sack_delay == 0 && params.sack_freq == 0)
2459 return 0; 2461 return 0;
2460 } else if (optlen == sizeof(struct sctp_assoc_value)) { 2462 } else if (optlen == sizeof(struct sctp_assoc_value)) {
2461 printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " 2463 pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
2462 "in delayed_ack socket option deprecated\n"); 2464 pr_warn("Use struct sctp_sack_info instead\n");
2463 printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
2464 if (copy_from_user(&params, optval, optlen)) 2465 if (copy_from_user(&params, optval, optlen))
2465 return -EFAULT; 2466 return -EFAULT;
2466 2467
@@ -2868,10 +2869,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
2868 int val; 2869 int val;
2869 2870
2870 if (optlen == sizeof(int)) { 2871 if (optlen == sizeof(int)) {
2871 printk(KERN_WARNING 2872 pr_warn("Use of int in maxseg socket option deprecated\n");
2872 "SCTP: Use of int in maxseg socket option deprecated\n"); 2873 pr_warn("Use struct sctp_assoc_value instead\n");
2873 printk(KERN_WARNING
2874 "SCTP: Use struct sctp_assoc_value instead\n");
2875 if (copy_from_user(&val, optval, optlen)) 2874 if (copy_from_user(&val, optval, optlen))
2876 return -EFAULT; 2875 return -EFAULT;
2877 params.assoc_id = 0; 2876 params.assoc_id = 0;
@@ -3121,10 +3120,8 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
3121 int assoc_id = 0; 3120 int assoc_id = 0;
3122 3121
3123 if (optlen == sizeof(int)) { 3122 if (optlen == sizeof(int)) {
3124 printk(KERN_WARNING 3123 pr_warn("Use of int in max_burst socket option deprecated\n");
3125 "SCTP: Use of int in max_burst socket option deprecated\n"); 3124 pr_warn("Use struct sctp_assoc_value instead\n");
3126 printk(KERN_WARNING
3127 "SCTP: Use struct sctp_assoc_value instead\n");
3128 if (copy_from_user(&val, optval, optlen)) 3125 if (copy_from_user(&val, optval, optlen))
3129 return -EFAULT; 3126 return -EFAULT;
3130 } else if (optlen == sizeof(struct sctp_assoc_value)) { 3127 } else if (optlen == sizeof(struct sctp_assoc_value)) {
@@ -3595,7 +3592,40 @@ out:
3595/* The SCTP ioctl handler. */ 3592/* The SCTP ioctl handler. */
3596SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) 3593SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
3597{ 3594{
3598 return -ENOIOCTLCMD; 3595 int rc = -ENOTCONN;
3596
3597 sctp_lock_sock(sk);
3598
3599 /*
3600 * SEQPACKET-style sockets in LISTENING state are valid, for
3601 * SCTP, so only discard TCP-style sockets in LISTENING state.
3602 */
3603 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
3604 goto out;
3605
3606 switch (cmd) {
3607 case SIOCINQ: {
3608 struct sk_buff *skb;
3609 unsigned int amount = 0;
3610
3611 skb = skb_peek(&sk->sk_receive_queue);
3612 if (skb != NULL) {
3613 /*
3614 * We will only return the amount of this packet since
3615 * that is all that will be read.
3616 */
3617 amount = skb->len;
3618 }
3619 rc = put_user(amount, (int __user *)arg);
3620 }
3621 break;
3622 default:
3623 rc = -ENOIOCTLCMD;
3624 break;
3625 }
3626out:
3627 sctp_release_sock(sk);
3628 return rc;
3599} 3629}
3600 3630
3601/* This is the function which gets called during socket creation to 3631/* This is the function which gets called during socket creation to
@@ -4281,9 +4311,8 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len,
4281 if (copy_from_user(&params, optval, len)) 4311 if (copy_from_user(&params, optval, len))
4282 return -EFAULT; 4312 return -EFAULT;
4283 } else if (len == sizeof(struct sctp_assoc_value)) { 4313 } else if (len == sizeof(struct sctp_assoc_value)) {
4284 printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " 4314 pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
4285 "in delayed_ack socket option deprecated\n"); 4315 pr_warn("Use struct sctp_sack_info instead\n");
4286 printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
4287 if (copy_from_user(&params, optval, len)) 4316 if (copy_from_user(&params, optval, len))
4288 return -EFAULT; 4317 return -EFAULT;
4289 } else 4318 } else
@@ -4929,10 +4958,8 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
4929 struct sctp_association *asoc; 4958 struct sctp_association *asoc;
4930 4959
4931 if (len == sizeof(int)) { 4960 if (len == sizeof(int)) {
4932 printk(KERN_WARNING 4961 pr_warn("Use of int in maxseg socket option deprecated\n");
4933 "SCTP: Use of int in maxseg socket option deprecated\n"); 4962 pr_warn("Use struct sctp_assoc_value instead\n");
4934 printk(KERN_WARNING
4935 "SCTP: Use struct sctp_assoc_value instead\n");
4936 params.assoc_id = 0; 4963 params.assoc_id = 0;
4937 } else if (len >= sizeof(struct sctp_assoc_value)) { 4964 } else if (len >= sizeof(struct sctp_assoc_value)) {
4938 len = sizeof(struct sctp_assoc_value); 4965 len = sizeof(struct sctp_assoc_value);
@@ -5023,10 +5050,8 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
5023 struct sctp_association *asoc; 5050 struct sctp_association *asoc;
5024 5051
5025 if (len == sizeof(int)) { 5052 if (len == sizeof(int)) {
5026 printk(KERN_WARNING 5053 pr_warn("Use of int in max_burst socket option deprecated\n");
5027 "SCTP: Use of int in max_burst socket option deprecated\n"); 5054 pr_warn("Use struct sctp_assoc_value instead\n");
5028 printk(KERN_WARNING
5029 "SCTP: Use struct sctp_assoc_value instead\n");
5030 params.assoc_id = 0; 5055 params.assoc_id = 0;
5031 } else if (len >= sizeof(struct sctp_assoc_value)) { 5056 } else if (len >= sizeof(struct sctp_assoc_value)) {
5032 len = sizeof(struct sctp_assoc_value); 5057 len = sizeof(struct sctp_assoc_value);
@@ -5586,8 +5611,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog)
5586 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); 5611 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
5587 if (IS_ERR(tfm)) { 5612 if (IS_ERR(tfm)) {
5588 if (net_ratelimit()) { 5613 if (net_ratelimit()) {
5589 printk(KERN_INFO 5614 pr_info("failed to load transform for %s: %ld\n",
5590 "SCTP: failed to load transform for %s: %ld\n",
5591 sctp_hmac_alg, PTR_ERR(tfm)); 5615 sctp_hmac_alg, PTR_ERR(tfm));
5592 } 5616 }
5593 return -ENOSYS; 5617 return -ENOSYS;
@@ -5716,13 +5740,12 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
5716 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 5740 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
5717 mask |= POLLERR; 5741 mask |= POLLERR;
5718 if (sk->sk_shutdown & RCV_SHUTDOWN) 5742 if (sk->sk_shutdown & RCV_SHUTDOWN)
5719 mask |= POLLRDHUP; 5743 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
5720 if (sk->sk_shutdown == SHUTDOWN_MASK) 5744 if (sk->sk_shutdown == SHUTDOWN_MASK)
5721 mask |= POLLHUP; 5745 mask |= POLLHUP;
5722 5746
5723 /* Is it readable? Reconsider this code with TCP-style support. */ 5747 /* Is it readable? Reconsider this code with TCP-style support. */
5724 if (!skb_queue_empty(&sk->sk_receive_queue) || 5748 if (!skb_queue_empty(&sk->sk_receive_queue))
5725 (sk->sk_shutdown & RCV_SHUTDOWN))
5726 mask |= POLLIN | POLLRDNORM; 5749 mask |= POLLIN | POLLRDNORM;
5727 5750
5728 /* The association is either gone or not ready. */ 5751 /* The association is either gone or not ready. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 132046cb82fc..d3ae493d234a 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -48,6 +48,8 @@
48 * be incorporated into the next SCTP release. 48 * be incorporated into the next SCTP release.
49 */ 49 */
50 50
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52
51#include <linux/slab.h> 53#include <linux/slab.h>
52#include <linux/types.h> 54#include <linux/types.h>
53#include <linux/random.h> 55#include <linux/random.h>
@@ -244,10 +246,9 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
244 struct dst_entry *dst; 246 struct dst_entry *dst;
245 247
246 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { 248 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
247 printk(KERN_WARNING "%s: Reported pmtu %d too low, " 249 pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
248 "using default minimum of %d\n", 250 __func__, pmtu,
249 __func__, pmtu, 251 SCTP_DEFAULT_MINSEGMENT);
250 SCTP_DEFAULT_MINSEGMENT);
251 /* Use default minimum segment size and disable 252 /* Use default minimum segment size and disable
252 * pmtu discovery on this transport. 253 * pmtu discovery on this transport.
253 */ 254 */
diff --git a/net/socket.c b/net/socket.c
index 2270b941bcc7..717a5f1c8792 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -535,14 +535,13 @@ void sock_release(struct socket *sock)
535} 535}
536EXPORT_SYMBOL(sock_release); 536EXPORT_SYMBOL(sock_release);
537 537
538int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, 538int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
539 union skb_shared_tx *shtx)
540{ 539{
541 shtx->flags = 0; 540 *tx_flags = 0;
542 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 541 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
543 shtx->hardware = 1; 542 *tx_flags |= SKBTX_HW_TSTAMP;
544 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 543 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
545 shtx->software = 1; 544 *tx_flags |= SKBTX_SW_TSTAMP;
546 return 0; 545 return 0;
547} 546}
548EXPORT_SYMBOL(sock_tx_timestamp); 547EXPORT_SYMBOL(sock_tx_timestamp);
@@ -1919,7 +1918,8 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1919 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1918 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1920 * checking falls down on this. 1919 * checking falls down on this.
1921 */ 1920 */
1922 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, 1921 if (copy_from_user(ctl_buf,
1922 (void __user __force *)msg_sys.msg_control,
1923 ctl_len)) 1923 ctl_len))
1924 goto out_freectl; 1924 goto out_freectl;
1925 msg_sys.msg_control = ctl_buf; 1925 msg_sys.msg_control = ctl_buf;
@@ -3054,14 +3054,19 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
3054 char *optval, int *optlen) 3054 char *optval, int *optlen)
3055{ 3055{
3056 mm_segment_t oldfs = get_fs(); 3056 mm_segment_t oldfs = get_fs();
3057 char __user *uoptval;
3058 int __user *uoptlen;
3057 int err; 3059 int err;
3058 3060
3061 uoptval = (char __user __force *) optval;
3062 uoptlen = (int __user __force *) optlen;
3063
3059 set_fs(KERNEL_DS); 3064 set_fs(KERNEL_DS);
3060 if (level == SOL_SOCKET) 3065 if (level == SOL_SOCKET)
3061 err = sock_getsockopt(sock, level, optname, optval, optlen); 3066 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
3062 else 3067 else
3063 err = sock->ops->getsockopt(sock, level, optname, optval, 3068 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3064 optlen); 3069 uoptlen);
3065 set_fs(oldfs); 3070 set_fs(oldfs);
3066 return err; 3071 return err;
3067} 3072}
@@ -3071,13 +3076,16 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
3071 char *optval, unsigned int optlen) 3076 char *optval, unsigned int optlen)
3072{ 3077{
3073 mm_segment_t oldfs = get_fs(); 3078 mm_segment_t oldfs = get_fs();
3079 char __user *uoptval;
3074 int err; 3080 int err;
3075 3081
3082 uoptval = (char __user __force *) optval;
3083
3076 set_fs(KERNEL_DS); 3084 set_fs(KERNEL_DS);
3077 if (level == SOL_SOCKET) 3085 if (level == SOL_SOCKET)
3078 err = sock_setsockopt(sock, level, optname, optval, optlen); 3086 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
3079 else 3087 else
3080 err = sock->ops->setsockopt(sock, level, optname, optval, 3088 err = sock->ops->setsockopt(sock, level, optname, uoptval,
3081 optlen); 3089 optlen);
3082 set_fs(oldfs); 3090 set_fs(oldfs);
3083 return err; 3091 return err;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a008c6689305..b11248c2d788 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -143,6 +143,19 @@ static void bcbuf_decr_acks(struct sk_buff *buf)
143} 143}
144 144
145 145
146static void bclink_set_last_sent(void)
147{
148 if (bcl->next_out)
149 bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1);
150 else
151 bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1);
152}
153
154u32 tipc_bclink_get_last_sent(void)
155{
156 return bcl->fsm_msg_cnt;
157}
158
146/** 159/**
147 * bclink_set_gap - set gap according to contents of current deferred pkt queue 160 * bclink_set_gap - set gap according to contents of current deferred pkt queue
148 * 161 *
@@ -237,8 +250,10 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
237 250
238 /* Try resolving broadcast link congestion, if necessary */ 251 /* Try resolving broadcast link congestion, if necessary */
239 252
240 if (unlikely(bcl->next_out)) 253 if (unlikely(bcl->next_out)) {
241 tipc_link_push_queue(bcl); 254 tipc_link_push_queue(bcl);
255 bclink_set_last_sent();
256 }
242 if (unlikely(released && !list_empty(&bcl->waiting_ports))) 257 if (unlikely(released && !list_empty(&bcl->waiting_ports)))
243 tipc_link_wakeup_ports(bcl, 0); 258 tipc_link_wakeup_ports(bcl, 0);
244 spin_unlock_bh(&bc_lock); 259 spin_unlock_bh(&bc_lock);
@@ -395,7 +410,7 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
395 if (unlikely(res == -ELINKCONG)) 410 if (unlikely(res == -ELINKCONG))
396 buf_discard(buf); 411 buf_discard(buf);
397 else 412 else
398 bcl->stats.sent_info++; 413 bclink_set_last_sent();
399 414
400 if (bcl->out_queue_size > bcl->stats.max_queue_sz) 415 if (bcl->out_queue_size > bcl->stats.max_queue_sz)
401 bcl->stats.max_queue_sz = bcl->out_queue_size; 416 bcl->stats.max_queue_sz = bcl->out_queue_size;
@@ -529,15 +544,6 @@ receive:
529 tipc_node_unlock(node); 544 tipc_node_unlock(node);
530} 545}
531 546
532u32 tipc_bclink_get_last_sent(void)
533{
534 u32 last_sent = mod(bcl->next_out_no - 1);
535
536 if (bcl->next_out)
537 last_sent = mod(buf_seqno(bcl->next_out) - 1);
538 return last_sent;
539}
540
541u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) 547u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
542{ 548{
543 return (n_ptr->bclink.supported && 549 return (n_ptr->bclink.supported &&
@@ -570,6 +576,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
570 msg = buf_msg(buf); 576 msg = buf_msg(buf);
571 msg_set_non_seq(msg, 1); 577 msg_set_non_seq(msg, 1);
572 msg_set_mc_netid(msg, tipc_net_id); 578 msg_set_mc_netid(msg, tipc_net_id);
579 bcl->stats.sent_info++;
573 } 580 }
574 581
575 /* Send buffer over bearers until all targets reached */ 582 /* Send buffer over bearers until all targets reached */
@@ -609,11 +616,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
609 bcbearer->remains = bcbearer->remains_new; 616 bcbearer->remains = bcbearer->remains_new;
610 } 617 }
611 618
612 /* Unable to reach all targets */ 619 /*
620 * Unable to reach all targets (indicate success, since currently
621 * there isn't code in place to properly block & unblock the
622 * pseudo-bearer used by the broadcast link)
623 */
613 624
614 bcbearer->bearer.publ.blocked = 1; 625 return TIPC_OK;
615 bcl->stats.bearer_congs++;
616 return 1;
617} 626}
618 627
619/** 628/**
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 696468117985..466b861dab91 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -169,6 +169,7 @@ void tipc_core_stop(void)
169 tipc_nametbl_stop(); 169 tipc_nametbl_stop();
170 tipc_ref_table_stop(); 170 tipc_ref_table_stop();
171 tipc_socket_stop(); 171 tipc_socket_stop();
172 tipc_log_resize(0);
172} 173}
173 174
174/** 175/**
@@ -203,7 +204,9 @@ static int __init tipc_init(void)
203{ 204{
204 int res; 205 int res;
205 206
206 tipc_log_resize(CONFIG_TIPC_LOG); 207 if (tipc_log_resize(CONFIG_TIPC_LOG) != 0)
208 warn("Unable to create log buffer\n");
209
207 info("Activated (version " TIPC_MOD_VER 210 info("Activated (version " TIPC_MOD_VER
208 " compiled " __DATE__ " " __TIME__ ")\n"); 211 " compiled " __DATE__ " " __TIME__ ")\n");
209 212
@@ -230,7 +233,6 @@ static void __exit tipc_exit(void)
230 tipc_core_stop_net(); 233 tipc_core_stop_net();
231 tipc_core_stop(); 234 tipc_core_stop();
232 info("Deactivated\n"); 235 info("Deactivated\n");
233 tipc_log_resize(0);
234} 236}
235 237
236module_init(tipc_init); 238module_init(tipc_init);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index fc1fcf5e6b53..f28d1ae93125 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -203,6 +203,14 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
203 return; 203 return;
204 } 204 }
205 spin_lock_bh(&n_ptr->lock); 205 spin_lock_bh(&n_ptr->lock);
206
207 /* Don't talk to neighbor during cleanup after last session */
208
209 if (n_ptr->cleanup_required) {
210 spin_unlock_bh(&n_ptr->lock);
211 return;
212 }
213
206 link = n_ptr->links[b_ptr->identity]; 214 link = n_ptr->links[b_ptr->identity];
207 if (!link) { 215 if (!link) {
208 dbg("creating link\n"); 216 dbg("creating link\n");
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 6230d16020c4..6e988ba485fd 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -72,17 +72,26 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
72{ 72{
73 struct sk_buff *clone; 73 struct sk_buff *clone;
74 struct net_device *dev; 74 struct net_device *dev;
75 int delta;
75 76
76 clone = skb_clone(buf, GFP_ATOMIC); 77 clone = skb_clone(buf, GFP_ATOMIC);
77 if (clone) { 78 if (!clone)
78 skb_reset_network_header(clone); 79 return 0;
79 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev; 80
80 clone->dev = dev; 81 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
81 dev_hard_header(clone, dev, ETH_P_TIPC, 82 delta = dev->hard_header_len - skb_headroom(buf);
82 &dest->dev_addr.eth_addr, 83
83 dev->dev_addr, clone->len); 84 if ((delta > 0) &&
84 dev_queue_xmit(clone); 85 pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
86 kfree_skb(clone);
87 return 0;
85 } 88 }
89
90 skb_reset_network_header(clone);
91 clone->dev = dev;
92 dev_hard_header(clone, dev, ETH_P_TIPC, &dest->dev_addr.eth_addr,
93 dev->dev_addr, clone->len);
94 dev_queue_xmit(clone);
86 return 0; 95 return 0;
87} 96}
88 97
@@ -92,15 +101,12 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
92 * Accept only packets explicitly sent to this node, or broadcast packets; 101 * Accept only packets explicitly sent to this node, or broadcast packets;
93 * ignores packets sent using Ethernet multicast, and traffic sent to other 102 * ignores packets sent using Ethernet multicast, and traffic sent to other
94 * nodes (which can happen if interface is running in promiscuous mode). 103 * nodes (which can happen if interface is running in promiscuous mode).
95 * Routine truncates any Ethernet padding/CRC appended to the message,
96 * and ensures message size matches actual length
97 */ 104 */
98 105
99static int recv_msg(struct sk_buff *buf, struct net_device *dev, 106static int recv_msg(struct sk_buff *buf, struct net_device *dev,
100 struct packet_type *pt, struct net_device *orig_dev) 107 struct packet_type *pt, struct net_device *orig_dev)
101{ 108{
102 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; 109 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
103 u32 size;
104 110
105 if (!net_eq(dev_net(dev), &init_net)) { 111 if (!net_eq(dev_net(dev), &init_net)) {
106 kfree_skb(buf); 112 kfree_skb(buf);
@@ -109,13 +115,9 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
109 115
110 if (likely(eb_ptr->bearer)) { 116 if (likely(eb_ptr->bearer)) {
111 if (likely(buf->pkt_type <= PACKET_BROADCAST)) { 117 if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
112 size = msg_size((struct tipc_msg *)buf->data); 118 buf->next = NULL;
113 skb_trim(buf, size); 119 tipc_recv_msg(buf, eb_ptr->bearer);
114 if (likely(buf->len == size)) { 120 return 0;
115 buf->next = NULL;
116 tipc_recv_msg(buf, eb_ptr->bearer);
117 return 0;
118 }
119 } 121 }
120 } 122 }
121 kfree_skb(buf); 123 kfree_skb(buf);
@@ -133,6 +135,16 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
133 struct eth_bearer *eb_ptr = &eth_bearers[0]; 135 struct eth_bearer *eb_ptr = &eth_bearers[0];
134 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS]; 136 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
135 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; 137 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
138 int pending_dev = 0;
139
140 /* Find unused Ethernet bearer structure */
141
142 while (eb_ptr->dev) {
143 if (!eb_ptr->bearer)
144 pending_dev++;
145 if (++eb_ptr == stop)
146 return pending_dev ? -EAGAIN : -EDQUOT;
147 }
136 148
137 /* Find device with specified name */ 149 /* Find device with specified name */
138 150
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a3616b99529b..a6a3102bb4d6 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1802,6 +1802,15 @@ static int link_recv_buf_validate(struct sk_buff *buf)
1802 return pskb_may_pull(buf, hdr_size); 1802 return pskb_may_pull(buf, hdr_size);
1803} 1803}
1804 1804
1805/**
1806 * tipc_recv_msg - process TIPC messages arriving from off-node
1807 * @head: pointer to message buffer chain
1808 * @tb_ptr: pointer to bearer message arrived on
1809 *
1810 * Invoked with no locks held. Bearer pointer must point to a valid bearer
1811 * structure (i.e. cannot be NULL), but bearer can be inactive.
1812 */
1813
1805void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) 1814void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1806{ 1815{
1807 read_lock_bh(&tipc_net_lock); 1816 read_lock_bh(&tipc_net_lock);
@@ -1819,6 +1828,11 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1819 1828
1820 head = head->next; 1829 head = head->next;
1821 1830
1831 /* Ensure bearer is still enabled */
1832
1833 if (unlikely(!b_ptr->active))
1834 goto cont;
1835
1822 /* Ensure message is well-formed */ 1836 /* Ensure message is well-formed */
1823 1837
1824 if (unlikely(!link_recv_buf_validate(buf))) 1838 if (unlikely(!link_recv_buf_validate(buf)))
@@ -1855,13 +1869,22 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1855 goto cont; 1869 goto cont;
1856 } 1870 }
1857 1871
1858 /* Locate unicast link endpoint that should handle message */ 1872 /* Locate neighboring node that sent message */
1859 1873
1860 n_ptr = tipc_node_find(msg_prevnode(msg)); 1874 n_ptr = tipc_node_find(msg_prevnode(msg));
1861 if (unlikely(!n_ptr)) 1875 if (unlikely(!n_ptr))
1862 goto cont; 1876 goto cont;
1863 tipc_node_lock(n_ptr); 1877 tipc_node_lock(n_ptr);
1864 1878
1879 /* Don't talk to neighbor during cleanup after last session */
1880
1881 if (n_ptr->cleanup_required) {
1882 tipc_node_unlock(n_ptr);
1883 goto cont;
1884 }
1885
1886 /* Locate unicast link endpoint that should handle message */
1887
1865 l_ptr = n_ptr->links[b_ptr->identity]; 1888 l_ptr = n_ptr->links[b_ptr->identity];
1866 if (unlikely(!l_ptr)) { 1889 if (unlikely(!l_ptr)) {
1867 tipc_node_unlock(n_ptr); 1890 tipc_node_unlock(n_ptr);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 8ba79620db3f..c13c2c7c4b57 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -613,8 +613,7 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
613} 613}
614 614
615/* 615/*
616 * tipc_nametbl_translate(): Translate tipc_name -> tipc_portid. 616 * tipc_nametbl_translate - translate name to port id
617 * Very time-critical.
618 * 617 *
619 * Note: on entry 'destnode' is the search domain used during translation; 618 * Note: on entry 'destnode' is the search domain used during translation;
620 * on exit it passes back the node address of the matching port (if any) 619 * on exit it passes back the node address of the matching port (if any)
@@ -685,7 +684,6 @@ found:
685 } 684 }
686 spin_unlock_bh(&seq->lock); 685 spin_unlock_bh(&seq->lock);
687not_found: 686not_found:
688 *destnode = 0;
689 read_unlock_bh(&tipc_nametbl_lock); 687 read_unlock_bh(&tipc_nametbl_lock);
690 return 0; 688 return 0;
691} 689}
@@ -877,7 +875,7 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
877 u32 index) 875 u32 index)
878{ 876{
879 char portIdStr[27]; 877 char portIdStr[27];
880 char *scopeStr; 878 const char *scope_str[] = {"", " zone", " cluster", " node"};
881 struct publication *publ = sseq->zone_list; 879 struct publication *publ = sseq->zone_list;
882 880
883 tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper); 881 tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper);
@@ -893,15 +891,8 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
893 tipc_node(publ->node), publ->ref); 891 tipc_node(publ->node), publ->ref);
894 tipc_printf(buf, "%-26s ", portIdStr); 892 tipc_printf(buf, "%-26s ", portIdStr);
895 if (depth > 3) { 893 if (depth > 3) {
896 if (publ->node != tipc_own_addr) 894 tipc_printf(buf, "%-10u %s", publ->key,
897 scopeStr = ""; 895 scope_str[publ->scope]);
898 else if (publ->scope == TIPC_NODE_SCOPE)
899 scopeStr = "node";
900 else if (publ->scope == TIPC_CLUSTER_SCOPE)
901 scopeStr = "cluster";
902 else
903 scopeStr = "zone";
904 tipc_printf(buf, "%-10u %s", publ->key, scopeStr);
905 } 896 }
906 897
907 publ = publ->zone_list_next; 898 publ = publ->zone_list_next;
@@ -951,24 +942,19 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth,
951 942
952static void nametbl_header(struct print_buf *buf, u32 depth) 943static void nametbl_header(struct print_buf *buf, u32 depth)
953{ 944{
954 tipc_printf(buf, "Type "); 945 const char *header[] = {
955 946 "Type ",
956 if (depth > 1) 947 "Lower Upper ",
957 tipc_printf(buf, "Lower Upper "); 948 "Port Identity ",
958 if (depth > 2) 949 "Publication Scope"
959 tipc_printf(buf, "Port Identity "); 950 };
960 if (depth > 3) 951
961 tipc_printf(buf, "Publication"); 952 int i;
962 953
963 tipc_printf(buf, "\n-----------"); 954 if (depth > 4)
964 955 depth = 4;
965 if (depth > 1) 956 for (i = 0; i < depth; i++)
966 tipc_printf(buf, "--------------------- "); 957 tipc_printf(buf, header[i]);
967 if (depth > 2)
968 tipc_printf(buf, "-------------------------- ");
969 if (depth > 3)
970 tipc_printf(buf, "------------------");
971
972 tipc_printf(buf, "\n"); 958 tipc_printf(buf, "\n");
973} 959}
974 960
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f61b7694138b..7e05af47a196 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -248,6 +248,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
248 248
249 /* Handle message for another node */ 249 /* Handle message for another node */
250 msg_dbg(msg, "NET>SEND>: "); 250 msg_dbg(msg, "NET>SEND>: ");
251 skb_trim(buf, msg_size(msg));
251 tipc_link_send(buf, dnode, msg_link_selector(msg)); 252 tipc_link_send(buf, dnode, msg_link_selector(msg));
252} 253}
253 254
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b634942caba5..b702c7bf580f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -237,8 +237,7 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr)
237 237
238int tipc_node_has_active_links(struct tipc_node *n_ptr) 238int tipc_node_has_active_links(struct tipc_node *n_ptr)
239{ 239{
240 return (n_ptr && 240 return n_ptr->active_links[0] != NULL;
241 ((n_ptr->active_links[0]) || (n_ptr->active_links[1])));
242} 241}
243 242
244int tipc_node_has_redundant_links(struct tipc_node *n_ptr) 243int tipc_node_has_redundant_links(struct tipc_node *n_ptr)
@@ -384,6 +383,20 @@ static void node_established_contact(struct tipc_node *n_ptr)
384 tipc_highest_allowed_slave); 383 tipc_highest_allowed_slave);
385} 384}
386 385
386static void node_cleanup_finished(unsigned long node_addr)
387{
388 struct tipc_node *n_ptr;
389
390 read_lock_bh(&tipc_net_lock);
391 n_ptr = tipc_node_find(node_addr);
392 if (n_ptr) {
393 tipc_node_lock(n_ptr);
394 n_ptr->cleanup_required = 0;
395 tipc_node_unlock(n_ptr);
396 }
397 read_unlock_bh(&tipc_net_lock);
398}
399
387static void node_lost_contact(struct tipc_node *n_ptr) 400static void node_lost_contact(struct tipc_node *n_ptr)
388{ 401{
389 struct cluster *c_ptr; 402 struct cluster *c_ptr;
@@ -458,6 +471,11 @@ static void node_lost_contact(struct tipc_node *n_ptr)
458 tipc_k_signal((Handler)ns->handle_node_down, 471 tipc_k_signal((Handler)ns->handle_node_down,
459 (unsigned long)ns->usr_handle); 472 (unsigned long)ns->usr_handle);
460 } 473 }
474
475 /* Prevent re-contact with node until all cleanup is done */
476
477 n_ptr->cleanup_required = 1;
478 tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr);
461} 479}
462 480
463/** 481/**
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 6f990da5d143..45f3db3a595d 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -52,6 +52,7 @@
52 * @active_links: pointers to active links to node 52 * @active_links: pointers to active links to node
53 * @links: pointers to all links to node 53 * @links: pointers to all links to node
54 * @working_links: number of working links to node (both active and standby) 54 * @working_links: number of working links to node (both active and standby)
55 * @cleanup_required: non-zero if cleaning up after a prior loss of contact
55 * @link_cnt: number of links to node 56 * @link_cnt: number of links to node
56 * @permit_changeover: non-zero if node has redundant links to this system 57 * @permit_changeover: non-zero if node has redundant links to this system
57 * @routers: bitmap (used for multicluster communication) 58 * @routers: bitmap (used for multicluster communication)
@@ -78,6 +79,7 @@ struct tipc_node {
78 struct link *links[MAX_BEARERS]; 79 struct link *links[MAX_BEARERS];
79 int link_cnt; 80 int link_cnt;
80 int working_links; 81 int working_links;
82 int cleanup_required;
81 int permit_changeover; 83 int permit_changeover;
82 u32 routers[512/32]; 84 u32 routers[512/32];
83 int last_router; 85 int last_router;
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 0737680e9266..d760336f2ca8 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -588,19 +588,10 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
588 if (!p_ptr) { 588 if (!p_ptr) {
589 err = TIPC_ERR_NO_PORT; 589 err = TIPC_ERR_NO_PORT;
590 } else if (p_ptr->publ.connected) { 590 } else if (p_ptr->publ.connected) {
591 if (port_peernode(p_ptr) != msg_orignode(msg)) 591 if ((port_peernode(p_ptr) != msg_orignode(msg)) ||
592 (port_peerport(p_ptr) != msg_origport(msg))) {
592 err = TIPC_ERR_NO_PORT; 593 err = TIPC_ERR_NO_PORT;
593 if (port_peerport(p_ptr) != msg_origport(msg)) 594 } else if (msg_type(msg) == CONN_ACK) {
594 err = TIPC_ERR_NO_PORT;
595 if (!err && msg_routed(msg)) {
596 u32 seqno = msg_transp_seqno(msg);
597 u32 myno = ++p_ptr->last_in_seqno;
598 if (seqno != myno) {
599 err = TIPC_ERR_NO_PORT;
600 abort_buf = port_build_self_abort_msg(p_ptr, err);
601 }
602 }
603 if (msg_type(msg) == CONN_ACK) {
604 int wakeup = tipc_port_congested(p_ptr) && 595 int wakeup = tipc_port_congested(p_ptr) &&
605 p_ptr->publ.congested && 596 p_ptr->publ.congested &&
606 p_ptr->wakeup; 597 p_ptr->wakeup;
@@ -1473,7 +1464,7 @@ int tipc_forward2name(u32 ref,
1473 msg_set_destnode(msg, destnode); 1464 msg_set_destnode(msg, destnode);
1474 msg_set_destport(msg, destport); 1465 msg_set_destport(msg, destport);
1475 1466
1476 if (likely(destport || destnode)) { 1467 if (likely(destport)) {
1477 p_ptr->sent++; 1468 p_ptr->sent++;
1478 if (likely(destnode == tipc_own_addr)) 1469 if (likely(destnode == tipc_own_addr))
1479 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1470 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
@@ -1551,7 +1542,7 @@ int tipc_forward_buf2name(u32 ref,
1551 skb_push(buf, LONG_H_SIZE); 1542 skb_push(buf, LONG_H_SIZE);
1552 skb_copy_to_linear_data(buf, msg, LONG_H_SIZE); 1543 skb_copy_to_linear_data(buf, msg, LONG_H_SIZE);
1553 msg_dbg(buf_msg(buf),"PREP:"); 1544 msg_dbg(buf_msg(buf),"PREP:");
1554 if (likely(destport || destnode)) { 1545 if (likely(destport)) {
1555 p_ptr->sent++; 1546 p_ptr->sent++;
1556 if (destnode == tipc_own_addr) 1547 if (destnode == tipc_own_addr)
1557 return tipc_port_recv_msg(buf); 1548 return tipc_port_recv_msg(buf);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 66e889ba48fd..f7ac94de24fe 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -64,6 +64,7 @@ struct tipc_sock {
64 struct sock sk; 64 struct sock sk;
65 struct tipc_port *p; 65 struct tipc_port *p;
66 struct tipc_portid peer_name; 66 struct tipc_portid peer_name;
67 long conn_timeout;
67}; 68};
68 69
69#define tipc_sk(sk) ((struct tipc_sock *)(sk)) 70#define tipc_sk(sk) ((struct tipc_sock *)(sk))
@@ -240,9 +241,9 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
240 sock->state = state; 241 sock->state = state;
241 242
242 sock_init_data(sock, sk); 243 sock_init_data(sock, sk);
243 sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
244 sk->sk_backlog_rcv = backlog_rcv; 244 sk->sk_backlog_rcv = backlog_rcv;
245 tipc_sk(sk)->p = tp_ptr; 245 tipc_sk(sk)->p = tp_ptr;
246 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
246 247
247 spin_unlock_bh(tp_ptr->lock); 248 spin_unlock_bh(tp_ptr->lock);
248 249
@@ -429,36 +430,55 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
429 * to handle any preventable race conditions, so TIPC will do the same ... 430 * to handle any preventable race conditions, so TIPC will do the same ...
430 * 431 *
431 * TIPC sets the returned events as follows: 432 * TIPC sets the returned events as follows:
432 * a) POLLRDNORM and POLLIN are set if the socket's receive queue is non-empty 433 *
433 * or if a connection-oriented socket is does not have an active connection 434 * socket state flags set
434 * (i.e. a read operation will not block). 435 * ------------ ---------
435 * b) POLLOUT is set except when a socket's connection has been terminated 436 * unconnected no read flags
436 * (i.e. a write operation will not block). 437 * no write flags
437 * c) POLLHUP is set when a socket's connection has been terminated. 438 *
438 * 439 * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue
439 * IMPORTANT: The fact that a read or write operation will not block does NOT 440 * no write flags
440 * imply that the operation will succeed! 441 *
442 * connected POLLIN/POLLRDNORM if data in rx queue
443 * POLLOUT if port is not congested
444 *
445 * disconnecting POLLIN/POLLRDNORM/POLLHUP
446 * no write flags
447 *
448 * listening POLLIN if SYN in rx queue
449 * no write flags
450 *
451 * ready POLLIN/POLLRDNORM if data in rx queue
452 * [connectionless] POLLOUT (since port cannot be congested)
453 *
454 * IMPORTANT: The fact that a read or write operation is indicated does NOT
455 * imply that the operation will succeed, merely that it should be performed
456 * and will not block.
441 */ 457 */
442 458
443static unsigned int poll(struct file *file, struct socket *sock, 459static unsigned int poll(struct file *file, struct socket *sock,
444 poll_table *wait) 460 poll_table *wait)
445{ 461{
446 struct sock *sk = sock->sk; 462 struct sock *sk = sock->sk;
447 u32 mask; 463 u32 mask = 0;
448 464
449 poll_wait(file, sk_sleep(sk), wait); 465 poll_wait(file, sk_sleep(sk), wait);
450 466
451 if (!skb_queue_empty(&sk->sk_receive_queue) || 467 switch ((int)sock->state) {
452 (sock->state == SS_UNCONNECTED) || 468 case SS_READY:
453 (sock->state == SS_DISCONNECTING)) 469 case SS_CONNECTED:
454 mask = (POLLRDNORM | POLLIN); 470 if (!tipc_sk_port(sk)->congested)
455 else 471 mask |= POLLOUT;
456 mask = 0; 472 /* fall thru' */
457 473 case SS_CONNECTING:
458 if (sock->state == SS_DISCONNECTING) 474 case SS_LISTENING:
459 mask |= POLLHUP; 475 if (!skb_queue_empty(&sk->sk_receive_queue))
460 else 476 mask |= (POLLIN | POLLRDNORM);
461 mask |= POLLOUT; 477 break;
478 case SS_DISCONNECTING:
479 mask = (POLLIN | POLLRDNORM | POLLHUP);
480 break;
481 }
462 482
463 return mask; 483 return mask;
464} 484}
@@ -1026,9 +1046,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1026 struct sk_buff *buf; 1046 struct sk_buff *buf;
1027 struct tipc_msg *msg; 1047 struct tipc_msg *msg;
1028 unsigned int sz; 1048 unsigned int sz;
1029 int sz_to_copy; 1049 int sz_to_copy, target, needed;
1030 int sz_copied = 0; 1050 int sz_copied = 0;
1031 int needed;
1032 char __user *crs = m->msg_iov->iov_base; 1051 char __user *crs = m->msg_iov->iov_base;
1033 unsigned char *buf_crs; 1052 unsigned char *buf_crs;
1034 u32 err; 1053 u32 err;
@@ -1050,6 +1069,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1050 goto exit; 1069 goto exit;
1051 } 1070 }
1052 1071
1072 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
1073
1053restart: 1074restart:
1054 1075
1055 /* Look for a message in receive queue; wait if necessary */ 1076 /* Look for a message in receive queue; wait if necessary */
@@ -1138,7 +1159,7 @@ restart:
1138 1159
1139 if ((sz_copied < buf_len) && /* didn't get all requested data */ 1160 if ((sz_copied < buf_len) && /* didn't get all requested data */
1140 (!skb_queue_empty(&sk->sk_receive_queue) || 1161 (!skb_queue_empty(&sk->sk_receive_queue) ||
1141 (flags & MSG_WAITALL)) && /* and more is ready or required */ 1162 (sz_copied < target)) && /* and more is ready or required */
1142 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */ 1163 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */
1143 (!err)) /* and haven't reached a FIN */ 1164 (!err)) /* and haven't reached a FIN */
1144 goto restart; 1165 goto restart;
@@ -1365,6 +1386,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1365 struct msghdr m = {NULL,}; 1386 struct msghdr m = {NULL,};
1366 struct sk_buff *buf; 1387 struct sk_buff *buf;
1367 struct tipc_msg *msg; 1388 struct tipc_msg *msg;
1389 long timeout;
1368 int res; 1390 int res;
1369 1391
1370 lock_sock(sk); 1392 lock_sock(sk);
@@ -1379,7 +1401,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1379 /* For now, TIPC does not support the non-blocking form of connect() */ 1401 /* For now, TIPC does not support the non-blocking form of connect() */
1380 1402
1381 if (flags & O_NONBLOCK) { 1403 if (flags & O_NONBLOCK) {
1382 res = -EWOULDBLOCK; 1404 res = -EOPNOTSUPP;
1383 goto exit; 1405 goto exit;
1384 } 1406 }
1385 1407
@@ -1425,11 +1447,12 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1425 1447
1426 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ 1448 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1427 1449
1450 timeout = tipc_sk(sk)->conn_timeout;
1428 release_sock(sk); 1451 release_sock(sk);
1429 res = wait_event_interruptible_timeout(*sk_sleep(sk), 1452 res = wait_event_interruptible_timeout(*sk_sleep(sk),
1430 (!skb_queue_empty(&sk->sk_receive_queue) || 1453 (!skb_queue_empty(&sk->sk_receive_queue) ||
1431 (sock->state != SS_CONNECTING)), 1454 (sock->state != SS_CONNECTING)),
1432 sk->sk_rcvtimeo); 1455 timeout ? timeout : MAX_SCHEDULE_TIMEOUT);
1433 lock_sock(sk); 1456 lock_sock(sk);
1434 1457
1435 if (res > 0) { 1458 if (res > 0) {
@@ -1692,7 +1715,7 @@ static int setsockopt(struct socket *sock,
1692 res = tipc_set_portunreturnable(tport->ref, value); 1715 res = tipc_set_portunreturnable(tport->ref, value);
1693 break; 1716 break;
1694 case TIPC_CONN_TIMEOUT: 1717 case TIPC_CONN_TIMEOUT:
1695 sk->sk_rcvtimeo = msecs_to_jiffies(value); 1718 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(value);
1696 /* no need to set "res", since already 0 at this point */ 1719 /* no need to set "res", since already 0 at this point */
1697 break; 1720 break;
1698 default: 1721 default:
@@ -1747,7 +1770,7 @@ static int getsockopt(struct socket *sock,
1747 res = tipc_portunreturnable(tport->ref, &value); 1770 res = tipc_portunreturnable(tport->ref, &value);
1748 break; 1771 break;
1749 case TIPC_CONN_TIMEOUT: 1772 case TIPC_CONN_TIMEOUT:
1750 value = jiffies_to_msecs(sk->sk_rcvtimeo); 1773 value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout);
1751 /* no need to set "res", since already 0 at this point */ 1774 /* no need to set "res", since already 0 at this point */
1752 break; 1775 break;
1753 case TIPC_NODE_RECVQ_DEPTH: 1776 case TIPC_NODE_RECVQ_DEPTH:
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0b39b2451ea5..c586da3f4f18 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2033,11 +2033,10 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
2033 if (sk->sk_shutdown == SHUTDOWN_MASK) 2033 if (sk->sk_shutdown == SHUTDOWN_MASK)
2034 mask |= POLLHUP; 2034 mask |= POLLHUP;
2035 if (sk->sk_shutdown & RCV_SHUTDOWN) 2035 if (sk->sk_shutdown & RCV_SHUTDOWN)
2036 mask |= POLLRDHUP; 2036 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2037 2037
2038 /* readable? */ 2038 /* readable? */
2039 if (!skb_queue_empty(&sk->sk_receive_queue) || 2039 if (!skb_queue_empty(&sk->sk_receive_queue))
2040 (sk->sk_shutdown & RCV_SHUTDOWN))
2041 mask |= POLLIN | POLLRDNORM; 2040 mask |= POLLIN | POLLRDNORM;
2042 2041
2043 /* Connection-based need to check for termination and startup */ 2042 /* Connection-based need to check for termination and startup */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index d6d046b9f6f2..d587ad284b3d 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -253,11 +253,16 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
253 WARN_ON(err); 253 WARN_ON(err);
254 wdev->netdev->features |= NETIF_F_NETNS_LOCAL; 254 wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
255 } 255 }
256
257 return err;
256 } 258 }
257 259
258 wiphy_net_set(&rdev->wiphy, net); 260 wiphy_net_set(&rdev->wiphy, net);
259 261
260 return err; 262 err = device_rename(&rdev->wiphy.dev, dev_name(&rdev->wiphy.dev));
263 WARN_ON(err);
264
265 return 0;
261} 266}
262 267
263static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) 268static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
@@ -428,7 +433,7 @@ int wiphy_register(struct wiphy *wiphy)
428 433
429 /* sanity check ifmodes */ 434 /* sanity check ifmodes */
430 WARN_ON(!ifmodes); 435 WARN_ON(!ifmodes);
431 ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; 436 ifmodes &= ((1 << NUM_NL80211_IFTYPES) - 1) & ~1;
432 if (WARN_ON(ifmodes != wiphy->interface_modes)) 437 if (WARN_ON(ifmodes != wiphy->interface_modes))
433 wiphy->interface_modes = ifmodes; 438 wiphy->interface_modes = ifmodes;
434 439
@@ -683,8 +688,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
683 INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); 688 INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work);
684 INIT_LIST_HEAD(&wdev->event_list); 689 INIT_LIST_HEAD(&wdev->event_list);
685 spin_lock_init(&wdev->event_lock); 690 spin_lock_init(&wdev->event_lock);
686 INIT_LIST_HEAD(&wdev->action_registrations); 691 INIT_LIST_HEAD(&wdev->mgmt_registrations);
687 spin_lock_init(&wdev->action_registrations_lock); 692 spin_lock_init(&wdev->mgmt_registrations_lock);
688 693
689 mutex_lock(&rdev->devlist_mtx); 694 mutex_lock(&rdev->devlist_mtx);
690 list_add_rcu(&wdev->list, &rdev->netdev_list); 695 list_add_rcu(&wdev->list, &rdev->netdev_list);
@@ -804,7 +809,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
804 sysfs_remove_link(&dev->dev.kobj, "phy80211"); 809 sysfs_remove_link(&dev->dev.kobj, "phy80211");
805 list_del_rcu(&wdev->list); 810 list_del_rcu(&wdev->list);
806 rdev->devlist_generation++; 811 rdev->devlist_generation++;
807 cfg80211_mlme_purge_actions(wdev); 812 cfg80211_mlme_purge_registrations(wdev);
808#ifdef CONFIG_CFG80211_WEXT 813#ifdef CONFIG_CFG80211_WEXT
809 kfree(wdev->wext.keys); 814 kfree(wdev->wext.keys);
810#endif 815#endif
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 63d57ae399c3..58ab2c791d28 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -331,16 +331,17 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
331 const u8 *resp_ie, size_t resp_ie_len, 331 const u8 *resp_ie, size_t resp_ie_len,
332 u16 status, bool wextev, 332 u16 status, bool wextev,
333 struct cfg80211_bss *bss); 333 struct cfg80211_bss *bss);
334int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, 334int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
335 const u8 *match_data, int match_len); 335 u16 frame_type, const u8 *match_data,
336void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid); 336 int match_len);
337void cfg80211_mlme_purge_actions(struct wireless_dev *wdev); 337void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
338int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, 338void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
339 struct net_device *dev, 339int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
340 struct ieee80211_channel *chan, 340 struct net_device *dev,
341 enum nl80211_channel_type channel_type, 341 struct ieee80211_channel *chan,
342 bool channel_type_valid, 342 enum nl80211_channel_type channel_type,
343 const u8 *buf, size_t len, u64 *cookie); 343 bool channel_type_valid,
344 const u8 *buf, size_t len, u64 *cookie);
344 345
345/* SME */ 346/* SME */
346int __cfg80211_connect(struct cfg80211_registered_device *rdev, 347int __cfg80211_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d1a3fb99fdf2..8515b1e5c578 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -149,7 +149,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
149 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; 149 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
150 const u8 *bssid = mgmt->bssid; 150 const u8 *bssid = mgmt->bssid;
151 int i; 151 int i;
152 bool found = false; 152 bool found = false, was_current = false;
153 153
154 ASSERT_WDEV_LOCK(wdev); 154 ASSERT_WDEV_LOCK(wdev);
155 155
@@ -159,6 +159,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
159 cfg80211_put_bss(&wdev->current_bss->pub); 159 cfg80211_put_bss(&wdev->current_bss->pub);
160 wdev->current_bss = NULL; 160 wdev->current_bss = NULL;
161 found = true; 161 found = true;
162 was_current = true;
162 } else for (i = 0; i < MAX_AUTH_BSSES; i++) { 163 } else for (i = 0; i < MAX_AUTH_BSSES; i++) {
163 if (wdev->auth_bsses[i] && 164 if (wdev->auth_bsses[i] &&
164 memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) { 165 memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) {
@@ -183,7 +184,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
183 184
184 nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL); 185 nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL);
185 186
186 if (wdev->sme_state == CFG80211_SME_CONNECTED) { 187 if (wdev->sme_state == CFG80211_SME_CONNECTED && was_current) {
187 u16 reason_code; 188 u16 reason_code;
188 bool from_ap; 189 bool from_ap;
189 190
@@ -747,31 +748,51 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
747} 748}
748EXPORT_SYMBOL(cfg80211_new_sta); 749EXPORT_SYMBOL(cfg80211_new_sta);
749 750
750struct cfg80211_action_registration { 751struct cfg80211_mgmt_registration {
751 struct list_head list; 752 struct list_head list;
752 753
753 u32 nlpid; 754 u32 nlpid;
754 755
755 int match_len; 756 int match_len;
756 757
758 __le16 frame_type;
759
757 u8 match[]; 760 u8 match[];
758}; 761};
759 762
760int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, 763int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
761 const u8 *match_data, int match_len) 764 u16 frame_type, const u8 *match_data,
765 int match_len)
762{ 766{
763 struct cfg80211_action_registration *reg, *nreg; 767 struct cfg80211_mgmt_registration *reg, *nreg;
764 int err = 0; 768 int err = 0;
769 u16 mgmt_type;
770
771 if (!wdev->wiphy->mgmt_stypes)
772 return -EOPNOTSUPP;
773
774 if ((frame_type & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT)
775 return -EINVAL;
776
777 if (frame_type & ~(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE))
778 return -EINVAL;
779
780 mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4;
781 if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].rx & BIT(mgmt_type)))
782 return -EINVAL;
765 783
766 nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL); 784 nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL);
767 if (!nreg) 785 if (!nreg)
768 return -ENOMEM; 786 return -ENOMEM;
769 787
770 spin_lock_bh(&wdev->action_registrations_lock); 788 spin_lock_bh(&wdev->mgmt_registrations_lock);
771 789
772 list_for_each_entry(reg, &wdev->action_registrations, list) { 790 list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
773 int mlen = min(match_len, reg->match_len); 791 int mlen = min(match_len, reg->match_len);
774 792
793 if (frame_type != le16_to_cpu(reg->frame_type))
794 continue;
795
775 if (memcmp(reg->match, match_data, mlen) == 0) { 796 if (memcmp(reg->match, match_data, mlen) == 0) {
776 err = -EALREADY; 797 err = -EALREADY;
777 break; 798 break;
@@ -786,62 +807,75 @@ int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
786 memcpy(nreg->match, match_data, match_len); 807 memcpy(nreg->match, match_data, match_len);
787 nreg->match_len = match_len; 808 nreg->match_len = match_len;
788 nreg->nlpid = snd_pid; 809 nreg->nlpid = snd_pid;
789 list_add(&nreg->list, &wdev->action_registrations); 810 nreg->frame_type = cpu_to_le16(frame_type);
811 list_add(&nreg->list, &wdev->mgmt_registrations);
790 812
791 out: 813 out:
792 spin_unlock_bh(&wdev->action_registrations_lock); 814 spin_unlock_bh(&wdev->mgmt_registrations_lock);
793 return err; 815 return err;
794} 816}
795 817
796void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid) 818void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
797{ 819{
798 struct cfg80211_action_registration *reg, *tmp; 820 struct cfg80211_mgmt_registration *reg, *tmp;
799 821
800 spin_lock_bh(&wdev->action_registrations_lock); 822 spin_lock_bh(&wdev->mgmt_registrations_lock);
801 823
802 list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { 824 list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
803 if (reg->nlpid == nlpid) { 825 if (reg->nlpid == nlpid) {
804 list_del(&reg->list); 826 list_del(&reg->list);
805 kfree(reg); 827 kfree(reg);
806 } 828 }
807 } 829 }
808 830
809 spin_unlock_bh(&wdev->action_registrations_lock); 831 spin_unlock_bh(&wdev->mgmt_registrations_lock);
810} 832}
811 833
812void cfg80211_mlme_purge_actions(struct wireless_dev *wdev) 834void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
813{ 835{
814 struct cfg80211_action_registration *reg, *tmp; 836 struct cfg80211_mgmt_registration *reg, *tmp;
815 837
816 spin_lock_bh(&wdev->action_registrations_lock); 838 spin_lock_bh(&wdev->mgmt_registrations_lock);
817 839
818 list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { 840 list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
819 list_del(&reg->list); 841 list_del(&reg->list);
820 kfree(reg); 842 kfree(reg);
821 } 843 }
822 844
823 spin_unlock_bh(&wdev->action_registrations_lock); 845 spin_unlock_bh(&wdev->mgmt_registrations_lock);
824} 846}
825 847
826int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, 848int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
827 struct net_device *dev, 849 struct net_device *dev,
828 struct ieee80211_channel *chan, 850 struct ieee80211_channel *chan,
829 enum nl80211_channel_type channel_type, 851 enum nl80211_channel_type channel_type,
830 bool channel_type_valid, 852 bool channel_type_valid,
831 const u8 *buf, size_t len, u64 *cookie) 853 const u8 *buf, size_t len, u64 *cookie)
832{ 854{
833 struct wireless_dev *wdev = dev->ieee80211_ptr; 855 struct wireless_dev *wdev = dev->ieee80211_ptr;
834 const struct ieee80211_mgmt *mgmt; 856 const struct ieee80211_mgmt *mgmt;
857 u16 stype;
858
859 if (!wdev->wiphy->mgmt_stypes)
860 return -EOPNOTSUPP;
835 861
836 if (rdev->ops->action == NULL) 862 if (!rdev->ops->mgmt_tx)
837 return -EOPNOTSUPP; 863 return -EOPNOTSUPP;
864
838 if (len < 24 + 1) 865 if (len < 24 + 1)
839 return -EINVAL; 866 return -EINVAL;
840 867
841 mgmt = (const struct ieee80211_mgmt *) buf; 868 mgmt = (const struct ieee80211_mgmt *) buf;
842 if (!ieee80211_is_action(mgmt->frame_control)) 869
870 if (!ieee80211_is_mgmt(mgmt->frame_control))
843 return -EINVAL; 871 return -EINVAL;
844 if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { 872
873 stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
874 if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].tx & BIT(stype >> 4)))
875 return -EINVAL;
876
877 if (ieee80211_is_action(mgmt->frame_control) &&
878 mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) {
845 /* Verify that we are associated with the destination AP */ 879 /* Verify that we are associated with the destination AP */
846 wdev_lock(wdev); 880 wdev_lock(wdev);
847 881
@@ -862,64 +896,75 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
862 return -EINVAL; 896 return -EINVAL;
863 897
864 /* Transmit the Action frame as requested by user space */ 898 /* Transmit the Action frame as requested by user space */
865 return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, 899 return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type,
866 channel_type_valid, buf, len, cookie); 900 channel_type_valid, buf, len, cookie);
867} 901}
868 902
869bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, 903bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf,
870 size_t len, gfp_t gfp) 904 size_t len, gfp_t gfp)
871{ 905{
872 struct wireless_dev *wdev = dev->ieee80211_ptr; 906 struct wireless_dev *wdev = dev->ieee80211_ptr;
873 struct wiphy *wiphy = wdev->wiphy; 907 struct wiphy *wiphy = wdev->wiphy;
874 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 908 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
875 struct cfg80211_action_registration *reg; 909 struct cfg80211_mgmt_registration *reg;
876 const u8 *action_data; 910 const struct ieee80211_txrx_stypes *stypes =
877 int action_data_len; 911 &wiphy->mgmt_stypes[wdev->iftype];
912 struct ieee80211_mgmt *mgmt = (void *)buf;
913 const u8 *data;
914 int data_len;
878 bool result = false; 915 bool result = false;
916 __le16 ftype = mgmt->frame_control &
917 cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE);
918 u16 stype;
879 919
880 /* frame length - min size excluding category */ 920 stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4;
881 action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1);
882 921
883 /* action data starts with category */ 922 if (!(stypes->rx & BIT(stype)))
884 action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1; 923 return false;
885 924
886 spin_lock_bh(&wdev->action_registrations_lock); 925 data = buf + ieee80211_hdrlen(mgmt->frame_control);
926 data_len = len - ieee80211_hdrlen(mgmt->frame_control);
927
928 spin_lock_bh(&wdev->mgmt_registrations_lock);
929
930 list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
931 if (reg->frame_type != ftype)
932 continue;
887 933
888 list_for_each_entry(reg, &wdev->action_registrations, list) { 934 if (reg->match_len > data_len)
889 if (reg->match_len > action_data_len)
890 continue; 935 continue;
891 936
892 if (memcmp(reg->match, action_data, reg->match_len)) 937 if (memcmp(reg->match, data, reg->match_len))
893 continue; 938 continue;
894 939
895 /* found match! */ 940 /* found match! */
896 941
897 /* Indicate the received Action frame to user space */ 942 /* Indicate the received Action frame to user space */
898 if (nl80211_send_action(rdev, dev, reg->nlpid, freq, 943 if (nl80211_send_mgmt(rdev, dev, reg->nlpid, freq,
899 buf, len, gfp)) 944 buf, len, gfp))
900 continue; 945 continue;
901 946
902 result = true; 947 result = true;
903 break; 948 break;
904 } 949 }
905 950
906 spin_unlock_bh(&wdev->action_registrations_lock); 951 spin_unlock_bh(&wdev->mgmt_registrations_lock);
907 952
908 return result; 953 return result;
909} 954}
910EXPORT_SYMBOL(cfg80211_rx_action); 955EXPORT_SYMBOL(cfg80211_rx_mgmt);
911 956
912void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, 957void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie,
913 const u8 *buf, size_t len, bool ack, gfp_t gfp) 958 const u8 *buf, size_t len, bool ack, gfp_t gfp)
914{ 959{
915 struct wireless_dev *wdev = dev->ieee80211_ptr; 960 struct wireless_dev *wdev = dev->ieee80211_ptr;
916 struct wiphy *wiphy = wdev->wiphy; 961 struct wiphy *wiphy = wdev->wiphy;
917 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 962 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
918 963
919 /* Indicate TX status of the Action frame to user space */ 964 /* Indicate TX status of the Action frame to user space */
920 nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp); 965 nl80211_send_mgmt_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
921} 966}
922EXPORT_SYMBOL(cfg80211_action_tx_status); 967EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
923 968
924void cfg80211_cqm_rssi_notify(struct net_device *dev, 969void cfg80211_cqm_rssi_notify(struct net_device *dev,
925 enum nl80211_cqm_rssi_threshold_event rssi_event, 970 enum nl80211_cqm_rssi_threshold_event rssi_event,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 37902a54e9c1..85a23de7bff3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -136,6 +136,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
136 .len = sizeof(struct nl80211_sta_flag_update), 136 .len = sizeof(struct nl80211_sta_flag_update),
137 }, 137 },
138 [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, 138 [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG },
139 [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 },
140 [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
139 [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, 141 [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
140 [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, 142 [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
141 [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, 143 [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
@@ -156,6 +158,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
156 158
157 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, 159 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
158 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, 160 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
161 [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
159}; 162};
160 163
161/* policy for the attributes */ 164/* policy for the attributes */
@@ -437,6 +440,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
437 struct ieee80211_rate *rate; 440 struct ieee80211_rate *rate;
438 int i; 441 int i;
439 u16 ifmodes = dev->wiphy.interface_modes; 442 u16 ifmodes = dev->wiphy.interface_modes;
443 const struct ieee80211_txrx_stypes *mgmt_stypes =
444 dev->wiphy.mgmt_stypes;
440 445
441 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); 446 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY);
442 if (!hdr) 447 if (!hdr)
@@ -471,6 +476,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
471 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, 476 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
472 dev->wiphy.max_num_pmkids); 477 dev->wiphy.max_num_pmkids);
473 478
479 if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
480 NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
481
474 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); 482 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
475 if (!nl_modes) 483 if (!nl_modes)
476 goto nla_put_failure; 484 goto nla_put_failure;
@@ -587,7 +595,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
587 CMD(flush_pmksa, FLUSH_PMKSA); 595 CMD(flush_pmksa, FLUSH_PMKSA);
588 CMD(remain_on_channel, REMAIN_ON_CHANNEL); 596 CMD(remain_on_channel, REMAIN_ON_CHANNEL);
589 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); 597 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
590 CMD(action, ACTION); 598 CMD(mgmt_tx, FRAME);
591 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { 599 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
592 i++; 600 i++;
593 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); 601 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -608,6 +616,55 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
608 616
609 nla_nest_end(msg, nl_cmds); 617 nla_nest_end(msg, nl_cmds);
610 618
619 if (mgmt_stypes) {
620 u16 stypes;
621 struct nlattr *nl_ftypes, *nl_ifs;
622 enum nl80211_iftype ift;
623
624 nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
625 if (!nl_ifs)
626 goto nla_put_failure;
627
628 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
629 nl_ftypes = nla_nest_start(msg, ift);
630 if (!nl_ftypes)
631 goto nla_put_failure;
632 i = 0;
633 stypes = mgmt_stypes[ift].tx;
634 while (stypes) {
635 if (stypes & 1)
636 NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
637 (i << 4) | IEEE80211_FTYPE_MGMT);
638 stypes >>= 1;
639 i++;
640 }
641 nla_nest_end(msg, nl_ftypes);
642 }
643
644 nla_nest_end(msg, nl_ifs);
645
646 nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
647 if (!nl_ifs)
648 goto nla_put_failure;
649
650 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
651 nl_ftypes = nla_nest_start(msg, ift);
652 if (!nl_ftypes)
653 goto nla_put_failure;
654 i = 0;
655 stypes = mgmt_stypes[ift].rx;
656 while (stypes) {
657 if (stypes & 1)
658 NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
659 (i << 4) | IEEE80211_FTYPE_MGMT);
660 stypes >>= 1;
661 i++;
662 }
663 nla_nest_end(msg, nl_ftypes);
664 }
665 nla_nest_end(msg, nl_ifs);
666 }
667
611 return genlmsg_end(msg, hdr); 668 return genlmsg_end(msg, hdr);
612 669
613 nla_put_failure: 670 nla_put_failure:
@@ -3572,6 +3629,21 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3572 if (err) 3629 if (err)
3573 goto unlock_rtnl; 3630 goto unlock_rtnl;
3574 3631
3632 if (key.idx >= 0) {
3633 int i;
3634 bool ok = false;
3635 for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) {
3636 if (key.p.cipher == rdev->wiphy.cipher_suites[i]) {
3637 ok = true;
3638 break;
3639 }
3640 }
3641 if (!ok) {
3642 err = -EINVAL;
3643 goto out;
3644 }
3645 }
3646
3575 if (!rdev->ops->auth) { 3647 if (!rdev->ops->auth) {
3576 err = -EOPNOTSUPP; 3648 err = -EOPNOTSUPP;
3577 goto out; 3649 goto out;
@@ -3624,7 +3696,8 @@ unlock_rtnl:
3624 return err; 3696 return err;
3625} 3697}
3626 3698
3627static int nl80211_crypto_settings(struct genl_info *info, 3699static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
3700 struct genl_info *info,
3628 struct cfg80211_crypto_settings *settings, 3701 struct cfg80211_crypto_settings *settings,
3629 int cipher_limit) 3702 int cipher_limit)
3630{ 3703{
@@ -3632,6 +3705,19 @@ static int nl80211_crypto_settings(struct genl_info *info,
3632 3705
3633 settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; 3706 settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT];
3634 3707
3708 if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) {
3709 u16 proto;
3710 proto = nla_get_u16(
3711 info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]);
3712 settings->control_port_ethertype = cpu_to_be16(proto);
3713 if (!(rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
3714 proto != ETH_P_PAE)
3715 return -EINVAL;
3716 if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT])
3717 settings->control_port_no_encrypt = true;
3718 } else
3719 settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE);
3720
3635 if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { 3721 if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) {
3636 void *data; 3722 void *data;
3637 int len, i; 3723 int len, i;
@@ -3759,7 +3845,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3759 if (info->attrs[NL80211_ATTR_PREV_BSSID]) 3845 if (info->attrs[NL80211_ATTR_PREV_BSSID])
3760 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); 3846 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
3761 3847
3762 err = nl80211_crypto_settings(info, &crypto, 1); 3848 err = nl80211_crypto_settings(rdev, info, &crypto, 1);
3763 if (!err) 3849 if (!err)
3764 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, 3850 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
3765 ssid, ssid_len, ie, ie_len, use_mfp, 3851 ssid, ssid_len, ie, ie_len, use_mfp,
@@ -4236,7 +4322,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
4236 4322
4237 connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; 4323 connect.privacy = info->attrs[NL80211_ATTR_PRIVACY];
4238 4324
4239 err = nl80211_crypto_settings(info, &connect.crypto, 4325 err = nl80211_crypto_settings(rdev, info, &connect.crypto,
4240 NL80211_MAX_NR_CIPHER_SUITES); 4326 NL80211_MAX_NR_CIPHER_SUITES);
4241 if (err) 4327 if (err)
4242 return err; 4328 return err;
@@ -4717,17 +4803,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
4717 return err; 4803 return err;
4718} 4804}
4719 4805
4720static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) 4806static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
4721{ 4807{
4722 struct cfg80211_registered_device *rdev; 4808 struct cfg80211_registered_device *rdev;
4723 struct net_device *dev; 4809 struct net_device *dev;
4810 u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION;
4724 int err; 4811 int err;
4725 4812
4726 if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) 4813 if (!info->attrs[NL80211_ATTR_FRAME_MATCH])
4727 return -EINVAL; 4814 return -EINVAL;
4728 4815
4729 if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1) 4816 if (info->attrs[NL80211_ATTR_FRAME_TYPE])
4730 return -EINVAL; 4817 frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]);
4731 4818
4732 rtnl_lock(); 4819 rtnl_lock();
4733 4820
@@ -4742,12 +4829,13 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
4742 } 4829 }
4743 4830
4744 /* not much point in registering if we can't reply */ 4831 /* not much point in registering if we can't reply */
4745 if (!rdev->ops->action) { 4832 if (!rdev->ops->mgmt_tx) {
4746 err = -EOPNOTSUPP; 4833 err = -EOPNOTSUPP;
4747 goto out; 4834 goto out;
4748 } 4835 }
4749 4836
4750 err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid, 4837 err = cfg80211_mlme_register_mgmt(dev->ieee80211_ptr, info->snd_pid,
4838 frame_type,
4751 nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), 4839 nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
4752 nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); 4840 nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]));
4753 out: 4841 out:
@@ -4758,7 +4846,7 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
4758 return err; 4846 return err;
4759} 4847}
4760 4848
4761static int nl80211_action(struct sk_buff *skb, struct genl_info *info) 4849static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
4762{ 4850{
4763 struct cfg80211_registered_device *rdev; 4851 struct cfg80211_registered_device *rdev;
4764 struct net_device *dev; 4852 struct net_device *dev;
@@ -4781,7 +4869,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4781 if (err) 4869 if (err)
4782 goto unlock_rtnl; 4870 goto unlock_rtnl;
4783 4871
4784 if (!rdev->ops->action) { 4872 if (!rdev->ops->mgmt_tx) {
4785 err = -EOPNOTSUPP; 4873 err = -EOPNOTSUPP;
4786 goto out; 4874 goto out;
4787 } 4875 }
@@ -4824,17 +4912,17 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4824 } 4912 }
4825 4913
4826 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 4914 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
4827 NL80211_CMD_ACTION); 4915 NL80211_CMD_FRAME);
4828 4916
4829 if (IS_ERR(hdr)) { 4917 if (IS_ERR(hdr)) {
4830 err = PTR_ERR(hdr); 4918 err = PTR_ERR(hdr);
4831 goto free_msg; 4919 goto free_msg;
4832 } 4920 }
4833 err = cfg80211_mlme_action(rdev, dev, chan, channel_type, 4921 err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type,
4834 channel_type_valid, 4922 channel_type_valid,
4835 nla_data(info->attrs[NL80211_ATTR_FRAME]), 4923 nla_data(info->attrs[NL80211_ATTR_FRAME]),
4836 nla_len(info->attrs[NL80211_ATTR_FRAME]), 4924 nla_len(info->attrs[NL80211_ATTR_FRAME]),
4837 &cookie); 4925 &cookie);
4838 if (err) 4926 if (err)
4839 goto free_msg; 4927 goto free_msg;
4840 4928
@@ -5333,14 +5421,14 @@ static struct genl_ops nl80211_ops[] = {
5333 .flags = GENL_ADMIN_PERM, 5421 .flags = GENL_ADMIN_PERM,
5334 }, 5422 },
5335 { 5423 {
5336 .cmd = NL80211_CMD_REGISTER_ACTION, 5424 .cmd = NL80211_CMD_REGISTER_FRAME,
5337 .doit = nl80211_register_action, 5425 .doit = nl80211_register_mgmt,
5338 .policy = nl80211_policy, 5426 .policy = nl80211_policy,
5339 .flags = GENL_ADMIN_PERM, 5427 .flags = GENL_ADMIN_PERM,
5340 }, 5428 },
5341 { 5429 {
5342 .cmd = NL80211_CMD_ACTION, 5430 .cmd = NL80211_CMD_FRAME,
5343 .doit = nl80211_action, 5431 .doit = nl80211_tx_mgmt,
5344 .policy = nl80211_policy, 5432 .policy = nl80211_policy,
5345 .flags = GENL_ADMIN_PERM, 5433 .flags = GENL_ADMIN_PERM,
5346 }, 5434 },
@@ -6040,9 +6128,9 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
6040 nl80211_mlme_mcgrp.id, gfp); 6128 nl80211_mlme_mcgrp.id, gfp);
6041} 6129}
6042 6130
6043int nl80211_send_action(struct cfg80211_registered_device *rdev, 6131int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
6044 struct net_device *netdev, u32 nlpid, 6132 struct net_device *netdev, u32 nlpid,
6045 int freq, const u8 *buf, size_t len, gfp_t gfp) 6133 int freq, const u8 *buf, size_t len, gfp_t gfp)
6046{ 6134{
6047 struct sk_buff *msg; 6135 struct sk_buff *msg;
6048 void *hdr; 6136 void *hdr;
@@ -6052,7 +6140,7 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
6052 if (!msg) 6140 if (!msg)
6053 return -ENOMEM; 6141 return -ENOMEM;
6054 6142
6055 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION); 6143 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME);
6056 if (!hdr) { 6144 if (!hdr) {
6057 nlmsg_free(msg); 6145 nlmsg_free(msg);
6058 return -ENOMEM; 6146 return -ENOMEM;
@@ -6080,10 +6168,10 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
6080 return -ENOBUFS; 6168 return -ENOBUFS;
6081} 6169}
6082 6170
6083void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, 6171void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
6084 struct net_device *netdev, u64 cookie, 6172 struct net_device *netdev, u64 cookie,
6085 const u8 *buf, size_t len, bool ack, 6173 const u8 *buf, size_t len, bool ack,
6086 gfp_t gfp) 6174 gfp_t gfp)
6087{ 6175{
6088 struct sk_buff *msg; 6176 struct sk_buff *msg;
6089 void *hdr; 6177 void *hdr;
@@ -6092,7 +6180,7 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
6092 if (!msg) 6180 if (!msg)
6093 return; 6181 return;
6094 6182
6095 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS); 6183 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS);
6096 if (!hdr) { 6184 if (!hdr) {
6097 nlmsg_free(msg); 6185 nlmsg_free(msg);
6098 return; 6186 return;
@@ -6179,7 +6267,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
6179 6267
6180 list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) 6268 list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list)
6181 list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) 6269 list_for_each_entry_rcu(wdev, &rdev->netdev_list, list)
6182 cfg80211_mlme_unregister_actions(wdev, notify->pid); 6270 cfg80211_mlme_unregister_socket(wdev, notify->pid);
6183 6271
6184 rcu_read_unlock(); 6272 rcu_read_unlock();
6185 6273
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 2ad7fbc7d9f1..30d2f939150d 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -74,13 +74,13 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
74 struct net_device *dev, const u8 *mac_addr, 74 struct net_device *dev, const u8 *mac_addr,
75 struct station_info *sinfo, gfp_t gfp); 75 struct station_info *sinfo, gfp_t gfp);
76 76
77int nl80211_send_action(struct cfg80211_registered_device *rdev, 77int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
78 struct net_device *netdev, u32 nlpid, int freq, 78 struct net_device *netdev, u32 nlpid, int freq,
79 const u8 *buf, size_t len, gfp_t gfp); 79 const u8 *buf, size_t len, gfp_t gfp);
80void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, 80void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
81 struct net_device *netdev, u64 cookie, 81 struct net_device *netdev, u64 cookie,
82 const u8 *buf, size_t len, bool ack, 82 const u8 *buf, size_t len, bool ack,
83 gfp_t gfp); 83 gfp_t gfp);
84 84
85void 85void
86nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, 86nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f180db0de66c..b0d9a08447c9 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -36,6 +36,7 @@
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/list.h> 37#include <linux/list.h>
38#include <linux/random.h> 38#include <linux/random.h>
39#include <linux/ctype.h>
39#include <linux/nl80211.h> 40#include <linux/nl80211.h>
40#include <linux/platform_device.h> 41#include <linux/platform_device.h>
41#include <net/cfg80211.h> 42#include <net/cfg80211.h>
@@ -181,14 +182,6 @@ static bool is_alpha2_set(const char *alpha2)
181 return false; 182 return false;
182} 183}
183 184
184static bool is_alpha_upper(char letter)
185{
186 /* ASCII A - Z */
187 if (letter >= 65 && letter <= 90)
188 return true;
189 return false;
190}
191
192static bool is_unknown_alpha2(const char *alpha2) 185static bool is_unknown_alpha2(const char *alpha2)
193{ 186{
194 if (!alpha2) 187 if (!alpha2)
@@ -220,7 +213,7 @@ static bool is_an_alpha2(const char *alpha2)
220{ 213{
221 if (!alpha2) 214 if (!alpha2)
222 return false; 215 return false;
223 if (is_alpha_upper(alpha2[0]) && is_alpha_upper(alpha2[1])) 216 if (isalpha(alpha2[0]) && isalpha(alpha2[1]))
224 return true; 217 return true;
225 return false; 218 return false;
226} 219}
@@ -1399,6 +1392,11 @@ static DECLARE_WORK(reg_work, reg_todo);
1399 1392
1400static void queue_regulatory_request(struct regulatory_request *request) 1393static void queue_regulatory_request(struct regulatory_request *request)
1401{ 1394{
1395 if (isalpha(request->alpha2[0]))
1396 request->alpha2[0] = toupper(request->alpha2[0]);
1397 if (isalpha(request->alpha2[1]))
1398 request->alpha2[1] = toupper(request->alpha2[1]);
1399
1402 spin_lock(&reg_requests_lock); 1400 spin_lock(&reg_requests_lock);
1403 list_add_tail(&request->list, &reg_requests_list); 1401 list_add_tail(&request->list, &reg_requests_list);
1404 spin_unlock(&reg_requests_lock); 1402 spin_unlock(&reg_requests_lock);
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 9f2cef3e0ca0..74a9e3cce452 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -110,6 +110,13 @@ static int wiphy_resume(struct device *dev)
110 return ret; 110 return ret;
111} 111}
112 112
113static const void *wiphy_namespace(struct device *d)
114{
115 struct wiphy *wiphy = container_of(d, struct wiphy, dev);
116
117 return wiphy_net(wiphy);
118}
119
113struct class ieee80211_class = { 120struct class ieee80211_class = {
114 .name = "ieee80211", 121 .name = "ieee80211",
115 .owner = THIS_MODULE, 122 .owner = THIS_MODULE,
@@ -120,6 +127,8 @@ struct class ieee80211_class = {
120#endif 127#endif
121 .suspend = wiphy_suspend, 128 .suspend = wiphy_suspend,
122 .resume = wiphy_resume, 129 .resume = wiphy_resume,
130 .ns_type = &net_ns_type_operations,
131 .namespace = wiphy_namespace,
123}; 132};
124 133
125int wiphy_sysfs_init(void) 134int wiphy_sysfs_init(void)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 0c8a1e8b7690..bca32eb8f446 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -183,7 +183,14 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
183 return -EINVAL; 183 return -EINVAL;
184 break; 184 break;
185 default: 185 default:
186 return -EINVAL; 186 /*
187 * We don't know anything about this algorithm,
188 * allow using it -- but the driver must check
189 * all parameters! We still check below whether
190 * or not the driver supports this algorithm,
191 * of course.
192 */
193 break;
187 } 194 }
188 195
189 if (params->seq) { 196 if (params->seq) {
@@ -221,7 +228,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) =
221 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; 228 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
222EXPORT_SYMBOL(bridge_tunnel_header); 229EXPORT_SYMBOL(bridge_tunnel_header);
223 230
224unsigned int ieee80211_hdrlen(__le16 fc) 231unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc)
225{ 232{
226 unsigned int hdrlen = 24; 233 unsigned int hdrlen = 24;
227 234
@@ -823,7 +830,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
823 /* monitor can't bridge anyway */ 830 /* monitor can't bridge anyway */
824 break; 831 break;
825 case NL80211_IFTYPE_UNSPECIFIED: 832 case NL80211_IFTYPE_UNSPECIFIED:
826 case __NL80211_IFTYPE_AFTER_LAST: 833 case NUM_NL80211_IFTYPES:
827 /* not happening */ 834 /* not happening */
828 break; 835 break;
829 } 836 }
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 8f5116f5af19..dc675a3daa3d 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -611,7 +611,7 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev)
611#endif 611#endif
612 612
613#ifdef CONFIG_CFG80211_WEXT 613#ifdef CONFIG_CFG80211_WEXT
614 if (dev->ieee80211_ptr && dev->ieee80211_ptr && 614 if (dev->ieee80211_ptr &&
615 dev->ieee80211_ptr->wiphy && 615 dev->ieee80211_ptr->wiphy &&
616 dev->ieee80211_ptr->wiphy->wext && 616 dev->ieee80211_ptr->wiphy->wext &&
617 dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) 617 dev->ieee80211_ptr->wiphy->wext->get_wireless_stats)
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 9818198add8a..6fffe62d7c25 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -197,6 +197,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
197 wdev->wext.connect.ssid_len = len; 197 wdev->wext.connect.ssid_len = len;
198 198
199 wdev->wext.connect.crypto.control_port = false; 199 wdev->wext.connect.crypto.control_port = false;
200 wdev->wext.connect.crypto.control_port_ethertype =
201 cpu_to_be16(ETH_P_PAE);
200 202
201 err = cfg80211_mgd_wext_connect(rdev, wdev); 203 err = cfg80211_mgd_wext_connect(rdev, wdev);
202 out: 204 out: