aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/fc.c2
-rw-r--r--net/802/fddi.c12
-rw-r--r--net/802/hippi.c2
-rw-r--r--net/802/tr.c2
-rw-r--r--net/8021q/vlan.c4
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_core.c16
-rw-r--r--net/8021q/vlan_dev.c9
-rw-r--r--net/9p/client.c4
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.c1
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_route.c4
-rw-r--r--net/bluetooth/af_bluetooth.c5
-rw-r--r--net/bluetooth/rfcomm/core.c4
-rw-r--r--net/bridge/br_if.c29
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_netfilter.c107
-rw-r--r--net/caif/caif_dev.c24
-rw-r--r--net/caif/caif_socket.c26
-rw-r--r--net/caif/cfcnfg.c49
-rw-r--r--net/caif/cfctrl.c59
-rw-r--r--net/caif/cfdbgl.c4
-rw-r--r--net/caif/cfdgml.c11
-rw-r--r--net/caif/cffrml.c14
-rw-r--r--net/caif/cfmuxl.c14
-rw-r--r--net/caif/cfpkt_skbuff.c48
-rw-r--r--net/caif/cfrfml.c12
-rw-r--r--net/caif/cfserl.c4
-rw-r--r--net/caif/cfsrvl.c17
-rw-r--r--net/caif/cfutill.c12
-rw-r--r--net/caif/cfveil.c11
-rw-r--r--net/caif/cfvidl.c6
-rw-r--r--net/caif/chnl_net.c47
-rw-r--r--net/can/raw.c4
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c285
-rw-r--r--net/core/ethtool.c88
-rw-r--r--net/core/flow.c82
-rw-r--r--net/core/gen_estimator.c4
-rw-r--r--net/core/iovec.c6
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/net-sysfs.c5
-rw-r--r--net/core/pktgen.c12
-rw-r--r--net/core/rtnetlink.c31
-rw-r--r--net/core/skbuff.c92
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/utils.c15
-rw-r--r--net/dccp/ccid.h46
-rw-r--r--net/dccp/ccids/Kconfig31
-rw-r--r--net/dccp/ccids/ccid2.c287
-rw-r--r--net/dccp/ccids/ccid2.h35
-rw-r--r--net/dccp/ccids/ccid3.c253
-rw-r--r--net/dccp/ccids/ccid3.h51
-rw-r--r--net/dccp/ccids/lib/loss_interval.c2
-rw-r--r--net/dccp/ccids/lib/packet_history.c39
-rw-r--r--net/dccp/ccids/lib/packet_history.h22
-rw-r--r--net/dccp/ccids/lib/tfrc.h1
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c14
-rw-r--r--net/dccp/options.c25
-rw-r--r--net/decnet/dn_nsp_out.c8
-rw-r--r--net/econet/af_econet.c6
-rw-r--r--net/ethernet/eth.c8
-rw-r--r--net/ipv4/Kconfig7
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/arp.c228
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c4
-rw-r--r--net/ipv4/fib_trie.c55
-rw-r--r--net/ipv4/gre.c151
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c83
-rw-r--r--net/ipv4/ip_options.c3
-rw-r--r--net/ipv4/ip_output.c24
-rw-r--r--net/ipv4/ipip.c74
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c31
-rw-r--r--net/ipv4/protocol.c31
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c13
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_input.c27
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c31
-rw-r--r--net/ipv4/tcp_timer.c40
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/tunnel4.c19
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/xfrm4_tunnel.c4
-rw-r--r--net/ipv6/addrconf.c5
-rw-r--r--net/ipv6/addrlabel.c5
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/exthdrs_core.c4
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_tunnel.c67
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/ndisc.c26
-rw-r--r--net/ipv6/netfilter/ip6_tables.c14
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/protocol.c32
-rw-r--r--net/ipv6/raw.c12
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c14
-rw-r--r--net/ipv6/sit.c71
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/tunnel6.c17
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/ipv6/xfrm6_tunnel.c4
-rw-r--r--net/irda/af_irda.c14
-rw-r--r--net/irda/discovery.c2
-rw-r--r--net/irda/ircomm/ircomm_tty.c4
-rw-r--r--net/irda/irlan/irlan_eth.c32
-rw-r--r--net/irda/irlan/irlan_event.c2
-rw-r--r--net/irda/irlmp.c2
-rw-r--r--net/irda/irlmp_frame.c2
-rw-r--r--net/irda/irnet/irnet_irda.c22
-rw-r--r--net/irda/irnet/irnet_ppp.c8
-rw-r--r--net/irda/irnet/irnet_ppp.h3
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/l2tp/l2tp_eth.c1
-rw-r--r--net/l2tp/l2tp_ppp.c2
-rw-r--r--net/mac80211/aes_ccm.c6
-rw-r--r--net/mac80211/aes_cmac.c6
-rw-r--r--net/mac80211/agg-rx.c22
-rw-r--r--net/mac80211/cfg.c145
-rw-r--r--net/mac80211/chan.c2
-rw-r--r--net/mac80211/debugfs.c6
-rw-r--r--net/mac80211/debugfs_key.c55
-rw-r--r--net/mac80211/driver-ops.h14
-rw-r--r--net/mac80211/driver-trace.h42
-rw-r--r--net/mac80211/ht.c28
-rw-r--r--net/mac80211/ibss.c12
-rw-r--r--net/mac80211/ieee80211_i.h68
-rw-r--r--net/mac80211/iface.c401
-rw-r--r--net/mac80211/key.c113
-rw-r--r--net/mac80211/key.h10
-rw-r--r--net/mac80211/main.c179
-rw-r--r--net/mac80211/mlme.c106
-rw-r--r--net/mac80211/offchannel.c26
-rw-r--r--net/mac80211/pm.c3
-rw-r--r--net/mac80211/rate.c11
-rw-r--r--net/mac80211/rc80211_pid_debugfs.c2
-rw-r--r--net/mac80211/rx.c522
-rw-r--r--net/mac80211/scan.c66
-rw-r--r--net/mac80211/sta_info.c25
-rw-r--r--net/mac80211/sta_info.h16
-rw-r--r--net/mac80211/status.c11
-rw-r--r--net/mac80211/tx.c68
-rw-r--r--net/mac80211/util.c35
-rw-r--r--net/mac80211/wep.c2
-rw-r--r--net/mac80211/work.c39
-rw-r--r--net/mac80211/wpa.c32
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c22
-rw-r--r--net/netfilter/xt_hashlimit.c15
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/phonet/af_phonet.c17
-rw-r--r--net/phonet/datagram.c13
-rw-r--r--net/phonet/pep.c35
-rw-r--r--net/phonet/pn_dev.c5
-rw-r--r--net/phonet/socket.c190
-rw-r--r--net/rds/af_rds.c26
-rw-r--r--net/rds/bind.c82
-rw-r--r--net/rds/cong.c8
-rw-r--r--net/rds/connection.c157
-rw-r--r--net/rds/ib.c194
-rw-r--r--net/rds/ib.h100
-rw-r--r--net/rds/ib_cm.c184
-rw-r--r--net/rds/ib_rdma.c318
-rw-r--r--net/rds/ib_recv.c549
-rw-r--r--net/rds/ib_send.c682
-rw-r--r--net/rds/ib_stats.c2
-rw-r--r--net/rds/ib_sysctl.c17
-rw-r--r--net/rds/info.c12
-rw-r--r--net/rds/iw.c4
-rw-r--r--net/rds/iw.h11
-rw-r--r--net/rds/iw_cm.c14
-rw-r--r--net/rds/iw_rdma.c5
-rw-r--r--net/rds/iw_recv.c24
-rw-r--r--net/rds/iw_send.c93
-rw-r--r--net/rds/iw_sysctl.c4
-rw-r--r--net/rds/loop.c31
-rw-r--r--net/rds/message.c118
-rw-r--r--net/rds/page.c5
-rw-r--r--net/rds/rdma.c339
-rw-r--r--net/rds/rdma.h85
-rw-r--r--net/rds/rdma_transport.c42
-rw-r--r--net/rds/rds.h187
-rw-r--r--net/rds/recv.c9
-rw-r--r--net/rds/send.c544
-rw-r--r--net/rds/stats.c6
-rw-r--r--net/rds/sysctl.c4
-rw-r--r--net/rds/tcp.c8
-rw-r--r--net/rds/tcp.h9
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_listen.c6
-rw-r--r--net/rds/tcp_recv.c14
-rw-r--r--net/rds/tcp_send.c66
-rw-r--r--net/rds/threads.c69
-rw-r--r--net/rds/transport.c19
-rw-r--r--net/rds/xlist.h80
-rw-r--r--net/rfkill/input.c2
-rw-r--r--net/rose/rose_link.c4
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_csum.c595
-rw-r--r--net/sched/cls_flow.c74
-rw-r--r--net/sched/em_meta.c6
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_sfq.c33
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/objcnt.c5
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c34
-rw-r--r--net/sctp/probe.c4
-rw-r--r--net/sctp/protocol.c19
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_sideeffect.c21
-rw-r--r--net/sctp/sm_statefuns.c20
-rw-r--r--net/sctp/sm_statetable.c42
-rw-r--r--net/sctp/socket.c85
-rw-r--r--net/sctp/transport.c9
-rw-r--r--net/socket.c30
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c44
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c2
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c2
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/tipc/addr.c2
-rw-r--r--net/tipc/bcast.c41
-rw-r--r--net/tipc/bearer.c2
-rw-r--r--net/tipc/core.c6
-rw-r--r--net/tipc/dbg.c4
-rw-r--r--net/tipc/discover.c8
-rw-r--r--net/tipc/eth_media.c48
-rw-r--r--net/tipc/link.c31
-rw-r--r--net/tipc/link.h16
-rw-r--r--net/tipc/msg.h6
-rw-r--r--net/tipc/name_table.c50
-rw-r--r--net/tipc/net.c1
-rw-r--r--net/tipc/node.c28
-rw-r--r--net/tipc/node.h2
-rw-r--r--net/tipc/port.c19
-rw-r--r--net/tipc/port.h2
-rw-r--r--net/tipc/socket.c83
-rw-r--r--net/tipc/subscr.c2
-rw-r--r--net/unix/af_unix.c5
-rw-r--r--net/wireless/core.c66
-rw-r--r--net/wireless/core.h32
-rw-r--r--net/wireless/mlme.c152
-rw-r--r--net/wireless/nl80211.c208
-rw-r--r--net/wireless/nl80211.h14
-rw-r--r--net/wireless/radiotap.c3
-rw-r--r--net/wireless/reg.c22
-rw-r--r--net/wireless/sme.c9
-rw-r--r--net/wireless/sysfs.c9
-rw-r--r--net/wireless/util.c28
-rw-r--r--net/wireless/wext-core.c2
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/x25/af_x25.c34
270 files changed, 7290 insertions, 4546 deletions
diff --git a/net/802/fc.c b/net/802/fc.c
index 34cf1ee014b..1e49f2d4ea9 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -70,7 +70,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
70 if(daddr) 70 if(daddr)
71 { 71 {
72 memcpy(fch->daddr,daddr,dev->addr_len); 72 memcpy(fch->daddr,daddr,dev->addr_len);
73 return(hdr_len); 73 return hdr_len;
74 } 74 }
75 return -hdr_len; 75 return -hdr_len;
76} 76}
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 3ef0ab0a543..94b3ad08f39 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -82,10 +82,10 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev,
82 if (daddr != NULL) 82 if (daddr != NULL)
83 { 83 {
84 memcpy(fddi->daddr, daddr, dev->addr_len); 84 memcpy(fddi->daddr, daddr, dev->addr_len);
85 return(hl); 85 return hl;
86 } 86 }
87 87
88 return(-hl); 88 return -hl;
89} 89}
90 90
91 91
@@ -108,7 +108,7 @@ static int fddi_rebuild_header(struct sk_buff *skb)
108 { 108 {
109 printk("%s: Don't know how to resolve type %04X addresses.\n", 109 printk("%s: Don't know how to resolve type %04X addresses.\n",
110 skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype)); 110 skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype));
111 return(0); 111 return 0;
112 } 112 }
113} 113}
114 114
@@ -162,7 +162,7 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
162 162
163 /* Assume 802.2 SNAP frames, for now */ 163 /* Assume 802.2 SNAP frames, for now */
164 164
165 return(type); 165 return type;
166} 166}
167 167
168EXPORT_SYMBOL(fddi_type_trans); 168EXPORT_SYMBOL(fddi_type_trans);
@@ -170,9 +170,9 @@ EXPORT_SYMBOL(fddi_type_trans);
170int fddi_change_mtu(struct net_device *dev, int new_mtu) 170int fddi_change_mtu(struct net_device *dev, int new_mtu)
171{ 171{
172 if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) 172 if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
173 return(-EINVAL); 173 return -EINVAL;
174 dev->mtu = new_mtu; 174 dev->mtu = new_mtu;
175 return(0); 175 return 0;
176} 176}
177EXPORT_SYMBOL(fddi_change_mtu); 177EXPORT_SYMBOL(fddi_change_mtu);
178 178
diff --git a/net/802/hippi.c b/net/802/hippi.c
index cd3e8e92952..91aca8780fd 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -152,7 +152,7 @@ int hippi_change_mtu(struct net_device *dev, int new_mtu)
152 if ((new_mtu < 68) || (new_mtu > 65280)) 152 if ((new_mtu < 68) || (new_mtu > 65280))
153 return -EINVAL; 153 return -EINVAL;
154 dev->mtu = new_mtu; 154 dev->mtu = new_mtu;
155 return(0); 155 return 0;
156} 156}
157EXPORT_SYMBOL(hippi_change_mtu); 157EXPORT_SYMBOL(hippi_change_mtu);
158 158
diff --git a/net/802/tr.c b/net/802/tr.c
index 1c6e596074d..5e20cf8a074 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -145,7 +145,7 @@ static int tr_header(struct sk_buff *skb, struct net_device *dev,
145 { 145 {
146 memcpy(trh->daddr,daddr,dev->addr_len); 146 memcpy(trh->daddr,daddr,dev->addr_len);
147 tr_source_route(skb, trh, dev); 147 tr_source_route(skb, trh, dev);
148 return(hdr_len); 148 return hdr_len;
149 } 149 }
150 150
151 return -hdr_len; 151 return -hdr_len;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a2ad1525057..2c6c2bd6e4a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -525,6 +525,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
525 break; 525 break;
526 526
527 case NETDEV_UNREGISTER: 527 case NETDEV_UNREGISTER:
528 /* twiddle thumbs on netns device moves */
529 if (dev->reg_state != NETREG_UNREGISTERING)
530 break;
531
528 /* Delete all VLANs for this dev. */ 532 /* Delete all VLANs for this dev. */
529 grp->killall = 1; 533 grp->killall = 1;
530 534
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 8d9503ad01d..b26ce343072 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -25,6 +25,7 @@ struct vlan_priority_tci_mapping {
25 * @rx_multicast: number of received multicast packets 25 * @rx_multicast: number of received multicast packets
26 * @syncp: synchronization point for 64bit counters 26 * @syncp: synchronization point for 64bit counters
27 * @rx_errors: number of errors 27 * @rx_errors: number of errors
28 * @rx_dropped: number of dropped packets
28 */ 29 */
29struct vlan_rx_stats { 30struct vlan_rx_stats {
30 u64 rx_packets; 31 u64 rx_packets;
@@ -32,6 +33,7 @@ struct vlan_rx_stats {
32 u64 rx_multicast; 33 u64 rx_multicast;
33 struct u64_stats_sync syncp; 34 struct u64_stats_sync syncp;
34 unsigned long rx_errors; 35 unsigned long rx_errors;
36 unsigned long rx_dropped;
35}; 37};
36 38
37/** 39/**
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 01ddb0472f8..0eb486d342d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -27,7 +27,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
27 else if (vlan_id) 27 else if (vlan_id)
28 goto drop; 28 goto drop;
29 29
30 return (polling ? netif_receive_skb(skb) : netif_rx(skb)); 30 return polling ? netif_receive_skb(skb) : netif_rx(skb);
31 31
32drop: 32drop:
33 dev_kfree_skb_any(skb); 33 dev_kfree_skb_any(skb);
@@ -35,12 +35,12 @@ drop:
35} 35}
36EXPORT_SYMBOL(__vlan_hwaccel_rx); 36EXPORT_SYMBOL(__vlan_hwaccel_rx);
37 37
38int vlan_hwaccel_do_receive(struct sk_buff *skb) 38void vlan_hwaccel_do_receive(struct sk_buff *skb)
39{ 39{
40 struct net_device *dev = skb->dev; 40 struct net_device *dev = skb->dev;
41 struct vlan_rx_stats *rx_stats; 41 struct vlan_rx_stats *rx_stats;
42 42
43 skb->dev = vlan_dev_info(dev)->real_dev; 43 skb->dev = vlan_dev_real_dev(dev);
44 netif_nit_deliver(skb); 44 netif_nit_deliver(skb);
45 45
46 skb->dev = dev; 46 skb->dev = dev;
@@ -69,7 +69,6 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
69 break; 69 break;
70 } 70 }
71 u64_stats_update_end(&rx_stats->syncp); 71 u64_stats_update_end(&rx_stats->syncp);
72 return 0;
73} 72}
74 73
75struct net_device *vlan_dev_real_dev(const struct net_device *dev) 74struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -106,9 +105,12 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
106 goto drop; 105 goto drop;
107 106
108 for (p = napi->gro_list; p; p = p->next) { 107 for (p = napi->gro_list; p; p = p->next) {
109 NAPI_GRO_CB(p)->same_flow = 108 unsigned long diffs;
110 p->dev == skb->dev && !compare_ether_header( 109
111 skb_mac_header(p), skb_gro_mac_header(skb)); 110 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
111 diffs |= compare_ether_header(skb_mac_header(p),
112 skb_gro_mac_header(skb));
113 NAPI_GRO_CB(p)->same_flow = !diffs;
112 NAPI_GRO_CB(p)->flush = 0; 114 NAPI_GRO_CB(p)->flush = 0;
113 } 115 }
114 116
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3bccdd12a26..94a1feddeb4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -225,7 +225,10 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
225 } 225 }
226 } 226 }
227 227
228 netif_rx(skb); 228 if (unlikely(netif_rx(skb) == NET_RX_DROP)) {
229 if (rx_stats)
230 rx_stats->rx_dropped++;
231 }
229 rcu_read_unlock(); 232 rcu_read_unlock();
230 return NET_RX_SUCCESS; 233 return NET_RX_SUCCESS;
231 234
@@ -843,13 +846,15 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
843 accum.rx_packets += rxpackets; 846 accum.rx_packets += rxpackets;
844 accum.rx_bytes += rxbytes; 847 accum.rx_bytes += rxbytes;
845 accum.rx_multicast += rxmulticast; 848 accum.rx_multicast += rxmulticast;
846 /* rx_errors is an ulong, not protected by syncp */ 849 /* rx_errors, rx_dropped are ulong, not protected by syncp */
847 accum.rx_errors += p->rx_errors; 850 accum.rx_errors += p->rx_errors;
851 accum.rx_dropped += p->rx_dropped;
848 } 852 }
849 stats->rx_packets = accum.rx_packets; 853 stats->rx_packets = accum.rx_packets;
850 stats->rx_bytes = accum.rx_bytes; 854 stats->rx_bytes = accum.rx_bytes;
851 stats->rx_errors = accum.rx_errors; 855 stats->rx_errors = accum.rx_errors;
852 stats->multicast = accum.rx_multicast; 856 stats->multicast = accum.rx_multicast;
857 stats->rx_dropped = accum.rx_dropped;
853 } 858 }
854 return stats; 859 return stats;
855} 860}
diff --git a/net/9p/client.c b/net/9p/client.c
index dc6f2f26d02..f34b9f51081 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -61,13 +61,13 @@ static const match_table_t tokens = {
61 61
62inline int p9_is_proto_dotl(struct p9_client *clnt) 62inline int p9_is_proto_dotl(struct p9_client *clnt)
63{ 63{
64 return (clnt->proto_version == p9_proto_2000L); 64 return clnt->proto_version == p9_proto_2000L;
65} 65}
66EXPORT_SYMBOL(p9_is_proto_dotl); 66EXPORT_SYMBOL(p9_is_proto_dotl);
67 67
68inline int p9_is_proto_dotu(struct p9_client *clnt) 68inline int p9_is_proto_dotu(struct p9_client *clnt)
69{ 69{
70 return (clnt->proto_version == p9_proto_2000u); 70 return clnt->proto_version == p9_proto_2000u;
71} 71}
72EXPORT_SYMBOL(p9_is_proto_dotu); 72EXPORT_SYMBOL(p9_is_proto_dotu);
73 73
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c85109d809c..078eb162d9b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -222,7 +222,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
222 } 222 }
223} 223}
224 224
225static unsigned int 225static int
226p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) 226p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
227{ 227{
228 int ret, n; 228 int ret, n;
diff --git a/net/atm/common.c b/net/atm/common.c
index 940404a73b3..1b9c52a02cd 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -792,7 +792,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
792 default: 792 default:
793 if (level == SOL_SOCKET) 793 if (level == SOL_SOCKET)
794 return -EINVAL; 794 return -EINVAL;
795 break; 795 break;
796 } 796 }
797 if (!vcc->dev || !vcc->dev->ops->getsockopt) 797 if (!vcc->dev || !vcc->dev->ops->getsockopt)
798 return -EINVAL; 798 return -EINVAL;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index d98bde1a0ac..181d70c73d7 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -220,7 +220,6 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
220static int lec_open(struct net_device *dev) 220static int lec_open(struct net_device *dev)
221{ 221{
222 netif_start_queue(dev); 222 netif_start_queue(dev);
223 memset(&dev->stats, 0, sizeof(struct net_device_stats));
224 223
225 return 0; 224 return 0;
226} 225}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index cfdfd7e2a17..26eaebf4aaa 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1103,7 +1103,7 @@ done:
1103out: 1103out:
1104 release_sock(sk); 1104 release_sock(sk);
1105 1105
1106 return 0; 1106 return err;
1107} 1107}
1108 1108
1109/* 1109/*
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 7805945a5fd..a1690845dc6 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -412,7 +412,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
412{ 412{
413 ax25_uid_assoc *user; 413 ax25_uid_assoc *user;
414 ax25_route *ax25_rt; 414 ax25_route *ax25_rt;
415 int err; 415 int err = 0;
416 416
417 if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL) 417 if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
418 return -EHOSTUNREACH; 418 return -EHOSTUNREACH;
@@ -453,7 +453,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
453put: 453put:
454 ax25_put_route(ax25_rt); 454 ax25_put_route(ax25_rt);
455 455
456 return 0; 456 return err;
457} 457}
458 458
459struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, 459struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 421c45bd1b9..ed0f22f5766 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -297,13 +297,12 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w
297 mask |= POLLERR; 297 mask |= POLLERR;
298 298
299 if (sk->sk_shutdown & RCV_SHUTDOWN) 299 if (sk->sk_shutdown & RCV_SHUTDOWN)
300 mask |= POLLRDHUP; 300 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
301 301
302 if (sk->sk_shutdown == SHUTDOWN_MASK) 302 if (sk->sk_shutdown == SHUTDOWN_MASK)
303 mask |= POLLHUP; 303 mask |= POLLHUP;
304 304
305 if (!skb_queue_empty(&sk->sk_receive_queue) || 305 if (!skb_queue_empty(&sk->sk_receive_queue))
306 (sk->sk_shutdown & RCV_SHUTDOWN))
307 mask |= POLLIN | POLLRDNORM; 306 mask |= POLLIN | POLLRDNORM;
308 307
309 if (sk->sk_state == BT_CLOSED) 308 if (sk->sk_state == BT_CLOSED)
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 7dca91bb8c5..15ea84ba344 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -179,13 +179,13 @@ static unsigned char rfcomm_crc_table[256] = {
179/* FCS on 2 bytes */ 179/* FCS on 2 bytes */
180static inline u8 __fcs(u8 *data) 180static inline u8 __fcs(u8 *data)
181{ 181{
182 return (0xff - __crc(data)); 182 return 0xff - __crc(data);
183} 183}
184 184
185/* FCS on 3 bytes */ 185/* FCS on 3 bytes */
186static inline u8 __fcs2(u8 *data) 186static inline u8 __fcs2(u8 *data)
187{ 187{
188 return (0xff - rfcomm_crc_table[__crc(data) ^ data[2]]); 188 return 0xff - rfcomm_crc_table[__crc(data) ^ data[2]];
189} 189}
190 190
191/* Check FCS */ 191/* Check FCS */
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c03d2c3ff03..89ad25a7620 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -61,30 +61,27 @@ static int port_cost(struct net_device *dev)
61} 61}
62 62
63 63
64/* 64/* Check for port carrier transistions. */
65 * Check for port carrier transistions.
66 * Called from work queue to allow for calling functions that
67 * might sleep (such as speed check), and to debounce.
68 */
69void br_port_carrier_check(struct net_bridge_port *p) 65void br_port_carrier_check(struct net_bridge_port *p)
70{ 66{
71 struct net_device *dev = p->dev; 67 struct net_device *dev = p->dev;
72 struct net_bridge *br = p->br; 68 struct net_bridge *br = p->br;
73 69
74 if (netif_carrier_ok(dev)) 70 if (netif_running(dev) && netif_carrier_ok(dev))
75 p->path_cost = port_cost(dev); 71 p->path_cost = port_cost(dev);
76 72
77 if (netif_running(br->dev)) { 73 if (!netif_running(br->dev))
78 spin_lock_bh(&br->lock); 74 return;
79 if (netif_carrier_ok(dev)) { 75
80 if (p->state == BR_STATE_DISABLED) 76 spin_lock_bh(&br->lock);
81 br_stp_enable_port(p); 77 if (netif_running(dev) && netif_carrier_ok(dev)) {
82 } else { 78 if (p->state == BR_STATE_DISABLED)
83 if (p->state != BR_STATE_DISABLED) 79 br_stp_enable_port(p);
84 br_stp_disable_port(p); 80 } else {
85 } 81 if (p->state != BR_STATE_DISABLED)
86 spin_unlock_bh(&br->lock); 82 br_stp_disable_port(p);
87 } 83 }
84 spin_unlock_bh(&br->lock);
88} 85}
89 86
90static void release_nbp(struct kobject *kobj) 87static void release_nbp(struct kobject *kobj)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 826cd522153..6d04cfdf454 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -141,7 +141,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
141 const unsigned char *dest = eth_hdr(skb)->h_dest; 141 const unsigned char *dest = eth_hdr(skb)->h_dest;
142 int (*rhook)(struct sk_buff *skb); 142 int (*rhook)(struct sk_buff *skb);
143 143
144 if (skb->pkt_type == PACKET_LOOPBACK) 144 if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
145 return skb; 145 return skb;
146 146
147 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 147 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 137f23259a9..77f7b5fda45 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -209,6 +209,72 @@ static inline void nf_bridge_update_protocol(struct sk_buff *skb)
209 skb->protocol = htons(ETH_P_PPP_SES); 209 skb->protocol = htons(ETH_P_PPP_SES);
210} 210}
211 211
212/* When handing a packet over to the IP layer
213 * check whether we have a skb that is in the
214 * expected format
215 */
216
217int br_parse_ip_options(struct sk_buff *skb)
218{
219 struct ip_options *opt;
220 struct iphdr *iph;
221 struct net_device *dev = skb->dev;
222 u32 len;
223
224 iph = ip_hdr(skb);
225 opt = &(IPCB(skb)->opt);
226
227 /* Basic sanity checks */
228 if (iph->ihl < 5 || iph->version != 4)
229 goto inhdr_error;
230
231 if (!pskb_may_pull(skb, iph->ihl*4))
232 goto inhdr_error;
233
234 iph = ip_hdr(skb);
235 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
236 goto inhdr_error;
237
238 len = ntohs(iph->tot_len);
239 if (skb->len < len) {
240 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
241 goto drop;
242 } else if (len < (iph->ihl*4))
243 goto inhdr_error;
244
245 if (pskb_trim_rcsum(skb, len)) {
246 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
247 goto drop;
248 }
249
250 /* Zero out the CB buffer if no options present */
251 if (iph->ihl == 5) {
252 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
253 return 0;
254 }
255
256 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
257 if (ip_options_compile(dev_net(dev), opt, skb))
258 goto inhdr_error;
259
260 /* Check correct handling of SRR option */
261 if (unlikely(opt->srr)) {
262 struct in_device *in_dev = __in_dev_get_rcu(dev);
263 if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev))
264 goto drop;
265
266 if (ip_options_rcv_srr(skb))
267 goto drop;
268 }
269
270 return 0;
271
272inhdr_error:
273 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
274drop:
275 return -1;
276}
277
212/* Fill in the header for fragmented IP packets handled by 278/* Fill in the header for fragmented IP packets handled by
213 * the IPv4 connection tracking code. 279 * the IPv4 connection tracking code.
214 */ 280 */
@@ -549,7 +615,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
549{ 615{
550 struct net_bridge_port *p; 616 struct net_bridge_port *p;
551 struct net_bridge *br; 617 struct net_bridge *br;
552 struct iphdr *iph;
553 __u32 len = nf_bridge_encap_header_len(skb); 618 __u32 len = nf_bridge_encap_header_len(skb);
554 619
555 if (unlikely(!pskb_may_pull(skb, len))) 620 if (unlikely(!pskb_may_pull(skb, len)))
@@ -578,28 +643,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
578 643
579 nf_bridge_pull_encap_header_rcsum(skb); 644 nf_bridge_pull_encap_header_rcsum(skb);
580 645
581 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 646 if (br_parse_ip_options(skb))
582 goto inhdr_error; 647 /* Drop invalid packet */
583 648 goto out;
584 iph = ip_hdr(skb);
585 if (iph->ihl < 5 || iph->version != 4)
586 goto inhdr_error;
587
588 if (!pskb_may_pull(skb, 4 * iph->ihl))
589 goto inhdr_error;
590
591 iph = ip_hdr(skb);
592 if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
593 goto inhdr_error;
594
595 len = ntohs(iph->tot_len);
596 if (skb->len < len || len < 4 * iph->ihl)
597 goto inhdr_error;
598
599 pskb_trim_rcsum(skb, len);
600
601 /* BUG: Should really parse the IP options here. */
602 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
603 649
604 nf_bridge_put(skb->nf_bridge); 650 nf_bridge_put(skb->nf_bridge);
605 if (!nf_bridge_alloc(skb)) 651 if (!nf_bridge_alloc(skb))
@@ -614,8 +660,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
614 660
615 return NF_STOLEN; 661 return NF_STOLEN;
616 662
617inhdr_error:
618// IP_INC_STATS_BH(IpInHdrErrors);
619out: 663out:
620 return NF_DROP; 664 return NF_DROP;
621} 665}
@@ -759,14 +803,19 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
759#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) 803#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
760static int br_nf_dev_queue_xmit(struct sk_buff *skb) 804static int br_nf_dev_queue_xmit(struct sk_buff *skb)
761{ 805{
806 int ret;
807
762 if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && 808 if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
763 skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && 809 skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
764 !skb_is_gso(skb)) { 810 !skb_is_gso(skb)) {
765 /* BUG: Should really parse the IP options here. */ 811 if (br_parse_ip_options(skb))
766 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 812 /* Drop invalid packet */
767 return ip_fragment(skb, br_dev_queue_push_xmit); 813 return NF_DROP;
814 ret = ip_fragment(skb, br_dev_queue_push_xmit);
768 } else 815 } else
769 return br_dev_queue_push_xmit(skb); 816 ret = br_dev_queue_push_xmit(skb);
817
818 return ret;
770} 819}
771#else 820#else
772static int br_nf_dev_queue_xmit(struct sk_buff *skb) 821static int br_nf_dev_queue_xmit(struct sk_buff *skb)
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 0b586e9d137..b99369a055d 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -9,6 +9,8 @@
9 * and Sakari Ailus <sakari.ailus@nokia.com> 9 * and Sakari Ailus <sakari.ailus@nokia.com>
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
13
12#include <linux/version.h> 14#include <linux/version.h>
13#include <linux/module.h> 15#include <linux/module.h>
14#include <linux/kernel.h> 16#include <linux/kernel.h>
@@ -171,7 +173,7 @@ static int receive(struct sk_buff *skb, struct net_device *dev,
171 net = dev_net(dev); 173 net = dev_net(dev);
172 pkt = cfpkt_fromnative(CAIF_DIR_IN, skb); 174 pkt = cfpkt_fromnative(CAIF_DIR_IN, skb);
173 caifd = caif_get(dev); 175 caifd = caif_get(dev);
174 if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd) 176 if (!caifd || !caifd->layer.up || !caifd->layer.up->receive)
175 return NET_RX_DROP; 177 return NET_RX_DROP;
176 178
177 if (caifd->layer.up->receive(caifd->layer.up, pkt)) 179 if (caifd->layer.up->receive(caifd->layer.up, pkt))
@@ -214,7 +216,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
214 216
215 switch (what) { 217 switch (what) {
216 case NETDEV_REGISTER: 218 case NETDEV_REGISTER:
217 pr_info("CAIF: %s():register %s\n", __func__, dev->name); 219 netdev_info(dev, "register\n");
218 caifd = caif_device_alloc(dev); 220 caifd = caif_device_alloc(dev);
219 if (caifd == NULL) 221 if (caifd == NULL)
220 break; 222 break;
@@ -225,14 +227,13 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
225 break; 227 break;
226 228
227 case NETDEV_UP: 229 case NETDEV_UP:
228 pr_info("CAIF: %s(): up %s\n", __func__, dev->name); 230 netdev_info(dev, "up\n");
229 caifd = caif_get(dev); 231 caifd = caif_get(dev);
230 if (caifd == NULL) 232 if (caifd == NULL)
231 break; 233 break;
232 caifdev = netdev_priv(dev); 234 caifdev = netdev_priv(dev);
233 if (atomic_read(&caifd->state) == NETDEV_UP) { 235 if (atomic_read(&caifd->state) == NETDEV_UP) {
234 pr_info("CAIF: %s():%s already up\n", 236 netdev_info(dev, "already up\n");
235 __func__, dev->name);
236 break; 237 break;
237 } 238 }
238 atomic_set(&caifd->state, what); 239 atomic_set(&caifd->state, what);
@@ -273,7 +274,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
273 caifd = caif_get(dev); 274 caifd = caif_get(dev);
274 if (caifd == NULL) 275 if (caifd == NULL)
275 break; 276 break;
276 pr_info("CAIF: %s():going down %s\n", __func__, dev->name); 277 netdev_info(dev, "going down\n");
277 278
278 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN || 279 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN ||
279 atomic_read(&caifd->state) == NETDEV_DOWN) 280 atomic_read(&caifd->state) == NETDEV_DOWN)
@@ -295,11 +296,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
295 caifd = caif_get(dev); 296 caifd = caif_get(dev);
296 if (caifd == NULL) 297 if (caifd == NULL)
297 break; 298 break;
298 pr_info("CAIF: %s(): down %s\n", __func__, dev->name); 299 netdev_info(dev, "down\n");
299 if (atomic_read(&caifd->in_use)) 300 if (atomic_read(&caifd->in_use))
300 pr_warning("CAIF: %s(): " 301 netdev_warn(dev,
301 "Unregistering an active CAIF device: %s\n", 302 "Unregistering an active CAIF device\n");
302 __func__, dev->name);
303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer); 303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
304 dev_put(dev); 304 dev_put(dev);
305 atomic_set(&caifd->state, what); 305 atomic_set(&caifd->state, what);
@@ -307,7 +307,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
307 307
308 case NETDEV_UNREGISTER: 308 case NETDEV_UNREGISTER:
309 caifd = caif_get(dev); 309 caifd = caif_get(dev);
310 pr_info("CAIF: %s(): unregister %s\n", __func__, dev->name); 310 netdev_info(dev, "unregister\n");
311 atomic_set(&caifd->state, what); 311 atomic_set(&caifd->state, what);
312 caif_device_destroy(dev); 312 caif_device_destroy(dev);
313 break; 313 break;
@@ -391,7 +391,7 @@ static int __init caif_device_init(void)
391 int result; 391 int result;
392 cfg = cfcnfg_create(); 392 cfg = cfcnfg_create();
393 if (!cfg) { 393 if (!cfg) {
394 pr_warning("CAIF: %s(): can't create cfcnfg.\n", __func__); 394 pr_warn("can't create cfcnfg\n");
395 goto err_cfcnfg_create_failed; 395 goto err_cfcnfg_create_failed;
396 } 396 }
397 result = register_pernet_device(&caif_net_ops); 397 result = register_pernet_device(&caif_net_ops);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 8ce90478611..4d918f8f4e6 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/fs.h> 9#include <linux/fs.h>
8#include <linux/init.h> 10#include <linux/init.h>
9#include <linux/module.h> 11#include <linux/module.h>
@@ -28,9 +30,6 @@
28MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
29MODULE_ALIAS_NETPROTO(AF_CAIF); 31MODULE_ALIAS_NETPROTO(AF_CAIF);
30 32
31#define CAIF_DEF_SNDBUF (4096*10)
32#define CAIF_DEF_RCVBUF (4096*100)
33
34/* 33/*
35 * CAIF state is re-using the TCP socket states. 34 * CAIF state is re-using the TCP socket states.
36 * caif_states stored in sk_state reflect the state as reported by 35 * caif_states stored in sk_state reflect the state as reported by
@@ -157,9 +156,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
157 156
158 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 157 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
159 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) { 158 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
160 trace_printk("CAIF: %s():" 159 pr_debug("sending flow OFF (queue len = %d %d)\n",
161 " sending flow OFF (queue len = %d %d)\n",
162 __func__,
163 atomic_read(&cf_sk->sk.sk_rmem_alloc), 160 atomic_read(&cf_sk->sk.sk_rmem_alloc),
164 sk_rcvbuf_lowwater(cf_sk)); 161 sk_rcvbuf_lowwater(cf_sk));
165 set_rx_flow_off(cf_sk); 162 set_rx_flow_off(cf_sk);
@@ -172,9 +169,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
172 return err; 169 return err;
173 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { 170 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
174 set_rx_flow_off(cf_sk); 171 set_rx_flow_off(cf_sk);
175 trace_printk("CAIF: %s():" 172 pr_debug("sending flow OFF due to rmem_schedule\n");
176 " sending flow OFF due to rmem_schedule\n",
177 __func__);
178 dbfs_atomic_inc(&cnt.num_rx_flow_off); 173 dbfs_atomic_inc(&cnt.num_rx_flow_off);
179 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); 174 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
180 } 175 }
@@ -275,8 +270,7 @@ static void caif_ctrl_cb(struct cflayer *layr,
275 break; 270 break;
276 271
277 default: 272 default:
278 pr_debug("CAIF: %s(): Unexpected flow command %d\n", 273 pr_debug("Unexpected flow command %d\n", flow);
279 __func__, flow);
280 } 274 }
281} 275}
282 276
@@ -536,8 +530,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
536 530
537 /* Slight paranoia, probably not needed. */ 531 /* Slight paranoia, probably not needed. */
538 if (unlikely(loopcnt++ > 1000)) { 532 if (unlikely(loopcnt++ > 1000)) {
539 pr_warning("CAIF: %s(): transmit retries failed," 533 pr_warn("transmit retries failed, error = %d\n", ret);
540 " error = %d\n", __func__, ret);
541 break; 534 break;
542 } 535 }
543 536
@@ -902,8 +895,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
902 cf_sk->maxframe = dev->mtu - (headroom + tailroom); 895 cf_sk->maxframe = dev->mtu - (headroom + tailroom);
903 dev_put(dev); 896 dev_put(dev);
904 if (cf_sk->maxframe < 1) { 897 if (cf_sk->maxframe < 1) {
905 pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n", 898 pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
906 __func__, dev->mtu);
907 err = -ENODEV; 899 err = -ENODEV;
908 goto out; 900 goto out;
909 } 901 }
@@ -1123,10 +1115,6 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
1123 /* Store the protocol */ 1115 /* Store the protocol */
1124 sk->sk_protocol = (unsigned char) protocol; 1116 sk->sk_protocol = (unsigned char) protocol;
1125 1117
1126 /* Sendbuf dictates the amount of outbound packets not yet sent */
1127 sk->sk_sndbuf = CAIF_DEF_SNDBUF;
1128 sk->sk_rcvbuf = CAIF_DEF_RCVBUF;
1129
1130 /* 1118 /*
1131 * Lock in order to try to stop someone from opening the socket 1119 * Lock in order to try to stop someone from opening the socket
1132 * too early. 1120 * too early.
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 1c29189b344..41adafd1891 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -3,6 +3,9 @@
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
6#include <linux/kernel.h> 9#include <linux/kernel.h>
7#include <linux/stddef.h> 10#include <linux/stddef.h>
8#include <linux/slab.h> 11#include <linux/slab.h>
@@ -78,7 +81,7 @@ struct cfcnfg *cfcnfg_create(void)
78 /* Initiate this layer */ 81 /* Initiate this layer */
79 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC); 82 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
80 if (!this) { 83 if (!this) {
81 pr_warning("CAIF: %s(): Out of memory\n", __func__); 84 pr_warn("Out of memory\n");
82 return NULL; 85 return NULL;
83 } 86 }
84 this->mux = cfmuxl_create(); 87 this->mux = cfmuxl_create();
@@ -106,7 +109,7 @@ struct cfcnfg *cfcnfg_create(void)
106 layer_set_up(this->ctrl, this); 109 layer_set_up(this->ctrl, this);
107 return this; 110 return this;
108out_of_mem: 111out_of_mem:
109 pr_warning("CAIF: %s(): Out of memory\n", __func__); 112 pr_warn("Out of memory\n");
110 kfree(this->mux); 113 kfree(this->mux);
111 kfree(this->ctrl); 114 kfree(this->ctrl);
112 kfree(this); 115 kfree(this);
@@ -194,7 +197,7 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
194 caif_assert(adap_layer != NULL); 197 caif_assert(adap_layer != NULL);
195 channel_id = adap_layer->id; 198 channel_id = adap_layer->id;
196 if (adap_layer->dn == NULL || channel_id == 0) { 199 if (adap_layer->dn == NULL || channel_id == 0) {
197 pr_err("CAIF: %s():adap_layer->id is 0\n", __func__); 200 pr_err("adap_layer->dn == NULL or adap_layer->id is 0\n");
198 ret = -ENOTCONN; 201 ret = -ENOTCONN;
199 goto end; 202 goto end;
200 } 203 }
@@ -204,9 +207,8 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
204 layer_set_up(servl, NULL); 207 layer_set_up(servl, NULL);
205 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); 208 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
206 if (servl == NULL) { 209 if (servl == NULL) {
207 pr_err("CAIF: %s(): PROTOCOL ERROR " 210 pr_err("PROTOCOL ERROR - Error removing service_layer Channel_Id(%d)",
208 "- Error removing service_layer Channel_Id(%d)", 211 channel_id);
209 __func__, channel_id);
210 ret = -EINVAL; 212 ret = -EINVAL;
211 goto end; 213 goto end;
212 } 214 }
@@ -216,18 +218,14 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
216 218
217 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid); 219 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
218 if (phyinfo == NULL) { 220 if (phyinfo == NULL) {
219 pr_warning("CAIF: %s(): " 221 pr_warn("No interface to send disconnect to\n");
220 "No interface to send disconnect to\n",
221 __func__);
222 ret = -ENODEV; 222 ret = -ENODEV;
223 goto end; 223 goto end;
224 } 224 }
225 if (phyinfo->id != phyid || 225 if (phyinfo->id != phyid ||
226 phyinfo->phy_layer->id != phyid || 226 phyinfo->phy_layer->id != phyid ||
227 phyinfo->frm_layer->id != phyid) { 227 phyinfo->frm_layer->id != phyid) {
228 pr_err("CAIF: %s(): " 228 pr_err("Inconsistency in phy registration\n");
229 "Inconsistency in phy registration\n",
230 __func__);
231 ret = -EINVAL; 229 ret = -EINVAL;
232 goto end; 230 goto end;
233 } 231 }
@@ -276,21 +274,20 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
276{ 274{
277 struct cflayer *frml; 275 struct cflayer *frml;
278 if (adap_layer == NULL) { 276 if (adap_layer == NULL) {
279 pr_err("CAIF: %s(): adap_layer is zero", __func__); 277 pr_err("adap_layer is zero\n");
280 return -EINVAL; 278 return -EINVAL;
281 } 279 }
282 if (adap_layer->receive == NULL) { 280 if (adap_layer->receive == NULL) {
283 pr_err("CAIF: %s(): adap_layer->receive is NULL", __func__); 281 pr_err("adap_layer->receive is NULL\n");
284 return -EINVAL; 282 return -EINVAL;
285 } 283 }
286 if (adap_layer->ctrlcmd == NULL) { 284 if (adap_layer->ctrlcmd == NULL) {
287 pr_err("CAIF: %s(): adap_layer->ctrlcmd == NULL", __func__); 285 pr_err("adap_layer->ctrlcmd == NULL\n");
288 return -EINVAL; 286 return -EINVAL;
289 } 287 }
290 frml = cnfg->phy_layers[param->phyid].frm_layer; 288 frml = cnfg->phy_layers[param->phyid].frm_layer;
291 if (frml == NULL) { 289 if (frml == NULL) {
292 pr_err("CAIF: %s(): Specified PHY type does not exist!", 290 pr_err("Specified PHY type does not exist!\n");
293 __func__);
294 return -ENODEV; 291 return -ENODEV;
295 } 292 }
296 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id); 293 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id);
@@ -330,9 +327,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
330 struct net_device *netdev; 327 struct net_device *netdev;
331 328
332 if (adapt_layer == NULL) { 329 if (adapt_layer == NULL) {
333 pr_debug("CAIF: %s(): link setup response " 330 pr_debug("link setup response but no client exist, send linkdown back\n");
334 "but no client exist, send linkdown back\n",
335 __func__);
336 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL); 331 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
337 return; 332 return;
338 } 333 }
@@ -374,13 +369,11 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
374 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info); 369 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
375 break; 370 break;
376 default: 371 default:
377 pr_err("CAIF: %s(): Protocol error. " 372 pr_err("Protocol error. Link setup response - unknown channel type\n");
378 "Link setup response - unknown channel type\n",
379 __func__);
380 return; 373 return;
381 } 374 }
382 if (!servicel) { 375 if (!servicel) {
383 pr_warning("CAIF: %s(): Out of memory\n", __func__); 376 pr_warn("Out of memory\n");
384 return; 377 return;
385 } 378 }
386 layer_set_dn(servicel, cnfg->mux); 379 layer_set_dn(servicel, cnfg->mux);
@@ -418,7 +411,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
418 } 411 }
419 } 412 }
420 if (*phyid == 0) { 413 if (*phyid == 0) {
421 pr_err("CAIF: %s(): No Available PHY ID\n", __func__); 414 pr_err("No Available PHY ID\n");
422 return; 415 return;
423 } 416 }
424 417
@@ -427,7 +420,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
427 phy_driver = 420 phy_driver =
428 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx); 421 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx);
429 if (!phy_driver) { 422 if (!phy_driver) {
430 pr_warning("CAIF: %s(): Out of memory\n", __func__); 423 pr_warn("Out of memory\n");
431 return; 424 return;
432 } 425 }
433 426
@@ -436,7 +429,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
436 phy_driver = NULL; 429 phy_driver = NULL;
437 break; 430 break;
438 default: 431 default:
439 pr_err("CAIF: %s(): %d", __func__, phy_type); 432 pr_err("%d\n", phy_type);
440 return; 433 return;
441 break; 434 break;
442 } 435 }
@@ -455,7 +448,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
455 phy_layer->type = phy_type; 448 phy_layer->type = phy_type;
456 frml = cffrml_create(*phyid, fcs); 449 frml = cffrml_create(*phyid, fcs);
457 if (!frml) { 450 if (!frml) {
458 pr_warning("CAIF: %s(): Out of memory\n", __func__); 451 pr_warn("Out of memory\n");
459 return; 452 return;
460 } 453 }
461 cnfg->phy_layers[*phyid].frm_layer = frml; 454 cnfg->phy_layers[*phyid].frm_layer = frml;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 563145fdc4c..08f267a109a 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -36,7 +38,7 @@ struct cflayer *cfctrl_create(void)
36 struct cfctrl *this = 38 struct cfctrl *this =
37 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC); 39 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
38 if (!this) { 40 if (!this) {
39 pr_warning("CAIF: %s(): Out of memory\n", __func__); 41 pr_warn("Out of memory\n");
40 return NULL; 42 return NULL;
41 } 43 }
42 caif_assert(offsetof(struct cfctrl, serv.layer) == 0); 44 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -132,9 +134,7 @@ struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
132 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 134 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
133 if (cfctrl_req_eq(req, p)) { 135 if (cfctrl_req_eq(req, p)) {
134 if (p != first) 136 if (p != first)
135 pr_warning("CAIF: %s(): Requests are not " 137 pr_warn("Requests are not received in order\n");
136 "received in order\n",
137 __func__);
138 138
139 atomic_set(&ctrl->rsp_seq_no, 139 atomic_set(&ctrl->rsp_seq_no,
140 p->sequence_no); 140 p->sequence_no);
@@ -177,7 +177,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
177 int ret; 177 int ret;
178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
179 if (!pkt) { 179 if (!pkt) {
180 pr_warning("CAIF: %s(): Out of memory\n", __func__); 180 pr_warn("Out of memory\n");
181 return; 181 return;
182 } 182 }
183 caif_assert(offsetof(struct cfctrl, serv.layer) == 0); 183 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -189,8 +189,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
189 ret = 189 ret =
190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
191 if (ret < 0) { 191 if (ret < 0) {
192 pr_err("CAIF: %s(): Could not transmit enum message\n", 192 pr_err("Could not transmit enum message\n");
193 __func__);
194 cfpkt_destroy(pkt); 193 cfpkt_destroy(pkt);
195 } 194 }
196} 195}
@@ -208,7 +207,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
208 char utility_name[16]; 207 char utility_name[16];
209 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 208 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
210 if (!pkt) { 209 if (!pkt) {
211 pr_warning("CAIF: %s(): Out of memory\n", __func__); 210 pr_warn("Out of memory\n");
212 return -ENOMEM; 211 return -ENOMEM;
213 } 212 }
214 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); 213 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
@@ -253,13 +252,13 @@ int cfctrl_linkup_request(struct cflayer *layer,
253 param->u.utility.paramlen); 252 param->u.utility.paramlen);
254 break; 253 break;
255 default: 254 default:
256 pr_warning("CAIF: %s():Request setup of bad link type = %d\n", 255 pr_warn("Request setup of bad link type = %d\n",
257 __func__, param->linktype); 256 param->linktype);
258 return -EINVAL; 257 return -EINVAL;
259 } 258 }
260 req = kzalloc(sizeof(*req), GFP_KERNEL); 259 req = kzalloc(sizeof(*req), GFP_KERNEL);
261 if (!req) { 260 if (!req) {
262 pr_warning("CAIF: %s(): Out of memory\n", __func__); 261 pr_warn("Out of memory\n");
263 return -ENOMEM; 262 return -ENOMEM;
264 } 263 }
265 req->client_layer = user_layer; 264 req->client_layer = user_layer;
@@ -276,8 +275,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
276 ret = 275 ret =
277 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 276 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
278 if (ret < 0) { 277 if (ret < 0) {
279 pr_err("CAIF: %s(): Could not transmit linksetup request\n", 278 pr_err("Could not transmit linksetup request\n");
280 __func__);
281 cfpkt_destroy(pkt); 279 cfpkt_destroy(pkt);
282 return -ENODEV; 280 return -ENODEV;
283 } 281 }
@@ -291,7 +289,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
291 struct cfctrl *cfctrl = container_obj(layer); 289 struct cfctrl *cfctrl = container_obj(layer);
292 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 290 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
293 if (!pkt) { 291 if (!pkt) {
294 pr_warning("CAIF: %s(): Out of memory\n", __func__); 292 pr_warn("Out of memory\n");
295 return -ENOMEM; 293 return -ENOMEM;
296 } 294 }
297 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY); 295 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
@@ -300,8 +298,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
300 ret = 298 ret =
301 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 299 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
302 if (ret < 0) { 300 if (ret < 0) {
303 pr_err("CAIF: %s(): Could not transmit link-down request\n", 301 pr_err("Could not transmit link-down request\n");
304 __func__);
305 cfpkt_destroy(pkt); 302 cfpkt_destroy(pkt);
306 } 303 }
307 return ret; 304 return ret;
@@ -313,7 +310,7 @@ void cfctrl_sleep_req(struct cflayer *layer)
313 struct cfctrl *cfctrl = container_obj(layer); 310 struct cfctrl *cfctrl = container_obj(layer);
314 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 311 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
315 if (!pkt) { 312 if (!pkt) {
316 pr_warning("CAIF: %s(): Out of memory\n", __func__); 313 pr_warn("Out of memory\n");
317 return; 314 return;
318 } 315 }
319 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP); 316 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP);
@@ -330,7 +327,7 @@ void cfctrl_wake_req(struct cflayer *layer)
330 struct cfctrl *cfctrl = container_obj(layer); 327 struct cfctrl *cfctrl = container_obj(layer);
331 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 328 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
332 if (!pkt) { 329 if (!pkt) {
333 pr_warning("CAIF: %s(): Out of memory\n", __func__); 330 pr_warn("Out of memory\n");
334 return; 331 return;
335 } 332 }
336 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE); 333 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE);
@@ -347,7 +344,7 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
347 struct cfctrl *cfctrl = container_obj(layer); 344 struct cfctrl *cfctrl = container_obj(layer);
348 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 345 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
349 if (!pkt) { 346 if (!pkt) {
350 pr_warning("CAIF: %s(): Out of memory\n", __func__); 347 pr_warn("Out of memory\n");
351 return; 348 return;
352 } 349 }
353 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON); 350 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON);
@@ -364,12 +361,11 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
364 struct cfctrl_request_info *p, *tmp; 361 struct cfctrl_request_info *p, *tmp;
365 struct cfctrl *ctrl = container_obj(layr); 362 struct cfctrl *ctrl = container_obj(layr);
366 spin_lock(&ctrl->info_list_lock); 363 spin_lock(&ctrl->info_list_lock);
367 pr_warning("CAIF: %s(): enter\n", __func__); 364 pr_warn("enter\n");
368 365
369 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 366 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
370 if (p->client_layer == adap_layer) { 367 if (p->client_layer == adap_layer) {
371 pr_warning("CAIF: %s(): cancel req :%d\n", __func__, 368 pr_warn("cancel req :%d\n", p->sequence_no);
372 p->sequence_no);
373 list_del(&p->list); 369 list_del(&p->list);
374 kfree(p); 370 kfree(p);
375 } 371 }
@@ -520,9 +516,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
520 cfpkt_extr_head(pkt, &param, len); 516 cfpkt_extr_head(pkt, &param, len);
521 break; 517 break;
522 default: 518 default:
523 pr_warning("CAIF: %s(): Request setup " 519 pr_warn("Request setup - invalid link type (%d)\n",
524 "- invalid link type (%d)", 520 serv);
525 __func__, serv);
526 goto error; 521 goto error;
527 } 522 }
528 523
@@ -532,9 +527,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
532 527
533 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || 528 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
534 cfpkt_erroneous(pkt)) { 529 cfpkt_erroneous(pkt)) {
535 pr_err("CAIF: %s(): Invalid O/E bit or parse " 530 pr_err("Invalid O/E bit or parse error on CAIF control channel\n");
536 "error on CAIF control channel",
537 __func__);
538 cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 531 cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
539 0, 532 0,
540 req ? req->client_layer 533 req ? req->client_layer
@@ -556,8 +549,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
556 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); 549 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
557 break; 550 break;
558 case CFCTRL_CMD_LINK_ERR: 551 case CFCTRL_CMD_LINK_ERR:
559 pr_err("CAIF: %s(): Frame Error Indication received\n", 552 pr_err("Frame Error Indication received\n");
560 __func__);
561 cfctrl->res.linkerror_ind(); 553 cfctrl->res.linkerror_ind();
562 break; 554 break;
563 case CFCTRL_CMD_ENUM: 555 case CFCTRL_CMD_ENUM:
@@ -576,7 +568,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
576 cfctrl->res.radioset_rsp(); 568 cfctrl->res.radioset_rsp();
577 break; 569 break;
578 default: 570 default:
579 pr_err("CAIF: %s(): Unrecognized Control Frame\n", __func__); 571 pr_err("Unrecognized Control Frame\n");
580 goto error; 572 goto error;
581 break; 573 break;
582 } 574 }
@@ -595,8 +587,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
595 case CAIF_CTRLCMD_FLOW_OFF_IND: 587 case CAIF_CTRLCMD_FLOW_OFF_IND:
596 spin_lock(&this->info_list_lock); 588 spin_lock(&this->info_list_lock);
597 if (!list_empty(&this->list)) { 589 if (!list_empty(&this->list)) {
598 pr_debug("CAIF: %s(): Received flow off in " 590 pr_debug("Received flow off in control layer\n");
599 "control layer", __func__);
600 } 591 }
601 spin_unlock(&this->info_list_lock); 592 spin_unlock(&this->info_list_lock);
602 break; 593 break;
@@ -620,7 +611,7 @@ static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
620 if (!ctrl->loop_linkused[linkid]) 611 if (!ctrl->loop_linkused[linkid])
621 goto found; 612 goto found;
622 spin_unlock(&ctrl->loop_linkid_lock); 613 spin_unlock(&ctrl->loop_linkid_lock);
623 pr_err("CAIF: %s(): Out of link-ids\n", __func__); 614 pr_err("Out of link-ids\n");
624 return -EINVAL; 615 return -EINVAL;
625found: 616found:
626 if (!ctrl->loop_linkused[linkid]) 617 if (!ctrl->loop_linkused[linkid])
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 676648cac8d..496fda9ac66 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/slab.h> 10#include <linux/slab.h>
9#include <net/caif/caif_layer.h> 11#include <net/caif/caif_layer.h>
@@ -17,7 +19,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
17{ 19{
18 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 20 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
19 if (!dbg) { 21 if (!dbg) {
20 pr_warning("CAIF: %s(): Out of memory\n", __func__); 22 pr_warn("Out of memory\n");
21 return NULL; 23 return NULL;
22 } 24 }
23 caif_assert(offsetof(struct cfsrvl, layer) == 0); 25 caif_assert(offsetof(struct cfsrvl, layer) == 0);
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index ed9d53aff28..d3ed264ad6c 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
26{ 28{
27 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!dgm) { 30 if (!dgm) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__); 31 pr_warn("Out of memory\n");
30 return NULL; 32 return NULL;
31 } 33 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0); 34 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -49,14 +51,14 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
49 caif_assert(layr->ctrlcmd != NULL); 51 caif_assert(layr->ctrlcmd != NULL);
50 52
51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 53 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
52 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 54 pr_err("Packet is erroneous!\n");
53 cfpkt_destroy(pkt); 55 cfpkt_destroy(pkt);
54 return -EPROTO; 56 return -EPROTO;
55 } 57 }
56 58
57 if ((cmd & DGM_CMD_BIT) == 0) { 59 if ((cmd & DGM_CMD_BIT) == 0) {
58 if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) { 60 if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) {
59 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 61 pr_err("Packet is erroneous!\n");
60 cfpkt_destroy(pkt); 62 cfpkt_destroy(pkt);
61 return -EPROTO; 63 return -EPROTO;
62 } 64 }
@@ -75,8 +77,7 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
75 return 0; 77 return 0;
76 default: 78 default:
77 cfpkt_destroy(pkt); 79 cfpkt_destroy(pkt);
78 pr_info("CAIF: %s(): Unknown datagram control %d (0x%x)\n", 80 pr_info("Unknown datagram control %d (0x%x)\n", cmd, cmd);
79 __func__, cmd, cmd);
80 return -EPROTO; 81 return -EPROTO;
81 } 82 }
82} 83}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index e86a4ca3b21..a445043931a 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -6,6 +6,8 @@
6 * License terms: GNU General Public License (GPL) version 2 6 * License terms: GNU General Public License (GPL) version 2
7 */ 7 */
8 8
9#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
10
9#include <linux/stddef.h> 11#include <linux/stddef.h>
10#include <linux/spinlock.h> 12#include <linux/spinlock.h>
11#include <linux/slab.h> 13#include <linux/slab.h>
@@ -32,7 +34,7 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
32{ 34{
33 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC); 35 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC);
34 if (!this) { 36 if (!this) {
35 pr_warning("CAIF: %s(): Out of memory\n", __func__); 37 pr_warn("Out of memory\n");
36 return NULL; 38 return NULL;
37 } 39 }
38 caif_assert(offsetof(struct cffrml, layer) == 0); 40 caif_assert(offsetof(struct cffrml, layer) == 0);
@@ -83,7 +85,7 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
83 85
84 if (cfpkt_setlen(pkt, len) < 0) { 86 if (cfpkt_setlen(pkt, len) < 0) {
85 ++cffrml_rcv_error; 87 ++cffrml_rcv_error;
86 pr_err("CAIF: %s():Framing length error (%d)\n", __func__, len); 88 pr_err("Framing length error (%d)\n", len);
87 cfpkt_destroy(pkt); 89 cfpkt_destroy(pkt);
88 return -EPROTO; 90 return -EPROTO;
89 } 91 }
@@ -99,14 +101,14 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
99 cfpkt_add_trail(pkt, &tmp, 2); 101 cfpkt_add_trail(pkt, &tmp, 2);
100 ++cffrml_rcv_error; 102 ++cffrml_rcv_error;
101 ++cffrml_rcv_checsum_error; 103 ++cffrml_rcv_checsum_error;
102 pr_info("CAIF: %s(): Frame checksum error " 104 pr_info("Frame checksum error (0x%x != 0x%x)\n",
103 "(0x%x != 0x%x)\n", __func__, hdrchks, pktchks); 105 hdrchks, pktchks);
104 return -EILSEQ; 106 return -EILSEQ;
105 } 107 }
106 } 108 }
107 if (cfpkt_erroneous(pkt)) { 109 if (cfpkt_erroneous(pkt)) {
108 ++cffrml_rcv_error; 110 ++cffrml_rcv_error;
109 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 111 pr_err("Packet is erroneous!\n");
110 cfpkt_destroy(pkt); 112 cfpkt_destroy(pkt);
111 return -EPROTO; 113 return -EPROTO;
112 } 114 }
@@ -132,7 +134,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
132 cfpkt_add_head(pkt, &tmp, 2); 134 cfpkt_add_head(pkt, &tmp, 2);
133 cfpkt_info(pkt)->hdr_len += 2; 135 cfpkt_info(pkt)->hdr_len += 2;
134 if (cfpkt_erroneous(pkt)) { 136 if (cfpkt_erroneous(pkt)) {
135 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 137 pr_err("Packet is erroneous!\n");
136 return -EPROTO; 138 return -EPROTO;
137 } 139 }
138 ret = layr->dn->transmit(layr->dn, pkt); 140 ret = layr->dn->transmit(layr->dn, pkt);
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 80c8d332b25..46f34b2e047 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -3,6 +3,9 @@
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
6#include <linux/stddef.h> 9#include <linux/stddef.h>
7#include <linux/spinlock.h> 10#include <linux/spinlock.h>
8#include <linux/slab.h> 11#include <linux/slab.h>
@@ -190,7 +193,7 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
190 u8 id; 193 u8 id;
191 struct cflayer *up; 194 struct cflayer *up;
192 if (cfpkt_extr_head(pkt, &id, 1) < 0) { 195 if (cfpkt_extr_head(pkt, &id, 1) < 0) {
193 pr_err("CAIF: %s(): erroneous Caif Packet\n", __func__); 196 pr_err("erroneous Caif Packet\n");
194 cfpkt_destroy(pkt); 197 cfpkt_destroy(pkt);
195 return -EPROTO; 198 return -EPROTO;
196 } 199 }
@@ -199,8 +202,8 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
199 up = get_up(muxl, id); 202 up = get_up(muxl, id);
200 spin_unlock(&muxl->receive_lock); 203 spin_unlock(&muxl->receive_lock);
201 if (up == NULL) { 204 if (up == NULL) {
202 pr_info("CAIF: %s():Received data on unknown link ID = %d " 205 pr_info("Received data on unknown link ID = %d (0x%x) up == NULL",
203 "(0x%x) up == NULL", __func__, id, id); 206 id, id);
204 cfpkt_destroy(pkt); 207 cfpkt_destroy(pkt);
205 /* 208 /*
206 * Don't return ERROR, since modem misbehaves and sends out 209 * Don't return ERROR, since modem misbehaves and sends out
@@ -223,9 +226,8 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
223 struct caif_payload_info *info = cfpkt_info(pkt); 226 struct caif_payload_info *info = cfpkt_info(pkt);
224 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info); 227 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info);
225 if (dn == NULL) { 228 if (dn == NULL) {
226 pr_warning("CAIF: %s(): Send data on unknown phy " 229 pr_warn("Send data on unknown phy ID = %d (0x%x)\n",
227 "ID = %d (0x%x)\n", 230 info->dev_info->id, info->dev_info->id);
228 __func__, info->dev_info->id, info->dev_info->id);
229 return -ENOTCONN; 231 return -ENOTCONN;
230 } 232 }
231 info->hdr_len += 1; 233 info->hdr_len += 1;
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index c49a6695793..d7e865e2ff6 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/string.h> 9#include <linux/string.h>
8#include <linux/skbuff.h> 10#include <linux/skbuff.h>
9#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -12,11 +14,12 @@
12#define PKT_PREFIX 48 14#define PKT_PREFIX 48
13#define PKT_POSTFIX 2 15#define PKT_POSTFIX 2
14#define PKT_LEN_WHEN_EXTENDING 128 16#define PKT_LEN_WHEN_EXTENDING 128
15#define PKT_ERROR(pkt, errmsg) do { \ 17#define PKT_ERROR(pkt, errmsg) \
16 cfpkt_priv(pkt)->erronous = true; \ 18do { \
17 skb_reset_tail_pointer(&pkt->skb); \ 19 cfpkt_priv(pkt)->erronous = true; \
18 pr_warning("CAIF: " errmsg);\ 20 skb_reset_tail_pointer(&pkt->skb); \
19 } while (0) 21 pr_warn(errmsg); \
22} while (0)
20 23
21struct cfpktq { 24struct cfpktq {
22 struct sk_buff_head head; 25 struct sk_buff_head head;
@@ -130,13 +133,13 @@ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
130 return -EPROTO; 133 return -EPROTO;
131 134
132 if (unlikely(len > skb->len)) { 135 if (unlikely(len > skb->len)) {
133 PKT_ERROR(pkt, "cfpkt_extr_head read beyond end of packet\n"); 136 PKT_ERROR(pkt, "read beyond end of packet\n");
134 return -EPROTO; 137 return -EPROTO;
135 } 138 }
136 139
137 if (unlikely(len > skb_headlen(skb))) { 140 if (unlikely(len > skb_headlen(skb))) {
138 if (unlikely(skb_linearize(skb) != 0)) { 141 if (unlikely(skb_linearize(skb) != 0)) {
139 PKT_ERROR(pkt, "cfpkt_extr_head linearize failed\n"); 142 PKT_ERROR(pkt, "linearize failed\n");
140 return -EPROTO; 143 return -EPROTO;
141 } 144 }
142 } 145 }
@@ -156,11 +159,11 @@ int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
156 return -EPROTO; 159 return -EPROTO;
157 160
158 if (unlikely(skb_linearize(skb) != 0)) { 161 if (unlikely(skb_linearize(skb) != 0)) {
159 PKT_ERROR(pkt, "cfpkt_extr_trail linearize failed\n"); 162 PKT_ERROR(pkt, "linearize failed\n");
160 return -EPROTO; 163 return -EPROTO;
161 } 164 }
162 if (unlikely(skb->data + len > skb_tail_pointer(skb))) { 165 if (unlikely(skb->data + len > skb_tail_pointer(skb))) {
163 PKT_ERROR(pkt, "cfpkt_extr_trail read beyond end of packet\n"); 166 PKT_ERROR(pkt, "read beyond end of packet\n");
164 return -EPROTO; 167 return -EPROTO;
165 } 168 }
166 from = skb_tail_pointer(skb) - len; 169 from = skb_tail_pointer(skb) - len;
@@ -202,7 +205,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
202 205
203 /* Make sure data is writable */ 206 /* Make sure data is writable */
204 if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) { 207 if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) {
205 PKT_ERROR(pkt, "cfpkt_add_body: cow failed\n"); 208 PKT_ERROR(pkt, "cow failed\n");
206 return -EPROTO; 209 return -EPROTO;
207 } 210 }
208 /* 211 /*
@@ -211,8 +214,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
211 * lengths of the top SKB. 214 * lengths of the top SKB.
212 */ 215 */
213 if (lastskb != skb) { 216 if (lastskb != skb) {
214 pr_warning("CAIF: %s(): Packet is non-linear\n", 217 pr_warn("Packet is non-linear\n");
215 __func__);
216 skb->len += len; 218 skb->len += len;
217 skb->data_len += len; 219 skb->data_len += len;
218 } 220 }
@@ -242,14 +244,14 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
242 if (unlikely(is_erronous(pkt))) 244 if (unlikely(is_erronous(pkt)))
243 return -EPROTO; 245 return -EPROTO;
244 if (unlikely(skb_headroom(skb) < len)) { 246 if (unlikely(skb_headroom(skb) < len)) {
245 PKT_ERROR(pkt, "cfpkt_add_head: no headroom\n"); 247 PKT_ERROR(pkt, "no headroom\n");
246 return -EPROTO; 248 return -EPROTO;
247 } 249 }
248 250
249 /* Make sure data is writable */ 251 /* Make sure data is writable */
250 ret = skb_cow_data(skb, 0, &lastskb); 252 ret = skb_cow_data(skb, 0, &lastskb);
251 if (unlikely(ret < 0)) { 253 if (unlikely(ret < 0)) {
252 PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n"); 254 PKT_ERROR(pkt, "cow failed\n");
253 return ret; 255 return ret;
254 } 256 }
255 257
@@ -283,7 +285,7 @@ inline u16 cfpkt_iterate(struct cfpkt *pkt,
283 if (unlikely(is_erronous(pkt))) 285 if (unlikely(is_erronous(pkt)))
284 return -EPROTO; 286 return -EPROTO;
285 if (unlikely(skb_linearize(&pkt->skb) != 0)) { 287 if (unlikely(skb_linearize(&pkt->skb) != 0)) {
286 PKT_ERROR(pkt, "cfpkt_iterate: linearize failed\n"); 288 PKT_ERROR(pkt, "linearize failed\n");
287 return -EPROTO; 289 return -EPROTO;
288 } 290 }
289 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt)); 291 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
@@ -309,7 +311,7 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
309 311
310 /* Need to expand SKB */ 312 /* Need to expand SKB */
311 if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len))) 313 if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len)))
312 PKT_ERROR(pkt, "cfpkt_setlen: skb_pad_trail failed\n"); 314 PKT_ERROR(pkt, "skb_pad_trail failed\n");
313 315
314 return cfpkt_getlen(pkt); 316 return cfpkt_getlen(pkt);
315} 317}
@@ -380,8 +382,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
380 return NULL; 382 return NULL;
381 383
382 if (skb->data + pos > skb_tail_pointer(skb)) { 384 if (skb->data + pos > skb_tail_pointer(skb)) {
383 PKT_ERROR(pkt, 385 PKT_ERROR(pkt, "trying to split beyond end of packet\n");
384 "cfpkt_split: trying to split beyond end of packet");
385 return NULL; 386 return NULL;
386 } 387 }
387 388
@@ -455,17 +456,17 @@ int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen)
455 return -EPROTO; 456 return -EPROTO;
456 /* Make sure SKB is writable */ 457 /* Make sure SKB is writable */
457 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) { 458 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
458 PKT_ERROR(pkt, "cfpkt_raw_append: skb_cow_data failed\n"); 459 PKT_ERROR(pkt, "skb_cow_data failed\n");
459 return -EPROTO; 460 return -EPROTO;
460 } 461 }
461 462
462 if (unlikely(skb_linearize(skb) != 0)) { 463 if (unlikely(skb_linearize(skb) != 0)) {
463 PKT_ERROR(pkt, "cfpkt_raw_append: linearize failed\n"); 464 PKT_ERROR(pkt, "linearize failed\n");
464 return -EPROTO; 465 return -EPROTO;
465 } 466 }
466 467
467 if (unlikely(skb_tailroom(skb) < buflen)) { 468 if (unlikely(skb_tailroom(skb) < buflen)) {
468 PKT_ERROR(pkt, "cfpkt_raw_append: buffer too short - failed\n"); 469 PKT_ERROR(pkt, "buffer too short - failed\n");
469 return -EPROTO; 470 return -EPROTO;
470 } 471 }
471 472
@@ -483,14 +484,13 @@ int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen)
483 return -EPROTO; 484 return -EPROTO;
484 485
485 if (unlikely(buflen > skb->len)) { 486 if (unlikely(buflen > skb->len)) {
486 PKT_ERROR(pkt, "cfpkt_raw_extract: buflen too large " 487 PKT_ERROR(pkt, "buflen too large - failed\n");
487 "- failed\n");
488 return -EPROTO; 488 return -EPROTO;
489 } 489 }
490 490
491 if (unlikely(buflen > skb_headlen(skb))) { 491 if (unlikely(buflen > skb_headlen(skb))) {
492 if (unlikely(skb_linearize(skb) != 0)) { 492 if (unlikely(skb_linearize(skb) != 0)) {
493 PKT_ERROR(pkt, "cfpkt_raw_extract: linearize failed\n"); 493 PKT_ERROR(pkt, "linearize failed\n");
494 return -EPROTO; 494 return -EPROTO;
495 } 495 }
496 } 496 }
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 9a699242d10..bde8481e8d2 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -48,7 +50,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
48 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); 50 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
49 51
50 if (!this) { 52 if (!this) {
51 pr_warning("CAIF: %s(): Out of memory\n", __func__); 53 pr_warn("Out of memory\n");
52 return NULL; 54 return NULL;
53 } 55 }
54 56
@@ -178,9 +180,7 @@ out:
178 cfpkt_destroy(rfml->incomplete_frm); 180 cfpkt_destroy(rfml->incomplete_frm);
179 rfml->incomplete_frm = NULL; 181 rfml->incomplete_frm = NULL;
180 182
181 pr_info("CAIF: %s(): " 183 pr_info("Connection error %d triggered on RFM link\n", err);
182 "Connection error %d triggered on RFM link\n",
183 __func__, err);
184 184
185 /* Trigger connection error upon failure.*/ 185 /* Trigger connection error upon failure.*/
186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
@@ -280,9 +280,7 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
280out: 280out:
281 281
282 if (err != 0) { 282 if (err != 0) {
283 pr_info("CAIF: %s(): " 283 pr_info("Connection error %d triggered on RFM link\n", err);
284 "Connection error %d triggered on RFM link\n",
285 __func__, err);
286 /* Trigger connection error upon failure.*/ 284 /* Trigger connection error upon failure.*/
287 285
288 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 286 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index a11fbd68a13..9297f7dea9d 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -34,7 +36,7 @@ struct cflayer *cfserl_create(int type, int instance, bool use_stx)
34{ 36{
35 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC); 37 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC);
36 if (!this) { 38 if (!this) {
37 pr_warning("CAIF: %s(): Out of memory\n", __func__); 39 pr_warn("Out of memory\n");
38 return NULL; 40 return NULL;
39 } 41 }
40 caif_assert(offsetof(struct cfserl, layer) == 0); 42 caif_assert(offsetof(struct cfserl, layer) == 0);
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index f40939a9121..ab5e542526b 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/errno.h> 11#include <linux/errno.h>
@@ -79,8 +81,7 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
79 layr->up->ctrlcmd(layr->up, ctrl, phyid); 81 layr->up->ctrlcmd(layr->up, ctrl, phyid);
80 break; 82 break;
81 default: 83 default:
82 pr_warning("CAIF: %s(): " 84 pr_warn("Unexpected ctrl in cfsrvl (%d)\n", ctrl);
83 "Unexpected ctrl in cfsrvl (%d)\n", __func__, ctrl);
84 /* We have both modem and phy flow on, send flow on */ 85 /* We have both modem and phy flow on, send flow on */
85 layr->up->ctrlcmd(layr->up, ctrl, phyid); 86 layr->up->ctrlcmd(layr->up, ctrl, phyid);
86 service->phy_flow_on = true; 87 service->phy_flow_on = true;
@@ -107,14 +108,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
107 u8 flow_on = SRVL_FLOW_ON; 108 u8 flow_on = SRVL_FLOW_ON;
108 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 109 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
109 if (!pkt) { 110 if (!pkt) {
110 pr_warning("CAIF: %s(): Out of memory\n", 111 pr_warn("Out of memory\n");
111 __func__);
112 return -ENOMEM; 112 return -ENOMEM;
113 } 113 }
114 114
115 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) { 115 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
116 pr_err("CAIF: %s(): Packet is erroneous!\n", 116 pr_err("Packet is erroneous!\n");
117 __func__);
118 cfpkt_destroy(pkt); 117 cfpkt_destroy(pkt);
119 return -EPROTO; 118 return -EPROTO;
120 } 119 }
@@ -131,14 +130,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
131 u8 flow_off = SRVL_FLOW_OFF; 130 u8 flow_off = SRVL_FLOW_OFF;
132 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 131 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
133 if (!pkt) { 132 if (!pkt) {
134 pr_warning("CAIF: %s(): Out of memory\n", 133 pr_warn("Out of memory\n");
135 __func__);
136 return -ENOMEM; 134 return -ENOMEM;
137 } 135 }
138 136
139 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { 137 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
140 pr_err("CAIF: %s(): Packet is erroneous!\n", 138 pr_err("Packet is erroneous!\n");
141 __func__);
142 cfpkt_destroy(pkt); 139 cfpkt_destroy(pkt);
143 return -EPROTO; 140 return -EPROTO;
144 } 141 }
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 02795aff57a..efad410e4c8 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
26{ 28{
27 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!util) { 30 if (!util) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__); 31 pr_warn("Out of memory\n");
30 return NULL; 32 return NULL;
31 } 33 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0); 34 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
47 caif_assert(layr->up->receive != NULL); 49 caif_assert(layr->up->receive != NULL);
48 caif_assert(layr->up->ctrlcmd != NULL); 50 caif_assert(layr->up->ctrlcmd != NULL);
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 52 pr_err("Packet is erroneous!\n");
51 cfpkt_destroy(pkt); 53 cfpkt_destroy(pkt);
52 return -EPROTO; 54 return -EPROTO;
53 } 55 }
@@ -64,16 +66,14 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
64 cfpkt_destroy(pkt); 66 cfpkt_destroy(pkt);
65 return 0; 67 return 0;
66 case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */ 68 case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */
67 pr_err("CAIF: %s(): REMOTE SHUTDOWN REQUEST RECEIVED\n", 69 pr_err("REMOTE SHUTDOWN REQUEST RECEIVED\n");
68 __func__);
69 layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0); 70 layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0);
70 service->open = false; 71 service->open = false;
71 cfpkt_destroy(pkt); 72 cfpkt_destroy(pkt);
72 return 0; 73 return 0;
73 default: 74 default:
74 cfpkt_destroy(pkt); 75 cfpkt_destroy(pkt);
75 pr_warning("CAIF: %s(): Unknown service control %d (0x%x)\n", 76 pr_warn("Unknown service control %d (0x%x)\n", cmd, cmd);
76 __func__, cmd, cmd);
77 return -EPROTO; 77 return -EPROTO;
78 } 78 }
79} 79}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 77cc09faac9..3b425b189a9 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/slab.h> 10#include <linux/slab.h>
9#include <net/caif/caif_layer.h> 11#include <net/caif/caif_layer.h>
@@ -25,7 +27,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
25{ 27{
26 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 28 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
27 if (!vei) { 29 if (!vei) {
28 pr_warning("CAIF: %s(): Out of memory\n", __func__); 30 pr_warn("Out of memory\n");
29 return NULL; 31 return NULL;
30 } 32 }
31 caif_assert(offsetof(struct cfsrvl, layer) == 0); 33 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
47 49
48 50
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 52 pr_err("Packet is erroneous!\n");
51 cfpkt_destroy(pkt); 53 cfpkt_destroy(pkt);
52 return -EPROTO; 54 return -EPROTO;
53 } 55 }
@@ -67,8 +69,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
67 cfpkt_destroy(pkt); 69 cfpkt_destroy(pkt);
68 return 0; 70 return 0;
69 default: /* SET RS232 PIN */ 71 default: /* SET RS232 PIN */
70 pr_warning("CAIF: %s():Unknown VEI control packet %d (0x%x)!\n", 72 pr_warn("Unknown VEI control packet %d (0x%x)!\n", cmd, cmd);
71 __func__, cmd, cmd);
72 cfpkt_destroy(pkt); 73 cfpkt_destroy(pkt);
73 return -EPROTO; 74 return -EPROTO;
74 } 75 }
@@ -86,7 +87,7 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
86 caif_assert(layr->dn->transmit != NULL); 87 caif_assert(layr->dn->transmit != NULL);
87 88
88 if (cfpkt_add_head(pkt, &tmp, 1) < 0) { 89 if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
89 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 90 pr_err("Packet is erroneous!\n");
90 return -EPROTO; 91 return -EPROTO;
91 } 92 }
92 93
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index ada6ee2d48f..bf6fef2a0ef 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -21,7 +23,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
21{ 23{
22 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 24 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
23 if (!vid) { 25 if (!vid) {
24 pr_warning("CAIF: %s(): Out of memory\n", __func__); 26 pr_warn("Out of memory\n");
25 return NULL; 27 return NULL;
26 } 28 }
27 caif_assert(offsetof(struct cfsrvl, layer) == 0); 29 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -38,7 +40,7 @@ static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt)
38{ 40{
39 u32 videoheader; 41 u32 videoheader;
40 if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) { 42 if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) {
41 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 43 pr_err("Packet is erroneous!\n");
42 cfpkt_destroy(pkt); 44 cfpkt_destroy(pkt);
43 return -EPROTO; 45 return -EPROTO;
44 } 46 }
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 4293e190ec5..84a422c9894 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -5,6 +5,8 @@
5 * License terms: GNU General Public License (GPL) version 2 5 * License terms: GNU General Public License (GPL) version 2
6 */ 6 */
7 7
8#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
9
8#include <linux/version.h> 10#include <linux/version.h>
9#include <linux/fs.h> 11#include <linux/fs.h>
10#include <linux/init.h> 12#include <linux/init.h>
@@ -28,9 +30,6 @@
28#define CONNECT_TIMEOUT (5 * HZ) 30#define CONNECT_TIMEOUT (5 * HZ)
29#define CAIF_NET_DEFAULT_QUEUE_LEN 500 31#define CAIF_NET_DEFAULT_QUEUE_LEN 500
30 32
31#undef pr_debug
32#define pr_debug pr_warning
33
34/*This list is protected by the rtnl lock. */ 33/*This list is protected by the rtnl lock. */
35static LIST_HEAD(chnl_net_list); 34static LIST_HEAD(chnl_net_list);
36 35
@@ -142,8 +141,7 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
142 int phyid) 141 int phyid)
143{ 142{
144 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); 143 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
145 pr_debug("CAIF: %s(): NET flowctrl func called flow: %s\n", 144 pr_debug("NET flowctrl func called flow: %s\n",
146 __func__,
147 flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" : 145 flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
148 flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" : 146 flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
149 flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" : 147 flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
@@ -196,12 +194,12 @@ static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
196 priv = netdev_priv(dev); 194 priv = netdev_priv(dev);
197 195
198 if (skb->len > priv->netdev->mtu) { 196 if (skb->len > priv->netdev->mtu) {
199 pr_warning("CAIF: %s(): Size of skb exceeded MTU\n", __func__); 197 pr_warn("Size of skb exceeded MTU\n");
200 return -ENOSPC; 198 return -ENOSPC;
201 } 199 }
202 200
203 if (!priv->flowenabled) { 201 if (!priv->flowenabled) {
204 pr_debug("CAIF: %s(): dropping packets flow off\n", __func__); 202 pr_debug("dropping packets flow off\n");
205 return NETDEV_TX_BUSY; 203 return NETDEV_TX_BUSY;
206 } 204 }
207 205
@@ -237,7 +235,7 @@ static int chnl_net_open(struct net_device *dev)
237 ASSERT_RTNL(); 235 ASSERT_RTNL();
238 priv = netdev_priv(dev); 236 priv = netdev_priv(dev);
239 if (!priv) { 237 if (!priv) {
240 pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__); 238 pr_debug("chnl_net_open: no priv\n");
241 return -ENODEV; 239 return -ENODEV;
242 } 240 }
243 241
@@ -246,18 +244,17 @@ static int chnl_net_open(struct net_device *dev)
246 result = caif_connect_client(&priv->conn_req, &priv->chnl, 244 result = caif_connect_client(&priv->conn_req, &priv->chnl,
247 &llifindex, &headroom, &tailroom); 245 &llifindex, &headroom, &tailroom);
248 if (result != 0) { 246 if (result != 0) {
249 pr_debug("CAIF: %s(): err: " 247 pr_debug("err: "
250 "Unable to register and open device," 248 "Unable to register and open device,"
251 " Err:%d\n", 249 " Err:%d\n",
252 __func__, 250 result);
253 result);
254 goto error; 251 goto error;
255 } 252 }
256 253
257 lldev = dev_get_by_index(dev_net(dev), llifindex); 254 lldev = dev_get_by_index(dev_net(dev), llifindex);
258 255
259 if (lldev == NULL) { 256 if (lldev == NULL) {
260 pr_debug("CAIF: %s(): no interface?\n", __func__); 257 pr_debug("no interface?\n");
261 result = -ENODEV; 258 result = -ENODEV;
262 goto error; 259 goto error;
263 } 260 }
@@ -279,9 +276,7 @@ static int chnl_net_open(struct net_device *dev)
279 dev_put(lldev); 276 dev_put(lldev);
280 277
281 if (mtu < 100) { 278 if (mtu < 100) {
282 pr_warning("CAIF: %s(): " 279 pr_warn("CAIF Interface MTU too small (%d)\n", mtu);
283 "CAIF Interface MTU too small (%d)\n",
284 __func__, mtu);
285 result = -ENODEV; 280 result = -ENODEV;
286 goto error; 281 goto error;
287 } 282 }
@@ -296,33 +291,32 @@ static int chnl_net_open(struct net_device *dev)
296 rtnl_lock(); 291 rtnl_lock();
297 292
298 if (result == -ERESTARTSYS) { 293 if (result == -ERESTARTSYS) {
299 pr_debug("CAIF: %s(): wait_event_interruptible" 294 pr_debug("wait_event_interruptible woken by a signal\n");
300 " woken by a signal\n", __func__);
301 result = -ERESTARTSYS; 295 result = -ERESTARTSYS;
302 goto error; 296 goto error;
303 } 297 }
304 298
305 if (result == 0) { 299 if (result == 0) {
306 pr_debug("CAIF: %s(): connect timeout\n", __func__); 300 pr_debug("connect timeout\n");
307 caif_disconnect_client(&priv->chnl); 301 caif_disconnect_client(&priv->chnl);
308 priv->state = CAIF_DISCONNECTED; 302 priv->state = CAIF_DISCONNECTED;
309 pr_debug("CAIF: %s(): state disconnected\n", __func__); 303 pr_debug("state disconnected\n");
310 result = -ETIMEDOUT; 304 result = -ETIMEDOUT;
311 goto error; 305 goto error;
312 } 306 }
313 307
314 if (priv->state != CAIF_CONNECTED) { 308 if (priv->state != CAIF_CONNECTED) {
315 pr_debug("CAIF: %s(): connect failed\n", __func__); 309 pr_debug("connect failed\n");
316 result = -ECONNREFUSED; 310 result = -ECONNREFUSED;
317 goto error; 311 goto error;
318 } 312 }
319 pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__); 313 pr_debug("CAIF Netdevice connected\n");
320 return 0; 314 return 0;
321 315
322error: 316error:
323 caif_disconnect_client(&priv->chnl); 317 caif_disconnect_client(&priv->chnl);
324 priv->state = CAIF_DISCONNECTED; 318 priv->state = CAIF_DISCONNECTED;
325 pr_debug("CAIF: %s(): state disconnected\n", __func__); 319 pr_debug("state disconnected\n");
326 return result; 320 return result;
327 321
328} 322}
@@ -413,7 +407,7 @@ static void caif_netlink_parms(struct nlattr *data[],
413 struct caif_connect_request *conn_req) 407 struct caif_connect_request *conn_req)
414{ 408{
415 if (!data) { 409 if (!data) {
416 pr_warning("CAIF: %s: no params data found\n", __func__); 410 pr_warn("no params data found\n");
417 return; 411 return;
418 } 412 }
419 if (data[IFLA_CAIF_IPV4_CONNID]) 413 if (data[IFLA_CAIF_IPV4_CONNID])
@@ -442,8 +436,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
442 436
443 ret = register_netdevice(dev); 437 ret = register_netdevice(dev);
444 if (ret) 438 if (ret)
445 pr_warning("CAIF: %s(): device rtml registration failed\n", 439 pr_warn("device rtml registration failed\n");
446 __func__);
447 return ret; 440 return ret;
448} 441}
449 442
diff --git a/net/can/raw.c b/net/can/raw.c
index a10e3338f08..7d77e67e57a 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -647,12 +647,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
647 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 647 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
648 if (err < 0) 648 if (err < 0)
649 goto free_skb; 649 goto free_skb;
650 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 650 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
651 if (err < 0) 651 if (err < 0)
652 goto free_skb; 652 goto free_skb;
653 653
654 /* to be able to check the received tx sock reference in raw_rcv() */ 654 /* to be able to check the received tx sock reference in raw_rcv() */
655 skb_tx(skb)->prevent_sk_orphan = 1; 655 skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
656 656
657 skb->dev = dev; 657 skb->dev = dev;
658 skb->sk = sk; 658 skb->sk = sk;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 251997a9548..4df1b7a6c1b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -746,13 +746,12 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
746 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 746 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
747 mask |= POLLERR; 747 mask |= POLLERR;
748 if (sk->sk_shutdown & RCV_SHUTDOWN) 748 if (sk->sk_shutdown & RCV_SHUTDOWN)
749 mask |= POLLRDHUP; 749 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
750 if (sk->sk_shutdown == SHUTDOWN_MASK) 750 if (sk->sk_shutdown == SHUTDOWN_MASK)
751 mask |= POLLHUP; 751 mask |= POLLHUP;
752 752
753 /* readable? */ 753 /* readable? */
754 if (!skb_queue_empty(&sk->sk_receive_queue) || 754 if (!skb_queue_empty(&sk->sk_receive_queue))
755 (sk->sk_shutdown & RCV_SHUTDOWN))
756 mask |= POLLIN | POLLRDNORM; 755 mask |= POLLIN | POLLRDNORM;
757 756
758 /* Connection-based need to check for termination and startup */ 757 /* Connection-based need to check for termination and startup */
diff --git a/net/core/dev.c b/net/core/dev.c
index 660dd41aaaa..42b200fdf12 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,7 @@
129#include <linux/random.h> 129#include <linux/random.h>
130#include <trace/events/napi.h> 130#include <trace/events/napi.h>
131#include <linux/pci.h> 131#include <linux/pci.h>
132#include <linux/inetdevice.h>
132 133
133#include "net-sysfs.h" 134#include "net-sysfs.h"
134 135
@@ -371,6 +372,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
371 * --ANK (980803) 372 * --ANK (980803)
372 */ 373 */
373 374
375static inline struct list_head *ptype_head(const struct packet_type *pt)
376{
377 if (pt->type == htons(ETH_P_ALL))
378 return &ptype_all;
379 else
380 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
381}
382
374/** 383/**
375 * dev_add_pack - add packet handler 384 * dev_add_pack - add packet handler
376 * @pt: packet type declaration 385 * @pt: packet type declaration
@@ -386,16 +395,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
386 395
387void dev_add_pack(struct packet_type *pt) 396void dev_add_pack(struct packet_type *pt)
388{ 397{
389 int hash; 398 struct list_head *head = ptype_head(pt);
390 399
391 spin_lock_bh(&ptype_lock); 400 spin_lock(&ptype_lock);
392 if (pt->type == htons(ETH_P_ALL)) 401 list_add_rcu(&pt->list, head);
393 list_add_rcu(&pt->list, &ptype_all); 402 spin_unlock(&ptype_lock);
394 else {
395 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
396 list_add_rcu(&pt->list, &ptype_base[hash]);
397 }
398 spin_unlock_bh(&ptype_lock);
399} 403}
400EXPORT_SYMBOL(dev_add_pack); 404EXPORT_SYMBOL(dev_add_pack);
401 405
@@ -414,15 +418,10 @@ EXPORT_SYMBOL(dev_add_pack);
414 */ 418 */
415void __dev_remove_pack(struct packet_type *pt) 419void __dev_remove_pack(struct packet_type *pt)
416{ 420{
417 struct list_head *head; 421 struct list_head *head = ptype_head(pt);
418 struct packet_type *pt1; 422 struct packet_type *pt1;
419 423
420 spin_lock_bh(&ptype_lock); 424 spin_lock(&ptype_lock);
421
422 if (pt->type == htons(ETH_P_ALL))
423 head = &ptype_all;
424 else
425 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
426 425
427 list_for_each_entry(pt1, head, list) { 426 list_for_each_entry(pt1, head, list) {
428 if (pt == pt1) { 427 if (pt == pt1) {
@@ -433,7 +432,7 @@ void __dev_remove_pack(struct packet_type *pt)
433 432
434 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 433 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
435out: 434out:
436 spin_unlock_bh(&ptype_lock); 435 spin_unlock(&ptype_lock);
437} 436}
438EXPORT_SYMBOL(__dev_remove_pack); 437EXPORT_SYMBOL(__dev_remove_pack);
439 438
@@ -1902,14 +1901,14 @@ static int dev_gso_segment(struct sk_buff *skb)
1902 1901
1903/* 1902/*
1904 * Try to orphan skb early, right before transmission by the device. 1903 * Try to orphan skb early, right before transmission by the device.
1905 * We cannot orphan skb if tx timestamp is requested, since 1904 * We cannot orphan skb if tx timestamp is requested or the sk-reference
1906 * drivers need to call skb_tstamp_tx() to send the timestamp. 1905 * is needed on driver level for other reasons, e.g. see net/can/raw.c
1907 */ 1906 */
1908static inline void skb_orphan_try(struct sk_buff *skb) 1907static inline void skb_orphan_try(struct sk_buff *skb)
1909{ 1908{
1910 struct sock *sk = skb->sk; 1909 struct sock *sk = skb->sk;
1911 1910
1912 if (sk && !skb_tx(skb)->flags) { 1911 if (sk && !skb_shinfo(skb)->tx_flags) {
1913 /* skb_tx_hash() wont be able to get sk. 1912 /* skb_tx_hash() wont be able to get sk.
1914 * We copy sk_hash into skb->rxhash 1913 * We copy sk_hash into skb->rxhash
1915 */ 1914 */
@@ -1930,7 +1929,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
1930 struct net_device *dev) 1929 struct net_device *dev)
1931{ 1930{
1932 return skb_is_nonlinear(skb) && 1931 return skb_is_nonlinear(skb) &&
1933 ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || 1932 ((skb_has_frag_list(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
1934 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || 1933 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
1935 illegal_highdma(dev, skb)))); 1934 illegal_highdma(dev, skb))));
1936} 1935}
@@ -2259,69 +2258,44 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2259 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2258 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2260} 2259}
2261 2260
2262#ifdef CONFIG_RPS
2263
2264/* One global table that all flow-based protocols share. */
2265struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2266EXPORT_SYMBOL(rps_sock_flow_table);
2267
2268/* 2261/*
2269 * get_rps_cpu is called from netif_receive_skb and returns the target 2262 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2270 * CPU from the RPS map of the receiving queue for a given skb. 2263 * and src/dst port numbers. Returns a non-zero hash number on success
2271 * rcu_read_lock must be held on entry. 2264 * and 0 on failure.
2272 */ 2265 */
2273static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2266__u32 __skb_get_rxhash(struct sk_buff *skb)
2274 struct rps_dev_flow **rflowp)
2275{ 2267{
2268 int nhoff, hash = 0, poff;
2276 struct ipv6hdr *ip6; 2269 struct ipv6hdr *ip6;
2277 struct iphdr *ip; 2270 struct iphdr *ip;
2278 struct netdev_rx_queue *rxqueue;
2279 struct rps_map *map;
2280 struct rps_dev_flow_table *flow_table;
2281 struct rps_sock_flow_table *sock_flow_table;
2282 int cpu = -1;
2283 u8 ip_proto; 2271 u8 ip_proto;
2284 u16 tcpu;
2285 u32 addr1, addr2, ihl; 2272 u32 addr1, addr2, ihl;
2286 union { 2273 union {
2287 u32 v32; 2274 u32 v32;
2288 u16 v16[2]; 2275 u16 v16[2];
2289 } ports; 2276 } ports;
2290 2277
2291 if (skb_rx_queue_recorded(skb)) { 2278 nhoff = skb_network_offset(skb);
2292 u16 index = skb_get_rx_queue(skb);
2293 if (unlikely(index >= dev->num_rx_queues)) {
2294 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2295 "on queue %u, but number of RX queues is %u\n",
2296 dev->name, index, dev->num_rx_queues);
2297 goto done;
2298 }
2299 rxqueue = dev->_rx + index;
2300 } else
2301 rxqueue = dev->_rx;
2302
2303 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2304 goto done;
2305
2306 if (skb->rxhash)
2307 goto got_hash; /* Skip hash computation on packet header */
2308 2279
2309 switch (skb->protocol) { 2280 switch (skb->protocol) {
2310 case __constant_htons(ETH_P_IP): 2281 case __constant_htons(ETH_P_IP):
2311 if (!pskb_may_pull(skb, sizeof(*ip))) 2282 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2312 goto done; 2283 goto done;
2313 2284
2314 ip = (struct iphdr *) skb->data; 2285 ip = (struct iphdr *) (skb->data + nhoff);
2315 ip_proto = ip->protocol; 2286 if (ip->frag_off & htons(IP_MF | IP_OFFSET))
2287 ip_proto = 0;
2288 else
2289 ip_proto = ip->protocol;
2316 addr1 = (__force u32) ip->saddr; 2290 addr1 = (__force u32) ip->saddr;
2317 addr2 = (__force u32) ip->daddr; 2291 addr2 = (__force u32) ip->daddr;
2318 ihl = ip->ihl; 2292 ihl = ip->ihl;
2319 break; 2293 break;
2320 case __constant_htons(ETH_P_IPV6): 2294 case __constant_htons(ETH_P_IPV6):
2321 if (!pskb_may_pull(skb, sizeof(*ip6))) 2295 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2322 goto done; 2296 goto done;
2323 2297
2324 ip6 = (struct ipv6hdr *) skb->data; 2298 ip6 = (struct ipv6hdr *) (skb->data + nhoff);
2325 ip_proto = ip6->nexthdr; 2299 ip_proto = ip6->nexthdr;
2326 addr1 = (__force u32) ip6->saddr.s6_addr32[3]; 2300 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2327 addr2 = (__force u32) ip6->daddr.s6_addr32[3]; 2301 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2330,33 +2304,80 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2330 default: 2304 default:
2331 goto done; 2305 goto done;
2332 } 2306 }
2333 switch (ip_proto) { 2307
2334 case IPPROTO_TCP: 2308 ports.v32 = 0;
2335 case IPPROTO_UDP: 2309 poff = proto_ports_offset(ip_proto);
2336 case IPPROTO_DCCP: 2310 if (poff >= 0) {
2337 case IPPROTO_ESP: 2311 nhoff += ihl * 4 + poff;
2338 case IPPROTO_AH: 2312 if (pskb_may_pull(skb, nhoff + 4)) {
2339 case IPPROTO_SCTP: 2313 ports.v32 = * (__force u32 *) (skb->data + nhoff);
2340 case IPPROTO_UDPLITE:
2341 if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2342 ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2343 if (ports.v16[1] < ports.v16[0]) 2314 if (ports.v16[1] < ports.v16[0])
2344 swap(ports.v16[0], ports.v16[1]); 2315 swap(ports.v16[0], ports.v16[1]);
2345 break;
2346 } 2316 }
2347 default:
2348 ports.v32 = 0;
2349 break;
2350 } 2317 }
2351 2318
2352 /* get a consistent hash (same value on both flow directions) */ 2319 /* get a consistent hash (same value on both flow directions) */
2353 if (addr2 < addr1) 2320 if (addr2 < addr1)
2354 swap(addr1, addr2); 2321 swap(addr1, addr2);
2355 skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2356 if (!skb->rxhash)
2357 skb->rxhash = 1;
2358 2322
2359got_hash: 2323 hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2324 if (!hash)
2325 hash = 1;
2326
2327done:
2328 return hash;
2329}
2330EXPORT_SYMBOL(__skb_get_rxhash);
2331
2332#ifdef CONFIG_RPS
2333
2334/* One global table that all flow-based protocols share. */
2335struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2336EXPORT_SYMBOL(rps_sock_flow_table);
2337
2338/*
2339 * get_rps_cpu is called from netif_receive_skb and returns the target
2340 * CPU from the RPS map of the receiving queue for a given skb.
2341 * rcu_read_lock must be held on entry.
2342 */
2343static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2344 struct rps_dev_flow **rflowp)
2345{
2346 struct netdev_rx_queue *rxqueue;
2347 struct rps_map *map = NULL;
2348 struct rps_dev_flow_table *flow_table;
2349 struct rps_sock_flow_table *sock_flow_table;
2350 int cpu = -1;
2351 u16 tcpu;
2352
2353 if (skb_rx_queue_recorded(skb)) {
2354 u16 index = skb_get_rx_queue(skb);
2355 if (unlikely(index >= dev->num_rx_queues)) {
2356 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2357 "on queue %u, but number of RX queues is %u\n",
2358 dev->name, index, dev->num_rx_queues);
2359 goto done;
2360 }
2361 rxqueue = dev->_rx + index;
2362 } else
2363 rxqueue = dev->_rx;
2364
2365 if (rxqueue->rps_map) {
2366 map = rcu_dereference(rxqueue->rps_map);
2367 if (map && map->len == 1) {
2368 tcpu = map->cpus[0];
2369 if (cpu_online(tcpu))
2370 cpu = tcpu;
2371 goto done;
2372 }
2373 } else if (!rxqueue->rps_flow_table) {
2374 goto done;
2375 }
2376
2377 skb_reset_network_header(skb);
2378 if (!skb_get_rxhash(skb))
2379 goto done;
2380
2360 flow_table = rcu_dereference(rxqueue->rps_flow_table); 2381 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2361 sock_flow_table = rcu_dereference(rps_sock_flow_table); 2382 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2362 if (flow_table && sock_flow_table) { 2383 if (flow_table && sock_flow_table) {
@@ -2396,7 +2417,6 @@ got_hash:
2396 } 2417 }
2397 } 2418 }
2398 2419
2399 map = rcu_dereference(rxqueue->rps_map);
2400 if (map) { 2420 if (map) {
2401 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 2421 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2402 2422
@@ -2828,8 +2848,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
2828 if (!netdev_tstamp_prequeue) 2848 if (!netdev_tstamp_prequeue)
2829 net_timestamp_check(skb); 2849 net_timestamp_check(skb);
2830 2850
2831 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) 2851 if (vlan_tx_tag_present(skb))
2832 return NET_RX_SUCCESS; 2852 vlan_hwaccel_do_receive(skb);
2833 2853
2834 /* if we've gotten here through NAPI, check netpoll */ 2854 /* if we've gotten here through NAPI, check netpoll */
2835 if (netpoll_receive_skb(skb)) 2855 if (netpoll_receive_skb(skb))
@@ -3050,7 +3070,7 @@ out:
3050 return netif_receive_skb(skb); 3070 return netif_receive_skb(skb);
3051} 3071}
3052 3072
3053static void napi_gro_flush(struct napi_struct *napi) 3073inline void napi_gro_flush(struct napi_struct *napi)
3054{ 3074{
3055 struct sk_buff *skb, *next; 3075 struct sk_buff *skb, *next;
3056 3076
@@ -3063,6 +3083,7 @@ static void napi_gro_flush(struct napi_struct *napi)
3063 napi->gro_count = 0; 3083 napi->gro_count = 0;
3064 napi->gro_list = NULL; 3084 napi->gro_list = NULL;
3065} 3085}
3086EXPORT_SYMBOL(napi_gro_flush);
3066 3087
3067enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3088enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3068{ 3089{
@@ -3077,7 +3098,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3077 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3098 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3078 goto normal; 3099 goto normal;
3079 3100
3080 if (skb_is_gso(skb) || skb_has_frags(skb)) 3101 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3081 goto normal; 3102 goto normal;
3082 3103
3083 rcu_read_lock(); 3104 rcu_read_lock();
@@ -3156,16 +3177,18 @@ normal:
3156} 3177}
3157EXPORT_SYMBOL(dev_gro_receive); 3178EXPORT_SYMBOL(dev_gro_receive);
3158 3179
3159static gro_result_t 3180static inline gro_result_t
3160__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3181__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3161{ 3182{
3162 struct sk_buff *p; 3183 struct sk_buff *p;
3163 3184
3164 for (p = napi->gro_list; p; p = p->next) { 3185 for (p = napi->gro_list; p; p = p->next) {
3165 NAPI_GRO_CB(p)->same_flow = 3186 unsigned long diffs;
3166 (p->dev == skb->dev) && 3187
3167 !compare_ether_header(skb_mac_header(p), 3188 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3189 diffs |= compare_ether_header(skb_mac_header(p),
3168 skb_gro_mac_header(skb)); 3190 skb_gro_mac_header(skb));
3191 NAPI_GRO_CB(p)->same_flow = !diffs;
3169 NAPI_GRO_CB(p)->flush = 0; 3192 NAPI_GRO_CB(p)->flush = 0;
3170 } 3193 }
3171 3194
@@ -4941,6 +4964,34 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4941} 4964}
4942EXPORT_SYMBOL(netif_stacked_transfer_operstate); 4965EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4943 4966
4967static int netif_alloc_rx_queues(struct net_device *dev)
4968{
4969#ifdef CONFIG_RPS
4970 unsigned int i, count = dev->num_rx_queues;
4971
4972 if (count) {
4973 struct netdev_rx_queue *rx;
4974
4975 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
4976 if (!rx) {
4977 pr_err("netdev: Unable to allocate %u rx queues.\n",
4978 count);
4979 return -ENOMEM;
4980 }
4981 dev->_rx = rx;
4982 atomic_set(&rx->count, count);
4983
4984 /*
4985 * Set a pointer to first element in the array which holds the
4986 * reference count.
4987 */
4988 for (i = 0; i < count; i++)
4989 rx[i].first = rx;
4990 }
4991#endif
4992 return 0;
4993}
4994
4944/** 4995/**
4945 * register_netdevice - register a network device 4996 * register_netdevice - register a network device
4946 * @dev: device to register 4997 * @dev: device to register
@@ -4978,24 +5029,10 @@ int register_netdevice(struct net_device *dev)
4978 5029
4979 dev->iflink = -1; 5030 dev->iflink = -1;
4980 5031
4981#ifdef CONFIG_RPS 5032 ret = netif_alloc_rx_queues(dev);
4982 if (!dev->num_rx_queues) { 5033 if (ret)
4983 /* 5034 goto out;
4984 * Allocate a single RX queue if driver never called
4985 * alloc_netdev_mq
4986 */
4987
4988 dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
4989 if (!dev->_rx) {
4990 ret = -ENOMEM;
4991 goto out;
4992 }
4993 5035
4994 dev->_rx->first = dev->_rx;
4995 atomic_set(&dev->_rx->count, 1);
4996 dev->num_rx_queues = 1;
4997 }
4998#endif
4999 /* Init, if this function is available */ 5036 /* Init, if this function is available */
5000 if (dev->netdev_ops->ndo_init) { 5037 if (dev->netdev_ops->ndo_init) {
5001 ret = dev->netdev_ops->ndo_init(dev); 5038 ret = dev->netdev_ops->ndo_init(dev);
@@ -5035,6 +5072,12 @@ int register_netdevice(struct net_device *dev)
5035 if (dev->features & NETIF_F_SG) 5072 if (dev->features & NETIF_F_SG)
5036 dev->features |= NETIF_F_GSO; 5073 dev->features |= NETIF_F_GSO;
5037 5074
5075 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5076 * vlan_dev_init() will do the dev->features check, so these features
5077 * are enabled only if supported by underlying device.
5078 */
5079 dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
5080
5038 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5081 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5039 ret = notifier_to_errno(ret); 5082 ret = notifier_to_errno(ret);
5040 if (ret) 5083 if (ret)
@@ -5264,7 +5307,7 @@ void netdev_run_todo(void)
5264 5307
5265 /* paranoia */ 5308 /* paranoia */
5266 BUG_ON(atomic_read(&dev->refcnt)); 5309 BUG_ON(atomic_read(&dev->refcnt));
5267 WARN_ON(dev->ip_ptr); 5310 WARN_ON(rcu_dereference_raw(dev->ip_ptr));
5268 WARN_ON(dev->ip6_ptr); 5311 WARN_ON(dev->ip6_ptr);
5269 WARN_ON(dev->dn_ptr); 5312 WARN_ON(dev->dn_ptr);
5270 5313
@@ -5386,10 +5429,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5386 struct net_device *dev; 5429 struct net_device *dev;
5387 size_t alloc_size; 5430 size_t alloc_size;
5388 struct net_device *p; 5431 struct net_device *p;
5389#ifdef CONFIG_RPS
5390 struct netdev_rx_queue *rx;
5391 int i;
5392#endif
5393 5432
5394 BUG_ON(strlen(name) >= sizeof(dev->name)); 5433 BUG_ON(strlen(name) >= sizeof(dev->name));
5395 5434
@@ -5415,29 +5454,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5415 goto free_p; 5454 goto free_p;
5416 } 5455 }
5417 5456
5418#ifdef CONFIG_RPS
5419 rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5420 if (!rx) {
5421 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5422 "rx queues.\n");
5423 goto free_tx;
5424 }
5425
5426 atomic_set(&rx->count, queue_count);
5427
5428 /*
5429 * Set a pointer to first element in the array which holds the
5430 * reference count.
5431 */
5432 for (i = 0; i < queue_count; i++)
5433 rx[i].first = rx;
5434#endif
5435 5457
5436 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5458 dev = PTR_ALIGN(p, NETDEV_ALIGN);
5437 dev->padded = (char *)dev - (char *)p; 5459 dev->padded = (char *)dev - (char *)p;
5438 5460
5439 if (dev_addr_init(dev)) 5461 if (dev_addr_init(dev))
5440 goto free_rx; 5462 goto free_tx;
5441 5463
5442 dev_mc_init(dev); 5464 dev_mc_init(dev);
5443 dev_uc_init(dev); 5465 dev_uc_init(dev);
@@ -5449,7 +5471,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5449 dev->real_num_tx_queues = queue_count; 5471 dev->real_num_tx_queues = queue_count;
5450 5472
5451#ifdef CONFIG_RPS 5473#ifdef CONFIG_RPS
5452 dev->_rx = rx;
5453 dev->num_rx_queues = queue_count; 5474 dev->num_rx_queues = queue_count;
5454#endif 5475#endif
5455 5476
@@ -5467,11 +5488,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5467 strcpy(dev->name, name); 5488 strcpy(dev->name, name);
5468 return dev; 5489 return dev;
5469 5490
5470free_rx:
5471#ifdef CONFIG_RPS
5472 kfree(rx);
5473free_tx: 5491free_tx:
5474#endif
5475 kfree(tx); 5492 kfree(tx);
5476free_p: 5493free_p:
5477 kfree(p); 5494 kfree(p);
@@ -5658,6 +5675,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5658 5675
5659 /* Notify protocols, that we are about to destroy 5676 /* Notify protocols, that we are about to destroy
5660 this device. They should clean all the things. 5677 this device. They should clean all the things.
5678
5679 Note that dev->reg_state stays at NETREG_REGISTERED.
5680 This is wanted because this way 8021q and macvlan know
5681 the device is just moving and can keep their slaves up.
5661 */ 5682 */
5662 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5683 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5663 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); 5684 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 7a85367b3c2..7d7e572cedc 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -19,6 +19,7 @@
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/vmalloc.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23 24
24/* 25/*
@@ -205,18 +206,24 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
205 struct ethtool_drvinfo info; 206 struct ethtool_drvinfo info;
206 const struct ethtool_ops *ops = dev->ethtool_ops; 207 const struct ethtool_ops *ops = dev->ethtool_ops;
207 208
208 if (!ops->get_drvinfo)
209 return -EOPNOTSUPP;
210
211 memset(&info, 0, sizeof(info)); 209 memset(&info, 0, sizeof(info));
212 info.cmd = ETHTOOL_GDRVINFO; 210 info.cmd = ETHTOOL_GDRVINFO;
213 ops->get_drvinfo(dev, &info); 211 if (ops && ops->get_drvinfo) {
212 ops->get_drvinfo(dev, &info);
213 } else if (dev->dev.parent && dev->dev.parent->driver) {
214 strlcpy(info.bus_info, dev_name(dev->dev.parent),
215 sizeof(info.bus_info));
216 strlcpy(info.driver, dev->dev.parent->driver->name,
217 sizeof(info.driver));
218 } else {
219 return -EOPNOTSUPP;
220 }
214 221
215 /* 222 /*
216 * this method of obtaining string set info is deprecated; 223 * this method of obtaining string set info is deprecated;
217 * Use ETHTOOL_GSSET_INFO instead. 224 * Use ETHTOOL_GSSET_INFO instead.
218 */ 225 */
219 if (ops->get_sset_count) { 226 if (ops && ops->get_sset_count) {
220 int rc; 227 int rc;
221 228
222 rc = ops->get_sset_count(dev, ETH_SS_TEST); 229 rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -229,9 +236,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
229 if (rc >= 0) 236 if (rc >= 0)
230 info.n_priv_flags = rc; 237 info.n_priv_flags = rc;
231 } 238 }
232 if (ops->get_regs_len) 239 if (ops && ops->get_regs_len)
233 info.regdump_len = ops->get_regs_len(dev); 240 info.regdump_len = ops->get_regs_len(dev);
234 if (ops->get_eeprom_len) 241 if (ops && ops->get_eeprom_len)
235 info.eedump_len = ops->get_eeprom_len(dev); 242 info.eedump_len = ops->get_eeprom_len(dev);
236 243
237 if (copy_to_user(useraddr, &info, sizeof(info))) 244 if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -479,6 +486,38 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
479 list->count++; 486 list->count++;
480} 487}
481 488
489/*
490 * ethtool does not (or did not) set masks for flow parameters that are
491 * not specified, so if both value and mask are 0 then this must be
492 * treated as equivalent to a mask with all bits set. Implement that
493 * here rather than in drivers.
494 */
495static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs)
496{
497 struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec;
498 struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec;
499
500 if (fs->flow_type != TCP_V4_FLOW &&
501 fs->flow_type != UDP_V4_FLOW &&
502 fs->flow_type != SCTP_V4_FLOW)
503 return;
504
505 if (!(entry->ip4src | mask->ip4src))
506 mask->ip4src = htonl(0xffffffff);
507 if (!(entry->ip4dst | mask->ip4dst))
508 mask->ip4dst = htonl(0xffffffff);
509 if (!(entry->psrc | mask->psrc))
510 mask->psrc = htons(0xffff);
511 if (!(entry->pdst | mask->pdst))
512 mask->pdst = htons(0xffff);
513 if (!(entry->tos | mask->tos))
514 mask->tos = 0xff;
515 if (!(fs->vlan_tag | fs->vlan_tag_mask))
516 fs->vlan_tag_mask = 0xffff;
517 if (!(fs->data | fs->data_mask))
518 fs->data_mask = 0xffffffffffffffffULL;
519}
520
482static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, 521static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
483 void __user *useraddr) 522 void __user *useraddr)
484{ 523{
@@ -493,6 +532,8 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
493 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 532 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
494 return -EFAULT; 533 return -EFAULT;
495 534
535 rx_ntuple_fix_masks(&cmd.fs);
536
496 /* 537 /*
497 * Cache filter in dev struct for GET operation only if 538 * Cache filter in dev struct for GET operation only if
498 * the underlying driver doesn't have its own GET operation, and 539 * the underlying driver doesn't have its own GET operation, and
@@ -667,19 +708,19 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
667 break; 708 break;
668 case IP_USER_FLOW: 709 case IP_USER_FLOW:
669 sprintf(p, "\tSrc IP addr: 0x%x\n", 710 sprintf(p, "\tSrc IP addr: 0x%x\n",
670 fsc->fs.h_u.raw_ip4_spec.ip4src); 711 fsc->fs.h_u.usr_ip4_spec.ip4src);
671 p += ETH_GSTRING_LEN; 712 p += ETH_GSTRING_LEN;
672 num_strings++; 713 num_strings++;
673 sprintf(p, "\tSrc IP mask: 0x%x\n", 714 sprintf(p, "\tSrc IP mask: 0x%x\n",
674 fsc->fs.m_u.raw_ip4_spec.ip4src); 715 fsc->fs.m_u.usr_ip4_spec.ip4src);
675 p += ETH_GSTRING_LEN; 716 p += ETH_GSTRING_LEN;
676 num_strings++; 717 num_strings++;
677 sprintf(p, "\tDest IP addr: 0x%x\n", 718 sprintf(p, "\tDest IP addr: 0x%x\n",
678 fsc->fs.h_u.raw_ip4_spec.ip4dst); 719 fsc->fs.h_u.usr_ip4_spec.ip4dst);
679 p += ETH_GSTRING_LEN; 720 p += ETH_GSTRING_LEN;
680 num_strings++; 721 num_strings++;
681 sprintf(p, "\tDest IP mask: 0x%x\n", 722 sprintf(p, "\tDest IP mask: 0x%x\n",
682 fsc->fs.m_u.raw_ip4_spec.ip4dst); 723 fsc->fs.m_u.usr_ip4_spec.ip4dst);
683 p += ETH_GSTRING_LEN; 724 p += ETH_GSTRING_LEN;
684 num_strings++; 725 num_strings++;
685 break; 726 break;
@@ -775,7 +816,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
775 if (regs.len > reglen) 816 if (regs.len > reglen)
776 regs.len = reglen; 817 regs.len = reglen;
777 818
778 regbuf = kmalloc(reglen, GFP_USER); 819 regbuf = vmalloc(reglen);
779 if (!regbuf) 820 if (!regbuf)
780 return -ENOMEM; 821 return -ENOMEM;
781 822
@@ -790,7 +831,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
790 ret = 0; 831 ret = 0;
791 832
792 out: 833 out:
793 kfree(regbuf); 834 vfree(regbuf);
794 return ret; 835 return ret;
795} 836}
796 837
@@ -1175,8 +1216,11 @@ static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
1175 return -EFAULT; 1216 return -EFAULT;
1176 1217
1177 if (edata.data) { 1218 if (edata.data) {
1178 if (!dev->ethtool_ops->get_rx_csum || 1219 u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
1179 !dev->ethtool_ops->get_rx_csum(dev)) 1220 dev->ethtool_ops->get_rx_csum(dev) :
1221 ethtool_op_get_rx_csum(dev);
1222
1223 if (!rxcsum)
1180 return -EINVAL; 1224 return -EINVAL;
1181 dev->features |= NETIF_F_GRO; 1225 dev->features |= NETIF_F_GRO;
1182 } else 1226 } else
@@ -1402,14 +1446,22 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1402 if (!dev || !netif_device_present(dev)) 1446 if (!dev || !netif_device_present(dev))
1403 return -ENODEV; 1447 return -ENODEV;
1404 1448
1405 if (!dev->ethtool_ops)
1406 return -EOPNOTSUPP;
1407
1408 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd))) 1449 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
1409 return -EFAULT; 1450 return -EFAULT;
1410 1451
1452 if (!dev->ethtool_ops) {
1453 /* ETHTOOL_GDRVINFO does not require any driver support.
1454 * It is also unprivileged and does not change anything,
1455 * so we can take a shortcut to it. */
1456 if (ethcmd == ETHTOOL_GDRVINFO)
1457 return ethtool_get_drvinfo(dev, useraddr);
1458 else
1459 return -EOPNOTSUPP;
1460 }
1461
1411 /* Allow some commands to be done by anyone */ 1462 /* Allow some commands to be done by anyone */
1412 switch (ethcmd) { 1463 switch (ethcmd) {
1464 case ETHTOOL_GSET:
1413 case ETHTOOL_GDRVINFO: 1465 case ETHTOOL_GDRVINFO:
1414 case ETHTOOL_GMSGLVL: 1466 case ETHTOOL_GMSGLVL:
1415 case ETHTOOL_GCOALESCE: 1467 case ETHTOOL_GCOALESCE:
diff --git a/net/core/flow.c b/net/core/flow.c
index f67dcbfe54e..127c8a7ffd6 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -53,8 +53,7 @@ struct flow_flush_info {
53 53
54struct flow_cache { 54struct flow_cache {
55 u32 hash_shift; 55 u32 hash_shift;
56 unsigned long order; 56 struct flow_cache_percpu __percpu *percpu;
57 struct flow_cache_percpu *percpu;
58 struct notifier_block hotcpu_notifier; 57 struct notifier_block hotcpu_notifier;
59 int low_watermark; 58 int low_watermark;
60 int high_watermark; 59 int high_watermark;
@@ -64,7 +63,7 @@ struct flow_cache {
64atomic_t flow_cache_genid = ATOMIC_INIT(0); 63atomic_t flow_cache_genid = ATOMIC_INIT(0);
65EXPORT_SYMBOL(flow_cache_genid); 64EXPORT_SYMBOL(flow_cache_genid);
66static struct flow_cache flow_cache_global; 65static struct flow_cache flow_cache_global;
67static struct kmem_cache *flow_cachep; 66static struct kmem_cache *flow_cachep __read_mostly;
68 67
69static DEFINE_SPINLOCK(flow_cache_gc_lock); 68static DEFINE_SPINLOCK(flow_cache_gc_lock);
70static LIST_HEAD(flow_cache_gc_list); 69static LIST_HEAD(flow_cache_gc_list);
@@ -177,15 +176,11 @@ static u32 flow_hash_code(struct flow_cache *fc,
177{ 176{
178 u32 *k = (u32 *) key; 177 u32 *k = (u32 *) key;
179 178
180 return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) 179 return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
181 & (flow_cache_hash_size(fc) - 1)); 180 & (flow_cache_hash_size(fc) - 1);
182} 181}
183 182
184#if (BITS_PER_LONG == 64) 183typedef unsigned long flow_compare_t;
185typedef u64 flow_compare_t;
186#else
187typedef u32 flow_compare_t;
188#endif
189 184
190/* I hear what you're saying, use memcmp. But memcmp cannot make 185/* I hear what you're saying, use memcmp. But memcmp cannot make
191 * important assumptions that we can here, such as alignment and 186 * important assumptions that we can here, such as alignment and
@@ -357,62 +352,73 @@ void flow_cache_flush(void)
357 put_online_cpus(); 352 put_online_cpus();
358} 353}
359 354
360static void __init flow_cache_cpu_prepare(struct flow_cache *fc, 355static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
361 struct flow_cache_percpu *fcp)
362{ 356{
363 fcp->hash_table = (struct hlist_head *) 357 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
364 __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order); 358 size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
365 if (!fcp->hash_table)
366 panic("NET: failed to allocate flow cache order %lu\n", fc->order);
367 359
368 fcp->hash_rnd_recalc = 1; 360 if (!fcp->hash_table) {
369 fcp->hash_count = 0; 361 fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
370 tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); 362 if (!fcp->hash_table) {
363 pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
364 return -ENOMEM;
365 }
366 fcp->hash_rnd_recalc = 1;
367 fcp->hash_count = 0;
368 tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
369 }
370 return 0;
371} 371}
372 372
373static int flow_cache_cpu(struct notifier_block *nfb, 373static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
374 unsigned long action, 374 unsigned long action,
375 void *hcpu) 375 void *hcpu)
376{ 376{
377 struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); 377 struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
378 int cpu = (unsigned long) hcpu; 378 int res, cpu = (unsigned long) hcpu;
379 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); 379 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
380 380
381 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) 381 switch (action) {
382 case CPU_UP_PREPARE:
383 case CPU_UP_PREPARE_FROZEN:
384 res = flow_cache_cpu_prepare(fc, cpu);
385 if (res)
386 return notifier_from_errno(res);
387 break;
388 case CPU_DEAD:
389 case CPU_DEAD_FROZEN:
382 __flow_cache_shrink(fc, fcp, 0); 390 __flow_cache_shrink(fc, fcp, 0);
391 break;
392 }
383 return NOTIFY_OK; 393 return NOTIFY_OK;
384} 394}
385 395
386static int flow_cache_init(struct flow_cache *fc) 396static int __init flow_cache_init(struct flow_cache *fc)
387{ 397{
388 unsigned long order;
389 int i; 398 int i;
390 399
391 fc->hash_shift = 10; 400 fc->hash_shift = 10;
392 fc->low_watermark = 2 * flow_cache_hash_size(fc); 401 fc->low_watermark = 2 * flow_cache_hash_size(fc);
393 fc->high_watermark = 4 * flow_cache_hash_size(fc); 402 fc->high_watermark = 4 * flow_cache_hash_size(fc);
394 403
395 for (order = 0;
396 (PAGE_SIZE << order) <
397 (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
398 order++)
399 /* NOTHING */;
400 fc->order = order;
401 fc->percpu = alloc_percpu(struct flow_cache_percpu); 404 fc->percpu = alloc_percpu(struct flow_cache_percpu);
405 if (!fc->percpu)
406 return -ENOMEM;
402 407
403 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, 408 for_each_online_cpu(i) {
404 (unsigned long) fc); 409 if (flow_cache_cpu_prepare(fc, i))
405 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; 410 return -ENOMEM;
406 add_timer(&fc->rnd_timer); 411 }
407
408 for_each_possible_cpu(i)
409 flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
410
411 fc->hotcpu_notifier = (struct notifier_block){ 412 fc->hotcpu_notifier = (struct notifier_block){
412 .notifier_call = flow_cache_cpu, 413 .notifier_call = flow_cache_cpu,
413 }; 414 };
414 register_hotcpu_notifier(&fc->hotcpu_notifier); 415 register_hotcpu_notifier(&fc->hotcpu_notifier);
415 416
417 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
418 (unsigned long) fc);
419 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
420 add_timer(&fc->rnd_timer);
421
416 return 0; 422 return 0;
417} 423}
418 424
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 6743146e4d6..7c2373321b7 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -274,9 +274,9 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
274 while ((e = gen_find_node(bstats, rate_est))) { 274 while ((e = gen_find_node(bstats, rate_est))) {
275 rb_erase(&e->node, &est_root); 275 rb_erase(&e->node, &est_root);
276 276
277 write_lock_bh(&est_lock); 277 write_lock(&est_lock);
278 e->bstats = NULL; 278 e->bstats = NULL;
279 write_unlock_bh(&est_lock); 279 write_unlock(&est_lock);
280 280
281 list_del_rcu(&e->list); 281 list_del_rcu(&e->list);
282 call_rcu(&e->e_rcu, __gen_kill_estimator); 282 call_rcu(&e->e_rcu, __gen_kill_estimator);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 1cd98df412d..f4657c2127b 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -41,7 +41,9 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
41 41
42 if (m->msg_namelen) { 42 if (m->msg_namelen) {
43 if (mode == VERIFY_READ) { 43 if (mode == VERIFY_READ) {
44 err = move_addr_to_kernel(m->msg_name, m->msg_namelen, 44 void __user *namep;
45 namep = (void __user __force *) m->msg_name;
46 err = move_addr_to_kernel(namep, m->msg_namelen,
45 address); 47 address);
46 if (err < 0) 48 if (err < 0)
47 return err; 49 return err;
@@ -52,7 +54,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
52 } 54 }
53 55
54 size = m->msg_iovlen * sizeof(struct iovec); 56 size = m->msg_iovlen * sizeof(struct iovec);
55 if (copy_from_user(iov, m->msg_iov, size)) 57 if (copy_from_user(iov, (void __user __force *) m->msg_iov, size))
56 return -EFAULT; 58 return -EFAULT;
57 59
58 m->msg_iov = iov; 60 m->msg_iov = iov;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a4e0a7482c2..96b1a749abb 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,7 +122,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
122 122
123unsigned long neigh_rand_reach_time(unsigned long base) 123unsigned long neigh_rand_reach_time(unsigned long base)
124{ 124{
125 return (base ? (net_random() % base) + (base >> 1) : 0); 125 return base ? (net_random() % base) + (base >> 1) : 0;
126} 126}
127EXPORT_SYMBOL(neigh_rand_reach_time); 127EXPORT_SYMBOL(neigh_rand_reach_time);
128 128
@@ -766,9 +766,9 @@ next_elt:
766static __inline__ int neigh_max_probes(struct neighbour *n) 766static __inline__ int neigh_max_probes(struct neighbour *n)
767{ 767{
768 struct neigh_parms *p = n->parms; 768 struct neigh_parms *p = n->parms;
769 return (n->nud_state & NUD_PROBE ? 769 return (n->nud_state & NUD_PROBE) ?
770 p->ucast_probes : 770 p->ucast_probes :
771 p->ucast_probes + p->app_probes + p->mcast_probes); 771 p->ucast_probes + p->app_probes + p->mcast_probes;
772} 772}
773 773
774static void neigh_invalidate(struct neighbour *neigh) 774static void neigh_invalidate(struct neighbour *neigh)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index af4dfbadf2a..76485a3f910 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -515,7 +515,7 @@ static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
515 return attribute->store(queue, attribute, buf, count); 515 return attribute->store(queue, attribute, buf, count);
516} 516}
517 517
518static struct sysfs_ops rx_queue_sysfs_ops = { 518static const struct sysfs_ops rx_queue_sysfs_ops = {
519 .show = rx_queue_attr_show, 519 .show = rx_queue_attr_show,
520 .store = rx_queue_attr_store, 520 .store = rx_queue_attr_store,
521}; 521};
@@ -789,12 +789,13 @@ static const void *net_netlink_ns(struct sock *sk)
789 return sock_net(sk); 789 return sock_net(sk);
790} 790}
791 791
792static struct kobj_ns_type_operations net_ns_type_operations = { 792struct kobj_ns_type_operations net_ns_type_operations = {
793 .type = KOBJ_NS_TYPE_NET, 793 .type = KOBJ_NS_TYPE_NET,
794 .current_ns = net_current_ns, 794 .current_ns = net_current_ns,
795 .netlink_ns = net_netlink_ns, 795 .netlink_ns = net_netlink_ns,
796 .initial_ns = net_initial_ns, 796 .initial_ns = net_initial_ns,
797}; 797};
798EXPORT_SYMBOL_GPL(net_ns_type_operations);
798 799
799static void net_kobj_ns_exit(struct net *net) 800static void net_kobj_ns_exit(struct net *net)
800{ 801{
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 10a1ea72010..2c0df0f95b3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -729,16 +729,14 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen,
729 *num = 0; 729 *num = 0;
730 730
731 for (; i < maxlen; i++) { 731 for (; i < maxlen; i++) {
732 int value;
732 char c; 733 char c;
733 *num <<= 4; 734 *num <<= 4;
734 if (get_user(c, &user_buffer[i])) 735 if (get_user(c, &user_buffer[i]))
735 return -EFAULT; 736 return -EFAULT;
736 if ((c >= '0') && (c <= '9')) 737 value = hex_to_bin(c);
737 *num |= c - '0'; 738 if (value >= 0)
738 else if ((c >= 'a') && (c <= 'f')) 739 *num |= value;
739 *num |= c - 'a' + 10;
740 else if ((c >= 'A') && (c <= 'F'))
741 *num |= c - 'A' + 10;
742 else 740 else
743 break; 741 break;
744 } 742 }
@@ -3907,8 +3905,6 @@ static void __exit pg_cleanup(void)
3907{ 3905{
3908 struct pktgen_thread *t; 3906 struct pktgen_thread *t;
3909 struct list_head *q, *n; 3907 struct list_head *q, *n;
3910 wait_queue_head_t queue;
3911 init_waitqueue_head(&queue);
3912 3908
3913 /* Stop all interfaces & threads */ 3909 /* Stop all interfaces & threads */
3914 3910
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f78d821bd93..b2a718dfd72 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -612,36 +612,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
612 612
613static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) 613static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
614{ 614{
615 struct rtnl_link_stats64 a; 615 memcpy(v, b, sizeof(*b));
616
617 a.rx_packets = b->rx_packets;
618 a.tx_packets = b->tx_packets;
619 a.rx_bytes = b->rx_bytes;
620 a.tx_bytes = b->tx_bytes;
621 a.rx_errors = b->rx_errors;
622 a.tx_errors = b->tx_errors;
623 a.rx_dropped = b->rx_dropped;
624 a.tx_dropped = b->tx_dropped;
625
626 a.multicast = b->multicast;
627 a.collisions = b->collisions;
628
629 a.rx_length_errors = b->rx_length_errors;
630 a.rx_over_errors = b->rx_over_errors;
631 a.rx_crc_errors = b->rx_crc_errors;
632 a.rx_frame_errors = b->rx_frame_errors;
633 a.rx_fifo_errors = b->rx_fifo_errors;
634 a.rx_missed_errors = b->rx_missed_errors;
635
636 a.tx_aborted_errors = b->tx_aborted_errors;
637 a.tx_carrier_errors = b->tx_carrier_errors;
638 a.tx_fifo_errors = b->tx_fifo_errors;
639 a.tx_heartbeat_errors = b->tx_heartbeat_errors;
640 a.tx_window_errors = b->tx_window_errors;
641
642 a.rx_compressed = b->rx_compressed;
643 a.tx_compressed = b->tx_compressed;
644 memcpy(v, &a, sizeof(a));
645} 616}
646 617
647/* All VF info */ 618/* All VF info */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c83b421341c..752c1972b3a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -202,8 +202,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
202 skb->data = data; 202 skb->data = data;
203 skb_reset_tail_pointer(skb); 203 skb_reset_tail_pointer(skb);
204 skb->end = skb->tail + size; 204 skb->end = skb->tail + size;
205 kmemcheck_annotate_bitfield(skb, flags1);
206 kmemcheck_annotate_bitfield(skb, flags2);
207#ifdef NET_SKBUFF_DATA_USES_OFFSET 205#ifdef NET_SKBUFF_DATA_USES_OFFSET
208 skb->mac_header = ~0U; 206 skb->mac_header = ~0U;
209#endif 207#endif
@@ -340,7 +338,7 @@ static void skb_release_data(struct sk_buff *skb)
340 put_page(skb_shinfo(skb)->frags[i].page); 338 put_page(skb_shinfo(skb)->frags[i].page);
341 } 339 }
342 340
343 if (skb_has_frags(skb)) 341 if (skb_has_frag_list(skb))
344 skb_drop_fraglist(skb); 342 skb_drop_fraglist(skb);
345 343
346 kfree(skb->head); 344 kfree(skb->head);
@@ -685,16 +683,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
685 683
686struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) 684struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
687{ 685{
688 int headerlen = skb->data - skb->head; 686 int headerlen = skb_headroom(skb);
689 /* 687 unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
690 * Allocate the copy buffer 688 struct sk_buff *n = alloc_skb(size, gfp_mask);
691 */ 689
692 struct sk_buff *n;
693#ifdef NET_SKBUFF_DATA_USES_OFFSET
694 n = alloc_skb(skb->end + skb->data_len, gfp_mask);
695#else
696 n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
697#endif
698 if (!n) 690 if (!n)
699 return NULL; 691 return NULL;
700 692
@@ -726,20 +718,14 @@ EXPORT_SYMBOL(skb_copy);
726 718
727struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) 719struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
728{ 720{
729 /* 721 unsigned int size = skb_end_pointer(skb) - skb->head;
730 * Allocate the copy buffer 722 struct sk_buff *n = alloc_skb(size, gfp_mask);
731 */ 723
732 struct sk_buff *n;
733#ifdef NET_SKBUFF_DATA_USES_OFFSET
734 n = alloc_skb(skb->end, gfp_mask);
735#else
736 n = alloc_skb(skb->end - skb->head, gfp_mask);
737#endif
738 if (!n) 724 if (!n)
739 goto out; 725 goto out;
740 726
741 /* Set the data pointer */ 727 /* Set the data pointer */
742 skb_reserve(n, skb->data - skb->head); 728 skb_reserve(n, skb_headroom(skb));
743 /* Set the tail pointer and length */ 729 /* Set the tail pointer and length */
744 skb_put(n, skb_headlen(skb)); 730 skb_put(n, skb_headlen(skb));
745 /* Copy the bytes */ 731 /* Copy the bytes */
@@ -759,7 +745,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
759 skb_shinfo(n)->nr_frags = i; 745 skb_shinfo(n)->nr_frags = i;
760 } 746 }
761 747
762 if (skb_has_frags(skb)) { 748 if (skb_has_frag_list(skb)) {
763 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 749 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
764 skb_clone_fraglist(n); 750 skb_clone_fraglist(n);
765 } 751 }
@@ -791,12 +777,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
791{ 777{
792 int i; 778 int i;
793 u8 *data; 779 u8 *data;
794#ifdef NET_SKBUFF_DATA_USES_OFFSET 780 int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
795 int size = nhead + skb->end + ntail;
796#else
797 int size = nhead + (skb->end - skb->head) + ntail;
798#endif
799 long off; 781 long off;
782 bool fastpath;
800 783
801 BUG_ON(nhead < 0); 784 BUG_ON(nhead < 0);
802 785
@@ -810,23 +793,36 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
810 goto nodata; 793 goto nodata;
811 794
812 /* Copy only real data... and, alas, header. This should be 795 /* Copy only real data... and, alas, header. This should be
813 * optimized for the cases when header is void. */ 796 * optimized for the cases when header is void.
814#ifdef NET_SKBUFF_DATA_USES_OFFSET 797 */
815 memcpy(data + nhead, skb->head, skb->tail); 798 memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
816#else 799
817 memcpy(data + nhead, skb->head, skb->tail - skb->head); 800 memcpy((struct skb_shared_info *)(data + size),
818#endif 801 skb_shinfo(skb),
819 memcpy(data + size, skb_end_pointer(skb),
820 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); 802 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
821 803
822 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 804 /* Check if we can avoid taking references on fragments if we own
823 get_page(skb_shinfo(skb)->frags[i].page); 805 * the last reference on skb->head. (see skb_release_data())
806 */
807 if (!skb->cloned)
808 fastpath = true;
809 else {
810 int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
824 811
825 if (skb_has_frags(skb)) 812 fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
826 skb_clone_fraglist(skb); 813 }
827 814
828 skb_release_data(skb); 815 if (fastpath) {
816 kfree(skb->head);
817 } else {
818 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
819 get_page(skb_shinfo(skb)->frags[i].page);
829 820
821 if (skb_has_frag_list(skb))
822 skb_clone_fraglist(skb);
823
824 skb_release_data(skb);
825 }
830 off = (data + nhead) - skb->head; 826 off = (data + nhead) - skb->head;
831 827
832 skb->head = data; 828 skb->head = data;
@@ -1099,7 +1095,7 @@ drop_pages:
1099 for (; i < nfrags; i++) 1095 for (; i < nfrags; i++)
1100 put_page(skb_shinfo(skb)->frags[i].page); 1096 put_page(skb_shinfo(skb)->frags[i].page);
1101 1097
1102 if (skb_has_frags(skb)) 1098 if (skb_has_frag_list(skb))
1103 skb_drop_fraglist(skb); 1099 skb_drop_fraglist(skb);
1104 goto done; 1100 goto done;
1105 } 1101 }
@@ -1194,7 +1190,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1194 /* Optimization: no fragments, no reasons to preestimate 1190 /* Optimization: no fragments, no reasons to preestimate
1195 * size of pulled pages. Superb. 1191 * size of pulled pages. Superb.
1196 */ 1192 */
1197 if (!skb_has_frags(skb)) 1193 if (!skb_has_frag_list(skb))
1198 goto pull_pages; 1194 goto pull_pages;
1199 1195
1200 /* Estimate size of pulled pages. */ 1196 /* Estimate size of pulled pages. */
@@ -2323,7 +2319,7 @@ next_skb:
2323 st->frag_data = NULL; 2319 st->frag_data = NULL;
2324 } 2320 }
2325 2321
2326 if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) { 2322 if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
2327 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 2323 st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
2328 st->frag_idx = 0; 2324 st->frag_idx = 0;
2329 goto next_skb; 2325 goto next_skb;
@@ -2893,7 +2889,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2893 return -ENOMEM; 2889 return -ENOMEM;
2894 2890
2895 /* Easy case. Most of packets will go this way. */ 2891 /* Easy case. Most of packets will go this way. */
2896 if (!skb_has_frags(skb)) { 2892 if (!skb_has_frag_list(skb)) {
2897 /* A little of trouble, not enough of space for trailer. 2893 /* A little of trouble, not enough of space for trailer.
2898 * This should not happen, when stack is tuned to generate 2894 * This should not happen, when stack is tuned to generate
2899 * good frames. OK, on miss we reallocate and reserve even more 2895 * good frames. OK, on miss we reallocate and reserve even more
@@ -2928,7 +2924,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2928 2924
2929 if (skb1->next == NULL && tailbits) { 2925 if (skb1->next == NULL && tailbits) {
2930 if (skb_shinfo(skb1)->nr_frags || 2926 if (skb_shinfo(skb1)->nr_frags ||
2931 skb_has_frags(skb1) || 2927 skb_has_frag_list(skb1) ||
2932 skb_tailroom(skb1) < tailbits) 2928 skb_tailroom(skb1) < tailbits)
2933 ntail = tailbits + 128; 2929 ntail = tailbits + 128;
2934 } 2930 }
@@ -2937,7 +2933,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2937 skb_cloned(skb1) || 2933 skb_cloned(skb1) ||
2938 ntail || 2934 ntail ||
2939 skb_shinfo(skb1)->nr_frags || 2935 skb_shinfo(skb1)->nr_frags ||
2940 skb_has_frags(skb1)) { 2936 skb_has_frag_list(skb1)) {
2941 struct sk_buff *skb2; 2937 struct sk_buff *skb2;
2942 2938
2943 /* Fuck, we are miserable poor guys... */ 2939 /* Fuck, we are miserable poor guys... */
@@ -3020,7 +3016,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3020 } else { 3016 } else {
3021 /* 3017 /*
3022 * no hardware time stamps available, 3018 * no hardware time stamps available,
3023 * so keep the skb_shared_tx and only 3019 * so keep the shared tx_flags and only
3024 * store software time stamp 3020 * store software time stamp
3025 */ 3021 */
3026 skb->tstamp = ktime_get_real(); 3022 skb->tstamp = ktime_get_real();
diff --git a/net/core/sock.c b/net/core/sock.c
index ef30e9d286e..42365deeba2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1557,6 +1557,8 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1557EXPORT_SYMBOL(sock_alloc_send_skb); 1557EXPORT_SYMBOL(sock_alloc_send_skb);
1558 1558
1559static void __lock_sock(struct sock *sk) 1559static void __lock_sock(struct sock *sk)
1560 __releases(&sk->sk_lock.slock)
1561 __acquires(&sk->sk_lock.slock)
1560{ 1562{
1561 DEFINE_WAIT(wait); 1563 DEFINE_WAIT(wait);
1562 1564
@@ -1573,6 +1575,8 @@ static void __lock_sock(struct sock *sk)
1573} 1575}
1574 1576
1575static void __release_sock(struct sock *sk) 1577static void __release_sock(struct sock *sk)
1578 __releases(&sk->sk_lock.slock)
1579 __acquires(&sk->sk_lock.slock)
1576{ 1580{
1577 struct sk_buff *skb = sk->sk_backlog.head; 1581 struct sk_buff *skb = sk->sk_backlog.head;
1578 1582
diff --git a/net/core/utils.c b/net/core/utils.c
index f4185447053..5fea0ab2190 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -75,7 +75,7 @@ __be32 in_aton(const char *str)
75 str++; 75 str++;
76 } 76 }
77 } 77 }
78 return(htonl(l)); 78 return htonl(l);
79} 79}
80EXPORT_SYMBOL(in_aton); 80EXPORT_SYMBOL(in_aton);
81 81
@@ -92,18 +92,19 @@ EXPORT_SYMBOL(in_aton);
92 92
93static inline int xdigit2bin(char c, int delim) 93static inline int xdigit2bin(char c, int delim)
94{ 94{
95 int val;
96
95 if (c == delim || c == '\0') 97 if (c == delim || c == '\0')
96 return IN6PTON_DELIM; 98 return IN6PTON_DELIM;
97 if (c == ':') 99 if (c == ':')
98 return IN6PTON_COLON_MASK; 100 return IN6PTON_COLON_MASK;
99 if (c == '.') 101 if (c == '.')
100 return IN6PTON_DOT; 102 return IN6PTON_DOT;
101 if (c >= '0' && c <= '9') 103
102 return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0')); 104 val = hex_to_bin(c);
103 if (c >= 'a' && c <= 'f') 105 if (val >= 0)
104 return (IN6PTON_XDIGIT | (c - 'a' + 10)); 106 return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0);
105 if (c >= 'A' && c <= 'F') 107
106 return (IN6PTON_XDIGIT | (c - 'A' + 10));
107 if (delim == -1) 108 if (delim == -1)
108 return IN6PTON_DELIM; 109 return IN6PTON_DELIM;
109 return IN6PTON_UNKNOWN; 110 return IN6PTON_UNKNOWN;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 6df6f8ac963..6d16a9070ff 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -62,18 +62,14 @@ struct ccid_operations {
62 void (*ccid_hc_tx_exit)(struct sock *sk); 62 void (*ccid_hc_tx_exit)(struct sock *sk);
63 void (*ccid_hc_rx_packet_recv)(struct sock *sk, 63 void (*ccid_hc_rx_packet_recv)(struct sock *sk,
64 struct sk_buff *skb); 64 struct sk_buff *skb);
65 int (*ccid_hc_rx_parse_options)(struct sock *sk, 65 int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt,
66 unsigned char option, 66 u8 opt, u8 *val, u8 len);
67 unsigned char len, u16 idx,
68 unsigned char* value);
69 int (*ccid_hc_rx_insert_options)(struct sock *sk, 67 int (*ccid_hc_rx_insert_options)(struct sock *sk,
70 struct sk_buff *skb); 68 struct sk_buff *skb);
71 void (*ccid_hc_tx_packet_recv)(struct sock *sk, 69 void (*ccid_hc_tx_packet_recv)(struct sock *sk,
72 struct sk_buff *skb); 70 struct sk_buff *skb);
73 int (*ccid_hc_tx_parse_options)(struct sock *sk, 71 int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt,
74 unsigned char option, 72 u8 opt, u8 *val, u8 len);
75 unsigned char len, u16 idx,
76 unsigned char* value);
77 int (*ccid_hc_tx_send_packet)(struct sock *sk, 73 int (*ccid_hc_tx_send_packet)(struct sock *sk,
78 struct sk_buff *skb); 74 struct sk_buff *skb);
79 void (*ccid_hc_tx_packet_sent)(struct sock *sk, 75 void (*ccid_hc_tx_packet_sent)(struct sock *sk,
@@ -168,27 +164,31 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
168 ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb); 164 ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb);
169} 165}
170 166
167/**
168 * ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver
169 * @pkt: type of packet that @opt appears on (RFC 4340, 5.1)
170 * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3)
171 * @val: value of @opt
172 * @len: length of @val in bytes
173 */
171static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, 174static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
172 unsigned char option, 175 u8 pkt, u8 opt, u8 *val, u8 len)
173 unsigned char len, u16 idx,
174 unsigned char* value)
175{ 176{
176 int rc = 0; 177 if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
177 if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL) 178 return 0;
178 rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx, 179 return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
179 value);
180 return rc;
181} 180}
182 181
182/**
183 * ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender
184 * Arguments are analogous to ccid_hc_tx_parse_options()
185 */
183static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, 186static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
184 unsigned char option, 187 u8 pkt, u8 opt, u8 *val, u8 len)
185 unsigned char len, u16 idx,
186 unsigned char* value)
187{ 188{
188 int rc = 0; 189 if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
189 if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL) 190 return 0;
190 rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value); 191 return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
191 return rc;
192} 192}
193 193
194static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, 194static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 8408398cd44..0581143cb80 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -47,37 +47,6 @@ config IP_DCCP_CCID3_DEBUG
47 47
48 If in doubt, say N. 48 If in doubt, say N.
49 49
50config IP_DCCP_CCID3_RTO
51 int "Use higher bound for nofeedback timer"
52 default 100
53 depends on IP_DCCP_CCID3 && EXPERIMENTAL
54 ---help---
55 Use higher lower bound for nofeedback timer expiration.
56
57 The TFRC nofeedback timer normally expires after the maximum of 4
58 RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
59 with a small RTT this can mean a high processing load and reduced
60 performance, since then the nofeedback timer is triggered very
61 frequently.
62
63 This option enables to set a higher lower bound for the nofeedback
64 value. Values in units of milliseconds can be set here.
65
66 A value of 0 disables this feature by enforcing the value specified
67 in RFC 3448. The following values have been suggested as bounds for
68 experimental use:
69 * 16-20ms to match the typical multimedia inter-frame interval
70 * 100ms as a reasonable compromise [default]
71 * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
72
73 The default of 100ms is a compromise between a large value for
74 efficient DCCP implementations, and a small value to avoid disrupting
75 the network in times of congestion.
76
77 The purpose of the nofeedback timer is to slow DCCP down when there
78 is serious network congestion: experimenting with larger values should
79 therefore not be performed on WANs.
80
81config IP_DCCP_TFRC_LIB 50config IP_DCCP_TFRC_LIB
82 def_bool y if IP_DCCP_CCID3 51 def_bool y if IP_DCCP_CCID3
83 52
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9b3ae9922be..dc18172b1e5 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,59 +25,14 @@
25 */ 25 */
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include "../feat.h" 27#include "../feat.h"
28#include "../ccid.h"
29#include "../dccp.h"
30#include "ccid2.h" 28#include "ccid2.h"
31 29
32 30
33#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 31#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
34static int ccid2_debug; 32static int ccid2_debug;
35#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) 33#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
36
37static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc)
38{
39 int len = 0;
40 int pipe = 0;
41 struct ccid2_seq *seqp = hc->tx_seqh;
42
43 /* there is data in the chain */
44 if (seqp != hc->tx_seqt) {
45 seqp = seqp->ccid2s_prev;
46 len++;
47 if (!seqp->ccid2s_acked)
48 pipe++;
49
50 while (seqp != hc->tx_seqt) {
51 struct ccid2_seq *prev = seqp->ccid2s_prev;
52
53 len++;
54 if (!prev->ccid2s_acked)
55 pipe++;
56
57 /* packets are sent sequentially */
58 BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
59 prev->ccid2s_seq ) >= 0);
60 BUG_ON(time_before(seqp->ccid2s_sent,
61 prev->ccid2s_sent));
62
63 seqp = prev;
64 }
65 }
66
67 BUG_ON(pipe != hc->tx_pipe);
68 ccid2_pr_debug("len of chain=%d\n", len);
69
70 do {
71 seqp = seqp->ccid2s_prev;
72 len++;
73 } while (seqp != hc->tx_seqh);
74
75 ccid2_pr_debug("total len=%d\n", len);
76 BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN);
77}
78#else 34#else
79#define ccid2_pr_debug(format, a...) 35#define ccid2_pr_debug(format, a...)
80#define ccid2_hc_tx_check_sanity(hc)
81#endif 36#endif
82 37
83static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) 38static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
@@ -156,19 +111,10 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
156 dp->dccps_l_ack_ratio = val; 111 dp->dccps_l_ack_ratio = val;
157} 112}
158 113
159static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val)
160{
161 ccid2_pr_debug("change SRTT to %ld\n", val);
162 hc->tx_srtt = val;
163}
164
165static void ccid2_start_rto_timer(struct sock *sk);
166
167static void ccid2_hc_tx_rto_expire(unsigned long data) 114static void ccid2_hc_tx_rto_expire(unsigned long data)
168{ 115{
169 struct sock *sk = (struct sock *)data; 116 struct sock *sk = (struct sock *)data;
170 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 117 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
171 long s;
172 118
173 bh_lock_sock(sk); 119 bh_lock_sock(sk);
174 if (sock_owned_by_user(sk)) { 120 if (sock_owned_by_user(sk)) {
@@ -178,23 +124,19 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
178 124
179 ccid2_pr_debug("RTO_EXPIRE\n"); 125 ccid2_pr_debug("RTO_EXPIRE\n");
180 126
181 ccid2_hc_tx_check_sanity(hc);
182
183 /* back-off timer */ 127 /* back-off timer */
184 hc->tx_rto <<= 1; 128 hc->tx_rto <<= 1;
129 if (hc->tx_rto > DCCP_RTO_MAX)
130 hc->tx_rto = DCCP_RTO_MAX;
185 131
186 s = hc->tx_rto / HZ; 132 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
187 if (s > 60)
188 hc->tx_rto = 60 * HZ;
189
190 ccid2_start_rto_timer(sk);
191 133
192 /* adjust pipe, cwnd etc */ 134 /* adjust pipe, cwnd etc */
193 hc->tx_ssthresh = hc->tx_cwnd / 2; 135 hc->tx_ssthresh = hc->tx_cwnd / 2;
194 if (hc->tx_ssthresh < 2) 136 if (hc->tx_ssthresh < 2)
195 hc->tx_ssthresh = 2; 137 hc->tx_ssthresh = 2;
196 hc->tx_cwnd = 1; 138 hc->tx_cwnd = 1;
197 hc->tx_pipe = 0; 139 hc->tx_pipe = 0;
198 140
199 /* clear state about stuff we sent */ 141 /* clear state about stuff we sent */
200 hc->tx_seqt = hc->tx_seqh; 142 hc->tx_seqt = hc->tx_seqh;
@@ -204,22 +146,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
204 hc->tx_rpseq = 0; 146 hc->tx_rpseq = 0;
205 hc->tx_rpdupack = -1; 147 hc->tx_rpdupack = -1;
206 ccid2_change_l_ack_ratio(sk, 1); 148 ccid2_change_l_ack_ratio(sk, 1);
207 ccid2_hc_tx_check_sanity(hc);
208out: 149out:
209 bh_unlock_sock(sk); 150 bh_unlock_sock(sk);
210 sock_put(sk); 151 sock_put(sk);
211} 152}
212 153
213static void ccid2_start_rto_timer(struct sock *sk)
214{
215 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
216
217 ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto);
218
219 BUG_ON(timer_pending(&hc->tx_rtotimer));
220 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
221}
222
223static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) 154static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
224{ 155{
225 struct dccp_sock *dp = dccp_sk(sk); 156 struct dccp_sock *dp = dccp_sk(sk);
@@ -230,7 +161,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
230 161
231 hc->tx_seqh->ccid2s_seq = dp->dccps_gss; 162 hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
232 hc->tx_seqh->ccid2s_acked = 0; 163 hc->tx_seqh->ccid2s_acked = 0;
233 hc->tx_seqh->ccid2s_sent = jiffies; 164 hc->tx_seqh->ccid2s_sent = ccid2_time_stamp;
234 165
235 next = hc->tx_seqh->ccid2s_next; 166 next = hc->tx_seqh->ccid2s_next;
236 /* check if we need to alloc more space */ 167 /* check if we need to alloc more space */
@@ -296,23 +227,20 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
296 } 227 }
297#endif 228#endif
298 229
299 /* setup RTO timer */ 230 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
300 if (!timer_pending(&hc->tx_rtotimer))
301 ccid2_start_rto_timer(sk);
302 231
303#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 232#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
304 do { 233 do {
305 struct ccid2_seq *seqp = hc->tx_seqt; 234 struct ccid2_seq *seqp = hc->tx_seqt;
306 235
307 while (seqp != hc->tx_seqh) { 236 while (seqp != hc->tx_seqh) {
308 ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", 237 ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
309 (unsigned long long)seqp->ccid2s_seq, 238 (unsigned long long)seqp->ccid2s_seq,
310 seqp->ccid2s_acked, seqp->ccid2s_sent); 239 seqp->ccid2s_acked, seqp->ccid2s_sent);
311 seqp = seqp->ccid2s_next; 240 seqp = seqp->ccid2s_next;
312 } 241 }
313 } while (0); 242 } while (0);
314 ccid2_pr_debug("=========\n"); 243 ccid2_pr_debug("=========\n");
315 ccid2_hc_tx_check_sanity(hc);
316#endif 244#endif
317} 245}
318 246
@@ -378,17 +306,87 @@ out_invalid_option:
378 return -1; 306 return -1;
379} 307}
380 308
381static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) 309/**
310 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
311 * This code is almost identical with TCP's tcp_rtt_estimator(), since
312 * - it has a higher sampling frequency (recommended by RFC 1323),
313 * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
314 * - it is simple (cf. more complex proposals such as Eifel timer or research
315 * which suggests that the gain should be set according to window size),
316 * - in tests it was found to work well with CCID2 [gerrit].
317 */
318static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
382{ 319{
383 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 320 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
321 long m = mrtt ? : 1;
384 322
385 sk_stop_timer(sk, &hc->tx_rtotimer); 323 if (hc->tx_srtt == 0) {
386 ccid2_pr_debug("deleted RTO timer\n"); 324 /* First measurement m */
325 hc->tx_srtt = m << 3;
326 hc->tx_mdev = m << 1;
327
328 hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
329 hc->tx_rttvar = hc->tx_mdev_max;
330
331 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
332 } else {
333 /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
334 m -= (hc->tx_srtt >> 3);
335 hc->tx_srtt += m;
336
337 /* Similarly, update scaled mdev with regard to |m| */
338 if (m < 0) {
339 m = -m;
340 m -= (hc->tx_mdev >> 2);
341 /*
342 * This neutralises RTO increase when RTT < SRTT - mdev
343 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
344 * in Linux TCP", USENIX 2002, pp. 49-62).
345 */
346 if (m > 0)
347 m >>= 3;
348 } else {
349 m -= (hc->tx_mdev >> 2);
350 }
351 hc->tx_mdev += m;
352
353 if (hc->tx_mdev > hc->tx_mdev_max) {
354 hc->tx_mdev_max = hc->tx_mdev;
355 if (hc->tx_mdev_max > hc->tx_rttvar)
356 hc->tx_rttvar = hc->tx_mdev_max;
357 }
358
359 /*
360 * Decay RTTVAR at most once per flight, exploiting that
361 * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
362 * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
363 * GAR is a useful bound for FlightSize = pipe.
364 * AWL is probably too low here, as it over-estimates pipe.
365 */
366 if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
367 if (hc->tx_mdev_max < hc->tx_rttvar)
368 hc->tx_rttvar -= (hc->tx_rttvar -
369 hc->tx_mdev_max) >> 2;
370 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
371 hc->tx_mdev_max = tcp_rto_min(sk);
372 }
373 }
374
375 /*
376 * Set RTO from SRTT and RTTVAR
377 * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
378 * This agrees with RFC 4341, 5:
379 * "Because DCCP does not retransmit data, DCCP does not require
380 * TCP's recommended minimum timeout of one second".
381 */
382 hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
383
384 if (hc->tx_rto > DCCP_RTO_MAX)
385 hc->tx_rto = DCCP_RTO_MAX;
387} 386}
388 387
389static inline void ccid2_new_ack(struct sock *sk, 388static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
390 struct ccid2_seq *seqp, 389 unsigned int *maxincr)
391 unsigned int *maxincr)
392{ 390{
393 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 391 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
394 392
@@ -402,93 +400,27 @@ static inline void ccid2_new_ack(struct sock *sk,
402 hc->tx_cwnd += 1; 400 hc->tx_cwnd += 1;
403 hc->tx_packets_acked = 0; 401 hc->tx_packets_acked = 0;
404 } 402 }
405 403 /*
406 /* update RTO */ 404 * FIXME: RTT is sampled several times per acknowledgment (for each
407 if (hc->tx_srtt == -1 || 405 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
408 time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { 406 * This causes the RTT to be over-estimated, since the older entries
409 unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; 407 * in the Ack Vector have earlier sending times.
410 int s; 408 * The cleanest solution is to not use the ccid2s_sent field at all
411 409 * and instead use DCCP timestamps: requires changes in other places.
412 /* first measurement */ 410 */
413 if (hc->tx_srtt == -1) { 411 ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
414 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
415 r, jiffies,
416 (unsigned long long)seqp->ccid2s_seq);
417 ccid2_change_srtt(hc, r);
418 hc->tx_rttvar = r >> 1;
419 } else {
420 /* RTTVAR */
421 long tmp = hc->tx_srtt - r;
422 long srtt;
423
424 if (tmp < 0)
425 tmp *= -1;
426
427 tmp >>= 2;
428 hc->tx_rttvar *= 3;
429 hc->tx_rttvar >>= 2;
430 hc->tx_rttvar += tmp;
431
432 /* SRTT */
433 srtt = hc->tx_srtt;
434 srtt *= 7;
435 srtt >>= 3;
436 tmp = r >> 3;
437 srtt += tmp;
438 ccid2_change_srtt(hc, srtt);
439 }
440 s = hc->tx_rttvar << 2;
441 /* clock granularity is 1 when based on jiffies */
442 if (!s)
443 s = 1;
444 hc->tx_rto = hc->tx_srtt + s;
445
446 /* must be at least a second */
447 s = hc->tx_rto / HZ;
448 /* DCCP doesn't require this [but I like it cuz my code sux] */
449#if 1
450 if (s < 1)
451 hc->tx_rto = HZ;
452#endif
453 /* max 60 seconds */
454 if (s > 60)
455 hc->tx_rto = HZ * 60;
456
457 hc->tx_lastrtt = jiffies;
458
459 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
460 hc->tx_srtt, hc->tx_rttvar,
461 hc->tx_rto, HZ, r);
462 }
463
464 /* we got a new ack, so re-start RTO timer */
465 ccid2_hc_tx_kill_rto_timer(sk);
466 ccid2_start_rto_timer(sk);
467}
468
469static void ccid2_hc_tx_dec_pipe(struct sock *sk)
470{
471 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
472
473 if (hc->tx_pipe == 0)
474 DCCP_BUG("pipe == 0");
475 else
476 hc->tx_pipe--;
477
478 if (hc->tx_pipe == 0)
479 ccid2_hc_tx_kill_rto_timer(sk);
480} 412}
481 413
482static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) 414static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
483{ 415{
484 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 416 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
485 417
486 if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { 418 if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
487 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); 419 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
488 return; 420 return;
489 } 421 }
490 422
491 hc->tx_last_cong = jiffies; 423 hc->tx_last_cong = ccid2_time_stamp;
492 424
493 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; 425 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
494 hc->tx_ssthresh = max(hc->tx_cwnd, 2U); 426 hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
@@ -510,7 +442,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
510 int done = 0; 442 int done = 0;
511 unsigned int maxincr = 0; 443 unsigned int maxincr = 0;
512 444
513 ccid2_hc_tx_check_sanity(hc);
514 /* check reverse path congestion */ 445 /* check reverse path congestion */
515 seqno = DCCP_SKB_CB(skb)->dccpd_seq; 446 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
516 447
@@ -620,7 +551,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
620 seqp->ccid2s_acked = 1; 551 seqp->ccid2s_acked = 1;
621 ccid2_pr_debug("Got ack for %llu\n", 552 ccid2_pr_debug("Got ack for %llu\n",
622 (unsigned long long)seqp->ccid2s_seq); 553 (unsigned long long)seqp->ccid2s_seq);
623 ccid2_hc_tx_dec_pipe(sk); 554 hc->tx_pipe--;
624 } 555 }
625 if (seqp == hc->tx_seqt) { 556 if (seqp == hc->tx_seqt) {
626 done = 1; 557 done = 1;
@@ -677,7 +608,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
677 * one ack vector. 608 * one ack vector.
678 */ 609 */
679 ccid2_congestion_event(sk, seqp); 610 ccid2_congestion_event(sk, seqp);
680 ccid2_hc_tx_dec_pipe(sk); 611 hc->tx_pipe--;
681 } 612 }
682 if (seqp == hc->tx_seqt) 613 if (seqp == hc->tx_seqt)
683 break; 614 break;
@@ -695,7 +626,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
695 hc->tx_seqt = hc->tx_seqt->ccid2s_next; 626 hc->tx_seqt = hc->tx_seqt->ccid2s_next;
696 } 627 }
697 628
698 ccid2_hc_tx_check_sanity(hc); 629 /* restart RTO timer if not all outstanding data has been acked */
630 if (hc->tx_pipe == 0)
631 sk_stop_timer(sk, &hc->tx_rtotimer);
632 else
633 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
699} 634}
700 635
701static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 636static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -707,12 +642,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
707 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ 642 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
708 hc->tx_ssthresh = ~0U; 643 hc->tx_ssthresh = ~0U;
709 644
710 /* 645 /* Use larger initial windows (RFC 4341, section 5). */
711 * RFC 4341, 5: "The cwnd parameter is initialized to at most four 646 hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
712 * packets for new connections, following the rules from [RFC3390]".
713 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
714 */
715 hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
716 647
717 /* Make sure that Ack Ratio is enabled and within bounds. */ 648 /* Make sure that Ack Ratio is enabled and within bounds. */
718 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); 649 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
@@ -723,15 +654,11 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
723 if (ccid2_hc_tx_alloc_seq(hc)) 654 if (ccid2_hc_tx_alloc_seq(hc))
724 return -ENOMEM; 655 return -ENOMEM;
725 656
726 hc->tx_rto = 3 * HZ; 657 hc->tx_rto = DCCP_TIMEOUT_INIT;
727 ccid2_change_srtt(hc, -1);
728 hc->tx_rttvar = -1;
729 hc->tx_rpdupack = -1; 658 hc->tx_rpdupack = -1;
730 hc->tx_last_cong = jiffies; 659 hc->tx_last_cong = ccid2_time_stamp;
731 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 660 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
732 (unsigned long)sk); 661 (unsigned long)sk);
733
734 ccid2_hc_tx_check_sanity(hc);
735 return 0; 662 return 0;
736} 663}
737 664
@@ -740,7 +667,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
740 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 667 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
741 int i; 668 int i;
742 669
743 ccid2_hc_tx_kill_rto_timer(sk); 670 sk_stop_timer(sk, &hc->tx_rtotimer);
744 671
745 for (i = 0; i < hc->tx_seqbufc; i++) 672 for (i = 0; i < hc->tx_seqbufc; i++)
746 kfree(hc->tx_seqbuf[i]); 673 kfree(hc->tx_seqbuf[i]);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 1ec6a30103b..9731c2dc148 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -18,18 +18,23 @@
18#ifndef _DCCP_CCID2_H_ 18#ifndef _DCCP_CCID2_H_
19#define _DCCP_CCID2_H_ 19#define _DCCP_CCID2_H_
20 20
21#include <linux/dccp.h>
22#include <linux/timer.h> 21#include <linux/timer.h>
23#include <linux/types.h> 22#include <linux/types.h>
24#include "../ccid.h" 23#include "../ccid.h"
24#include "../dccp.h"
25
26/*
27 * CCID-2 timestamping faces the same issues as TCP timestamping.
28 * Hence we reuse/share as much of the code as possible.
29 */
30#define ccid2_time_stamp tcp_time_stamp
31
25/* NUMDUPACK parameter from RFC 4341, p. 6 */ 32/* NUMDUPACK parameter from RFC 4341, p. 6 */
26#define NUMDUPACK 3 33#define NUMDUPACK 3
27 34
28struct sock;
29
30struct ccid2_seq { 35struct ccid2_seq {
31 u64 ccid2s_seq; 36 u64 ccid2s_seq;
32 unsigned long ccid2s_sent; 37 u32 ccid2s_sent;
33 int ccid2s_acked; 38 int ccid2s_acked;
34 struct ccid2_seq *ccid2s_prev; 39 struct ccid2_seq *ccid2s_prev;
35 struct ccid2_seq *ccid2s_next; 40 struct ccid2_seq *ccid2s_next;
@@ -42,7 +47,12 @@ struct ccid2_seq {
42 * struct ccid2_hc_tx_sock - CCID2 TX half connection 47 * struct ccid2_hc_tx_sock - CCID2 TX half connection
43 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 48 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
44 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) 49 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
45 * @tx_lastrtt: time RTT was last measured 50 * @tx_srtt: smoothed RTT estimate, scaled by 2^3
51 * @tx_mdev: smoothed RTT variation, scaled by 2^2
52 * @tx_mdev_max: maximum of @mdev during one flight
53 * @tx_rttvar: moving average/maximum of @mdev_max
54 * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
55 * @tx_rtt_seq: to decay RTTVAR at most once per flight
46 * @tx_rpseq: last consecutive seqno 56 * @tx_rpseq: last consecutive seqno
47 * @tx_rpdupack: dupacks since rpseq 57 * @tx_rpdupack: dupacks since rpseq
48 */ 58 */
@@ -55,14 +65,19 @@ struct ccid2_hc_tx_sock {
55 int tx_seqbufc; 65 int tx_seqbufc;
56 struct ccid2_seq *tx_seqh; 66 struct ccid2_seq *tx_seqh;
57 struct ccid2_seq *tx_seqt; 67 struct ccid2_seq *tx_seqt;
58 long tx_rto; 68
59 long tx_srtt; 69 /* RTT measurement: variables/principles are the same as in TCP */
60 long tx_rttvar; 70 u32 tx_srtt,
61 unsigned long tx_lastrtt; 71 tx_mdev,
72 tx_mdev_max,
73 tx_rttvar,
74 tx_rto;
75 u64 tx_rtt_seq:48;
62 struct timer_list tx_rtotimer; 76 struct timer_list tx_rtotimer;
77
63 u64 tx_rpseq; 78 u64 tx_rpseq;
64 int tx_rpdupack; 79 int tx_rpdupack;
65 unsigned long tx_last_cong; 80 u32 tx_last_cong;
66 u64 tx_high_ack; 81 u64 tx_high_ack;
67}; 82};
68 83
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 95f75298649..c3f3a25bbd7 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -54,7 +54,6 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
54 [TFRC_SSTATE_NO_SENT] = "NO_SENT", 54 [TFRC_SSTATE_NO_SENT] = "NO_SENT",
55 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", 55 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
56 [TFRC_SSTATE_FBACK] = "FBACK", 56 [TFRC_SSTATE_FBACK] = "FBACK",
57 [TFRC_SSTATE_TERM] = "TERM",
58 }; 57 };
59 58
60 return ccid3_state_names[state]; 59 return ccid3_state_names[state];
@@ -91,19 +90,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
91 return scaled_div(w_init << 6, hc->tx_rtt); 90 return scaled_div(w_init << 6, hc->tx_rtt);
92} 91}
93 92
94/* 93/**
95 * Recalculate t_ipi and delta (should be called whenever X changes) 94 * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst
95 * This respects the granularity of X_inst (64 * bytes/second).
96 */ 96 */
97static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) 97static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc)
98{ 98{
99 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
100 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); 99 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x);
101 100
102 /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ 101 ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi,
103 hc->tx_delta = min_t(u32, hc->tx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); 102 hc->tx_s, (unsigned)(hc->tx_x >> 6));
104
105 ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hc->tx_t_ipi,
106 hc->tx_delta, hc->tx_s, (unsigned)(hc->tx_x >> 6));
107} 103}
108 104
109static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) 105static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now)
@@ -211,16 +207,19 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
211 ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk, 207 ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk,
212 ccid3_tx_state_name(hc->tx_state)); 208 ccid3_tx_state_name(hc->tx_state));
213 209
210 /* Ignore and do not restart after leaving the established state */
211 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
212 goto out;
213
214 /* Reset feedback state to "no feedback received" */
214 if (hc->tx_state == TFRC_SSTATE_FBACK) 215 if (hc->tx_state == TFRC_SSTATE_FBACK)
215 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 216 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
216 else if (hc->tx_state != TFRC_SSTATE_NO_FBACK)
217 goto out;
218 217
219 /* 218 /*
220 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 219 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
220 * RTO is 0 if and only if no feedback has been received yet.
221 */ 221 */
222 if (hc->tx_t_rto == 0 || /* no feedback received yet */ 222 if (hc->tx_t_rto == 0 || hc->tx_p == 0) {
223 hc->tx_p == 0) {
224 223
225 /* halve send rate directly */ 224 /* halve send rate directly */
226 hc->tx_x = max(hc->tx_x / 2, 225 hc->tx_x = max(hc->tx_x / 2,
@@ -256,7 +255,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
256 * Set new timeout for the nofeedback timer. 255 * Set new timeout for the nofeedback timer.
257 * See comments in packet_recv() regarding the value of t_RTO. 256 * See comments in packet_recv() regarding the value of t_RTO.
258 */ 257 */
259 if (unlikely(hc->tx_t_rto == 0)) /* no feedback yet */ 258 if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */
260 t_nfb = TFRC_INITIAL_TIMEOUT; 259 t_nfb = TFRC_INITIAL_TIMEOUT;
261 else 260 else
262 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); 261 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi);
@@ -290,8 +289,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
290 if (unlikely(skb->len == 0)) 289 if (unlikely(skb->len == 0))
291 return -EBADMSG; 290 return -EBADMSG;
292 291
293 switch (hc->tx_state) { 292 if (hc->tx_state == TFRC_SSTATE_NO_SENT) {
294 case TFRC_SSTATE_NO_SENT:
295 sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + 293 sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies +
296 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); 294 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
297 hc->tx_last_win_count = 0; 295 hc->tx_last_win_count = 0;
@@ -326,27 +324,22 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
326 ccid3_update_send_interval(hc); 324 ccid3_update_send_interval(hc);
327 325
328 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 326 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
329 break; 327
330 case TFRC_SSTATE_NO_FBACK: 328 } else {
331 case TFRC_SSTATE_FBACK:
332 delay = ktime_us_delta(hc->tx_t_nom, now); 329 delay = ktime_us_delta(hc->tx_t_nom, now);
333 ccid3_pr_debug("delay=%ld\n", (long)delay); 330 ccid3_pr_debug("delay=%ld\n", (long)delay);
334 /* 331 /*
335 * Scheduling of packet transmissions [RFC 3448, 4.6] 332 * Scheduling of packet transmissions (RFC 5348, 8.3)
336 * 333 *
337 * if (t_now > t_nom - delta) 334 * if (t_now > t_nom - delta)
338 * // send the packet now 335 * // send the packet now
339 * else 336 * else
340 * // send the packet in (t_nom - t_now) milliseconds. 337 * // send the packet in (t_nom - t_now) milliseconds.
341 */ 338 */
342 if (delay - (s64)hc->tx_delta >= 1000) 339 if (delay >= TFRC_T_DELTA)
343 return (u32)delay / 1000L; 340 return (u32)delay / USEC_PER_MSEC;
344 341
345 ccid3_hc_tx_update_win_count(hc, now); 342 ccid3_hc_tx_update_win_count(hc, now);
346 break;
347 case TFRC_SSTATE_TERM:
348 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
349 return -EINVAL;
350 } 343 }
351 344
352 /* prepare to send now (add options etc.) */ 345 /* prepare to send now (add options etc.) */
@@ -372,48 +365,34 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
372static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 365static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
373{ 366{
374 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 367 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
375 struct ccid3_options_received *opt_recv; 368 struct tfrc_tx_hist_entry *acked;
376 ktime_t now; 369 ktime_t now;
377 unsigned long t_nfb; 370 unsigned long t_nfb;
378 u32 pinv, r_sample; 371 u32 r_sample;
379 372
380 /* we are only interested in ACKs */ 373 /* we are only interested in ACKs */
381 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || 374 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
382 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) 375 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
383 return; 376 return;
384 /* ... and only in the established state */
385 if (hc->tx_state != TFRC_SSTATE_FBACK &&
386 hc->tx_state != TFRC_SSTATE_NO_FBACK)
387 return;
388
389 opt_recv = &hc->tx_options_received;
390 now = ktime_get_real();
391
392 /* Estimate RTT from history if ACK number is valid */
393 r_sample = tfrc_tx_hist_rtt(hc->tx_hist,
394 DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
395 if (r_sample == 0) {
396 DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
397 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
398 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
399 return;
400 }
401
402 /* Update receive rate in units of 64 * bytes/second */
403 hc->tx_x_recv = opt_recv->ccid3or_receive_rate;
404 hc->tx_x_recv <<= 6;
405
406 /* Update loss event rate (which is scaled by 1e6) */
407 pinv = opt_recv->ccid3or_loss_event_rate;
408 if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
409 hc->tx_p = 0;
410 else /* can not exceed 100% */
411 hc->tx_p = scaled_div(1, pinv);
412 /* 377 /*
413 * Validate new RTT sample and update moving average 378 * Locate the acknowledged packet in the TX history.
379 *
380 * Returning "entry not found" here can for instance happen when
381 * - the host has not sent out anything (e.g. a passive server),
382 * - the Ack is outdated (packet with higher Ack number was received),
383 * - it is a bogus Ack (for a packet not sent on this connection).
414 */ 384 */
415 r_sample = dccp_sample_rtt(sk, r_sample); 385 acked = tfrc_tx_hist_find_entry(hc->tx_hist, dccp_hdr_ack_seq(skb));
386 if (acked == NULL)
387 return;
388 /* For the sake of RTT sampling, ignore/remove all older entries */
389 tfrc_tx_hist_purge(&acked->next);
390
391 /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */
392 now = ktime_get_real();
393 r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp));
416 hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); 394 hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9);
395
417 /* 396 /*
418 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 397 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
419 */ 398 */
@@ -461,13 +440,12 @@ done_computing_x:
461 sk->sk_write_space(sk); 440 sk->sk_write_space(sk);
462 441
463 /* 442 /*
464 * Update timeout interval for the nofeedback timer. 443 * Update timeout interval for the nofeedback timer. In order to control
465 * We use a configuration option to increase the lower bound. 444 * rate halving on networks with very low RTTs (<= 1 ms), use per-route
466 * This can help avoid triggering the nofeedback timer too 445 * tunable RTAX_RTO_MIN value as the lower bound.
467 * often ('spinning') on LANs with small RTTs.
468 */ 446 */
469 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO * 447 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt,
470 (USEC_PER_SEC / 1000))); 448 USEC_PER_SEC/HZ * tcp_rto_min(sk));
471 /* 449 /*
472 * Schedule no feedback timer to expire in 450 * Schedule no feedback timer to expire in
473 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) 451 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
@@ -482,66 +460,41 @@ done_computing_x:
482 jiffies + usecs_to_jiffies(t_nfb)); 460 jiffies + usecs_to_jiffies(t_nfb));
483} 461}
484 462
485static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, 463static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type,
486 unsigned char len, u16 idx, 464 u8 option, u8 *optval, u8 optlen)
487 unsigned char *value)
488{ 465{
489 int rc = 0;
490 const struct dccp_sock *dp = dccp_sk(sk);
491 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 466 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
492 struct ccid3_options_received *opt_recv;
493 __be32 opt_val; 467 __be32 opt_val;
494 468
495 opt_recv = &hc->tx_options_received;
496
497 if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
498 opt_recv->ccid3or_seqno = dp->dccps_gsr;
499 opt_recv->ccid3or_loss_event_rate = ~0;
500 opt_recv->ccid3or_loss_intervals_idx = 0;
501 opt_recv->ccid3or_loss_intervals_len = 0;
502 opt_recv->ccid3or_receive_rate = 0;
503 }
504
505 switch (option) { 469 switch (option) {
470 case TFRC_OPT_RECEIVE_RATE:
506 case TFRC_OPT_LOSS_EVENT_RATE: 471 case TFRC_OPT_LOSS_EVENT_RATE:
507 if (unlikely(len != 4)) { 472 /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */
508 DCCP_WARN("%s(%p), invalid len %d " 473 if (packet_type == DCCP_PKT_DATA)
509 "for TFRC_OPT_LOSS_EVENT_RATE\n", 474 break;
510 dccp_role(sk), sk, len); 475 if (unlikely(optlen != 4)) {
511 rc = -EINVAL; 476 DCCP_WARN("%s(%p), invalid len %d for %u\n",
512 } else { 477 dccp_role(sk), sk, optlen, option);
513 opt_val = get_unaligned((__be32 *)value); 478 return -EINVAL;
514 opt_recv->ccid3or_loss_event_rate = ntohl(opt_val);
515 ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
516 dccp_role(sk), sk,
517 opt_recv->ccid3or_loss_event_rate);
518 } 479 }
519 break; 480 opt_val = ntohl(get_unaligned((__be32 *)optval));
520 case TFRC_OPT_LOSS_INTERVALS: 481
521 opt_recv->ccid3or_loss_intervals_idx = idx; 482 if (option == TFRC_OPT_RECEIVE_RATE) {
522 opt_recv->ccid3or_loss_intervals_len = len; 483 /* Receive Rate is kept in units of 64 bytes/second */
523 ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", 484 hc->tx_x_recv = opt_val;
524 dccp_role(sk), sk, 485 hc->tx_x_recv <<= 6;
525 opt_recv->ccid3or_loss_intervals_idx, 486
526 opt_recv->ccid3or_loss_intervals_len);
527 break;
528 case TFRC_OPT_RECEIVE_RATE:
529 if (unlikely(len != 4)) {
530 DCCP_WARN("%s(%p), invalid len %d "
531 "for TFRC_OPT_RECEIVE_RATE\n",
532 dccp_role(sk), sk, len);
533 rc = -EINVAL;
534 } else {
535 opt_val = get_unaligned((__be32 *)value);
536 opt_recv->ccid3or_receive_rate = ntohl(opt_val);
537 ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", 487 ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
538 dccp_role(sk), sk, 488 dccp_role(sk), sk, opt_val);
539 opt_recv->ccid3or_receive_rate); 489 } else {
490 /* Update the fixpoint Loss Event Rate fraction */
491 hc->tx_p = tfrc_invert_loss_event_rate(opt_val);
492
493 ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
494 dccp_role(sk), sk, opt_val);
540 } 495 }
541 break;
542 } 496 }
543 497 return 0;
544 return rc;
545} 498}
546 499
547static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) 500static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -559,42 +512,36 @@ static void ccid3_hc_tx_exit(struct sock *sk)
559{ 512{
560 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 513 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
561 514
562 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
563 sk_stop_timer(sk, &hc->tx_no_feedback_timer); 515 sk_stop_timer(sk, &hc->tx_no_feedback_timer);
564
565 tfrc_tx_hist_purge(&hc->tx_hist); 516 tfrc_tx_hist_purge(&hc->tx_hist);
566} 517}
567 518
568static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) 519static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
569{ 520{
570 struct ccid3_hc_tx_sock *hc; 521 info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto;
571 522 info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt;
572 /* Listen socks doesn't have a private CCID block */
573 if (sk->sk_state == DCCP_LISTEN)
574 return;
575
576 hc = ccid3_hc_tx_sk(sk);
577 info->tcpi_rto = hc->tx_t_rto;
578 info->tcpi_rtt = hc->tx_rtt;
579} 523}
580 524
581static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, 525static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
582 u32 __user *optval, int __user *optlen) 526 u32 __user *optval, int __user *optlen)
583{ 527{
584 const struct ccid3_hc_tx_sock *hc; 528 const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
529 struct tfrc_tx_info tfrc;
585 const void *val; 530 const void *val;
586 531
587 /* Listen socks doesn't have a private CCID block */
588 if (sk->sk_state == DCCP_LISTEN)
589 return -EINVAL;
590
591 hc = ccid3_hc_tx_sk(sk);
592 switch (optname) { 532 switch (optname) {
593 case DCCP_SOCKOPT_CCID_TX_INFO: 533 case DCCP_SOCKOPT_CCID_TX_INFO:
594 if (len < sizeof(hc->tx_tfrc)) 534 if (len < sizeof(tfrc))
595 return -EINVAL; 535 return -EINVAL;
596 len = sizeof(hc->tx_tfrc); 536 tfrc.tfrctx_x = hc->tx_x;
597 val = &hc->tx_tfrc; 537 tfrc.tfrctx_x_recv = hc->tx_x_recv;
538 tfrc.tfrctx_x_calc = hc->tx_x_calc;
539 tfrc.tfrctx_rtt = hc->tx_rtt;
540 tfrc.tfrctx_p = hc->tx_p;
541 tfrc.tfrctx_rto = hc->tx_t_rto;
542 tfrc.tfrctx_ipi = hc->tx_t_ipi;
543 len = sizeof(tfrc);
544 val = &tfrc;
598 break; 545 break;
599 default: 546 default:
600 return -ENOPROTOOPT; 547 return -ENOPROTOOPT;
@@ -624,7 +571,6 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
624 static const char *const ccid3_rx_state_names[] = { 571 static const char *const ccid3_rx_state_names[] = {
625 [TFRC_RSTATE_NO_DATA] = "NO_DATA", 572 [TFRC_RSTATE_NO_DATA] = "NO_DATA",
626 [TFRC_RSTATE_DATA] = "DATA", 573 [TFRC_RSTATE_DATA] = "DATA",
627 [TFRC_RSTATE_TERM] = "TERM",
628 }; 574 };
629 575
630 return ccid3_rx_state_names[state]; 576 return ccid3_rx_state_names[state];
@@ -650,14 +596,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
650{ 596{
651 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 597 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
652 struct dccp_sock *dp = dccp_sk(sk); 598 struct dccp_sock *dp = dccp_sk(sk);
653 ktime_t now; 599 ktime_t now = ktime_get_real();
654 s64 delta = 0; 600 s64 delta = 0;
655 601
656 if (unlikely(hc->rx_state == TFRC_RSTATE_TERM))
657 return;
658
659 now = ktime_get_real();
660
661 switch (fbtype) { 602 switch (fbtype) {
662 case CCID3_FBACK_INITIAL: 603 case CCID3_FBACK_INITIAL:
663 hc->rx_x_recv = 0; 604 hc->rx_x_recv = 0;
@@ -701,14 +642,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
701 642
702static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) 643static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
703{ 644{
704 const struct ccid3_hc_rx_sock *hc; 645 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
705 __be32 x_recv, pinv; 646 __be32 x_recv, pinv;
706 647
707 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) 648 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
708 return 0; 649 return 0;
709 650
710 hc = ccid3_hc_rx_sk(sk);
711
712 if (dccp_packet_without_ack(skb)) 651 if (dccp_packet_without_ack(skb))
713 return 0; 652 return 0;
714 653
@@ -749,10 +688,11 @@ static u32 ccid3_first_li(struct sock *sk)
749 x_recv = scaled_div32(hc->rx_bytes_recv, delta); 688 x_recv = scaled_div32(hc->rx_bytes_recv, delta);
750 if (x_recv == 0) { /* would also trigger divide-by-zero */ 689 if (x_recv == 0) { /* would also trigger divide-by-zero */
751 DCCP_WARN("X_recv==0\n"); 690 DCCP_WARN("X_recv==0\n");
752 if ((x_recv = hc->rx_x_recv) == 0) { 691 if (hc->rx_x_recv == 0) {
753 DCCP_BUG("stored value of X_recv is zero"); 692 DCCP_BUG("stored value of X_recv is zero");
754 return ~0U; 693 return ~0U;
755 } 694 }
695 x_recv = hc->rx_x_recv;
756 } 696 }
757 697
758 fval = scaled_div(hc->rx_s, hc->rx_rtt); 698 fval = scaled_div(hc->rx_s, hc->rx_rtt);
@@ -862,46 +802,31 @@ static void ccid3_hc_rx_exit(struct sock *sk)
862{ 802{
863 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 803 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
864 804
865 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
866
867 tfrc_rx_hist_purge(&hc->rx_hist); 805 tfrc_rx_hist_purge(&hc->rx_hist);
868 tfrc_lh_cleanup(&hc->rx_li_hist); 806 tfrc_lh_cleanup(&hc->rx_li_hist);
869} 807}
870 808
871static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) 809static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
872{ 810{
873 const struct ccid3_hc_rx_sock *hc; 811 info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state;
874
875 /* Listen socks doesn't have a private CCID block */
876 if (sk->sk_state == DCCP_LISTEN)
877 return;
878
879 hc = ccid3_hc_rx_sk(sk);
880 info->tcpi_ca_state = hc->rx_state;
881 info->tcpi_options |= TCPI_OPT_TIMESTAMPS; 812 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
882 info->tcpi_rcv_rtt = hc->rx_rtt; 813 info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt;
883} 814}
884 815
885static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, 816static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
886 u32 __user *optval, int __user *optlen) 817 u32 __user *optval, int __user *optlen)
887{ 818{
888 const struct ccid3_hc_rx_sock *hc; 819 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
889 struct tfrc_rx_info rx_info; 820 struct tfrc_rx_info rx_info;
890 const void *val; 821 const void *val;
891 822
892 /* Listen socks doesn't have a private CCID block */
893 if (sk->sk_state == DCCP_LISTEN)
894 return -EINVAL;
895
896 hc = ccid3_hc_rx_sk(sk);
897 switch (optname) { 823 switch (optname) {
898 case DCCP_SOCKOPT_CCID_RX_INFO: 824 case DCCP_SOCKOPT_CCID_RX_INFO:
899 if (len < sizeof(rx_info)) 825 if (len < sizeof(rx_info))
900 return -EINVAL; 826 return -EINVAL;
901 rx_info.tfrcrx_x_recv = hc->rx_x_recv; 827 rx_info.tfrcrx_x_recv = hc->rx_x_recv;
902 rx_info.tfrcrx_rtt = hc->rx_rtt; 828 rx_info.tfrcrx_rtt = hc->rx_rtt;
903 rx_info.tfrcrx_p = hc->rx_pinv == 0 ? ~0U : 829 rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hc->rx_pinv);
904 scaled_div(1, hc->rx_pinv);
905 len = sizeof(rx_info); 830 len = sizeof(rx_info);
906 val = &rx_info; 831 val = &rx_info;
907 break; 832 break;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 03263577665..1a9933c2967 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -42,35 +42,36 @@
42#include "lib/tfrc.h" 42#include "lib/tfrc.h"
43#include "../ccid.h" 43#include "../ccid.h"
44 44
45/* Two seconds as per RFC 3448 4.2 */ 45/* Two seconds as per RFC 5348, 4.2 */
46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) 46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
47 47
48/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
49#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
50
51/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ 48/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
52#define TFRC_T_MBI 64 49#define TFRC_T_MBI 64
53 50
51/*
52 * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are
53 * rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
54 * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
55 * resolution of HZ < 500 means that the error is below one timer tick (t_gran)
56 * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
57 */
58#if (HZ >= 500)
59# define TFRC_T_DELTA USEC_PER_MSEC
60#else
61# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
62#endif
63
54enum ccid3_options { 64enum ccid3_options {
55 TFRC_OPT_LOSS_EVENT_RATE = 192, 65 TFRC_OPT_LOSS_EVENT_RATE = 192,
56 TFRC_OPT_LOSS_INTERVALS = 193, 66 TFRC_OPT_LOSS_INTERVALS = 193,
57 TFRC_OPT_RECEIVE_RATE = 194, 67 TFRC_OPT_RECEIVE_RATE = 194,
58}; 68};
59 69
60struct ccid3_options_received {
61 u64 ccid3or_seqno:48,
62 ccid3or_loss_intervals_idx:16;
63 u16 ccid3or_loss_intervals_len;
64 u32 ccid3or_loss_event_rate;
65 u32 ccid3or_receive_rate;
66};
67
68/* TFRC sender states */ 70/* TFRC sender states */
69enum ccid3_hc_tx_states { 71enum ccid3_hc_tx_states {
70 TFRC_SSTATE_NO_SENT = 1, 72 TFRC_SSTATE_NO_SENT = 1,
71 TFRC_SSTATE_NO_FBACK, 73 TFRC_SSTATE_NO_FBACK,
72 TFRC_SSTATE_FBACK, 74 TFRC_SSTATE_FBACK,
73 TFRC_SSTATE_TERM,
74}; 75};
75 76
76/** 77/**
@@ -90,19 +91,16 @@ enum ccid3_hc_tx_states {
90 * @tx_no_feedback_timer: Handle to no feedback timer 91 * @tx_no_feedback_timer: Handle to no feedback timer
91 * @tx_t_ld: Time last doubled during slow start 92 * @tx_t_ld: Time last doubled during slow start
92 * @tx_t_nom: Nominal send time of next packet 93 * @tx_t_nom: Nominal send time of next packet
93 * @tx_delta: Send timer delta (RFC 3448, 4.6) in usecs
94 * @tx_hist: Packet history 94 * @tx_hist: Packet history
95 * @tx_options_received: Parsed set of retrieved options
96 */ 95 */
97struct ccid3_hc_tx_sock { 96struct ccid3_hc_tx_sock {
98 struct tfrc_tx_info tx_tfrc; 97 u64 tx_x;
99#define tx_x tx_tfrc.tfrctx_x 98 u64 tx_x_recv;
100#define tx_x_recv tx_tfrc.tfrctx_x_recv 99 u32 tx_x_calc;
101#define tx_x_calc tx_tfrc.tfrctx_x_calc 100 u32 tx_rtt;
102#define tx_rtt tx_tfrc.tfrctx_rtt 101 u32 tx_p;
103#define tx_p tx_tfrc.tfrctx_p 102 u32 tx_t_rto;
104#define tx_t_rto tx_tfrc.tfrctx_rto 103 u32 tx_t_ipi;
105#define tx_t_ipi tx_tfrc.tfrctx_ipi
106 u16 tx_s; 104 u16 tx_s;
107 enum ccid3_hc_tx_states tx_state:8; 105 enum ccid3_hc_tx_states tx_state:8;
108 u8 tx_last_win_count; 106 u8 tx_last_win_count;
@@ -110,9 +108,7 @@ struct ccid3_hc_tx_sock {
110 struct timer_list tx_no_feedback_timer; 108 struct timer_list tx_no_feedback_timer;
111 ktime_t tx_t_ld; 109 ktime_t tx_t_ld;
112 ktime_t tx_t_nom; 110 ktime_t tx_t_nom;
113 u32 tx_delta;
114 struct tfrc_tx_hist_entry *tx_hist; 111 struct tfrc_tx_hist_entry *tx_hist;
115 struct ccid3_options_received tx_options_received;
116}; 112};
117 113
118static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) 114static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
@@ -126,21 +122,16 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
126enum ccid3_hc_rx_states { 122enum ccid3_hc_rx_states {
127 TFRC_RSTATE_NO_DATA = 1, 123 TFRC_RSTATE_NO_DATA = 1,
128 TFRC_RSTATE_DATA, 124 TFRC_RSTATE_DATA,
129 TFRC_RSTATE_TERM = 127,
130}; 125};
131 126
132/** 127/**
133 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket 128 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
134 * @rx_x_recv: Receiver estimate of send rate (RFC 3448 4.3)
135 * @rx_rtt: Receiver estimate of rtt (non-standard)
136 * @rx_p: Current loss event rate (RFC 3448 5.4)
137 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) 129 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1)
138 * @rx_state: Receiver state, one of %ccid3_hc_rx_states 130 * @rx_state: Receiver state, one of %ccid3_hc_rx_states
139 * @rx_bytes_recv: Total sum of DCCP payload bytes 131 * @rx_bytes_recv: Total sum of DCCP payload bytes
140 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) 132 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3)
141 * @rx_rtt: Receiver estimate of RTT 133 * @rx_rtt: Receiver estimate of RTT
142 * @rx_tstamp_last_feedback: Time at which last feedback was sent 134 * @rx_tstamp_last_feedback: Time at which last feedback was sent
143 * @rx_tstamp_last_ack: Time at which last feedback was sent
144 * @rx_hist: Packet history (loss detection + RTT sampling) 135 * @rx_hist: Packet history (loss detection + RTT sampling)
145 * @rx_li_hist: Loss Interval database 136 * @rx_li_hist: Loss Interval database
146 * @rx_s: Received packet size in bytes 137 * @rx_s: Received packet size in bytes
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 8fc3cbf7907..497723c4d4b 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -116,7 +116,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
116 cur->li_length = len; 116 cur->li_length = len;
117 tfrc_lh_calc_i_mean(lh); 117 tfrc_lh_calc_i_mean(lh);
118 118
119 return (lh->i_mean < old_i_mean); 119 return lh->i_mean < old_i_mean;
120} 120}
121 121
122/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ 122/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 3a4f414e94a..de8fe294bf0 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -38,18 +38,6 @@
38#include "packet_history.h" 38#include "packet_history.h"
39#include "../../dccp.h" 39#include "../../dccp.h"
40 40
41/**
42 * tfrc_tx_hist_entry - Simple singly-linked TX history list
43 * @next: next oldest entry (LIFO order)
44 * @seqno: sequence number of this entry
45 * @stamp: send time of packet with sequence number @seqno
46 */
47struct tfrc_tx_hist_entry {
48 struct tfrc_tx_hist_entry *next;
49 u64 seqno;
50 ktime_t stamp;
51};
52
53/* 41/*
54 * Transmitter History Routines 42 * Transmitter History Routines
55 */ 43 */
@@ -71,15 +59,6 @@ void tfrc_tx_packet_history_exit(void)
71 } 59 }
72} 60}
73 61
74static struct tfrc_tx_hist_entry *
75 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
76{
77 while (head != NULL && head->seqno != seqno)
78 head = head->next;
79
80 return head;
81}
82
83int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) 62int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
84{ 63{
85 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); 64 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -107,24 +86,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
107 *headp = NULL; 86 *headp = NULL;
108} 87}
109 88
110u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
111 const ktime_t now)
112{
113 u32 rtt = 0;
114 struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
115
116 if (packet != NULL) {
117 rtt = ktime_us_delta(now, packet->stamp);
118 /*
119 * Garbage-collect older (irrelevant) entries:
120 */
121 tfrc_tx_hist_purge(&packet->next);
122 }
123
124 return rtt;
125}
126
127
128/* 89/*
129 * Receiver History Routines 90 * Receiver History Routines
130 */ 91 */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 7df6c529999..7ee4a9d9d33 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -40,12 +40,28 @@
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include "tfrc.h" 41#include "tfrc.h"
42 42
43struct tfrc_tx_hist_entry; 43/**
44 * tfrc_tx_hist_entry - Simple singly-linked TX history list
45 * @next: next oldest entry (LIFO order)
46 * @seqno: sequence number of this entry
47 * @stamp: send time of packet with sequence number @seqno
48 */
49struct tfrc_tx_hist_entry {
50 struct tfrc_tx_hist_entry *next;
51 u64 seqno;
52 ktime_t stamp;
53};
54
55static inline struct tfrc_tx_hist_entry *
56 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
57{
58 while (head != NULL && head->seqno != seqno)
59 head = head->next;
60 return head;
61}
44 62
45extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); 63extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
46extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); 64extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
47extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
48 const u64 seqno, const ktime_t now);
49 65
50/* Subtraction a-b modulo-16, respects circular wrap-around */ 66/* Subtraction a-b modulo-16, respects circular wrap-around */
51#define SUB16(a, b) (((a) + 16 - (b)) & 0xF) 67#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 01bb48e96c2..f8ee3f54977 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -57,6 +57,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
57 57
58extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); 58extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
59extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); 59extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
60extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
60 61
61extern int tfrc_tx_packet_history_init(void); 62extern int tfrc_tx_packet_history_init(void);
62extern void tfrc_tx_packet_history_exit(void); 63extern void tfrc_tx_packet_history_exit(void);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 22ca1cf0eb5..a052a4377e2 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -687,3 +687,17 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
687 index = tfrc_binsearch(fvalue, 0); 687 index = tfrc_binsearch(fvalue, 0);
688 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; 688 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
689} 689}
690
691/**
692 * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
693 * When @loss_event_rate is large, there is a chance that p is truncated to 0.
694 * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
695 */
696u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
697{
698 if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
699 return 0;
700 if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
701 return 1000000;
702 return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
703}
diff --git a/net/dccp/options.c b/net/dccp/options.c
index bfda087bd90..92718511eac 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -96,18 +96,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
96 } 96 }
97 97
98 /* 98 /*
99 * CCID-Specific Options (from RFC 4340, sec. 10.3):
100 *
101 * Option numbers 128 through 191 are for options sent from the
102 * HC-Sender to the HC-Receiver; option numbers 192 through 255
103 * are for options sent from the HC-Receiver to the HC-Sender.
104 *
105 * CCID-specific options are ignored during connection setup, as 99 * CCID-specific options are ignored during connection setup, as
106 * negotiation may still be in progress (see RFC 4340, 10.3). 100 * negotiation may still be in progress (see RFC 4340, 10.3).
107 * The same applies to Ack Vectors, as these depend on the CCID. 101 * The same applies to Ack Vectors, as these depend on the CCID.
108 *
109 */ 102 */
110 if (dreq != NULL && (opt >= 128 || 103 if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC ||
111 opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) 104 opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
112 goto ignore_option; 105 goto ignore_option;
113 106
@@ -226,23 +219,15 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
226 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", 219 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
227 dccp_role(sk), elapsed_time); 220 dccp_role(sk), elapsed_time);
228 break; 221 break;
229 case 128 ... 191: { 222 case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC:
230 const u16 idx = value - options;
231
232 if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, 223 if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
233 opt, len, idx, 224 pkt_type, opt, value, len))
234 value) != 0)
235 goto out_invalid_option; 225 goto out_invalid_option;
236 }
237 break; 226 break;
238 case 192 ... 255: { 227 case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
239 const u16 idx = value - options;
240
241 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, 228 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
242 opt, len, idx, 229 pkt_type, opt, value, len))
243 value) != 0)
244 goto out_invalid_option; 230 goto out_invalid_option;
245 }
246 break; 231 break;
247 default: 232 default:
248 DCCP_CRIT("DCCP(%p): option %d(len=%d) not " 233 DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index baeb1eaf011..2ef115277be 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -693,22 +693,22 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
693 aux = scp->accessdata.acc_userl; 693 aux = scp->accessdata.acc_userl;
694 *skb_put(skb, 1) = aux; 694 *skb_put(skb, 1) = aux;
695 if (aux > 0) 695 if (aux > 0)
696 memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux); 696 memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux);
697 697
698 aux = scp->accessdata.acc_passl; 698 aux = scp->accessdata.acc_passl;
699 *skb_put(skb, 1) = aux; 699 *skb_put(skb, 1) = aux;
700 if (aux > 0) 700 if (aux > 0)
701 memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux); 701 memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux);
702 702
703 aux = scp->accessdata.acc_accl; 703 aux = scp->accessdata.acc_accl;
704 *skb_put(skb, 1) = aux; 704 *skb_put(skb, 1) = aux;
705 if (aux > 0) 705 if (aux > 0)
706 memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux); 706 memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
707 707
708 aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); 708 aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
709 *skb_put(skb, 1) = aux; 709 *skb_put(skb, 1) = aux;
710 if (aux > 0) 710 if (aux > 0)
711 memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux); 711 memcpy(skb_put(skb, aux), scp->conndata_out.opt_data, aux);
712 712
713 scp->persist = dn_nsp_persist(sk); 713 scp->persist = dn_nsp_persist(sk);
714 scp->persist_fxn = dn_nsp_retrans_conninit; 714 scp->persist_fxn = dn_nsp_retrans_conninit;
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index dc54bd0d083..f8c1ae4b41f 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -392,7 +392,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
392 dev_queue_xmit(skb); 392 dev_queue_xmit(skb);
393 dev_put(dev); 393 dev_put(dev);
394 mutex_unlock(&econet_mutex); 394 mutex_unlock(&econet_mutex);
395 return(len); 395 return len;
396 396
397 out_free: 397 out_free:
398 kfree_skb(skb); 398 kfree_skb(skb);
@@ -637,7 +637,7 @@ static int econet_create(struct net *net, struct socket *sock, int protocol,
637 eo->num = protocol; 637 eo->num = protocol;
638 638
639 econet_insert_socket(&econet_sklist, sk); 639 econet_insert_socket(&econet_sklist, sk);
640 return(0); 640 return 0;
641out: 641out:
642 return err; 642 return err;
643} 643}
@@ -1009,7 +1009,6 @@ static int __init aun_udp_initialise(void)
1009 struct sockaddr_in sin; 1009 struct sockaddr_in sin;
1010 1010
1011 skb_queue_head_init(&aun_queue); 1011 skb_queue_head_init(&aun_queue);
1012 spin_lock_init(&aun_queue_lock);
1013 setup_timer(&ab_cleanup_timer, ab_cleanup, 0); 1012 setup_timer(&ab_cleanup_timer, ab_cleanup, 0);
1014 ab_cleanup_timer.expires = jiffies + (HZ*2); 1013 ab_cleanup_timer.expires = jiffies + (HZ*2);
1015 add_timer(&ab_cleanup_timer); 1014 add_timer(&ab_cleanup_timer);
@@ -1167,7 +1166,6 @@ static int __init econet_proto_init(void)
1167 goto out; 1166 goto out;
1168 sock_register(&econet_family_ops); 1167 sock_register(&econet_family_ops);
1169#ifdef CONFIG_ECONET_AUNUDP 1168#ifdef CONFIG_ECONET_AUNUDP
1170 spin_lock_init(&aun_queue_lock);
1171 aun_udp_initialise(); 1169 aun_udp_initialise();
1172#endif 1170#endif
1173#ifdef CONFIG_ECONET_NATIVE 1171#ifdef CONFIG_ECONET_NATIVE
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 215c83986a9..f00ef2f1d81 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -367,7 +367,7 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
367EXPORT_SYMBOL(alloc_etherdev_mq); 367EXPORT_SYMBOL(alloc_etherdev_mq);
368 368
369static size_t _format_mac_addr(char *buf, int buflen, 369static size_t _format_mac_addr(char *buf, int buflen,
370 const unsigned char *addr, int len) 370 const unsigned char *addr, int len)
371{ 371{
372 int i; 372 int i;
373 char *cp = buf; 373 char *cp = buf;
@@ -376,7 +376,7 @@ static size_t _format_mac_addr(char *buf, int buflen,
376 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]); 376 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]);
377 if (i == len - 1) 377 if (i == len - 1)
378 break; 378 break;
379 cp += strlcpy(cp, ":", buflen - (cp - buf)); 379 cp += scnprintf(cp, buflen - (cp - buf), ":");
380 } 380 }
381 return cp - buf; 381 return cp - buf;
382} 382}
@@ -386,7 +386,7 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
386 size_t l; 386 size_t l;
387 387
388 l = _format_mac_addr(buf, PAGE_SIZE, addr, len); 388 l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
389 l += strlcpy(buf + l, "\n", PAGE_SIZE - l); 389 l += scnprintf(buf + l, PAGE_SIZE - l, "\n");
390 return ((ssize_t) l); 390 return (ssize_t)l;
391} 391}
392EXPORT_SYMBOL(sysfs_format_mac); 392EXPORT_SYMBOL(sysfs_format_mac);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 571f8950ed0..5462e2d147a 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -215,8 +215,15 @@ config NET_IPIP
215 be inserted in and removed from the running kernel whenever you 215 be inserted in and removed from the running kernel whenever you
216 want). Most people won't need this and can say N. 216 want). Most people won't need this and can say N.
217 217
218config NET_IPGRE_DEMUX
219 tristate "IP: GRE demultiplexer"
220 help
221 This is helper module to demultiplex GRE packets on GRE version field criteria.
222 Required by ip_gre and pptp modules.
223
218config NET_IPGRE 224config NET_IPGRE
219 tristate "IP: GRE tunnels over IP" 225 tristate "IP: GRE tunnels over IP"
226 depends on NET_IPGRE_DEMUX
220 help 227 help
221 Tunneling means encapsulating data of one protocol type within 228 Tunneling means encapsulating data of one protocol type within
222 another protocol and sending it over a channel that understands the 229 another protocol and sending it over a channel that understands the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 80ff87ce43a..4978d22f9a7 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
21obj-$(CONFIG_IP_MROUTE) += ipmr.o 21obj-$(CONFIG_IP_MROUTE) += ipmr.o
22obj-$(CONFIG_NET_IPIP) += ipip.o 22obj-$(CONFIG_NET_IPIP) += ipip.o
23obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
23obj-$(CONFIG_NET_IPGRE) += ip_gre.o 24obj-$(CONFIG_NET_IPGRE) += ip_gre.o
24obj-$(CONFIG_SYN_COOKIES) += syncookies.o 25obj-$(CONFIG_SYN_COOKIES) += syncookies.o
25obj-$(CONFIG_INET_AH) += ah4.o 26obj-$(CONFIG_INET_AH) += ah4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6a1100c25a9..f581f77d109 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -227,18 +227,16 @@ EXPORT_SYMBOL(inet_ehash_secret);
227 227
228/* 228/*
229 * inet_ehash_secret must be set exactly once 229 * inet_ehash_secret must be set exactly once
230 * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
231 */ 230 */
232void build_ehash_secret(void) 231void build_ehash_secret(void)
233{ 232{
234 u32 rnd; 233 u32 rnd;
234
235 do { 235 do {
236 get_random_bytes(&rnd, sizeof(rnd)); 236 get_random_bytes(&rnd, sizeof(rnd));
237 } while (rnd == 0); 237 } while (rnd == 0);
238 spin_lock_bh(&inetsw_lock); 238
239 if (!inet_ehash_secret) 239 cmpxchg(&inet_ehash_secret, 0, rnd);
240 inet_ehash_secret = rnd;
241 spin_unlock_bh(&inetsw_lock);
242} 240}
243EXPORT_SYMBOL(build_ehash_secret); 241EXPORT_SYMBOL(build_ehash_secret);
244 242
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96c1955b3e2..4083c186fd3 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -55,7 +55,7 @@
55 * Stuart Cheshire : Metricom and grat arp fixes 55 * Stuart Cheshire : Metricom and grat arp fixes
56 * *** FOR 2.1 clean this up *** 56 * *** FOR 2.1 clean this up ***
57 * Lawrence V. Stefani: (08/12/96) Added FDDI support. 57 * Lawrence V. Stefani: (08/12/96) Added FDDI support.
58 * Alan Cox : Took the AP1000 nasty FDDI hack and 58 * Alan Cox : Took the AP1000 nasty FDDI hack and
59 * folded into the mainstream FDDI code. 59 * folded into the mainstream FDDI code.
60 * Ack spit, Linus how did you allow that 60 * Ack spit, Linus how did you allow that
61 * one in... 61 * one in...
@@ -120,7 +120,7 @@ EXPORT_SYMBOL(clip_tbl_hook);
120#endif 120#endif
121 121
122#include <asm/system.h> 122#include <asm/system.h>
123#include <asm/uaccess.h> 123#include <linux/uaccess.h>
124 124
125#include <linux/netfilter_arp.h> 125#include <linux/netfilter_arp.h>
126 126
@@ -173,32 +173,32 @@ const struct neigh_ops arp_broken_ops = {
173EXPORT_SYMBOL(arp_broken_ops); 173EXPORT_SYMBOL(arp_broken_ops);
174 174
175struct neigh_table arp_tbl = { 175struct neigh_table arp_tbl = {
176 .family = AF_INET, 176 .family = AF_INET,
177 .entry_size = sizeof(struct neighbour) + 4, 177 .entry_size = sizeof(struct neighbour) + 4,
178 .key_len = 4, 178 .key_len = 4,
179 .hash = arp_hash, 179 .hash = arp_hash,
180 .constructor = arp_constructor, 180 .constructor = arp_constructor,
181 .proxy_redo = parp_redo, 181 .proxy_redo = parp_redo,
182 .id = "arp_cache", 182 .id = "arp_cache",
183 .parms = { 183 .parms = {
184 .tbl = &arp_tbl, 184 .tbl = &arp_tbl,
185 .base_reachable_time = 30 * HZ, 185 .base_reachable_time = 30 * HZ,
186 .retrans_time = 1 * HZ, 186 .retrans_time = 1 * HZ,
187 .gc_staletime = 60 * HZ, 187 .gc_staletime = 60 * HZ,
188 .reachable_time = 30 * HZ, 188 .reachable_time = 30 * HZ,
189 .delay_probe_time = 5 * HZ, 189 .delay_probe_time = 5 * HZ,
190 .queue_len = 3, 190 .queue_len = 3,
191 .ucast_probes = 3, 191 .ucast_probes = 3,
192 .mcast_probes = 3, 192 .mcast_probes = 3,
193 .anycast_delay = 1 * HZ, 193 .anycast_delay = 1 * HZ,
194 .proxy_delay = (8 * HZ) / 10, 194 .proxy_delay = (8 * HZ) / 10,
195 .proxy_qlen = 64, 195 .proxy_qlen = 64,
196 .locktime = 1 * HZ, 196 .locktime = 1 * HZ,
197 }, 197 },
198 .gc_interval = 30 * HZ, 198 .gc_interval = 30 * HZ,
199 .gc_thresh1 = 128, 199 .gc_thresh1 = 128,
200 .gc_thresh2 = 512, 200 .gc_thresh2 = 512,
201 .gc_thresh3 = 1024, 201 .gc_thresh3 = 1024,
202}; 202};
203EXPORT_SYMBOL(arp_tbl); 203EXPORT_SYMBOL(arp_tbl);
204 204
@@ -233,7 +233,7 @@ static u32 arp_hash(const void *pkey, const struct net_device *dev)
233 233
234static int arp_constructor(struct neighbour *neigh) 234static int arp_constructor(struct neighbour *neigh)
235{ 235{
236 __be32 addr = *(__be32*)neigh->primary_key; 236 __be32 addr = *(__be32 *)neigh->primary_key;
237 struct net_device *dev = neigh->dev; 237 struct net_device *dev = neigh->dev;
238 struct in_device *in_dev; 238 struct in_device *in_dev;
239 struct neigh_parms *parms; 239 struct neigh_parms *parms;
@@ -296,16 +296,19 @@ static int arp_constructor(struct neighbour *neigh)
296 neigh->ops = &arp_broken_ops; 296 neigh->ops = &arp_broken_ops;
297 neigh->output = neigh->ops->output; 297 neigh->output = neigh->ops->output;
298 return 0; 298 return 0;
299#else
300 break;
299#endif 301#endif
300 ;} 302 }
301#endif 303#endif
302 if (neigh->type == RTN_MULTICAST) { 304 if (neigh->type == RTN_MULTICAST) {
303 neigh->nud_state = NUD_NOARP; 305 neigh->nud_state = NUD_NOARP;
304 arp_mc_map(addr, neigh->ha, dev, 1); 306 arp_mc_map(addr, neigh->ha, dev, 1);
305 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { 307 } else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
306 neigh->nud_state = NUD_NOARP; 308 neigh->nud_state = NUD_NOARP;
307 memcpy(neigh->ha, dev->dev_addr, dev->addr_len); 309 memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
308 } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) { 310 } else if (neigh->type == RTN_BROADCAST ||
311 (dev->flags & IFF_POINTOPOINT)) {
309 neigh->nud_state = NUD_NOARP; 312 neigh->nud_state = NUD_NOARP;
310 memcpy(neigh->ha, dev->broadcast, dev->addr_len); 313 memcpy(neigh->ha, dev->broadcast, dev->addr_len);
311 } 314 }
@@ -315,7 +318,7 @@ static int arp_constructor(struct neighbour *neigh)
315 else 318 else
316 neigh->ops = &arp_generic_ops; 319 neigh->ops = &arp_generic_ops;
317 320
318 if (neigh->nud_state&NUD_VALID) 321 if (neigh->nud_state & NUD_VALID)
319 neigh->output = neigh->ops->connected_output; 322 neigh->output = neigh->ops->connected_output;
320 else 323 else
321 neigh->output = neigh->ops->output; 324 neigh->output = neigh->ops->output;
@@ -334,7 +337,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
334 __be32 saddr = 0; 337 __be32 saddr = 0;
335 u8 *dst_ha = NULL; 338 u8 *dst_ha = NULL;
336 struct net_device *dev = neigh->dev; 339 struct net_device *dev = neigh->dev;
337 __be32 target = *(__be32*)neigh->primary_key; 340 __be32 target = *(__be32 *)neigh->primary_key;
338 int probes = atomic_read(&neigh->probes); 341 int probes = atomic_read(&neigh->probes);
339 struct in_device *in_dev; 342 struct in_device *in_dev;
340 343
@@ -347,7 +350,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
347 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 350 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
348 default: 351 default:
349 case 0: /* By default announce any local IP */ 352 case 0: /* By default announce any local IP */
350 if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL) 353 if (skb && inet_addr_type(dev_net(dev),
354 ip_hdr(skb)->saddr) == RTN_LOCAL)
351 saddr = ip_hdr(skb)->saddr; 355 saddr = ip_hdr(skb)->saddr;
352 break; 356 break;
353 case 1: /* Restrict announcements of saddr in same subnet */ 357 case 1: /* Restrict announcements of saddr in same subnet */
@@ -369,16 +373,21 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
369 if (!saddr) 373 if (!saddr)
370 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); 374 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
371 375
372 if ((probes -= neigh->parms->ucast_probes) < 0) { 376 probes -= neigh->parms->ucast_probes;
373 if (!(neigh->nud_state&NUD_VALID)) 377 if (probes < 0) {
374 printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n"); 378 if (!(neigh->nud_state & NUD_VALID))
379 printk(KERN_DEBUG
380 "trying to ucast probe in NUD_INVALID\n");
375 dst_ha = neigh->ha; 381 dst_ha = neigh->ha;
376 read_lock_bh(&neigh->lock); 382 read_lock_bh(&neigh->lock);
377 } else if ((probes -= neigh->parms->app_probes) < 0) { 383 } else {
384 probes -= neigh->parms->app_probes;
385 if (probes < 0) {
378#ifdef CONFIG_ARPD 386#ifdef CONFIG_ARPD
379 neigh_app_ns(neigh); 387 neigh_app_ns(neigh);
380#endif 388#endif
381 return; 389 return;
390 }
382 } 391 }
383 392
384 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, 393 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
@@ -451,7 +460,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
451 * is allowed to use this function, it is scheduled to be removed. --ANK 460 * is allowed to use this function, it is scheduled to be removed. --ANK
452 */ 461 */
453 462
454static int arp_set_predefined(int addr_hint, unsigned char * haddr, __be32 paddr, struct net_device * dev) 463static int arp_set_predefined(int addr_hint, unsigned char *haddr,
464 __be32 paddr, struct net_device *dev)
455{ 465{
456 switch (addr_hint) { 466 switch (addr_hint) {
457 case RTN_LOCAL: 467 case RTN_LOCAL:
@@ -483,7 +493,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
483 493
484 paddr = skb_rtable(skb)->rt_gateway; 494 paddr = skb_rtable(skb)->rt_gateway;
485 495
486 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) 496 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr,
497 paddr, dev))
487 return 0; 498 return 0;
488 499
489 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); 500 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
@@ -515,13 +526,14 @@ int arp_bind_neighbour(struct dst_entry *dst)
515 return -EINVAL; 526 return -EINVAL;
516 if (n == NULL) { 527 if (n == NULL) {
517 __be32 nexthop = ((struct rtable *)dst)->rt_gateway; 528 __be32 nexthop = ((struct rtable *)dst)->rt_gateway;
518 if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) 529 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
519 nexthop = 0; 530 nexthop = 0;
520 n = __neigh_lookup_errno( 531 n = __neigh_lookup_errno(
521#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) 532#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
522 dev->type == ARPHRD_ATM ? clip_tbl_hook : 533 dev->type == ARPHRD_ATM ?
534 clip_tbl_hook :
523#endif 535#endif
524 &arp_tbl, &nexthop, dev); 536 &arp_tbl, &nexthop, dev);
525 if (IS_ERR(n)) 537 if (IS_ERR(n))
526 return PTR_ERR(n); 538 return PTR_ERR(n);
527 dst->neighbour = n; 539 dst->neighbour = n;
@@ -543,8 +555,8 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
543 555
544 if (!IN_DEV_PROXY_ARP(in_dev)) 556 if (!IN_DEV_PROXY_ARP(in_dev))
545 return 0; 557 return 0;
546 558 imi = IN_DEV_MEDIUM_ID(in_dev);
547 if ((imi = IN_DEV_MEDIUM_ID(in_dev)) == 0) 559 if (imi == 0)
548 return 1; 560 return 1;
549 if (imi == -1) 561 if (imi == -1)
550 return 0; 562 return 0;
@@ -555,7 +567,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
555 if (out_dev) 567 if (out_dev)
556 omi = IN_DEV_MEDIUM_ID(out_dev); 568 omi = IN_DEV_MEDIUM_ID(out_dev);
557 569
558 return (omi != imi && omi != -1); 570 return omi != imi && omi != -1;
559} 571}
560 572
561/* 573/*
@@ -685,7 +697,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
685 arp->ar_pln = 4; 697 arp->ar_pln = 4;
686 arp->ar_op = htons(type); 698 arp->ar_op = htons(type);
687 699
688 arp_ptr=(unsigned char *)(arp+1); 700 arp_ptr = (unsigned char *)(arp + 1);
689 701
690 memcpy(arp_ptr, src_hw, dev->addr_len); 702 memcpy(arp_ptr, src_hw, dev->addr_len);
691 arp_ptr += dev->addr_len; 703 arp_ptr += dev->addr_len;
@@ -735,9 +747,8 @@ void arp_send(int type, int ptype, __be32 dest_ip,
735 747
736 skb = arp_create(type, ptype, dest_ip, dev, src_ip, 748 skb = arp_create(type, ptype, dest_ip, dev, src_ip,
737 dest_hw, src_hw, target_hw); 749 dest_hw, src_hw, target_hw);
738 if (skb == NULL) { 750 if (skb == NULL)
739 return; 751 return;
740 }
741 752
742 arp_xmit(skb); 753 arp_xmit(skb);
743} 754}
@@ -815,7 +826,7 @@ static int arp_process(struct sk_buff *skb)
815/* 826/*
816 * Extract fields 827 * Extract fields
817 */ 828 */
818 arp_ptr= (unsigned char *)(arp+1); 829 arp_ptr = (unsigned char *)(arp + 1);
819 sha = arp_ptr; 830 sha = arp_ptr;
820 arp_ptr += dev->addr_len; 831 arp_ptr += dev->addr_len;
821 memcpy(&sip, arp_ptr, 4); 832 memcpy(&sip, arp_ptr, 4);
@@ -869,16 +880,17 @@ static int arp_process(struct sk_buff *skb)
869 addr_type = rt->rt_type; 880 addr_type = rt->rt_type;
870 881
871 if (addr_type == RTN_LOCAL) { 882 if (addr_type == RTN_LOCAL) {
872 int dont_send = 0; 883 int dont_send;
873 884
874 if (!dont_send) 885 dont_send = arp_ignore(in_dev, sip, tip);
875 dont_send |= arp_ignore(in_dev,sip,tip);
876 if (!dont_send && IN_DEV_ARPFILTER(in_dev)) 886 if (!dont_send && IN_DEV_ARPFILTER(in_dev))
877 dont_send |= arp_filter(sip,tip,dev); 887 dont_send |= arp_filter(sip, tip, dev);
878 if (!dont_send) { 888 if (!dont_send) {
879 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 889 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
880 if (n) { 890 if (n) {
881 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 891 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
892 dev, tip, sha, dev->dev_addr,
893 sha);
882 neigh_release(n); 894 neigh_release(n);
883 } 895 }
884 } 896 }
@@ -887,8 +899,7 @@ static int arp_process(struct sk_buff *skb)
887 if (addr_type == RTN_UNICAST && 899 if (addr_type == RTN_UNICAST &&
888 (arp_fwd_proxy(in_dev, dev, rt) || 900 (arp_fwd_proxy(in_dev, dev, rt) ||
889 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || 901 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
890 pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) 902 pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
891 {
892 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 903 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
893 if (n) 904 if (n)
894 neigh_release(n); 905 neigh_release(n);
@@ -896,9 +907,12 @@ static int arp_process(struct sk_buff *skb)
896 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || 907 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
897 skb->pkt_type == PACKET_HOST || 908 skb->pkt_type == PACKET_HOST ||
898 in_dev->arp_parms->proxy_delay == 0) { 909 in_dev->arp_parms->proxy_delay == 0) {
899 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 910 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
911 dev, tip, sha, dev->dev_addr,
912 sha);
900 } else { 913 } else {
901 pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); 914 pneigh_enqueue(&arp_tbl,
915 in_dev->arp_parms, skb);
902 return 0; 916 return 0;
903 } 917 }
904 goto out; 918 goto out;
@@ -939,7 +953,8 @@ static int arp_process(struct sk_buff *skb)
939 if (arp->ar_op != htons(ARPOP_REPLY) || 953 if (arp->ar_op != htons(ARPOP_REPLY) ||
940 skb->pkt_type != PACKET_HOST) 954 skb->pkt_type != PACKET_HOST)
941 state = NUD_STALE; 955 state = NUD_STALE;
942 neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0); 956 neigh_update(n, sha, state,
957 override ? NEIGH_UPDATE_F_OVERRIDE : 0);
943 neigh_release(n); 958 neigh_release(n);
944 } 959 }
945 960
@@ -975,7 +990,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
975 arp->ar_pln != 4) 990 arp->ar_pln != 4)
976 goto freeskb; 991 goto freeskb;
977 992
978 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 993 skb = skb_share_check(skb, GFP_ATOMIC);
994 if (skb == NULL)
979 goto out_of_mem; 995 goto out_of_mem;
980 996
981 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); 997 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
@@ -1019,7 +1035,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
1019 return -EINVAL; 1035 return -EINVAL;
1020 if (!dev && (r->arp_flags & ATF_COM)) { 1036 if (!dev && (r->arp_flags & ATF_COM)) {
1021 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, 1037 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
1022 r->arp_ha.sa_data); 1038 r->arp_ha.sa_data);
1023 if (!dev) 1039 if (!dev)
1024 return -ENODEV; 1040 return -ENODEV;
1025 } 1041 }
@@ -1033,7 +1049,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
1033} 1049}
1034 1050
1035static int arp_req_set(struct net *net, struct arpreq *r, 1051static int arp_req_set(struct net *net, struct arpreq *r,
1036 struct net_device * dev) 1052 struct net_device *dev)
1037{ 1053{
1038 __be32 ip; 1054 __be32 ip;
1039 struct neighbour *neigh; 1055 struct neighbour *neigh;
@@ -1046,10 +1062,11 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1046 if (r->arp_flags & ATF_PERM) 1062 if (r->arp_flags & ATF_PERM)
1047 r->arp_flags |= ATF_COM; 1063 r->arp_flags |= ATF_COM;
1048 if (dev == NULL) { 1064 if (dev == NULL) {
1049 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1065 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
1050 .tos = RTO_ONLINK } } }; 1066 .tos = RTO_ONLINK } };
1051 struct rtable * rt; 1067 struct rtable *rt;
1052 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1068 err = ip_route_output_key(net, &rt, &fl);
1069 if (err != 0)
1053 return err; 1070 return err;
1054 dev = rt->dst.dev; 1071 dev = rt->dst.dev;
1055 ip_rt_put(rt); 1072 ip_rt_put(rt);
@@ -1083,9 +1100,9 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1083 unsigned state = NUD_STALE; 1100 unsigned state = NUD_STALE;
1084 if (r->arp_flags & ATF_PERM) 1101 if (r->arp_flags & ATF_PERM)
1085 state = NUD_PERMANENT; 1102 state = NUD_PERMANENT;
1086 err = neigh_update(neigh, (r->arp_flags&ATF_COM) ? 1103 err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
1087 r->arp_ha.sa_data : NULL, state, 1104 r->arp_ha.sa_data : NULL, state,
1088 NEIGH_UPDATE_F_OVERRIDE| 1105 NEIGH_UPDATE_F_OVERRIDE |
1089 NEIGH_UPDATE_F_ADMIN); 1106 NEIGH_UPDATE_F_ADMIN);
1090 neigh_release(neigh); 1107 neigh_release(neigh);
1091 } 1108 }
@@ -1094,12 +1111,12 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1094 1111
1095static unsigned arp_state_to_flags(struct neighbour *neigh) 1112static unsigned arp_state_to_flags(struct neighbour *neigh)
1096{ 1113{
1097 unsigned flags = 0;
1098 if (neigh->nud_state&NUD_PERMANENT) 1114 if (neigh->nud_state&NUD_PERMANENT)
1099 flags = ATF_PERM|ATF_COM; 1115 return ATF_PERM | ATF_COM;
1100 else if (neigh->nud_state&NUD_VALID) 1116 else if (neigh->nud_state&NUD_VALID)
1101 flags = ATF_COM; 1117 return ATF_COM;
1102 return flags; 1118 else
1119 return 0;
1103} 1120}
1104 1121
1105/* 1122/*
@@ -1142,7 +1159,7 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r,
1142} 1159}
1143 1160
1144static int arp_req_delete(struct net *net, struct arpreq *r, 1161static int arp_req_delete(struct net *net, struct arpreq *r,
1145 struct net_device * dev) 1162 struct net_device *dev)
1146{ 1163{
1147 int err; 1164 int err;
1148 __be32 ip; 1165 __be32 ip;
@@ -1153,10 +1170,11 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1153 1170
1154 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; 1171 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
1155 if (dev == NULL) { 1172 if (dev == NULL) {
1156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1173 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
1157 .tos = RTO_ONLINK } } }; 1174 .tos = RTO_ONLINK } };
1158 struct rtable * rt; 1175 struct rtable *rt;
1159 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1176 err = ip_route_output_key(net, &rt, &fl);
1177 if (err != 0)
1160 return err; 1178 return err;
1161 dev = rt->dst.dev; 1179 dev = rt->dst.dev;
1162 ip_rt_put(rt); 1180 ip_rt_put(rt);
@@ -1166,7 +1184,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1166 err = -ENXIO; 1184 err = -ENXIO;
1167 neigh = neigh_lookup(&arp_tbl, &ip, dev); 1185 neigh = neigh_lookup(&arp_tbl, &ip, dev);
1168 if (neigh) { 1186 if (neigh) {
1169 if (neigh->nud_state&~NUD_NOARP) 1187 if (neigh->nud_state & ~NUD_NOARP)
1170 err = neigh_update(neigh, NULL, NUD_FAILED, 1188 err = neigh_update(neigh, NULL, NUD_FAILED,
1171 NEIGH_UPDATE_F_OVERRIDE| 1189 NEIGH_UPDATE_F_OVERRIDE|
1172 NEIGH_UPDATE_F_ADMIN); 1190 NEIGH_UPDATE_F_ADMIN);
@@ -1186,24 +1204,24 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1186 struct net_device *dev = NULL; 1204 struct net_device *dev = NULL;
1187 1205
1188 switch (cmd) { 1206 switch (cmd) {
1189 case SIOCDARP: 1207 case SIOCDARP:
1190 case SIOCSARP: 1208 case SIOCSARP:
1191 if (!capable(CAP_NET_ADMIN)) 1209 if (!capable(CAP_NET_ADMIN))
1192 return -EPERM; 1210 return -EPERM;
1193 case SIOCGARP: 1211 case SIOCGARP:
1194 err = copy_from_user(&r, arg, sizeof(struct arpreq)); 1212 err = copy_from_user(&r, arg, sizeof(struct arpreq));
1195 if (err) 1213 if (err)
1196 return -EFAULT; 1214 return -EFAULT;
1197 break; 1215 break;
1198 default: 1216 default:
1199 return -EINVAL; 1217 return -EINVAL;
1200 } 1218 }
1201 1219
1202 if (r.arp_pa.sa_family != AF_INET) 1220 if (r.arp_pa.sa_family != AF_INET)
1203 return -EPFNOSUPPORT; 1221 return -EPFNOSUPPORT;
1204 1222
1205 if (!(r.arp_flags & ATF_PUBL) && 1223 if (!(r.arp_flags & ATF_PUBL) &&
1206 (r.arp_flags & (ATF_NETMASK|ATF_DONTPUB))) 1224 (r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
1207 return -EINVAL; 1225 return -EINVAL;
1208 if (!(r.arp_flags & ATF_NETMASK)) 1226 if (!(r.arp_flags & ATF_NETMASK))
1209 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = 1227 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
@@ -1211,7 +1229,8 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1211 rtnl_lock(); 1229 rtnl_lock();
1212 if (r.arp_dev[0]) { 1230 if (r.arp_dev[0]) {
1213 err = -ENODEV; 1231 err = -ENODEV;
1214 if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL) 1232 dev = __dev_get_by_name(net, r.arp_dev);
1233 if (dev == NULL)
1215 goto out; 1234 goto out;
1216 1235
1217 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ 1236 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
@@ -1243,7 +1262,8 @@ out:
1243 return err; 1262 return err;
1244} 1263}
1245 1264
1246static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 1265static int arp_netdev_event(struct notifier_block *this, unsigned long event,
1266 void *ptr)
1247{ 1267{
1248 struct net_device *dev = ptr; 1268 struct net_device *dev = ptr;
1249 1269
@@ -1311,12 +1331,13 @@ static char *ax2asc2(ax25_address *a, char *buf)
1311 for (n = 0, s = buf; n < 6; n++) { 1331 for (n = 0, s = buf; n < 6; n++) {
1312 c = (a->ax25_call[n] >> 1) & 0x7F; 1332 c = (a->ax25_call[n] >> 1) & 0x7F;
1313 1333
1314 if (c != ' ') *s++ = c; 1334 if (c != ' ')
1335 *s++ = c;
1315 } 1336 }
1316 1337
1317 *s++ = '-'; 1338 *s++ = '-';
1318 1339 n = (a->ax25_call[6] >> 1) & 0x0F;
1319 if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) { 1340 if (n > 9) {
1320 *s++ = '1'; 1341 *s++ = '1';
1321 n -= 10; 1342 n -= 10;
1322 } 1343 }
@@ -1325,10 +1346,9 @@ static char *ax2asc2(ax25_address *a, char *buf)
1325 *s++ = '\0'; 1346 *s++ = '\0';
1326 1347
1327 if (*buf == '\0' || *buf == '-') 1348 if (*buf == '\0' || *buf == '-')
1328 return "*"; 1349 return "*";
1329 1350
1330 return buf; 1351 return buf;
1331
1332} 1352}
1333#endif /* CONFIG_AX25 */ 1353#endif /* CONFIG_AX25 */
1334 1354
@@ -1408,10 +1428,10 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
1408/* ------------------------------------------------------------------------ */ 1428/* ------------------------------------------------------------------------ */
1409 1429
1410static const struct seq_operations arp_seq_ops = { 1430static const struct seq_operations arp_seq_ops = {
1411 .start = arp_seq_start, 1431 .start = arp_seq_start,
1412 .next = neigh_seq_next, 1432 .next = neigh_seq_next,
1413 .stop = neigh_seq_stop, 1433 .stop = neigh_seq_stop,
1414 .show = arp_seq_show, 1434 .show = arp_seq_show,
1415}; 1435};
1416 1436
1417static int arp_seq_open(struct inode *inode, struct file *file) 1437static int arp_seq_open(struct inode *inode, struct file *file)
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 721a8a37b45..174be6caa5c 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -73,6 +73,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
73 inet->inet_id = jiffies; 73 inet->inet_id = jiffies;
74 74
75 sk_dst_set(sk, &rt->dst); 75 sk_dst_set(sk, &rt->dst);
76 return(0); 76 return 0;
77} 77}
78EXPORT_SYMBOL(ip4_datagram_connect); 78EXPORT_SYMBOL(ip4_datagram_connect);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index da14c49284f..c2ff48fa18c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -209,7 +209,7 @@ static void inetdev_destroy(struct in_device *in_dev)
209 inet_free_ifa(ifa); 209 inet_free_ifa(ifa);
210 } 210 }
211 211
212 dev->ip_ptr = NULL; 212 rcu_assign_pointer(dev->ip_ptr, NULL);
213 213
214 devinet_sysctl_unregister(in_dev); 214 devinet_sysctl_unregister(in_dev);
215 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 215 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
@@ -1059,7 +1059,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1059 switch (event) { 1059 switch (event) {
1060 case NETDEV_REGISTER: 1060 case NETDEV_REGISTER:
1061 printk(KERN_DEBUG "inetdev_event: bug\n"); 1061 printk(KERN_DEBUG "inetdev_event: bug\n");
1062 dev->ip_ptr = NULL; 1062 rcu_assign_pointer(dev->ip_ptr, NULL);
1063 break; 1063 break;
1064 case NETDEV_UP: 1064 case NETDEV_UP:
1065 if (!inetdev_valid_mtu(dev->mtu)) 1065 if (!inetdev_valid_mtu(dev->mtu))
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4a8e370862b..a96e5ec211a 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -186,9 +186,7 @@ static inline struct tnode *node_parent_rcu(struct node *node)
186{ 186{
187 struct tnode *ret = node_parent(node); 187 struct tnode *ret = node_parent(node);
188 188
189 return rcu_dereference_check(ret, 189 return rcu_dereference_rtnl(ret);
190 rcu_read_lock_held() ||
191 lockdep_rtnl_is_held());
192} 190}
193 191
194/* Same as rcu_assign_pointer 192/* Same as rcu_assign_pointer
@@ -211,9 +209,7 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
211{ 209{
212 struct node *ret = tnode_get_child(tn, i); 210 struct node *ret = tnode_get_child(tn, i);
213 211
214 return rcu_dereference_check(ret, 212 return rcu_dereference_rtnl(ret);
215 rcu_read_lock_held() ||
216 lockdep_rtnl_is_held());
217} 213}
218 214
219static inline int tnode_child_length(const struct tnode *tn) 215static inline int tnode_child_length(const struct tnode *tn)
@@ -459,8 +455,8 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
459 tn->empty_children = 1<<bits; 455 tn->empty_children = 1<<bits;
460 } 456 }
461 457
462 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode), 458 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
463 (unsigned long) (sizeof(struct node) << bits)); 459 sizeof(struct node) << bits);
464 return tn; 460 return tn;
465} 461}
466 462
@@ -609,11 +605,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
609 605
610 /* Keep root node larger */ 606 /* Keep root node larger */
611 607
612 if (!node_parent((struct node*) tn)) { 608 if (!node_parent((struct node *)tn)) {
613 inflate_threshold_use = inflate_threshold_root; 609 inflate_threshold_use = inflate_threshold_root;
614 halve_threshold_use = halve_threshold_root; 610 halve_threshold_use = halve_threshold_root;
615 } 611 } else {
616 else {
617 inflate_threshold_use = inflate_threshold; 612 inflate_threshold_use = inflate_threshold;
618 halve_threshold_use = halve_threshold; 613 halve_threshold_use = halve_threshold;
619 } 614 }
@@ -639,7 +634,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
639 check_tnode(tn); 634 check_tnode(tn);
640 635
641 /* Return if at least one inflate is run */ 636 /* Return if at least one inflate is run */
642 if( max_work != MAX_WORK) 637 if (max_work != MAX_WORK)
643 return (struct node *) tn; 638 return (struct node *) tn;
644 639
645 /* 640 /*
@@ -966,9 +961,7 @@ fib_find_node(struct trie *t, u32 key)
966 struct node *n; 961 struct node *n;
967 962
968 pos = 0; 963 pos = 0;
969 n = rcu_dereference_check(t->trie, 964 n = rcu_dereference_rtnl(t->trie);
970 rcu_read_lock_held() ||
971 lockdep_rtnl_is_held());
972 965
973 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 966 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
974 tn = (struct tnode *) n; 967 tn = (struct tnode *) n;
@@ -1748,16 +1741,14 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
1748 1741
1749 /* Node empty, walk back up to parent */ 1742 /* Node empty, walk back up to parent */
1750 c = (struct node *) p; 1743 c = (struct node *) p;
1751 } while ( (p = node_parent_rcu(c)) != NULL); 1744 } while ((p = node_parent_rcu(c)) != NULL);
1752 1745
1753 return NULL; /* Root of trie */ 1746 return NULL; /* Root of trie */
1754} 1747}
1755 1748
1756static struct leaf *trie_firstleaf(struct trie *t) 1749static struct leaf *trie_firstleaf(struct trie *t)
1757{ 1750{
1758 struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie, 1751 struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie);
1759 rcu_read_lock_held() ||
1760 lockdep_rtnl_is_held());
1761 1752
1762 if (!n) 1753 if (!n)
1763 return NULL; 1754 return NULL;
@@ -2043,14 +2034,14 @@ struct fib_trie_iter {
2043 struct seq_net_private p; 2034 struct seq_net_private p;
2044 struct fib_table *tb; 2035 struct fib_table *tb;
2045 struct tnode *tnode; 2036 struct tnode *tnode;
2046 unsigned index; 2037 unsigned int index;
2047 unsigned depth; 2038 unsigned int depth;
2048}; 2039};
2049 2040
2050static struct node *fib_trie_get_next(struct fib_trie_iter *iter) 2041static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
2051{ 2042{
2052 struct tnode *tn = iter->tnode; 2043 struct tnode *tn = iter->tnode;
2053 unsigned cindex = iter->index; 2044 unsigned int cindex = iter->index;
2054 struct tnode *p; 2045 struct tnode *p;
2055 2046
2056 /* A single entry routing table */ 2047 /* A single entry routing table */
@@ -2159,7 +2150,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2159 */ 2150 */
2160static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 2151static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2161{ 2152{
2162 unsigned i, max, pointers, bytes, avdepth; 2153 unsigned int i, max, pointers, bytes, avdepth;
2163 2154
2164 if (stat->leaves) 2155 if (stat->leaves)
2165 avdepth = stat->totdepth*100 / stat->leaves; 2156 avdepth = stat->totdepth*100 / stat->leaves;
@@ -2356,7 +2347,8 @@ static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2356 2347
2357static void seq_indent(struct seq_file *seq, int n) 2348static void seq_indent(struct seq_file *seq, int n)
2358{ 2349{
2359 while (n-- > 0) seq_puts(seq, " "); 2350 while (n-- > 0)
2351 seq_puts(seq, " ");
2360} 2352}
2361 2353
2362static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2354static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
@@ -2388,7 +2380,7 @@ static const char *const rtn_type_names[__RTN_MAX] = {
2388 [RTN_XRESOLVE] = "XRESOLVE", 2380 [RTN_XRESOLVE] = "XRESOLVE",
2389}; 2381};
2390 2382
2391static inline const char *rtn_type(char *buf, size_t len, unsigned t) 2383static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2392{ 2384{
2393 if (t < __RTN_MAX && rtn_type_names[t]) 2385 if (t < __RTN_MAX && rtn_type_names[t])
2394 return rtn_type_names[t]; 2386 return rtn_type_names[t];
@@ -2544,13 +2536,12 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v)
2544 rcu_read_unlock(); 2536 rcu_read_unlock();
2545} 2537}
2546 2538
2547static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2539static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2548{ 2540{
2549 static unsigned type2flags[RTN_MAX + 1] = { 2541 unsigned int flags = 0;
2550 [7] = RTF_REJECT, [8] = RTF_REJECT,
2551 };
2552 unsigned flags = type2flags[type];
2553 2542
2543 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2544 flags = RTF_REJECT;
2554 if (fi && fi->fib_nh->nh_gw) 2545 if (fi && fi->fib_nh->nh_gw)
2555 flags |= RTF_GATEWAY; 2546 flags |= RTF_GATEWAY;
2556 if (mask == htonl(0xFFFFFFFF)) 2547 if (mask == htonl(0xFFFFFFFF))
@@ -2562,7 +2553,7 @@ static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2562/* 2553/*
2563 * This outputs /proc/net/route. 2554 * This outputs /proc/net/route.
2564 * The format of the file is not supposed to be changed 2555 * The format of the file is not supposed to be changed
2565 * and needs to be same as fib_hash output to avoid breaking 2556 * and needs to be same as fib_hash output to avoid breaking
2566 * legacy utilities 2557 * legacy utilities
2567 */ 2558 */
2568static int fib_route_seq_show(struct seq_file *seq, void *v) 2559static int fib_route_seq_show(struct seq_file *seq, void *v)
@@ -2587,7 +2578,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2587 2578
2588 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2579 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2589 const struct fib_info *fi = fa->fa_info; 2580 const struct fib_info *fi = fa->fa_info;
2590 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2581 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2591 int len; 2582 int len;
2592 2583
2593 if (fa->fa_type == RTN_BROADCAST 2584 if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
new file mode 100644
index 00000000000..b546736da2e
--- /dev/null
+++ b/net/ipv4/gre.c
@@ -0,0 +1,151 @@
1/*
2 * GRE over IPv4 demultiplexer driver
3 *
4 * Authors: Dmitry Kozlov (xeb@mail.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/kmod.h>
16#include <linux/skbuff.h>
17#include <linux/in.h>
18#include <linux/netdevice.h>
19#include <linux/version.h>
20#include <linux/spinlock.h>
21#include <net/protocol.h>
22#include <net/gre.h>
23
24
25const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly;
26static DEFINE_SPINLOCK(gre_proto_lock);
27
28int gre_add_protocol(const struct gre_protocol *proto, u8 version)
29{
30 if (version >= GREPROTO_MAX)
31 goto err_out;
32
33 spin_lock(&gre_proto_lock);
34 if (gre_proto[version])
35 goto err_out_unlock;
36
37 rcu_assign_pointer(gre_proto[version], proto);
38 spin_unlock(&gre_proto_lock);
39 return 0;
40
41err_out_unlock:
42 spin_unlock(&gre_proto_lock);
43err_out:
44 return -1;
45}
46EXPORT_SYMBOL_GPL(gre_add_protocol);
47
48int gre_del_protocol(const struct gre_protocol *proto, u8 version)
49{
50 if (version >= GREPROTO_MAX)
51 goto err_out;
52
53 spin_lock(&gre_proto_lock);
54 if (gre_proto[version] != proto)
55 goto err_out_unlock;
56 rcu_assign_pointer(gre_proto[version], NULL);
57 spin_unlock(&gre_proto_lock);
58 synchronize_rcu();
59 return 0;
60
61err_out_unlock:
62 spin_unlock(&gre_proto_lock);
63err_out:
64 return -1;
65}
66EXPORT_SYMBOL_GPL(gre_del_protocol);
67
68static int gre_rcv(struct sk_buff *skb)
69{
70 const struct gre_protocol *proto;
71 u8 ver;
72 int ret;
73
74 if (!pskb_may_pull(skb, 12))
75 goto drop;
76
77 ver = skb->data[1]&0x7f;
78 if (ver >= GREPROTO_MAX)
79 goto drop;
80
81 rcu_read_lock();
82 proto = rcu_dereference(gre_proto[ver]);
83 if (!proto || !proto->handler)
84 goto drop_unlock;
85 ret = proto->handler(skb);
86 rcu_read_unlock();
87 return ret;
88
89drop_unlock:
90 rcu_read_unlock();
91drop:
92 kfree_skb(skb);
93 return NET_RX_DROP;
94}
95
96static void gre_err(struct sk_buff *skb, u32 info)
97{
98 const struct gre_protocol *proto;
99 u8 ver;
100
101 if (!pskb_may_pull(skb, 12))
102 goto drop;
103
104 ver = skb->data[1]&0x7f;
105 if (ver >= GREPROTO_MAX)
106 goto drop;
107
108 rcu_read_lock();
109 proto = rcu_dereference(gre_proto[ver]);
110 if (!proto || !proto->err_handler)
111 goto drop_unlock;
112 proto->err_handler(skb, info);
113 rcu_read_unlock();
114 return;
115
116drop_unlock:
117 rcu_read_unlock();
118drop:
119 kfree_skb(skb);
120}
121
122static const struct net_protocol net_gre_protocol = {
123 .handler = gre_rcv,
124 .err_handler = gre_err,
125 .netns_ok = 1,
126};
127
128static int __init gre_init(void)
129{
130 pr_info("GRE over IPv4 demultiplexor driver");
131
132 if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
133 pr_err("gre: can't add protocol\n");
134 return -EAGAIN;
135 }
136
137 return 0;
138}
139
140static void __exit gre_exit(void)
141{
142 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
143}
144
145module_init(gre_init);
146module_exit(gre_exit);
147
148MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
149MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
150MODULE_LICENSE("GPL");
151
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a0d847c7cba..96bc7f9475a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -379,7 +379,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
379 inet->tos = ip_hdr(skb)->tos; 379 inet->tos = ip_hdr(skb)->tos;
380 daddr = ipc.addr = rt->rt_src; 380 daddr = ipc.addr = rt->rt_src;
381 ipc.opt = NULL; 381 ipc.opt = NULL;
382 ipc.shtx.flags = 0; 382 ipc.tx_flags = 0;
383 if (icmp_param->replyopts.optlen) { 383 if (icmp_param->replyopts.optlen) {
384 ipc.opt = &icmp_param->replyopts; 384 ipc.opt = &icmp_param->replyopts;
385 if (ipc.opt->srr) 385 if (ipc.opt->srr)
@@ -538,7 +538,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
538 inet_sk(sk)->tos = tos; 538 inet_sk(sk)->tos = tos;
539 ipc.addr = iph->saddr; 539 ipc.addr = iph->saddr;
540 ipc.opt = &icmp_param.replyopts; 540 ipc.opt = &icmp_param.replyopts;
541 ipc.shtx.flags = 0; 541 ipc.tx_flags = 0;
542 542
543 { 543 {
544 struct flowi fl = { 544 struct flowi fl = {
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e5fa2ddce32..ba804266584 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -425,7 +425,7 @@ static int inet_diag_bc_run(const void *bc, int len,
425 bc += op->no; 425 bc += op->no;
426 } 426 }
427 } 427 }
428 return (len == 0); 428 return len == 0;
429} 429}
430 430
431static int valid_cc(const void *bc, int len, int cc) 431static int valid_cc(const void *bc, int len, int cc)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b7c41654dde..168440834ad 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -116,11 +116,11 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
116 struct ip4_create_arg *arg = a; 116 struct ip4_create_arg *arg = a;
117 117
118 qp = container_of(q, struct ipq, q); 118 qp = container_of(q, struct ipq, q);
119 return (qp->id == arg->iph->id && 119 return qp->id == arg->iph->id &&
120 qp->saddr == arg->iph->saddr && 120 qp->saddr == arg->iph->saddr &&
121 qp->daddr == arg->iph->daddr && 121 qp->daddr == arg->iph->daddr &&
122 qp->protocol == arg->iph->protocol && 122 qp->protocol == arg->iph->protocol &&
123 qp->user == arg->user); 123 qp->user == arg->user;
124} 124}
125 125
126/* Memory Tracking Functions. */ 126/* Memory Tracking Functions. */
@@ -542,7 +542,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
542 /* If the first fragment is fragmented itself, we split 542 /* If the first fragment is fragmented itself, we split
543 * it to two chunks: the first with data and paged part 543 * it to two chunks: the first with data and paged part
544 * and the second, holding only fragments. */ 544 * and the second, holding only fragments. */
545 if (skb_has_frags(head)) { 545 if (skb_has_frag_list(head)) {
546 struct sk_buff *clone; 546 struct sk_buff *clone;
547 int i, plen = 0; 547 int i, plen = 0;
548 548
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 35c93e8b6a4..5d6ddcb7403 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
44#include <net/net_namespace.h> 44#include <net/net_namespace.h>
45#include <net/netns/generic.h> 45#include <net/netns/generic.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/gre.h>
47 48
48#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 49#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
49#include <net/ipv6.h> 50#include <net/ipv6.h>
@@ -128,7 +129,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev);
128 129
129static int ipgre_net_id __read_mostly; 130static int ipgre_net_id __read_mostly;
130struct ipgre_net { 131struct ipgre_net {
131 struct ip_tunnel *tunnels[4][HASH_SIZE]; 132 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
132 133
133 struct net_device *fb_tunnel_dev; 134 struct net_device *fb_tunnel_dev;
134}; 135};
@@ -158,9 +159,8 @@ struct ipgre_net {
158#define tunnels_l tunnels[1] 159#define tunnels_l tunnels[1]
159#define tunnels_wc tunnels[0] 160#define tunnels_wc tunnels[0]
160/* 161/*
161 * Locking : hash tables are protected by RCU and a spinlock 162 * Locking : hash tables are protected by RCU and RTNL
162 */ 163 */
163static DEFINE_SPINLOCK(ipgre_lock);
164 164
165#define for_each_ip_tunnel_rcu(start) \ 165#define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
@@ -173,8 +173,8 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
173{ 173{
174 struct net *net = dev_net(dev); 174 struct net *net = dev_net(dev);
175 int link = dev->ifindex; 175 int link = dev->ifindex;
176 unsigned h0 = HASH(remote); 176 unsigned int h0 = HASH(remote);
177 unsigned h1 = HASH(key); 177 unsigned int h1 = HASH(key);
178 struct ip_tunnel *t, *cand = NULL; 178 struct ip_tunnel *t, *cand = NULL;
179 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 179 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
@@ -289,13 +289,13 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
289 return NULL; 289 return NULL;
290} 290}
291 291
292static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, 292static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
293 struct ip_tunnel_parm *parms) 293 struct ip_tunnel_parm *parms)
294{ 294{
295 __be32 remote = parms->iph.daddr; 295 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr; 296 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key; 297 __be32 key = parms->i_key;
298 unsigned h = HASH(key); 298 unsigned int h = HASH(key);
299 int prio = 0; 299 int prio = 0;
300 300
301 if (local) 301 if (local)
@@ -308,7 +308,7 @@ static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
308 return &ign->tunnels[prio][h]; 308 return &ign->tunnels[prio][h];
309} 309}
310 310
311static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, 311static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
312 struct ip_tunnel *t) 312 struct ip_tunnel *t)
313{ 313{
314 return __ipgre_bucket(ign, &t->parms); 314 return __ipgre_bucket(ign, &t->parms);
@@ -316,23 +316,22 @@ static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
316 316
317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) 317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
318{ 318{
319 struct ip_tunnel **tp = ipgre_bucket(ign, t); 319 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
320 320
321 spin_lock_bh(&ipgre_lock); 321 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
322 t->next = *tp;
323 rcu_assign_pointer(*tp, t); 322 rcu_assign_pointer(*tp, t);
324 spin_unlock_bh(&ipgre_lock);
325} 323}
326 324
327static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 325static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
328{ 326{
329 struct ip_tunnel **tp; 327 struct ip_tunnel __rcu **tp;
330 328 struct ip_tunnel *iter;
331 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 329
332 if (t == *tp) { 330 for (tp = ipgre_bucket(ign, t);
333 spin_lock_bh(&ipgre_lock); 331 (iter = rtnl_dereference(*tp)) != NULL;
334 *tp = t->next; 332 tp = &iter->next) {
335 spin_unlock_bh(&ipgre_lock); 333 if (t == iter) {
334 rcu_assign_pointer(*tp, t->next);
336 break; 335 break;
337 } 336 }
338 } 337 }
@@ -346,10 +345,13 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
346 __be32 local = parms->iph.saddr; 345 __be32 local = parms->iph.saddr;
347 __be32 key = parms->i_key; 346 __be32 key = parms->i_key;
348 int link = parms->link; 347 int link = parms->link;
349 struct ip_tunnel *t, **tp; 348 struct ip_tunnel *t;
349 struct ip_tunnel __rcu **tp;
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 350 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351 351
352 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) 352 for (tp = __ipgre_bucket(ign, parms);
353 (t = rtnl_dereference(*tp)) != NULL;
354 tp = &t->next)
353 if (local == t->parms.iph.saddr && 355 if (local == t->parms.iph.saddr &&
354 remote == t->parms.iph.daddr && 356 remote == t->parms.iph.daddr &&
355 key == t->parms.i_key && 357 key == t->parms.i_key &&
@@ -360,7 +362,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
360 return t; 362 return t;
361} 363}
362 364
363static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, 365static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
364 struct ip_tunnel_parm *parms, int create) 366 struct ip_tunnel_parm *parms, int create)
365{ 367{
366 struct ip_tunnel *t, *nt; 368 struct ip_tunnel *t, *nt;
@@ -645,9 +647,11 @@ static int ipgre_rcv(struct sk_buff *skb)
645 skb_reset_network_header(skb); 647 skb_reset_network_header(skb);
646 ipgre_ecn_decapsulate(iph, skb); 648 ipgre_ecn_decapsulate(iph, skb);
647 649
648 netif_rx(skb); 650 if (netif_rx(skb) == NET_RX_DROP)
651 stats->rx_dropped++;
652
649 rcu_read_unlock(); 653 rcu_read_unlock();
650 return(0); 654 return 0;
651 } 655 }
652 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 656 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
653 657
@@ -655,7 +659,7 @@ drop:
655 rcu_read_unlock(); 659 rcu_read_unlock();
656drop_nolock: 660drop_nolock:
657 kfree_skb(skb); 661 kfree_skb(skb);
658 return(0); 662 return 0;
659} 663}
660 664
661static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 665static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -668,7 +672,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
668 u8 tos; 672 u8 tos;
669 __be16 df; 673 __be16 df;
670 struct rtable *rt; /* Route to the other host */ 674 struct rtable *rt; /* Route to the other host */
671 struct net_device *tdev; /* Device to other host */ 675 struct net_device *tdev; /* Device to other host */
672 struct iphdr *iph; /* Our new IP header */ 676 struct iphdr *iph; /* Our new IP header */
673 unsigned int max_headroom; /* The extra header space needed */ 677 unsigned int max_headroom; /* The extra header space needed */
674 int gre_hlen; 678 int gre_hlen;
@@ -1012,7 +1016,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1012 break; 1016 break;
1013 } 1017 }
1014 } else { 1018 } else {
1015 unsigned nflags = 0; 1019 unsigned int nflags = 0;
1016 1020
1017 t = netdev_priv(dev); 1021 t = netdev_priv(dev);
1018 1022
@@ -1125,7 +1129,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1125 1129
1126static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 1130static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1127 unsigned short type, 1131 unsigned short type,
1128 const void *daddr, const void *saddr, unsigned len) 1132 const void *daddr, const void *saddr, unsigned int len)
1129{ 1133{
1130 struct ip_tunnel *t = netdev_priv(dev); 1134 struct ip_tunnel *t = netdev_priv(dev);
1131 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1135 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
@@ -1274,14 +1278,13 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1274 tunnel->hlen = sizeof(struct iphdr) + 4; 1278 tunnel->hlen = sizeof(struct iphdr) + 4;
1275 1279
1276 dev_hold(dev); 1280 dev_hold(dev);
1277 ign->tunnels_wc[0] = tunnel; 1281 rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
1278} 1282}
1279 1283
1280 1284
1281static const struct net_protocol ipgre_protocol = { 1285static const struct gre_protocol ipgre_protocol = {
1282 .handler = ipgre_rcv, 1286 .handler = ipgre_rcv,
1283 .err_handler = ipgre_err, 1287 .err_handler = ipgre_err,
1284 .netns_ok = 1,
1285}; 1288};
1286 1289
1287static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) 1290static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
@@ -1291,11 +1294,13 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1291 for (prio = 0; prio < 4; prio++) { 1294 for (prio = 0; prio < 4; prio++) {
1292 int h; 1295 int h;
1293 for (h = 0; h < HASH_SIZE; h++) { 1296 for (h = 0; h < HASH_SIZE; h++) {
1294 struct ip_tunnel *t = ign->tunnels[prio][h]; 1297 struct ip_tunnel *t;
1298
1299 t = rtnl_dereference(ign->tunnels[prio][h]);
1295 1300
1296 while (t != NULL) { 1301 while (t != NULL) {
1297 unregister_netdevice_queue(t->dev, head); 1302 unregister_netdevice_queue(t->dev, head);
1298 t = t->next; 1303 t = rtnl_dereference(t->next);
1299 } 1304 }
1300 } 1305 }
1301 } 1306 }
@@ -1522,7 +1527,7 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1522 t = nt; 1527 t = nt;
1523 1528
1524 if (dev->type != ARPHRD_ETHER) { 1529 if (dev->type != ARPHRD_ETHER) {
1525 unsigned nflags = 0; 1530 unsigned int nflags = 0;
1526 1531
1527 if (ipv4_is_multicast(p.iph.daddr)) 1532 if (ipv4_is_multicast(p.iph.daddr))
1528 nflags = IFF_BROADCAST; 1533 nflags = IFF_BROADCAST;
@@ -1663,7 +1668,7 @@ static int __init ipgre_init(void)
1663 if (err < 0) 1668 if (err < 0)
1664 return err; 1669 return err;
1665 1670
1666 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); 1671 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1667 if (err < 0) { 1672 if (err < 0) {
1668 printk(KERN_INFO "ipgre init: can't add protocol\n"); 1673 printk(KERN_INFO "ipgre init: can't add protocol\n");
1669 goto add_proto_failed; 1674 goto add_proto_failed;
@@ -1683,7 +1688,7 @@ out:
1683tap_ops_failed: 1688tap_ops_failed:
1684 rtnl_link_unregister(&ipgre_link_ops); 1689 rtnl_link_unregister(&ipgre_link_ops);
1685rtnl_link_failed: 1690rtnl_link_failed:
1686 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1691 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1687add_proto_failed: 1692add_proto_failed:
1688 unregister_pernet_device(&ipgre_net_ops); 1693 unregister_pernet_device(&ipgre_net_ops);
1689 goto out; 1694 goto out;
@@ -1693,7 +1698,7 @@ static void __exit ipgre_fini(void)
1693{ 1698{
1694 rtnl_link_unregister(&ipgre_tap_ops); 1699 rtnl_link_unregister(&ipgre_tap_ops);
1695 rtnl_link_unregister(&ipgre_link_ops); 1700 rtnl_link_unregister(&ipgre_link_ops);
1696 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1701 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1697 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1702 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1698 unregister_pernet_device(&ipgre_net_ops); 1703 unregister_pernet_device(&ipgre_net_ops);
1699} 1704}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ba9836c488e..1906fa35860 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -466,7 +466,7 @@ error:
466 } 466 }
467 return -EINVAL; 467 return -EINVAL;
468} 468}
469 469EXPORT_SYMBOL(ip_options_compile);
470 470
471/* 471/*
472 * Undo all the changes done by ip_options_compile(). 472 * Undo all the changes done by ip_options_compile().
@@ -646,3 +646,4 @@ int ip_options_rcv_srr(struct sk_buff *skb)
646 } 646 }
647 return 0; 647 return 0;
648} 648}
649EXPORT_SYMBOL(ip_options_rcv_srr);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7649d775007..439d2a34ee4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -487,7 +487,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
487 * LATER: this step can be merged to real generation of fragments, 487 * LATER: this step can be merged to real generation of fragments,
488 * we can switch to copy when see the first bad fragment. 488 * we can switch to copy when see the first bad fragment.
489 */ 489 */
490 if (skb_has_frags(skb)) { 490 if (skb_has_frag_list(skb)) {
491 struct sk_buff *frag, *frag2; 491 struct sk_buff *frag, *frag2;
492 int first_len = skb_pagelen(skb); 492 int first_len = skb_pagelen(skb);
493 493
@@ -844,10 +844,9 @@ int ip_append_data(struct sock *sk,
844 inet->cork.length = 0; 844 inet->cork.length = 0;
845 sk->sk_sndmsg_page = NULL; 845 sk->sk_sndmsg_page = NULL;
846 sk->sk_sndmsg_off = 0; 846 sk->sk_sndmsg_off = 0;
847 if ((exthdrlen = rt->dst.header_len) != 0) { 847 exthdrlen = rt->dst.header_len;
848 length += exthdrlen; 848 length += exthdrlen;
849 transhdrlen += exthdrlen; 849 transhdrlen += exthdrlen;
850 }
851 } else { 850 } else {
852 rt = (struct rtable *)inet->cork.dst; 851 rt = (struct rtable *)inet->cork.dst;
853 if (inet->cork.flags & IPCORK_OPT) 852 if (inet->cork.flags & IPCORK_OPT)
@@ -934,16 +933,19 @@ alloc_new_skb:
934 !(rt->dst.dev->features&NETIF_F_SG)) 933 !(rt->dst.dev->features&NETIF_F_SG))
935 alloclen = mtu; 934 alloclen = mtu;
936 else 935 else
937 alloclen = datalen + fragheaderlen; 936 alloclen = fraglen;
938 937
939 /* The last fragment gets additional space at tail. 938 /* The last fragment gets additional space at tail.
940 * Note, with MSG_MORE we overallocate on fragments, 939 * Note, with MSG_MORE we overallocate on fragments,
941 * because we have no idea what fragment will be 940 * because we have no idea what fragment will be
942 * the last. 941 * the last.
943 */ 942 */
944 if (datalen == length + fraggap) 943 if (datalen == length + fraggap) {
945 alloclen += rt->dst.trailer_len; 944 alloclen += rt->dst.trailer_len;
946 945 /* make sure mtu is not reached */
946 if (datalen > mtu - fragheaderlen - rt->dst.trailer_len)
947 datalen -= ALIGN(rt->dst.trailer_len, 8);
948 }
947 if (transhdrlen) { 949 if (transhdrlen) {
948 skb = sock_alloc_send_skb(sk, 950 skb = sock_alloc_send_skb(sk,
949 alloclen + hh_len + 15, 951 alloclen + hh_len + 15,
@@ -960,7 +962,7 @@ alloc_new_skb:
960 else 962 else
961 /* only the initial fragment is 963 /* only the initial fragment is
962 time stamped */ 964 time stamped */
963 ipc->shtx.flags = 0; 965 ipc->tx_flags = 0;
964 } 966 }
965 if (skb == NULL) 967 if (skb == NULL)
966 goto error; 968 goto error;
@@ -971,7 +973,7 @@ alloc_new_skb:
971 skb->ip_summed = csummode; 973 skb->ip_summed = csummode;
972 skb->csum = 0; 974 skb->csum = 0;
973 skb_reserve(skb, hh_len); 975 skb_reserve(skb, hh_len);
974 *skb_tx(skb) = ipc->shtx; 976 skb_shinfo(skb)->tx_flags = ipc->tx_flags;
975 977
976 /* 978 /*
977 * Find where to start putting bytes. 979 * Find where to start putting bytes.
@@ -1391,7 +1393,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1391 1393
1392 daddr = ipc.addr = rt->rt_src; 1394 daddr = ipc.addr = rt->rt_src;
1393 ipc.opt = NULL; 1395 ipc.opt = NULL;
1394 ipc.shtx.flags = 0; 1396 ipc.tx_flags = 0;
1395 1397
1396 if (replyopts.opt.optlen) { 1398 if (replyopts.opt.optlen) {
1397 ipc.opt = &replyopts.opt; 1399 ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ec036731a70..babd2527810 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -122,11 +122,11 @@
122 122
123static int ipip_net_id __read_mostly; 123static int ipip_net_id __read_mostly;
124struct ipip_net { 124struct ipip_net {
125 struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 125 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126 struct ip_tunnel *tunnels_r[HASH_SIZE]; 126 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127 struct ip_tunnel *tunnels_l[HASH_SIZE]; 127 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128 struct ip_tunnel *tunnels_wc[1]; 128 struct ip_tunnel __rcu *tunnels_wc[1];
129 struct ip_tunnel **tunnels[4]; 129 struct ip_tunnel __rcu **tunnels[4];
130 130
131 struct net_device *fb_tunnel_dev; 131 struct net_device *fb_tunnel_dev;
132}; 132};
@@ -135,9 +135,8 @@ static void ipip_tunnel_init(struct net_device *dev);
135static void ipip_tunnel_setup(struct net_device *dev); 135static void ipip_tunnel_setup(struct net_device *dev);
136 136
137/* 137/*
138 * Locking : hash tables are protected by RCU and a spinlock 138 * Locking : hash tables are protected by RCU and RTNL
139 */ 139 */
140static DEFINE_SPINLOCK(ipip_lock);
141 140
142#define for_each_ip_tunnel_rcu(start) \ 141#define for_each_ip_tunnel_rcu(start) \
143 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 142 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
@@ -145,8 +144,8 @@ static DEFINE_SPINLOCK(ipip_lock);
145static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, 144static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
146 __be32 remote, __be32 local) 145 __be32 remote, __be32 local)
147{ 146{
148 unsigned h0 = HASH(remote); 147 unsigned int h0 = HASH(remote);
149 unsigned h1 = HASH(local); 148 unsigned int h1 = HASH(local);
150 struct ip_tunnel *t; 149 struct ip_tunnel *t;
151 struct ipip_net *ipn = net_generic(net, ipip_net_id); 150 struct ipip_net *ipn = net_generic(net, ipip_net_id);
152 151
@@ -169,12 +168,12 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
169 return NULL; 168 return NULL;
170} 169}
171 170
172static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn, 171static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
173 struct ip_tunnel_parm *parms) 172 struct ip_tunnel_parm *parms)
174{ 173{
175 __be32 remote = parms->iph.daddr; 174 __be32 remote = parms->iph.daddr;
176 __be32 local = parms->iph.saddr; 175 __be32 local = parms->iph.saddr;
177 unsigned h = 0; 176 unsigned int h = 0;
178 int prio = 0; 177 int prio = 0;
179 178
180 if (remote) { 179 if (remote) {
@@ -188,7 +187,7 @@ static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
188 return &ipn->tunnels[prio][h]; 187 return &ipn->tunnels[prio][h];
189} 188}
190 189
191static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn, 190static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
192 struct ip_tunnel *t) 191 struct ip_tunnel *t)
193{ 192{
194 return __ipip_bucket(ipn, &t->parms); 193 return __ipip_bucket(ipn, &t->parms);
@@ -196,13 +195,14 @@ static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
196 195
197static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) 196static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
198{ 197{
199 struct ip_tunnel **tp; 198 struct ip_tunnel __rcu **tp;
200 199 struct ip_tunnel *iter;
201 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { 200
202 if (t == *tp) { 201 for (tp = ipip_bucket(ipn, t);
203 spin_lock_bh(&ipip_lock); 202 (iter = rtnl_dereference(*tp)) != NULL;
204 *tp = t->next; 203 tp = &iter->next) {
205 spin_unlock_bh(&ipip_lock); 204 if (t == iter) {
205 rcu_assign_pointer(*tp, t->next);
206 break; 206 break;
207 } 207 }
208 } 208 }
@@ -210,12 +210,10 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
210 210
211static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) 211static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
212{ 212{
213 struct ip_tunnel **tp = ipip_bucket(ipn, t); 213 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
214 214
215 spin_lock_bh(&ipip_lock); 215 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
216 t->next = *tp;
217 rcu_assign_pointer(*tp, t); 216 rcu_assign_pointer(*tp, t);
218 spin_unlock_bh(&ipip_lock);
219} 217}
220 218
221static struct ip_tunnel * ipip_tunnel_locate(struct net *net, 219static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -223,12 +221,15 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
223{ 221{
224 __be32 remote = parms->iph.daddr; 222 __be32 remote = parms->iph.daddr;
225 __be32 local = parms->iph.saddr; 223 __be32 local = parms->iph.saddr;
226 struct ip_tunnel *t, **tp, *nt; 224 struct ip_tunnel *t, *nt;
225 struct ip_tunnel __rcu **tp;
227 struct net_device *dev; 226 struct net_device *dev;
228 char name[IFNAMSIZ]; 227 char name[IFNAMSIZ];
229 struct ipip_net *ipn = net_generic(net, ipip_net_id); 228 struct ipip_net *ipn = net_generic(net, ipip_net_id);
230 229
231 for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) { 230 for (tp = __ipip_bucket(ipn, parms);
231 (t = rtnl_dereference(*tp)) != NULL;
232 tp = &t->next) {
232 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 233 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
233 return t; 234 return t;
234 } 235 }
@@ -268,16 +269,15 @@ failed_free:
268 return NULL; 269 return NULL;
269} 270}
270 271
272/* called with RTNL */
271static void ipip_tunnel_uninit(struct net_device *dev) 273static void ipip_tunnel_uninit(struct net_device *dev)
272{ 274{
273 struct net *net = dev_net(dev); 275 struct net *net = dev_net(dev);
274 struct ipip_net *ipn = net_generic(net, ipip_net_id); 276 struct ipip_net *ipn = net_generic(net, ipip_net_id);
275 277
276 if (dev == ipn->fb_tunnel_dev) { 278 if (dev == ipn->fb_tunnel_dev)
277 spin_lock_bh(&ipip_lock); 279 rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
278 ipn->tunnels_wc[0] = NULL; 280 else
279 spin_unlock_bh(&ipip_lock);
280 } else
281 ipip_tunnel_unlink(ipn, netdev_priv(dev)); 281 ipip_tunnel_unlink(ipn, netdev_priv(dev));
282 dev_put(dev); 282 dev_put(dev);
283} 283}
@@ -377,7 +377,10 @@ static int ipip_rcv(struct sk_buff *skb)
377 skb_tunnel_rx(skb, tunnel->dev); 377 skb_tunnel_rx(skb, tunnel->dev);
378 378
379 ipip_ecn_decapsulate(iph, skb); 379 ipip_ecn_decapsulate(iph, skb);
380 netif_rx(skb); 380
381 if (netif_rx(skb) == NET_RX_DROP)
382 tunnel->dev->stats.rx_dropped++;
383
381 rcu_read_unlock(); 384 rcu_read_unlock();
382 return 0; 385 return 0;
383 } 386 }
@@ -741,10 +744,10 @@ static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
741 iph->ihl = 5; 744 iph->ihl = 5;
742 745
743 dev_hold(dev); 746 dev_hold(dev);
744 ipn->tunnels_wc[0] = tunnel; 747 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
745} 748}
746 749
747static struct xfrm_tunnel ipip_handler = { 750static struct xfrm_tunnel ipip_handler __read_mostly = {
748 .handler = ipip_rcv, 751 .handler = ipip_rcv,
749 .err_handler = ipip_err, 752 .err_handler = ipip_err,
750 .priority = 1, 753 .priority = 1,
@@ -760,11 +763,12 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
760 for (prio = 1; prio < 4; prio++) { 763 for (prio = 1; prio < 4; prio++) {
761 int h; 764 int h;
762 for (h = 0; h < HASH_SIZE; h++) { 765 for (h = 0; h < HASH_SIZE; h++) {
763 struct ip_tunnel *t = ipn->tunnels[prio][h]; 766 struct ip_tunnel *t;
764 767
768 t = rtnl_dereference(ipn->tunnels[prio][h]);
765 while (t != NULL) { 769 while (t != NULL) {
766 unregister_netdevice_queue(t->dev, head); 770 unregister_netdevice_queue(t->dev, head);
767 t = t->next; 771 t = rtnl_dereference(t->next);
768 } 772 }
769 } 773 }
770 } 774 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 179fcab866f..10b24c02deb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -724,7 +724,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
724 case 0: 724 case 0:
725 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 725 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
726 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 726 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
727 if (dev && dev->ip_ptr == NULL) { 727 if (dev && __in_dev_get_rtnl(dev) == NULL) {
728 dev_put(dev); 728 dev_put(dev);
729 return -EADDRNOTAVAIL; 729 return -EADDRNOTAVAIL;
730 } 730 }
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8f4f9a57f1..8b642f15246 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -72,7 +72,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
72 for (i = 0; i < len; i++) 72 for (i = 0; i < len; i++)
73 ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i]; 73 ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
74 74
75 return (ret != 0); 75 return ret != 0;
76} 76}
77 77
78/* 78/*
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 3a43cf36db8..1e26a489765 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -29,6 +29,7 @@
29#include <net/netfilter/nf_conntrack.h> 29#include <net/netfilter/nf_conntrack.h>
30#include <net/net_namespace.h> 30#include <net/net_namespace.h>
31#include <net/checksum.h> 31#include <net/checksum.h>
32#include <net/ip.h>
32 33
33#define CLUSTERIP_VERSION "0.8" 34#define CLUSTERIP_VERSION "0.8"
34 35
@@ -231,24 +232,22 @@ clusterip_hashfn(const struct sk_buff *skb,
231{ 232{
232 const struct iphdr *iph = ip_hdr(skb); 233 const struct iphdr *iph = ip_hdr(skb);
233 unsigned long hashval; 234 unsigned long hashval;
234 u_int16_t sport, dport; 235 u_int16_t sport = 0, dport = 0;
235 const u_int16_t *ports; 236 int poff;
236 237
237 switch (iph->protocol) { 238 poff = proto_ports_offset(iph->protocol);
238 case IPPROTO_TCP: 239 if (poff >= 0) {
239 case IPPROTO_UDP: 240 const u_int16_t *ports;
240 case IPPROTO_UDPLITE: 241 u16 _ports[2];
241 case IPPROTO_SCTP: 242
242 case IPPROTO_DCCP: 243 ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
243 case IPPROTO_ICMP: 244 if (ports) {
244 ports = (const void *)iph+iph->ihl*4; 245 sport = ports[0];
245 sport = ports[0]; 246 dport = ports[1];
246 dport = ports[1]; 247 }
247 break; 248 } else {
248 default:
249 if (net_ratelimit()) 249 if (net_ratelimit())
250 pr_info("unknown protocol %u\n", iph->protocol); 250 pr_info("unknown protocol %u\n", iph->protocol);
251 sport = dport = 0;
252 } 251 }
253 252
254 switch (config->hash_mode) { 253 switch (config->hash_mode) {
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index f2d29735140..65699c24411 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,8 +28,7 @@
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <net/protocol.h> 29#include <net/protocol.h>
30 30
31const struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; 31const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly;
32static DEFINE_SPINLOCK(inet_proto_lock);
33 32
34/* 33/*
35 * Add a protocol handler to the hash tables 34 * Add a protocol handler to the hash tables
@@ -37,20 +36,9 @@ static DEFINE_SPINLOCK(inet_proto_lock);
37 36
38int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) 37int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
39{ 38{
40 int hash, ret; 39 int hash = protocol & (MAX_INET_PROTOS - 1);
41 40
42 hash = protocol & (MAX_INET_PROTOS - 1); 41 return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1;
43
44 spin_lock_bh(&inet_proto_lock);
45 if (inet_protos[hash]) {
46 ret = -1;
47 } else {
48 inet_protos[hash] = prot;
49 ret = 0;
50 }
51 spin_unlock_bh(&inet_proto_lock);
52
53 return ret;
54} 42}
55EXPORT_SYMBOL(inet_add_protocol); 43EXPORT_SYMBOL(inet_add_protocol);
56 44
@@ -60,18 +48,9 @@ EXPORT_SYMBOL(inet_add_protocol);
60 48
61int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) 49int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
62{ 50{
63 int hash, ret; 51 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
64
65 hash = protocol & (MAX_INET_PROTOS - 1);
66 52
67 spin_lock_bh(&inet_proto_lock); 53 ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1;
68 if (inet_protos[hash] == prot) {
69 inet_protos[hash] = NULL;
70 ret = 0;
71 } else {
72 ret = -1;
73 }
74 spin_unlock_bh(&inet_proto_lock);
75 54
76 synchronize_net(); 55 synchronize_net();
77 56
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 009a7b2aa1e..1f85ef28989 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -505,7 +505,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
505 505
506 ipc.addr = inet->inet_saddr; 506 ipc.addr = inet->inet_saddr;
507 ipc.opt = NULL; 507 ipc.opt = NULL;
508 ipc.shtx.flags = 0; 508 ipc.tx_flags = 0;
509 ipc.oif = sk->sk_bound_dev_if; 509 ipc.oif = sk->sk_bound_dev_if;
510 510
511 if (msg->msg_controllen) { 511 if (msg->msg_controllen) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6298f75d5e9..98beda47bc9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1268,18 +1268,11 @@ skip_hashing:
1268 1268
1269void rt_bind_peer(struct rtable *rt, int create) 1269void rt_bind_peer(struct rtable *rt, int create)
1270{ 1270{
1271 static DEFINE_SPINLOCK(rt_peer_lock);
1272 struct inet_peer *peer; 1271 struct inet_peer *peer;
1273 1272
1274 peer = inet_getpeer(rt->rt_dst, create); 1273 peer = inet_getpeer(rt->rt_dst, create);
1275 1274
1276 spin_lock_bh(&rt_peer_lock); 1275 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1277 if (rt->peer == NULL) {
1278 rt->peer = peer;
1279 peer = NULL;
1280 }
1281 spin_unlock_bh(&rt_peer_lock);
1282 if (peer)
1283 inet_putpeer(peer); 1276 inet_putpeer(peer);
1284} 1277}
1285 1278
@@ -2586,7 +2579,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2586 goto out; 2579 goto out;
2587 2580
2588 /* RACE: Check return value of inet_select_addr instead. */ 2581 /* RACE: Check return value of inet_select_addr instead. */
2589 if (__in_dev_get_rtnl(dev_out) == NULL) { 2582 if (rcu_dereference_raw(dev_out->ip_ptr) == NULL) {
2590 dev_put(dev_out); 2583 dev_put(dev_out);
2591 goto out; /* Wrong error code */ 2584 goto out; /* Wrong error code */
2592 } 2585 }
@@ -2798,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2798 2791
2799 dst_release(&(*rp)->dst); 2792 dst_release(&(*rp)->dst);
2800 *rp = rt; 2793 *rp = rt;
2801 return (rt ? 0 : -ENOMEM); 2794 return rt ? 0 : -ENOMEM;
2802} 2795}
2803 2796
2804int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, 2797int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 95d75d44392..19192c5fe67 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2392,7 +2392,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2392 err = tp->af_specific->md5_parse(sk, optval, optlen); 2392 err = tp->af_specific->md5_parse(sk, optval, optlen);
2393 break; 2393 break;
2394#endif 2394#endif
2395 2395 case TCP_USER_TIMEOUT:
2396 /* Cap the max timeout in ms TCP will retry/retrans
2397 * before giving up and aborting (ETIMEDOUT) a connection.
2398 */
2399 icsk->icsk_user_timeout = msecs_to_jiffies(val);
2400 break;
2396 default: 2401 default:
2397 err = -ENOPROTOOPT; 2402 err = -ENOPROTOOPT;
2398 break; 2403 break;
@@ -2611,6 +2616,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2611 case TCP_THIN_DUPACK: 2616 case TCP_THIN_DUPACK:
2612 val = tp->thin_dupack; 2617 val = tp->thin_dupack;
2613 break; 2618 break;
2619
2620 case TCP_USER_TIMEOUT:
2621 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2622 break;
2614 default: 2623 default:
2615 return -ENOPROTOOPT; 2624 return -ENOPROTOOPT;
2616 } 2625 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 149e79ac289..fabc09a58d7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk)
805 } 805 }
806} 806}
807 807
808/* Numbers are taken from RFC3390.
809 *
810 * John Heffner states:
811 *
812 * The RFC specifies a window of no more than 4380 bytes
813 * unless 2*MSS > 4380. Reading the pseudocode in the RFC
814 * is a bit misleading because they use a clamp at 4380 bytes
815 * rather than use a multiplier in the relevant range.
816 */
817__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) 808__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
818{ 809{
819 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 810 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
820 811
821 if (!cwnd) { 812 if (!cwnd)
822 if (tp->mss_cache > 1460) 813 cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
823 cwnd = 2;
824 else
825 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
826 }
827 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 814 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
828} 815}
829 816
@@ -2314,7 +2301,7 @@ static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
2314 2301
2315static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) 2302static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
2316{ 2303{
2317 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); 2304 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2318} 2305}
2319 2306
2320static inline int tcp_head_timedout(struct sock *sk) 2307static inline int tcp_head_timedout(struct sock *sk)
@@ -3411,8 +3398,8 @@ static void tcp_ack_probe(struct sock *sk)
3411 3398
3412static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) 3399static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
3413{ 3400{
3414 return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || 3401 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3415 inet_csk(sk)->icsk_ca_state != TCP_CA_Open); 3402 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3416} 3403}
3417 3404
3418static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) 3405static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
@@ -3429,9 +3416,9 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
3429 const u32 ack, const u32 ack_seq, 3416 const u32 ack, const u32 ack_seq,
3430 const u32 nwin) 3417 const u32 nwin)
3431{ 3418{
3432 return (after(ack, tp->snd_una) || 3419 return after(ack, tp->snd_una) ||
3433 after(ack_seq, tp->snd_wl1) || 3420 after(ack_seq, tp->snd_wl1) ||
3434 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd)); 3421 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3435} 3422}
3436 3423
3437/* Update our send window. 3424/* Update our send window.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 020766292bb..a0232f3a358 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2571,7 +2571,6 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2571 2571
2572 return tcp_gro_receive(head, skb); 2572 return tcp_gro_receive(head, skb);
2573} 2573}
2574EXPORT_SYMBOL(tcp4_gro_receive);
2575 2574
2576int tcp4_gro_complete(struct sk_buff *skb) 2575int tcp4_gro_complete(struct sk_buff *skb)
2577{ 2576{
@@ -2584,7 +2583,6 @@ int tcp4_gro_complete(struct sk_buff *skb)
2584 2583
2585 return tcp_gro_complete(skb); 2584 return tcp_gro_complete(skb);
2586} 2585}
2587EXPORT_SYMBOL(tcp4_gro_complete);
2588 2586
2589struct proto tcp_prot = { 2587struct proto tcp_prot = {
2590 .name = "TCP", 2588 .name = "TCP",
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f25b56cb85c..43cf901d765 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -55,7 +55,7 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
55 return 1; 55 return 1;
56 if (after(end_seq, s_win) && before(seq, e_win)) 56 if (after(end_seq, s_win) && before(seq, e_win))
57 return 1; 57 return 1;
58 return (seq == e_win && seq == end_seq); 58 return seq == e_win && seq == end_seq;
59} 59}
60 60
61/* 61/*
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index de3bd845858..05b1ecf3676 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -224,16 +224,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
224 } 224 }
225 } 225 }
226 226
227 /* Set initial window to value enough for senders, 227 /* Set initial window to value enough for senders, following RFC5681. */
228 * following RFC2414. Senders, not following this RFC,
229 * will be satisfied with 2.
230 */
231 if (mss > (1 << *rcv_wscale)) { 228 if (mss > (1 << *rcv_wscale)) {
232 int init_cwnd = 4; 229 int init_cwnd = rfc3390_bytes_to_packets(mss);
233 if (mss > 1460 * 3) 230
234 init_cwnd = 2;
235 else if (mss > 1460)
236 init_cwnd = 3;
237 /* when initializing use the value from init_rcv_wnd 231 /* when initializing use the value from init_rcv_wnd
238 * rather than the default from above 232 * rather than the default from above
239 */ 233 */
@@ -1376,9 +1370,9 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp,
1376 const struct sk_buff *skb, 1370 const struct sk_buff *skb,
1377 unsigned mss_now, int nonagle) 1371 unsigned mss_now, int nonagle)
1378{ 1372{
1379 return (skb->len < mss_now && 1373 return skb->len < mss_now &&
1380 ((nonagle & TCP_NAGLE_CORK) || 1374 ((nonagle & TCP_NAGLE_CORK) ||
1381 (!nonagle && tp->packets_out && tcp_minshall_check(tp)))); 1375 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1382} 1376}
1383 1377
1384/* Return non-zero if the Nagle test allows this packet to be 1378/* Return non-zero if the Nagle test allows this packet to be
@@ -1449,10 +1443,10 @@ int tcp_may_send_now(struct sock *sk)
1449 struct tcp_sock *tp = tcp_sk(sk); 1443 struct tcp_sock *tp = tcp_sk(sk);
1450 struct sk_buff *skb = tcp_send_head(sk); 1444 struct sk_buff *skb = tcp_send_head(sk);
1451 1445
1452 return (skb && 1446 return skb &&
1453 tcp_snd_test(sk, skb, tcp_current_mss(sk), 1447 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1454 (tcp_skb_is_last(sk, skb) ? 1448 (tcp_skb_is_last(sk, skb) ?
1455 tp->nonagle : TCP_NAGLE_PUSH))); 1449 tp->nonagle : TCP_NAGLE_PUSH));
1456} 1450}
1457 1451
1458/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet 1452/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
@@ -2429,6 +2423,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2429 __u8 rcv_wscale; 2423 __u8 rcv_wscale;
2430 /* Set this up on the first call only */ 2424 /* Set this up on the first call only */
2431 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 2425 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2426
2427 /* limit the window selection if the user enforce a smaller rx buffer */
2428 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2429 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2430 req->window_clamp = tcp_full_space(sk);
2431
2432 /* tcp_full_space because it is guaranteed to be the first packet */ 2432 /* tcp_full_space because it is guaranteed to be the first packet */
2433 tcp_select_initial_window(tcp_full_space(sk), 2433 tcp_select_initial_window(tcp_full_space(sk),
2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -2555,6 +2555,11 @@ static void tcp_connect_init(struct sock *sk)
2555 2555
2556 tcp_initialize_rcv_mss(sk); 2556 tcp_initialize_rcv_mss(sk);
2557 2557
2558 /* limit the window selection if the user enforce a smaller rx buffer */
2559 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2560 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2561 tp->window_clamp = tcp_full_space(sk);
2562
2558 tcp_select_initial_window(tcp_full_space(sk), 2563 tcp_select_initial_window(tcp_full_space(sk),
2559 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), 2564 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2560 &tp->rcv_wnd, 2565 &tp->rcv_wnd,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c35b469e851..baea4a12902 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -138,10 +138,10 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
138 * retransmissions with an initial RTO of TCP_RTO_MIN. 138 * retransmissions with an initial RTO of TCP_RTO_MIN.
139 */ 139 */
140static bool retransmits_timed_out(struct sock *sk, 140static bool retransmits_timed_out(struct sock *sk,
141 unsigned int boundary) 141 unsigned int boundary,
142 unsigned int timeout)
142{ 143{
143 unsigned int timeout, linear_backoff_thresh; 144 unsigned int linear_backoff_thresh, start_ts;
144 unsigned int start_ts;
145 145
146 if (!inet_csk(sk)->icsk_retransmits) 146 if (!inet_csk(sk)->icsk_retransmits)
147 return false; 147 return false;
@@ -151,14 +151,15 @@ static bool retransmits_timed_out(struct sock *sk,
151 else 151 else
152 start_ts = tcp_sk(sk)->retrans_stamp; 152 start_ts = tcp_sk(sk)->retrans_stamp;
153 153
154 linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); 154 if (likely(timeout == 0)) {
155 155 linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
156 if (boundary <= linear_backoff_thresh)
157 timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
158 else
159 timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
160 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
161 156
157 if (boundary <= linear_backoff_thresh)
158 timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
159 else
160 timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
161 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
162 }
162 return (tcp_time_stamp - start_ts) >= timeout; 163 return (tcp_time_stamp - start_ts) >= timeout;
163} 164}
164 165
@@ -174,7 +175,7 @@ static int tcp_write_timeout(struct sock *sk)
174 dst_negative_advice(sk); 175 dst_negative_advice(sk);
175 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 176 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
176 } else { 177 } else {
177 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { 178 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) {
178 /* Black hole detection */ 179 /* Black hole detection */
179 tcp_mtu_probing(icsk, sk); 180 tcp_mtu_probing(icsk, sk);
180 181
@@ -187,14 +188,16 @@ static int tcp_write_timeout(struct sock *sk)
187 188
188 retry_until = tcp_orphan_retries(sk, alive); 189 retry_until = tcp_orphan_retries(sk, alive);
189 do_reset = alive || 190 do_reset = alive ||
190 !retransmits_timed_out(sk, retry_until); 191 !retransmits_timed_out(sk, retry_until, 0);
191 192
192 if (tcp_out_of_resources(sk, do_reset)) 193 if (tcp_out_of_resources(sk, do_reset))
193 return 1; 194 return 1;
194 } 195 }
195 } 196 }
196 197
197 if (retransmits_timed_out(sk, retry_until)) { 198 if (retransmits_timed_out(sk, retry_until,
199 (1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) ? 0 :
200 icsk->icsk_user_timeout)) {
198 /* Has it gone just too far? */ 201 /* Has it gone just too far? */
199 tcp_write_err(sk); 202 tcp_write_err(sk);
200 return 1; 203 return 1;
@@ -436,7 +439,7 @@ out_reset_timer:
436 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 439 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
437 } 440 }
438 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 441 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
439 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) 442 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0))
440 __sk_dst_reset(sk); 443 __sk_dst_reset(sk);
441 444
442out:; 445out:;
@@ -556,7 +559,14 @@ static void tcp_keepalive_timer (unsigned long data)
556 elapsed = keepalive_time_elapsed(tp); 559 elapsed = keepalive_time_elapsed(tp);
557 560
558 if (elapsed >= keepalive_time_when(tp)) { 561 if (elapsed >= keepalive_time_when(tp)) {
559 if (icsk->icsk_probes_out >= keepalive_probes(tp)) { 562 /* If the TCP_USER_TIMEOUT option is enabled, use that
563 * to determine when to timeout instead.
564 */
565 if ((icsk->icsk_user_timeout != 0 &&
566 elapsed >= icsk->icsk_user_timeout &&
567 icsk->icsk_probes_out > 0) ||
568 (icsk->icsk_user_timeout == 0 &&
569 icsk->icsk_probes_out >= keepalive_probes(tp))) {
560 tcp_send_active_reset(sk, GFP_ATOMIC); 570 tcp_send_active_reset(sk, GFP_ATOMIC);
561 tcp_write_err(sk); 571 tcp_write_err(sk);
562 goto out; 572 goto out;
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 20151d6a624..a534dda5456 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -80,7 +80,7 @@ static void tcp_westwood_init(struct sock *sk)
80 */ 80 */
81static inline u32 westwood_do_filter(u32 a, u32 b) 81static inline u32 westwood_do_filter(u32 a, u32 b)
82{ 82{
83 return (((7 * a) + b) >> 3); 83 return ((7 * a) + b) >> 3;
84} 84}
85 85
86static void westwood_filter(struct westwood *w, u32 delta) 86static void westwood_filter(struct westwood *w, u32 delta)
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 59186ca7808..9a17bd2a0a3 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,8 +14,8 @@
14#include <net/protocol.h> 14#include <net/protocol.h>
15#include <net/xfrm.h> 15#include <net/xfrm.h>
16 16
17static struct xfrm_tunnel *tunnel4_handlers; 17static struct xfrm_tunnel *tunnel4_handlers __read_mostly;
18static struct xfrm_tunnel *tunnel64_handlers; 18static struct xfrm_tunnel *tunnel64_handlers __read_mostly;
19static DEFINE_MUTEX(tunnel4_mutex); 19static DEFINE_MUTEX(tunnel4_mutex);
20 20
21static inline struct xfrm_tunnel **fam_handlers(unsigned short family) 21static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
@@ -39,7 +39,7 @@ int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
39 } 39 }
40 40
41 handler->next = *pprev; 41 handler->next = *pprev;
42 *pprev = handler; 42 rcu_assign_pointer(*pprev, handler);
43 43
44 ret = 0; 44 ret = 0;
45 45
@@ -73,6 +73,11 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
73} 73}
74EXPORT_SYMBOL(xfrm4_tunnel_deregister); 74EXPORT_SYMBOL(xfrm4_tunnel_deregister);
75 75
76#define for_each_tunnel_rcu(head, handler) \
77 for (handler = rcu_dereference(head); \
78 handler != NULL; \
79 handler = rcu_dereference(handler->next)) \
80
76static int tunnel4_rcv(struct sk_buff *skb) 81static int tunnel4_rcv(struct sk_buff *skb)
77{ 82{
78 struct xfrm_tunnel *handler; 83 struct xfrm_tunnel *handler;
@@ -80,7 +85,7 @@ static int tunnel4_rcv(struct sk_buff *skb)
80 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 85 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
81 goto drop; 86 goto drop;
82 87
83 for (handler = tunnel4_handlers; handler; handler = handler->next) 88 for_each_tunnel_rcu(tunnel4_handlers, handler)
84 if (!handler->handler(skb)) 89 if (!handler->handler(skb))
85 return 0; 90 return 0;
86 91
@@ -99,7 +104,7 @@ static int tunnel64_rcv(struct sk_buff *skb)
99 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 104 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
100 goto drop; 105 goto drop;
101 106
102 for (handler = tunnel64_handlers; handler; handler = handler->next) 107 for_each_tunnel_rcu(tunnel64_handlers, handler)
103 if (!handler->handler(skb)) 108 if (!handler->handler(skb))
104 return 0; 109 return 0;
105 110
@@ -115,7 +120,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
115{ 120{
116 struct xfrm_tunnel *handler; 121 struct xfrm_tunnel *handler;
117 122
118 for (handler = tunnel4_handlers; handler; handler = handler->next) 123 for_each_tunnel_rcu(tunnel4_handlers, handler)
119 if (!handler->err_handler(skb, info)) 124 if (!handler->err_handler(skb, info))
120 break; 125 break;
121} 126}
@@ -125,7 +130,7 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
125{ 130{
126 struct xfrm_tunnel *handler; 131 struct xfrm_tunnel *handler;
127 132
128 for (handler = tunnel64_handlers; handler; handler = handler->next) 133 for_each_tunnel_rcu(tunnel64_handlers, handler)
129 if (!handler->err_handler(skb, info)) 134 if (!handler->err_handler(skb, info))
130 break; 135 break;
131} 136}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fb23c2e63b5..b3f7e8cf18a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -797,7 +797,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
797 return -EOPNOTSUPP; 797 return -EOPNOTSUPP;
798 798
799 ipc.opt = NULL; 799 ipc.opt = NULL;
800 ipc.shtx.flags = 0; 800 ipc.tx_flags = 0;
801 801
802 if (up->pending) { 802 if (up->pending) {
803 /* 803 /*
@@ -845,7 +845,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
845 ipc.addr = inet->inet_saddr; 845 ipc.addr = inet->inet_saddr;
846 846
847 ipc.oif = sk->sk_bound_dev_if; 847 ipc.oif = sk->sk_bound_dev_if;
848 err = sock_tx_timestamp(msg, sk, &ipc.shtx); 848 err = sock_tx_timestamp(sk, &ipc.tx_flags);
849 if (err) 849 if (err)
850 return err; 850 return err;
851 if (msg->msg_controllen) { 851 if (msg->msg_controllen) {
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 41f5982d208..82806455e85 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -58,14 +58,14 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)
58 return -ENOENT; 58 return -ENOENT;
59} 59}
60 60
61static struct xfrm_tunnel xfrm_tunnel_handler = { 61static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
62 .handler = xfrm_tunnel_rcv, 62 .handler = xfrm_tunnel_rcv,
63 .err_handler = xfrm_tunnel_err, 63 .err_handler = xfrm_tunnel_err,
64 .priority = 2, 64 .priority = 2,
65}; 65};
66 66
67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
68static struct xfrm_tunnel xfrm64_tunnel_handler = { 68static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = {
69 .handler = xfrm_tunnel_rcv, 69 .handler = xfrm_tunnel_rcv,
70 .err_handler = xfrm_tunnel_err, 70 .err_handler = xfrm_tunnel_err,
71 .priority = 2, 71 .priority = 2,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 324fac3b6c1..8c88340278f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -243,7 +243,7 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev)
243/* Check if a route is valid prefix route */ 243/* Check if a route is valid prefix route */
244static inline int addrconf_is_prefix_route(const struct rt6_info *rt) 244static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
245{ 245{
246 return ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0); 246 return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0;
247} 247}
248 248
249static void addrconf_del_timer(struct inet6_ifaddr *ifp) 249static void addrconf_del_timer(struct inet6_ifaddr *ifp)
@@ -2964,7 +2964,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
2964 start sending router solicitations. 2964 start sending router solicitations.
2965 */ 2965 */
2966 2966
2967 if (ifp->idev->cnf.forwarding == 0 && 2967 if ((ifp->idev->cnf.forwarding == 0 ||
2968 ifp->idev->cnf.forwarding == 2) &&
2968 ifp->idev->cnf.rtr_solicits > 0 && 2969 ifp->idev->cnf.rtr_solicits > 0 &&
2969 (dev->flags&IFF_LOOPBACK) == 0 && 2970 (dev->flags&IFF_LOOPBACK) == 0 &&
2970 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { 2971 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 8175f802651..c8993e5a337 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -518,10 +518,9 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
518 518
519static inline int ip6addrlbl_msgsize(void) 519static inline int ip6addrlbl_msgsize(void)
520{ 520{
521 return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 521 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
522 + nla_total_size(16) /* IFAL_ADDRESS */ 522 + nla_total_size(16) /* IFAL_ADDRESS */
523 + nla_total_size(4) /* IFAL_LABEL */ 523 + nla_total_size(4); /* IFAL_LABEL */
524 );
525} 524}
526 525
527static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, 526static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 56b9bf2516f..60220985bb8 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -467,7 +467,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
467 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 467 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
468 sin->sin6_scope_id = sk->sk_bound_dev_if; 468 sin->sin6_scope_id = sk->sk_bound_dev_if;
469 *uaddr_len = sizeof(*sin); 469 *uaddr_len = sizeof(*sin);
470 return(0); 470 return 0;
471} 471}
472 472
473EXPORT_SYMBOL(inet6_getname); 473EXPORT_SYMBOL(inet6_getname);
@@ -488,7 +488,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
488 case SIOCADDRT: 488 case SIOCADDRT:
489 case SIOCDELRT: 489 case SIOCDELRT:
490 490
491 return(ipv6_route_ioctl(net, cmd, (void __user *)arg)); 491 return ipv6_route_ioctl(net, cmd, (void __user *)arg);
492 492
493 case SIOCSIFADDR: 493 case SIOCSIFADDR:
494 return addrconf_add_ifaddr(net, (void __user *) arg); 494 return addrconf_add_ifaddr(net, (void __user *) arg);
@@ -502,7 +502,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
502 return sk->sk_prot->ioctl(sk, cmd, arg); 502 return sk->sk_prot->ioctl(sk, cmd, arg);
503 } 503 }
504 /*NOTREACHED*/ 504 /*NOTREACHED*/
505 return(0); 505 return 0;
506} 506}
507 507
508EXPORT_SYMBOL(inet6_ioctl); 508EXPORT_SYMBOL(inet6_ioctl);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index e1caa5d526c..14ed0a955b5 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -13,12 +13,12 @@ int ipv6_ext_hdr(u8 nexthdr)
13 /* 13 /*
14 * find out if nexthdr is an extension header or a protocol 14 * find out if nexthdr is an extension header or a protocol
15 */ 15 */
16 return ( (nexthdr == NEXTHDR_HOP) || 16 return (nexthdr == NEXTHDR_HOP) ||
17 (nexthdr == NEXTHDR_ROUTING) || 17 (nexthdr == NEXTHDR_ROUTING) ||
18 (nexthdr == NEXTHDR_FRAGMENT) || 18 (nexthdr == NEXTHDR_FRAGMENT) ||
19 (nexthdr == NEXTHDR_AUTH) || 19 (nexthdr == NEXTHDR_AUTH) ||
20 (nexthdr == NEXTHDR_NONE) || 20 (nexthdr == NEXTHDR_NONE) ||
21 (nexthdr == NEXTHDR_DEST) ); 21 (nexthdr == NEXTHDR_DEST);
22} 22}
23 23
24/* 24/*
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 980912ed7a3..99157b4cd56 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -637,7 +637,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
637 } 637 }
638 mtu -= hlen + sizeof(struct frag_hdr); 638 mtu -= hlen + sizeof(struct frag_hdr);
639 639
640 if (skb_has_frags(skb)) { 640 if (skb_has_frag_list(skb)) {
641 int first_len = skb_pagelen(skb); 641 int first_len = skb_pagelen(skb);
642 struct sk_buff *frag2; 642 struct sk_buff *frag2;
643 643
@@ -878,8 +878,8 @@ static inline int ip6_rt_check(struct rt6key *rt_key,
878 struct in6_addr *fl_addr, 878 struct in6_addr *fl_addr,
879 struct in6_addr *addr_cache) 879 struct in6_addr *addr_cache)
880{ 880{
881 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 881 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
882 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); 882 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
883} 883}
884 884
885static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 885static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0fd027f3f47..f6d9f683543 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -83,15 +83,14 @@ struct ip6_tnl_net {
83 /* the IPv6 tunnel fallback device */ 83 /* the IPv6 tunnel fallback device */
84 struct net_device *fb_tnl_dev; 84 struct net_device *fb_tnl_dev;
85 /* lists for storing tunnels in use */ 85 /* lists for storing tunnels in use */
86 struct ip6_tnl *tnls_r_l[HASH_SIZE]; 86 struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
87 struct ip6_tnl *tnls_wc[1]; 87 struct ip6_tnl __rcu *tnls_wc[1];
88 struct ip6_tnl **tnls[2]; 88 struct ip6_tnl __rcu **tnls[2];
89}; 89};
90 90
91/* 91/*
92 * Locking : hash tables are protected by RCU and a spinlock 92 * Locking : hash tables are protected by RCU and RTNL
93 */ 93 */
94static DEFINE_SPINLOCK(ip6_tnl_lock);
95 94
96static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 95static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
97{ 96{
@@ -138,8 +137,8 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
138static struct ip6_tnl * 137static struct ip6_tnl *
139ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) 138ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
140{ 139{
141 unsigned h0 = HASH(remote); 140 unsigned int h0 = HASH(remote);
142 unsigned h1 = HASH(local); 141 unsigned int h1 = HASH(local);
143 struct ip6_tnl *t; 142 struct ip6_tnl *t;
144 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 143 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
145 144
@@ -167,7 +166,7 @@ ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
167 * Return: head of IPv6 tunnel list 166 * Return: head of IPv6 tunnel list
168 **/ 167 **/
169 168
170static struct ip6_tnl ** 169static struct ip6_tnl __rcu **
171ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p) 170ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
172{ 171{
173 struct in6_addr *remote = &p->raddr; 172 struct in6_addr *remote = &p->raddr;
@@ -190,12 +189,10 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
190static void 189static void
191ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 190ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
192{ 191{
193 struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms); 192 struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
194 193
195 spin_lock_bh(&ip6_tnl_lock); 194 rcu_assign_pointer(t->next , rtnl_dereference(*tp));
196 t->next = *tp;
197 rcu_assign_pointer(*tp, t); 195 rcu_assign_pointer(*tp, t);
198 spin_unlock_bh(&ip6_tnl_lock);
199} 196}
200 197
201/** 198/**
@@ -206,13 +203,14 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
206static void 203static void
207ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 204ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
208{ 205{
209 struct ip6_tnl **tp; 206 struct ip6_tnl __rcu **tp;
210 207 struct ip6_tnl *iter;
211 for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) { 208
212 if (t == *tp) { 209 for (tp = ip6_tnl_bucket(ip6n, &t->parms);
213 spin_lock_bh(&ip6_tnl_lock); 210 (iter = rtnl_dereference(*tp)) != NULL;
214 *tp = t->next; 211 tp = &iter->next) {
215 spin_unlock_bh(&ip6_tnl_lock); 212 if (t == iter) {
213 rcu_assign_pointer(*tp, t->next);
216 break; 214 break;
217 } 215 }
218 } 216 }
@@ -290,10 +288,13 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
290{ 288{
291 struct in6_addr *remote = &p->raddr; 289 struct in6_addr *remote = &p->raddr;
292 struct in6_addr *local = &p->laddr; 290 struct in6_addr *local = &p->laddr;
291 struct ip6_tnl __rcu **tp;
293 struct ip6_tnl *t; 292 struct ip6_tnl *t;
294 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 293 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
295 294
296 for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) { 295 for (tp = ip6_tnl_bucket(ip6n, p);
296 (t = rtnl_dereference(*tp)) != NULL;
297 tp = &t->next) {
297 if (ipv6_addr_equal(local, &t->parms.laddr) && 298 if (ipv6_addr_equal(local, &t->parms.laddr) &&
298 ipv6_addr_equal(remote, &t->parms.raddr)) 299 ipv6_addr_equal(remote, &t->parms.raddr))
299 return t; 300 return t;
@@ -318,13 +319,10 @@ ip6_tnl_dev_uninit(struct net_device *dev)
318 struct net *net = dev_net(dev); 319 struct net *net = dev_net(dev);
319 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 320 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
320 321
321 if (dev == ip6n->fb_tnl_dev) { 322 if (dev == ip6n->fb_tnl_dev)
322 spin_lock_bh(&ip6_tnl_lock); 323 rcu_assign_pointer(ip6n->tnls_wc[0], NULL);
323 ip6n->tnls_wc[0] = NULL; 324 else
324 spin_unlock_bh(&ip6_tnl_lock);
325 } else {
326 ip6_tnl_unlink(ip6n, t); 325 ip6_tnl_unlink(ip6n, t);
327 }
328 ip6_tnl_dst_reset(t); 326 ip6_tnl_dst_reset(t);
329 dev_put(dev); 327 dev_put(dev);
330} 328}
@@ -727,7 +725,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
727 skb_tunnel_rx(skb, t->dev); 725 skb_tunnel_rx(skb, t->dev);
728 726
729 dscp_ecn_decapsulate(t, ipv6h, skb); 727 dscp_ecn_decapsulate(t, ipv6h, skb);
730 netif_rx(skb); 728
729 if (netif_rx(skb) == NET_RX_DROP)
730 t->dev->stats.rx_dropped++;
731
731 rcu_read_unlock(); 732 rcu_read_unlock();
732 return 0; 733 return 0;
733 } 734 }
@@ -1369,16 +1370,16 @@ static void __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1369 ip6_tnl_dev_init_gen(dev); 1370 ip6_tnl_dev_init_gen(dev);
1370 t->parms.proto = IPPROTO_IPV6; 1371 t->parms.proto = IPPROTO_IPV6;
1371 dev_hold(dev); 1372 dev_hold(dev);
1372 ip6n->tnls_wc[0] = t; 1373 rcu_assign_pointer(ip6n->tnls_wc[0], t);
1373} 1374}
1374 1375
1375static struct xfrm6_tunnel ip4ip6_handler = { 1376static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1376 .handler = ip4ip6_rcv, 1377 .handler = ip4ip6_rcv,
1377 .err_handler = ip4ip6_err, 1378 .err_handler = ip4ip6_err,
1378 .priority = 1, 1379 .priority = 1,
1379}; 1380};
1380 1381
1381static struct xfrm6_tunnel ip6ip6_handler = { 1382static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1382 .handler = ip6ip6_rcv, 1383 .handler = ip6ip6_rcv,
1383 .err_handler = ip6ip6_err, 1384 .err_handler = ip6ip6_err,
1384 .priority = 1, 1385 .priority = 1,
@@ -1391,14 +1392,14 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1391 LIST_HEAD(list); 1392 LIST_HEAD(list);
1392 1393
1393 for (h = 0; h < HASH_SIZE; h++) { 1394 for (h = 0; h < HASH_SIZE; h++) {
1394 t = ip6n->tnls_r_l[h]; 1395 t = rtnl_dereference(ip6n->tnls_r_l[h]);
1395 while (t != NULL) { 1396 while (t != NULL) {
1396 unregister_netdevice_queue(t->dev, &list); 1397 unregister_netdevice_queue(t->dev, &list);
1397 t = t->next; 1398 t = rtnl_dereference(t->next);
1398 } 1399 }
1399 } 1400 }
1400 1401
1401 t = ip6n->tnls_wc[0]; 1402 t = rtnl_dereference(ip6n->tnls_wc[0]);
1402 unregister_netdevice_queue(t->dev, &list); 1403 unregister_netdevice_queue(t->dev, &list);
1403 unregister_netdevice_many(&list); 1404 unregister_netdevice_many(&list);
1404} 1405}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 66078dad7fe..2640c9be589 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -666,7 +666,9 @@ static int pim6_rcv(struct sk_buff *skb)
666 666
667 skb_tunnel_rx(skb, reg_dev); 667 skb_tunnel_rx(skb, reg_dev);
668 668
669 netif_rx(skb); 669 if (netif_rx(skb) == NET_RX_DROP)
670 reg_dev->stats.rx_dropped++;
671
670 dev_put(reg_dev); 672 dev_put(reg_dev);
671 return 0; 673 return 0;
672 drop: 674 drop:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 58841c4ae94..b3dd844cd34 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -228,12 +228,12 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
228 do { 228 do {
229 cur = ((void *)cur) + (cur->nd_opt_len << 3); 229 cur = ((void *)cur) + (cur->nd_opt_len << 3);
230 } while(cur < end && cur->nd_opt_type != type); 230 } while(cur < end && cur->nd_opt_type != type);
231 return (cur <= end && cur->nd_opt_type == type ? cur : NULL); 231 return cur <= end && cur->nd_opt_type == type ? cur : NULL;
232} 232}
233 233
234static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) 234static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
235{ 235{
236 return (opt->nd_opt_type == ND_OPT_RDNSS); 236 return opt->nd_opt_type == ND_OPT_RDNSS;
237} 237}
238 238
239static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, 239static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
@@ -244,7 +244,7 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
244 do { 244 do {
245 cur = ((void *)cur) + (cur->nd_opt_len << 3); 245 cur = ((void *)cur) + (cur->nd_opt_len << 3);
246 } while(cur < end && !ndisc_is_useropt(cur)); 246 } while(cur < end && !ndisc_is_useropt(cur));
247 return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL); 247 return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
248} 248}
249 249
250static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, 250static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
@@ -319,7 +319,7 @@ static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
319 int prepad = ndisc_addr_option_pad(dev->type); 319 int prepad = ndisc_addr_option_pad(dev->type);
320 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad)) 320 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
321 return NULL; 321 return NULL;
322 return (lladdr + prepad); 322 return lladdr + prepad;
323} 323}
324 324
325int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir) 325int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
@@ -1105,6 +1105,18 @@ errout:
1105 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); 1105 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1106} 1106}
1107 1107
1108static inline int accept_ra(struct inet6_dev *in6_dev)
1109{
1110 /*
1111 * If forwarding is enabled, RA are not accepted unless the special
1112 * hybrid mode (accept_ra=2) is enabled.
1113 */
1114 if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
1115 return 0;
1116
1117 return in6_dev->cnf.accept_ra;
1118}
1119
1108static void ndisc_router_discovery(struct sk_buff *skb) 1120static void ndisc_router_discovery(struct sk_buff *skb)
1109{ 1121{
1110 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); 1122 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
@@ -1158,8 +1170,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1158 return; 1170 return;
1159 } 1171 }
1160 1172
1161 /* skip route and link configuration on routers */ 1173 if (!accept_ra(in6_dev))
1162 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra)
1163 goto skip_linkparms; 1174 goto skip_linkparms;
1164 1175
1165#ifdef CONFIG_IPV6_NDISC_NODETYPE 1176#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1309,8 +1320,7 @@ skip_linkparms:
1309 NEIGH_UPDATE_F_ISROUTER); 1320 NEIGH_UPDATE_F_ISROUTER);
1310 } 1321 }
1311 1322
1312 /* skip route and link configuration on routers */ 1323 if (!accept_ra(in6_dev))
1313 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra)
1314 goto out; 1324 goto out;
1315 1325
1316#ifdef CONFIG_IPV6_ROUTE_INFO 1326#ifdef CONFIG_IPV6_ROUTE_INFO
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 8e754be92c2..6b331e9b570 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -82,13 +82,13 @@ EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
82int 82int
83ip6t_ext_hdr(u8 nexthdr) 83ip6t_ext_hdr(u8 nexthdr)
84{ 84{
85 return ( (nexthdr == IPPROTO_HOPOPTS) || 85 return (nexthdr == IPPROTO_HOPOPTS) ||
86 (nexthdr == IPPROTO_ROUTING) || 86 (nexthdr == IPPROTO_ROUTING) ||
87 (nexthdr == IPPROTO_FRAGMENT) || 87 (nexthdr == IPPROTO_FRAGMENT) ||
88 (nexthdr == IPPROTO_ESP) || 88 (nexthdr == IPPROTO_ESP) ||
89 (nexthdr == IPPROTO_AH) || 89 (nexthdr == IPPROTO_AH) ||
90 (nexthdr == IPPROTO_NONE) || 90 (nexthdr == IPPROTO_NONE) ||
91 (nexthdr == IPPROTO_DSTOPTS) ); 91 (nexthdr == IPPROTO_DSTOPTS);
92} 92}
93 93
94/* Returns whether matches rule or not. */ 94/* Returns whether matches rule or not. */
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 578f3c1a16d..138a8b36270 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -363,7 +363,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
363 /* If the first fragment is fragmented itself, we split 363 /* If the first fragment is fragmented itself, we split
364 * it to two chunks: the first with data and paged part 364 * it to two chunks: the first with data and paged part
365 * and the second, holding only fragments. */ 365 * and the second, holding only fragments. */
366 if (skb_has_frags(head)) { 366 if (skb_has_frag_list(head)) {
367 struct sk_buff *clone; 367 struct sk_buff *clone;
368 int i, plen = 0; 368 int i, plen = 0;
369 369
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 1fa3468f0f3..9bb936ae245 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,28 +25,14 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <net/protocol.h> 26#include <net/protocol.h>
27 27
28const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS]; 28const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] __read_mostly;
29static DEFINE_SPINLOCK(inet6_proto_lock);
30
31 29
32int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) 30int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
33{ 31{
34 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 32 int hash = protocol & (MAX_INET_PROTOS - 1);
35
36 spin_lock_bh(&inet6_proto_lock);
37
38 if (inet6_protos[hash]) {
39 ret = -1;
40 } else {
41 inet6_protos[hash] = prot;
42 ret = 0;
43 }
44
45 spin_unlock_bh(&inet6_proto_lock);
46 33
47 return ret; 34 return !cmpxchg(&inet6_protos[hash], NULL, prot) ? 0 : -1;
48} 35}
49
50EXPORT_SYMBOL(inet6_add_protocol); 36EXPORT_SYMBOL(inet6_add_protocol);
51 37
52/* 38/*
@@ -57,20 +43,10 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
57{ 43{
58 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 44 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
59 45
60 spin_lock_bh(&inet6_proto_lock); 46 ret = (cmpxchg(&inet6_protos[hash], prot, NULL) == prot) ? 0 : -1;
61
62 if (inet6_protos[hash] != prot) {
63 ret = -1;
64 } else {
65 inet6_protos[hash] = NULL;
66 ret = 0;
67 }
68
69 spin_unlock_bh(&inet6_proto_lock);
70 47
71 synchronize_net(); 48 synchronize_net();
72 49
73 return ret; 50 return ret;
74} 51}
75
76EXPORT_SYMBOL(inet6_del_protocol); 52EXPORT_SYMBOL(inet6_del_protocol);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e677937a07f..45e6efb7f17 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -764,7 +764,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
764 return -EINVAL; 764 return -EINVAL;
765 765
766 if (sin6->sin6_family && sin6->sin6_family != AF_INET6) 766 if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
767 return(-EAFNOSUPPORT); 767 return -EAFNOSUPPORT;
768 768
769 /* port is the proto value [0..255] carried in nexthdr */ 769 /* port is the proto value [0..255] carried in nexthdr */
770 proto = ntohs(sin6->sin6_port); 770 proto = ntohs(sin6->sin6_port);
@@ -772,10 +772,10 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
772 if (!proto) 772 if (!proto)
773 proto = inet->inet_num; 773 proto = inet->inet_num;
774 else if (proto != inet->inet_num) 774 else if (proto != inet->inet_num)
775 return(-EINVAL); 775 return -EINVAL;
776 776
777 if (proto > 255) 777 if (proto > 255)
778 return(-EINVAL); 778 return -EINVAL;
779 779
780 daddr = &sin6->sin6_addr; 780 daddr = &sin6->sin6_addr;
781 if (np->sndflow) { 781 if (np->sndflow) {
@@ -985,7 +985,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
985 /* You may get strange result with a positive odd offset; 985 /* You may get strange result with a positive odd offset;
986 RFC2292bis agrees with me. */ 986 RFC2292bis agrees with me. */
987 if (val > 0 && (val&1)) 987 if (val > 0 && (val&1))
988 return(-EINVAL); 988 return -EINVAL;
989 if (val < 0) { 989 if (val < 0) {
990 rp->checksum = 0; 990 rp->checksum = 0;
991 } else { 991 } else {
@@ -997,7 +997,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
997 break; 997 break;
998 998
999 default: 999 default:
1000 return(-ENOPROTOOPT); 1000 return -ENOPROTOOPT;
1001 } 1001 }
1002} 1002}
1003 1003
@@ -1190,7 +1190,7 @@ static int rawv6_init_sk(struct sock *sk)
1190 default: 1190 default:
1191 break; 1191 break;
1192 } 1192 }
1193 return(0); 1193 return 0;
1194} 1194}
1195 1195
1196struct proto rawv6_prot = { 1196struct proto rawv6_prot = {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 64cfef1b0a4..c7ba3149633 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -458,7 +458,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
458 /* If the first fragment is fragmented itself, we split 458 /* If the first fragment is fragmented itself, we split
459 * it to two chunks: the first with data and paged part 459 * it to two chunks: the first with data and paged part
460 * and the second, holding only fragments. */ 460 * and the second, holding only fragments. */
461 if (skb_has_frags(head)) { 461 if (skb_has_frag_list(head)) {
462 struct sk_buff *clone; 462 struct sk_buff *clone;
463 int i, plen = 0; 463 int i, plen = 0;
464 464
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d126365ac04..25b0beda433 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -217,14 +217,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
217 217
218static __inline__ int rt6_check_expired(const struct rt6_info *rt) 218static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219{ 219{
220 return (rt->rt6i_flags & RTF_EXPIRES && 220 return (rt->rt6i_flags & RTF_EXPIRES) &&
221 time_after(jiffies, rt->rt6i_expires)); 221 time_after(jiffies, rt->rt6i_expires);
222} 222}
223 223
224static inline int rt6_need_strict(struct in6_addr *daddr) 224static inline int rt6_need_strict(struct in6_addr *daddr)
225{ 225{
226 return (ipv6_addr_type(daddr) & 226 return ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); 227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
228} 228}
229 229
230/* 230/*
@@ -440,7 +440,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
440 __func__, match); 440 __func__, match);
441 441
442 net = dev_net(rt0->rt6i_dev); 442 net = dev_net(rt0->rt6i_dev);
443 return (match ? match : net->ipv6.ip6_null_entry); 443 return match ? match : net->ipv6.ip6_null_entry;
444} 444}
445 445
446#ifdef CONFIG_IPV6_ROUTE_INFO 446#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -859,7 +859,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
859 859
860 dst_release(*dstp); 860 dst_release(*dstp);
861 *dstp = new; 861 *dstp = new;
862 return (new ? 0 : -ENOMEM); 862 return new ? 0 : -ENOMEM;
863} 863}
864EXPORT_SYMBOL_GPL(ip6_dst_blackhole); 864EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
865 865
@@ -1070,7 +1070,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
1070 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1070 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1071out: 1071out:
1072 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1072 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1073 return (atomic_read(&ops->entries) > rt_max_size); 1073 return atomic_read(&ops->entries) > rt_max_size;
1074} 1074}
1075 1075
1076/* Clean host part of a prefix. Not necessary in radix tree, 1076/* Clean host part of a prefix. Not necessary in radix tree,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 4699cd3c311..8a039982223 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -68,19 +68,18 @@ static void ipip6_tunnel_setup(struct net_device *dev);
68 68
69static int sit_net_id __read_mostly; 69static int sit_net_id __read_mostly;
70struct sit_net { 70struct sit_net {
71 struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 71 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
72 struct ip_tunnel *tunnels_r[HASH_SIZE]; 72 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
73 struct ip_tunnel *tunnels_l[HASH_SIZE]; 73 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
74 struct ip_tunnel *tunnels_wc[1]; 74 struct ip_tunnel __rcu *tunnels_wc[1];
75 struct ip_tunnel **tunnels[4]; 75 struct ip_tunnel __rcu **tunnels[4];
76 76
77 struct net_device *fb_tunnel_dev; 77 struct net_device *fb_tunnel_dev;
78}; 78};
79 79
80/* 80/*
81 * Locking : hash tables are protected by RCU and a spinlock 81 * Locking : hash tables are protected by RCU and RTNL
82 */ 82 */
83static DEFINE_SPINLOCK(ipip6_lock);
84 83
85#define for_each_ip_tunnel_rcu(start) \ 84#define for_each_ip_tunnel_rcu(start) \
86 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 85 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
@@ -91,8 +90,8 @@ static DEFINE_SPINLOCK(ipip6_lock);
91static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, 90static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
92 struct net_device *dev, __be32 remote, __be32 local) 91 struct net_device *dev, __be32 remote, __be32 local)
93{ 92{
94 unsigned h0 = HASH(remote); 93 unsigned int h0 = HASH(remote);
95 unsigned h1 = HASH(local); 94 unsigned int h1 = HASH(local);
96 struct ip_tunnel *t; 95 struct ip_tunnel *t;
97 struct sit_net *sitn = net_generic(net, sit_net_id); 96 struct sit_net *sitn = net_generic(net, sit_net_id);
98 97
@@ -121,12 +120,12 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
121 return NULL; 120 return NULL;
122} 121}
123 122
124static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn, 123static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
125 struct ip_tunnel_parm *parms) 124 struct ip_tunnel_parm *parms)
126{ 125{
127 __be32 remote = parms->iph.daddr; 126 __be32 remote = parms->iph.daddr;
128 __be32 local = parms->iph.saddr; 127 __be32 local = parms->iph.saddr;
129 unsigned h = 0; 128 unsigned int h = 0;
130 int prio = 0; 129 int prio = 0;
131 130
132 if (remote) { 131 if (remote) {
@@ -140,7 +139,7 @@ static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn,
140 return &sitn->tunnels[prio][h]; 139 return &sitn->tunnels[prio][h];
141} 140}
142 141
143static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn, 142static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn,
144 struct ip_tunnel *t) 143 struct ip_tunnel *t)
145{ 144{
146 return __ipip6_bucket(sitn, &t->parms); 145 return __ipip6_bucket(sitn, &t->parms);
@@ -148,13 +147,14 @@ static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn,
148 147
149static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) 148static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
150{ 149{
151 struct ip_tunnel **tp; 150 struct ip_tunnel __rcu **tp;
152 151 struct ip_tunnel *iter;
153 for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) { 152
154 if (t == *tp) { 153 for (tp = ipip6_bucket(sitn, t);
155 spin_lock_bh(&ipip6_lock); 154 (iter = rtnl_dereference(*tp)) != NULL;
156 *tp = t->next; 155 tp = &iter->next) {
157 spin_unlock_bh(&ipip6_lock); 156 if (t == iter) {
157 rcu_assign_pointer(*tp, t->next);
158 break; 158 break;
159 } 159 }
160 } 160 }
@@ -162,12 +162,10 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
162 162
163static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) 163static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
164{ 164{
165 struct ip_tunnel **tp = ipip6_bucket(sitn, t); 165 struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
166 166
167 spin_lock_bh(&ipip6_lock); 167 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
168 t->next = *tp;
169 rcu_assign_pointer(*tp, t); 168 rcu_assign_pointer(*tp, t);
170 spin_unlock_bh(&ipip6_lock);
171} 169}
172 170
173static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) 171static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
@@ -187,17 +185,20 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
187#endif 185#endif
188} 186}
189 187
190static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, 188static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
191 struct ip_tunnel_parm *parms, int create) 189 struct ip_tunnel_parm *parms, int create)
192{ 190{
193 __be32 remote = parms->iph.daddr; 191 __be32 remote = parms->iph.daddr;
194 __be32 local = parms->iph.saddr; 192 __be32 local = parms->iph.saddr;
195 struct ip_tunnel *t, **tp, *nt; 193 struct ip_tunnel *t, *nt;
194 struct ip_tunnel __rcu **tp;
196 struct net_device *dev; 195 struct net_device *dev;
197 char name[IFNAMSIZ]; 196 char name[IFNAMSIZ];
198 struct sit_net *sitn = net_generic(net, sit_net_id); 197 struct sit_net *sitn = net_generic(net, sit_net_id);
199 198
200 for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) { 199 for (tp = __ipip6_bucket(sitn, parms);
200 (t = rtnl_dereference(*tp)) != NULL;
201 tp = &t->next) {
201 if (local == t->parms.iph.saddr && 202 if (local == t->parms.iph.saddr &&
202 remote == t->parms.iph.daddr && 203 remote == t->parms.iph.daddr &&
203 parms->link == t->parms.link) { 204 parms->link == t->parms.link) {
@@ -340,7 +341,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
340 341
341 ASSERT_RTNL(); 342 ASSERT_RTNL();
342 343
343 for (p = t->prl; p; p = p->next) { 344 for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) {
344 if (p->addr == a->addr) { 345 if (p->addr == a->addr) {
345 if (chg) { 346 if (chg) {
346 p->flags = a->flags; 347 p->flags = a->flags;
@@ -451,15 +452,12 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
451 struct sit_net *sitn = net_generic(net, sit_net_id); 452 struct sit_net *sitn = net_generic(net, sit_net_id);
452 453
453 if (dev == sitn->fb_tunnel_dev) { 454 if (dev == sitn->fb_tunnel_dev) {
454 spin_lock_bh(&ipip6_lock); 455 rcu_assign_pointer(sitn->tunnels_wc[0], NULL);
455 sitn->tunnels_wc[0] = NULL;
456 spin_unlock_bh(&ipip6_lock);
457 dev_put(dev);
458 } else { 456 } else {
459 ipip6_tunnel_unlink(sitn, netdev_priv(dev)); 457 ipip6_tunnel_unlink(sitn, netdev_priv(dev));
460 ipip6_tunnel_del_prl(netdev_priv(dev), NULL); 458 ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
461 dev_put(dev);
462 } 459 }
460 dev_put(dev);
463} 461}
464 462
465 463
@@ -566,7 +564,10 @@ static int ipip6_rcv(struct sk_buff *skb)
566 skb_tunnel_rx(skb, tunnel->dev); 564 skb_tunnel_rx(skb, tunnel->dev);
567 565
568 ipip6_ecn_decapsulate(iph, skb); 566 ipip6_ecn_decapsulate(iph, skb);
569 netif_rx(skb); 567
568 if (netif_rx(skb) == NET_RX_DROP)
569 tunnel->dev->stats.rx_dropped++;
570
570 rcu_read_unlock(); 571 rcu_read_unlock();
571 return 0; 572 return 0;
572 } 573 }
@@ -590,7 +591,7 @@ __be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel)
590#ifdef CONFIG_IPV6_SIT_6RD 591#ifdef CONFIG_IPV6_SIT_6RD
591 if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, 592 if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
592 tunnel->ip6rd.prefixlen)) { 593 tunnel->ip6rd.prefixlen)) {
593 unsigned pbw0, pbi0; 594 unsigned int pbw0, pbi0;
594 int pbi1; 595 int pbi1;
595 u32 d; 596 u32 d;
596 597
@@ -1132,7 +1133,7 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1132 sitn->tunnels_wc[0] = tunnel; 1133 sitn->tunnels_wc[0] = tunnel;
1133} 1134}
1134 1135
1135static struct xfrm_tunnel sit_handler = { 1136static struct xfrm_tunnel sit_handler __read_mostly = {
1136 .handler = ipip6_rcv, 1137 .handler = ipip6_rcv,
1137 .err_handler = ipip6_err, 1138 .err_handler = ipip6_err,
1138 .priority = 1, 1139 .priority = 1,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fe6d40418c0..8d93f6d8197 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -139,7 +139,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
139 return -EINVAL; 139 return -EINVAL;
140 140
141 if (usin->sin6_family != AF_INET6) 141 if (usin->sin6_family != AF_INET6)
142 return(-EAFNOSUPPORT); 142 return -EAFNOSUPPORT;
143 143
144 memset(&fl, 0, sizeof(fl)); 144 memset(&fl, 0, sizeof(fl));
145 145
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index fc3c86a4745..d9864725d0c 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -30,8 +30,8 @@
30#include <net/protocol.h> 30#include <net/protocol.h>
31#include <net/xfrm.h> 31#include <net/xfrm.h>
32 32
33static struct xfrm6_tunnel *tunnel6_handlers; 33static struct xfrm6_tunnel *tunnel6_handlers __read_mostly;
34static struct xfrm6_tunnel *tunnel46_handlers; 34static struct xfrm6_tunnel *tunnel46_handlers __read_mostly;
35static DEFINE_MUTEX(tunnel6_mutex); 35static DEFINE_MUTEX(tunnel6_mutex);
36 36
37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) 37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
@@ -51,7 +51,7 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
51 } 51 }
52 52
53 handler->next = *pprev; 53 handler->next = *pprev;
54 *pprev = handler; 54 rcu_assign_pointer(*pprev, handler);
55 55
56 ret = 0; 56 ret = 0;
57 57
@@ -88,6 +88,11 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
88 88
89EXPORT_SYMBOL(xfrm6_tunnel_deregister); 89EXPORT_SYMBOL(xfrm6_tunnel_deregister);
90 90
91#define for_each_tunnel_rcu(head, handler) \
92 for (handler = rcu_dereference(head); \
93 handler != NULL; \
94 handler = rcu_dereference(handler->next)) \
95
91static int tunnel6_rcv(struct sk_buff *skb) 96static int tunnel6_rcv(struct sk_buff *skb)
92{ 97{
93 struct xfrm6_tunnel *handler; 98 struct xfrm6_tunnel *handler;
@@ -95,7 +100,7 @@ static int tunnel6_rcv(struct sk_buff *skb)
95 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 100 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
96 goto drop; 101 goto drop;
97 102
98 for (handler = tunnel6_handlers; handler; handler = handler->next) 103 for_each_tunnel_rcu(tunnel6_handlers, handler)
99 if (!handler->handler(skb)) 104 if (!handler->handler(skb))
100 return 0; 105 return 0;
101 106
@@ -113,7 +118,7 @@ static int tunnel46_rcv(struct sk_buff *skb)
113 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 118 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
114 goto drop; 119 goto drop;
115 120
116 for (handler = tunnel46_handlers; handler; handler = handler->next) 121 for_each_tunnel_rcu(tunnel46_handlers, handler)
117 if (!handler->handler(skb)) 122 if (!handler->handler(skb))
118 return 0; 123 return 0;
119 124
@@ -129,7 +134,7 @@ static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
129{ 134{
130 struct xfrm6_tunnel *handler; 135 struct xfrm6_tunnel *handler;
131 136
132 for (handler = tunnel6_handlers; handler; handler = handler->next) 137 for_each_tunnel_rcu(tunnel6_handlers, handler)
133 if (!handler->err_handler(skb, opt, type, code, offset, info)) 138 if (!handler->err_handler(skb, opt, type, code, offset, info))
134 break; 139 break;
135} 140}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 6baeabbbca8..39676eac3a3 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); 199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
200 200
201 xfrm6_policy_afinfo.garbage_collect(net); 201 xfrm6_policy_afinfo.garbage_collect(net);
202 return (atomic_read(&ops->entries) > ops->gc_thresh * 2); 202 return atomic_read(&ops->entries) > ops->gc_thresh * 2;
203} 203}
204 204
205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) 205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 2ce3a8278f2..ac7584b946a 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -317,13 +317,13 @@ static const struct xfrm_type xfrm6_tunnel_type = {
317 .output = xfrm6_tunnel_output, 317 .output = xfrm6_tunnel_output,
318}; 318};
319 319
320static struct xfrm6_tunnel xfrm6_tunnel_handler = { 320static struct xfrm6_tunnel xfrm6_tunnel_handler __read_mostly = {
321 .handler = xfrm6_tunnel_rcv, 321 .handler = xfrm6_tunnel_rcv,
322 .err_handler = xfrm6_tunnel_err, 322 .err_handler = xfrm6_tunnel_err,
323 .priority = 2, 323 .priority = 2,
324}; 324};
325 325
326static struct xfrm6_tunnel xfrm46_tunnel_handler = { 326static struct xfrm6_tunnel xfrm46_tunnel_handler __read_mostly = {
327 .handler = xfrm6_tunnel_rcv, 327 .handler = xfrm6_tunnel_rcv,
328 .err_handler = xfrm6_tunnel_err, 328 .err_handler = xfrm6_tunnel_err,
329 .priority = 2, 329 .priority = 2,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index fd55b5135de..bf3635129b1 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -573,9 +573,9 @@ static int irda_find_lsap_sel(struct irda_sock *self, char *name)
573 /* Requested object/attribute doesn't exist */ 573 /* Requested object/attribute doesn't exist */
574 if((self->errno == IAS_CLASS_UNKNOWN) || 574 if((self->errno == IAS_CLASS_UNKNOWN) ||
575 (self->errno == IAS_ATTRIB_UNKNOWN)) 575 (self->errno == IAS_ATTRIB_UNKNOWN))
576 return (-EADDRNOTAVAIL); 576 return -EADDRNOTAVAIL;
577 else 577 else
578 return (-EHOSTUNREACH); 578 return -EHOSTUNREACH;
579 } 579 }
580 580
581 /* Get the remote TSAP selector */ 581 /* Get the remote TSAP selector */
@@ -663,7 +663,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
663 __func__, name); 663 __func__, name);
664 self->daddr = DEV_ADDR_ANY; 664 self->daddr = DEV_ADDR_ANY;
665 kfree(discoveries); 665 kfree(discoveries);
666 return(-ENOTUNIQ); 666 return -ENOTUNIQ;
667 } 667 }
668 /* First time we found that one, save it ! */ 668 /* First time we found that one, save it ! */
669 daddr = self->daddr; 669 daddr = self->daddr;
@@ -677,7 +677,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
677 IRDA_DEBUG(0, "%s(), unexpected IAS query failure\n", __func__); 677 IRDA_DEBUG(0, "%s(), unexpected IAS query failure\n", __func__);
678 self->daddr = DEV_ADDR_ANY; 678 self->daddr = DEV_ADDR_ANY;
679 kfree(discoveries); 679 kfree(discoveries);
680 return(-EHOSTUNREACH); 680 return -EHOSTUNREACH;
681 break; 681 break;
682 } 682 }
683 } 683 }
@@ -689,7 +689,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
689 IRDA_DEBUG(1, "%s(), cannot discover service ''%s'' in any device !!!\n", 689 IRDA_DEBUG(1, "%s(), cannot discover service ''%s'' in any device !!!\n",
690 __func__, name); 690 __func__, name);
691 self->daddr = DEV_ADDR_ANY; 691 self->daddr = DEV_ADDR_ANY;
692 return(-EADDRNOTAVAIL); 692 return -EADDRNOTAVAIL;
693 } 693 }
694 694
695 /* Revert back to discovered device & service */ 695 /* Revert back to discovered device & service */
@@ -2465,9 +2465,9 @@ bed:
2465 /* Requested object/attribute doesn't exist */ 2465 /* Requested object/attribute doesn't exist */
2466 if((self->errno == IAS_CLASS_UNKNOWN) || 2466 if((self->errno == IAS_CLASS_UNKNOWN) ||
2467 (self->errno == IAS_ATTRIB_UNKNOWN)) 2467 (self->errno == IAS_ATTRIB_UNKNOWN))
2468 return (-EADDRNOTAVAIL); 2468 return -EADDRNOTAVAIL;
2469 else 2469 else
2470 return (-EHOSTUNREACH); 2470 return -EHOSTUNREACH;
2471 } 2471 }
2472 2472
2473 /* Translate from internal to user structure */ 2473 /* Translate from internal to user structure */
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index c1c8ae93912..36c3f037f17 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -315,7 +315,7 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
315 315
316 /* Get the actual number of device in the buffer and return */ 316 /* Get the actual number of device in the buffer and return */
317 *pn = i; 317 *pn = i;
318 return(buffer); 318 return buffer;
319} 319}
320 320
321#ifdef CONFIG_PROC_FS 321#ifdef CONFIG_PROC_FS
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index faa82ca2dfd..a39cca8331d 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -449,8 +449,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
449 } 449 }
450 450
451#ifdef SERIAL_DO_RESTART 451#ifdef SERIAL_DO_RESTART
452 return ((self->flags & ASYNC_HUP_NOTIFY) ? 452 return (self->flags & ASYNC_HUP_NOTIFY) ?
453 -EAGAIN : -ERESTARTSYS); 453 -EAGAIN : -ERESTARTSYS;
454#else 454#else
455 return -EAGAIN; 455 return -EAGAIN;
456#endif 456#endif
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 5bb8353105c..8ee1ff6c742 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -45,13 +45,11 @@ static int irlan_eth_close(struct net_device *dev);
45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, 45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
46 struct net_device *dev); 46 struct net_device *dev);
47static void irlan_eth_set_multicast_list( struct net_device *dev); 47static void irlan_eth_set_multicast_list( struct net_device *dev);
48static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev);
49 48
50static const struct net_device_ops irlan_eth_netdev_ops = { 49static const struct net_device_ops irlan_eth_netdev_ops = {
51 .ndo_open = irlan_eth_open, 50 .ndo_open = irlan_eth_open,
52 .ndo_stop = irlan_eth_close, 51 .ndo_stop = irlan_eth_close,
53 .ndo_start_xmit = irlan_eth_xmit, 52 .ndo_start_xmit = irlan_eth_xmit,
54 .ndo_get_stats = irlan_eth_get_stats,
55 .ndo_set_multicast_list = irlan_eth_set_multicast_list, 53 .ndo_set_multicast_list = irlan_eth_set_multicast_list,
56 .ndo_change_mtu = eth_change_mtu, 54 .ndo_change_mtu = eth_change_mtu,
57 .ndo_validate_addr = eth_validate_addr, 55 .ndo_validate_addr = eth_validate_addr,
@@ -208,10 +206,10 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
208 * tried :-) DB 206 * tried :-) DB
209 */ 207 */
210 /* irttp_data_request already free the packet */ 208 /* irttp_data_request already free the packet */
211 self->stats.tx_dropped++; 209 dev->stats.tx_dropped++;
212 } else { 210 } else {
213 self->stats.tx_packets++; 211 dev->stats.tx_packets++;
214 self->stats.tx_bytes += len; 212 dev->stats.tx_bytes += len;
215 } 213 }
216 214
217 return NETDEV_TX_OK; 215 return NETDEV_TX_OK;
@@ -226,15 +224,16 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
226int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) 224int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
227{ 225{
228 struct irlan_cb *self = instance; 226 struct irlan_cb *self = instance;
227 struct net_device *dev = self->dev;
229 228
230 if (skb == NULL) { 229 if (skb == NULL) {
231 ++self->stats.rx_dropped; 230 dev->stats.rx_dropped++;
232 return 0; 231 return 0;
233 } 232 }
234 if (skb->len < ETH_HLEN) { 233 if (skb->len < ETH_HLEN) {
235 IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n", 234 IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n",
236 __func__, skb->len); 235 __func__, skb->len);
237 ++self->stats.rx_dropped; 236 dev->stats.rx_dropped++;
238 dev_kfree_skb(skb); 237 dev_kfree_skb(skb);
239 return 0; 238 return 0;
240 } 239 }
@@ -244,10 +243,10 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
244 * might have been previously set by the low level IrDA network 243 * might have been previously set by the low level IrDA network
245 * device driver 244 * device driver
246 */ 245 */
247 skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */ 246 skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */
248 247
249 self->stats.rx_packets++; 248 dev->stats.rx_packets++;
250 self->stats.rx_bytes += skb->len; 249 dev->stats.rx_bytes += skb->len;
251 250
252 netif_rx(skb); /* Eat it! */ 251 netif_rx(skb); /* Eat it! */
253 252
@@ -348,16 +347,3 @@ static void irlan_eth_set_multicast_list(struct net_device *dev)
348 else 347 else
349 irlan_set_broadcast_filter(self, FALSE); 348 irlan_set_broadcast_filter(self, FALSE);
350} 349}
351
352/*
353 * Function irlan_get_stats (dev)
354 *
355 * Get the current statistics for this device
356 *
357 */
358static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev)
359{
360 struct irlan_cb *self = netdev_priv(dev);
361
362 return &self->stats;
363}
diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c
index cbcb4eb5403..43f16040a6f 100644
--- a/net/irda/irlan/irlan_event.c
+++ b/net/irda/irlan/irlan_event.c
@@ -24,7 +24,7 @@
24 24
25#include <net/irda/irlan_event.h> 25#include <net/irda/irlan_event.h>
26 26
27char *irlan_state[] = { 27const char * const irlan_state[] = {
28 "IRLAN_IDLE", 28 "IRLAN_IDLE",
29 "IRLAN_QUERY", 29 "IRLAN_QUERY",
30 "IRLAN_CONN", 30 "IRLAN_CONN",
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 0e7d8bde145..6115a44c0a2 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -939,7 +939,7 @@ struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots)
939 } 939 }
940 940
941 /* Return current cached discovery log */ 941 /* Return current cached discovery log */
942 return(irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE)); 942 return irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE);
943} 943}
944EXPORT_SYMBOL(irlmp_get_discoveries); 944EXPORT_SYMBOL(irlmp_get_discoveries);
945 945
diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c
index 3750884094d..062e63b1c5c 100644
--- a/net/irda/irlmp_frame.c
+++ b/net/irda/irlmp_frame.c
@@ -448,7 +448,7 @@ static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap_sel,
448 (self->cache.slsap_sel == slsap_sel) && 448 (self->cache.slsap_sel == slsap_sel) &&
449 (self->cache.dlsap_sel == dlsap_sel)) 449 (self->cache.dlsap_sel == dlsap_sel))
450 { 450 {
451 return (self->cache.lsap); 451 return self->cache.lsap;
452 } 452 }
453#endif 453#endif
454 454
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index e98e40d76f4..7f17a8020e8 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -238,7 +238,7 @@ irnet_ias_to_tsap(irnet_socket * self,
238 DEXIT(IRDA_SR_TRACE, "\n"); 238 DEXIT(IRDA_SR_TRACE, "\n");
239 239
240 /* Return the TSAP */ 240 /* Return the TSAP */
241 return(dtsap_sel); 241 return dtsap_sel;
242} 242}
243 243
244/*------------------------------------------------------------------*/ 244/*------------------------------------------------------------------*/
@@ -301,7 +301,7 @@ irnet_connect_tsap(irnet_socket * self)
301 { 301 {
302 clear_bit(0, &self->ttp_connect); 302 clear_bit(0, &self->ttp_connect);
303 DERROR(IRDA_SR_ERROR, "connect aborted!\n"); 303 DERROR(IRDA_SR_ERROR, "connect aborted!\n");
304 return(err); 304 return err;
305 } 305 }
306 306
307 /* Connect to remote device */ 307 /* Connect to remote device */
@@ -312,7 +312,7 @@ irnet_connect_tsap(irnet_socket * self)
312 { 312 {
313 clear_bit(0, &self->ttp_connect); 313 clear_bit(0, &self->ttp_connect);
314 DERROR(IRDA_SR_ERROR, "connect aborted!\n"); 314 DERROR(IRDA_SR_ERROR, "connect aborted!\n");
315 return(err); 315 return err;
316 } 316 }
317 317
318 /* The above call is non-blocking. 318 /* The above call is non-blocking.
@@ -321,7 +321,7 @@ irnet_connect_tsap(irnet_socket * self)
321 * See you there ;-) */ 321 * See you there ;-) */
322 322
323 DEXIT(IRDA_SR_TRACE, "\n"); 323 DEXIT(IRDA_SR_TRACE, "\n");
324 return(err); 324 return err;
325} 325}
326 326
327/*------------------------------------------------------------------*/ 327/*------------------------------------------------------------------*/
@@ -362,10 +362,10 @@ irnet_discover_next_daddr(irnet_socket * self)
362 /* The above request is non-blocking. 362 /* The above request is non-blocking.
363 * After a while, IrDA will call us back in irnet_discovervalue_confirm() 363 * After a while, IrDA will call us back in irnet_discovervalue_confirm()
364 * We will then call irnet_ias_to_tsap() and come back here again... */ 364 * We will then call irnet_ias_to_tsap() and come back here again... */
365 return(0); 365 return 0;
366 } 366 }
367 else 367 else
368 return(1); 368 return 1;
369} 369}
370 370
371/*------------------------------------------------------------------*/ 371/*------------------------------------------------------------------*/
@@ -436,7 +436,7 @@ irnet_discover_daddr_and_lsap_sel(irnet_socket * self)
436 /* Follow me in irnet_discovervalue_confirm() */ 436 /* Follow me in irnet_discovervalue_confirm() */
437 437
438 DEXIT(IRDA_SR_TRACE, "\n"); 438 DEXIT(IRDA_SR_TRACE, "\n");
439 return(0); 439 return 0;
440} 440}
441 441
442/*------------------------------------------------------------------*/ 442/*------------------------------------------------------------------*/
@@ -485,7 +485,7 @@ irnet_dname_to_daddr(irnet_socket * self)
485 /* No luck ! */ 485 /* No luck ! */
486 DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname); 486 DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname);
487 kfree(discoveries); 487 kfree(discoveries);
488 return(-EADDRNOTAVAIL); 488 return -EADDRNOTAVAIL;
489} 489}
490 490
491 491
@@ -527,7 +527,7 @@ irda_irnet_create(irnet_socket * self)
527 INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect); 527 INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect);
528 528
529 DEXIT(IRDA_SOCK_TRACE, "\n"); 529 DEXIT(IRDA_SOCK_TRACE, "\n");
530 return(0); 530 return 0;
531} 531}
532 532
533/*------------------------------------------------------------------*/ 533/*------------------------------------------------------------------*/
@@ -601,7 +601,7 @@ irda_irnet_connect(irnet_socket * self)
601 * We will finish the connection procedure in irnet_connect_tsap(). 601 * We will finish the connection procedure in irnet_connect_tsap().
602 */ 602 */
603 DEXIT(IRDA_SOCK_TRACE, "\n"); 603 DEXIT(IRDA_SOCK_TRACE, "\n");
604 return(0); 604 return 0;
605} 605}
606 606
607/*------------------------------------------------------------------*/ 607/*------------------------------------------------------------------*/
@@ -733,7 +733,7 @@ irnet_daddr_to_dname(irnet_socket * self)
733 /* No luck ! */ 733 /* No luck ! */
734 DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr); 734 DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr);
735 kfree(discoveries); 735 kfree(discoveries);
736 return(-EADDRNOTAVAIL); 736 return -EADDRNOTAVAIL;
737} 737}
738 738
739/*------------------------------------------------------------------*/ 739/*------------------------------------------------------------------*/
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index dfe7b38dd4a..69f1fa64994 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -166,7 +166,7 @@ irnet_ctrl_write(irnet_socket * ap,
166 } 166 }
167 167
168 /* Success : we have parsed all commands successfully */ 168 /* Success : we have parsed all commands successfully */
169 return(count); 169 return count;
170} 170}
171 171
172#ifdef INITIAL_DISCOVERY 172#ifdef INITIAL_DISCOVERY
@@ -300,7 +300,7 @@ irnet_ctrl_read(irnet_socket * ap,
300 } 300 }
301 301
302 DEXIT(CTRL_TRACE, "\n"); 302 DEXIT(CTRL_TRACE, "\n");
303 return(strlen(event)); 303 return strlen(event);
304 } 304 }
305#endif /* INITIAL_DISCOVERY */ 305#endif /* INITIAL_DISCOVERY */
306 306
@@ -409,7 +409,7 @@ irnet_ctrl_read(irnet_socket * ap,
409 } 409 }
410 410
411 DEXIT(CTRL_TRACE, "\n"); 411 DEXIT(CTRL_TRACE, "\n");
412 return(strlen(event)); 412 return strlen(event);
413} 413}
414 414
415/*------------------------------------------------------------------*/ 415/*------------------------------------------------------------------*/
@@ -623,7 +623,7 @@ dev_irnet_poll(struct file * file,
623 mask |= irnet_ctrl_poll(ap, file, wait); 623 mask |= irnet_ctrl_poll(ap, file, wait);
624 624
625 DEXIT(FS_TRACE, " - mask=0x%X\n", mask); 625 DEXIT(FS_TRACE, " - mask=0x%X\n", mask);
626 return(mask); 626 return mask;
627} 627}
628 628
629/*------------------------------------------------------------------*/ 629/*------------------------------------------------------------------*/
diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
index b5df2418f90..940225866da 100644
--- a/net/irda/irnet/irnet_ppp.h
+++ b/net/irda/irnet/irnet_ppp.h
@@ -103,7 +103,8 @@ static const struct file_operations irnet_device_fops =
103 .poll = dev_irnet_poll, 103 .poll = dev_irnet_poll,
104 .unlocked_ioctl = dev_irnet_ioctl, 104 .unlocked_ioctl = dev_irnet_ioctl,
105 .open = dev_irnet_open, 105 .open = dev_irnet_open,
106 .release = dev_irnet_close 106 .release = dev_irnet_close,
107 .llseek = noop_llseek,
107 /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */ 108 /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */
108}; 109};
109 110
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 43040e97c47..d87c22df6f1 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -565,12 +565,12 @@ pfkey_proto2satype(uint16_t proto)
565 565
566static uint8_t pfkey_proto_to_xfrm(uint8_t proto) 566static uint8_t pfkey_proto_to_xfrm(uint8_t proto)
567{ 567{
568 return (proto == IPSEC_PROTO_ANY ? 0 : proto); 568 return proto == IPSEC_PROTO_ANY ? 0 : proto;
569} 569}
570 570
571static uint8_t pfkey_proto_from_xfrm(uint8_t proto) 571static uint8_t pfkey_proto_from_xfrm(uint8_t proto)
572{ 572{
573 return (proto ? proto : IPSEC_PROTO_ANY); 573 return proto ? proto : IPSEC_PROTO_ANY;
574} 574}
575 575
576static inline int pfkey_sockaddr_len(sa_family_t family) 576static inline int pfkey_sockaddr_len(sa_family_t family)
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 1ae697681bc..8d9ce0accc9 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -144,7 +144,6 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
144 nf_reset(skb); 144 nf_reset(skb);
145 145
146 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { 146 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
147 dev->last_rx = jiffies;
148 dev->stats.rx_packets++; 147 dev->stats.rx_packets++;
149 dev->stats.rx_bytes += data_len; 148 dev->stats.rx_bytes += data_len;
150 } else 149 } else
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index ff954b3e94b..39a21d0c61c 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1768,7 +1768,7 @@ static const struct proto_ops pppol2tp_ops = {
1768 .ioctl = pppox_ioctl, 1768 .ioctl = pppox_ioctl,
1769}; 1769};
1770 1770
1771static struct pppox_proto pppol2tp_proto = { 1771static const struct pppox_proto pppol2tp_proto = {
1772 .create = pppol2tp_create, 1772 .create = pppol2tp_create,
1773 .ioctl = pppol2tp_ioctl 1773 .ioctl = pppol2tp_ioctl
1774}; 1774};
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index a87cb3ba2df..d2b03e0851e 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -138,10 +138,8 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[])
138 struct crypto_cipher *tfm; 138 struct crypto_cipher *tfm;
139 139
140 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 140 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
141 if (IS_ERR(tfm)) 141 if (!IS_ERR(tfm))
142 return NULL; 142 crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN);
143
144 crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN);
145 143
146 return tfm; 144 return tfm;
147} 145}
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index 3d097b3d7b6..b4d66cca76d 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -119,10 +119,8 @@ struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[])
119 struct crypto_cipher *tfm; 119 struct crypto_cipher *tfm;
120 120
121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
122 if (IS_ERR(tfm)) 122 if (!IS_ERR(tfm))
123 return NULL; 123 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
124
125 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
126 124
127 return tfm; 125 return tfm;
128} 126}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 965b272499f..58eab9e8e4e 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -86,6 +86,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
86 tid, 0, reason); 86 tid, 0, reason);
87 87
88 del_timer_sync(&tid_rx->session_timer); 88 del_timer_sync(&tid_rx->session_timer);
89 del_timer_sync(&tid_rx->reorder_timer);
89 90
90 call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); 91 call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
91} 92}
@@ -120,6 +121,20 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
120 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); 121 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
121} 122}
122 123
124static void sta_rx_agg_reorder_timer_expired(unsigned long data)
125{
126 u8 *ptid = (u8 *)data;
127 u8 *timer_to_id = ptid - *ptid;
128 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
129 timer_to_tid[0]);
130
131 rcu_read_lock();
132 spin_lock(&sta->lock);
133 ieee80211_release_reorder_timeout(sta, *ptid);
134 spin_unlock(&sta->lock);
135 rcu_read_unlock();
136}
137
123static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, 138static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
124 u8 dialog_token, u16 status, u16 policy, 139 u8 dialog_token, u16 status, u16 policy,
125 u16 buf_size, u16 timeout) 140 u16 buf_size, u16 timeout)
@@ -251,11 +266,18 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
251 goto end; 266 goto end;
252 } 267 }
253 268
269 spin_lock_init(&tid_agg_rx->reorder_lock);
270
254 /* rx timer */ 271 /* rx timer */
255 tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired; 272 tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired;
256 tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid]; 273 tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
257 init_timer(&tid_agg_rx->session_timer); 274 init_timer(&tid_agg_rx->session_timer);
258 275
276 /* rx reorder timer */
277 tid_agg_rx->reorder_timer.function = sta_rx_agg_reorder_timer_expired;
278 tid_agg_rx->reorder_timer.data = (unsigned long)&sta->timer_to_tid[tid];
279 init_timer(&tid_agg_rx->reorder_timer);
280
259 /* prepare reordering buffer */ 281 /* prepare reordering buffer */
260 tid_agg_rx->reorder_buf = 282 tid_agg_rx->reorder_buf =
261 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC); 283 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 29ac8e1a509..c981604b71e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -19,33 +19,6 @@
19#include "rate.h" 19#include "rate.h"
20#include "mesh.h" 20#include "mesh.h"
21 21
22static bool nl80211_type_check(enum nl80211_iftype type)
23{
24 switch (type) {
25 case NL80211_IFTYPE_ADHOC:
26 case NL80211_IFTYPE_STATION:
27 case NL80211_IFTYPE_MONITOR:
28#ifdef CONFIG_MAC80211_MESH
29 case NL80211_IFTYPE_MESH_POINT:
30#endif
31 case NL80211_IFTYPE_AP:
32 case NL80211_IFTYPE_AP_VLAN:
33 case NL80211_IFTYPE_WDS:
34 return true;
35 default:
36 return false;
37 }
38}
39
40static bool nl80211_params_check(enum nl80211_iftype type,
41 struct vif_params *params)
42{
43 if (!nl80211_type_check(type))
44 return false;
45
46 return true;
47}
48
49static int ieee80211_add_iface(struct wiphy *wiphy, char *name, 22static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
50 enum nl80211_iftype type, u32 *flags, 23 enum nl80211_iftype type, u32 *flags,
51 struct vif_params *params) 24 struct vif_params *params)
@@ -55,9 +28,6 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
55 struct ieee80211_sub_if_data *sdata; 28 struct ieee80211_sub_if_data *sdata;
56 int err; 29 int err;
57 30
58 if (!nl80211_params_check(type, params))
59 return -EINVAL;
60
61 err = ieee80211_if_add(local, name, &dev, type, params); 31 err = ieee80211_if_add(local, name, &dev, type, params);
62 if (err || type != NL80211_IFTYPE_MONITOR || !flags) 32 if (err || type != NL80211_IFTYPE_MONITOR || !flags)
63 return err; 33 return err;
@@ -82,12 +52,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
82 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 52 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
83 int ret; 53 int ret;
84 54
85 if (ieee80211_sdata_running(sdata))
86 return -EBUSY;
87
88 if (!nl80211_params_check(type, params))
89 return -EINVAL;
90
91 ret = ieee80211_if_change_type(sdata, type); 55 ret = ieee80211_if_change_type(sdata, type);
92 if (ret) 56 if (ret)
93 return ret; 57 return ret;
@@ -114,44 +78,30 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
114 u8 key_idx, const u8 *mac_addr, 78 u8 key_idx, const u8 *mac_addr,
115 struct key_params *params) 79 struct key_params *params)
116{ 80{
117 struct ieee80211_sub_if_data *sdata; 81 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
118 struct sta_info *sta = NULL; 82 struct sta_info *sta = NULL;
119 enum ieee80211_key_alg alg;
120 struct ieee80211_key *key; 83 struct ieee80211_key *key;
121 int err; 84 int err;
122 85
123 if (!netif_running(dev)) 86 if (!ieee80211_sdata_running(sdata))
124 return -ENETDOWN; 87 return -ENETDOWN;
125 88
126 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 89 /* reject WEP and TKIP keys if WEP failed to initialize */
127
128 switch (params->cipher) { 90 switch (params->cipher) {
129 case WLAN_CIPHER_SUITE_WEP40: 91 case WLAN_CIPHER_SUITE_WEP40:
130 case WLAN_CIPHER_SUITE_WEP104:
131 alg = ALG_WEP;
132 break;
133 case WLAN_CIPHER_SUITE_TKIP: 92 case WLAN_CIPHER_SUITE_TKIP:
134 alg = ALG_TKIP; 93 case WLAN_CIPHER_SUITE_WEP104:
135 break; 94 if (IS_ERR(sdata->local->wep_tx_tfm))
136 case WLAN_CIPHER_SUITE_CCMP: 95 return -EINVAL;
137 alg = ALG_CCMP;
138 break;
139 case WLAN_CIPHER_SUITE_AES_CMAC:
140 alg = ALG_AES_CMAC;
141 break; 96 break;
142 default: 97 default:
143 return -EINVAL; 98 break;
144 } 99 }
145 100
146 /* reject WEP and TKIP keys if WEP failed to initialize */ 101 key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len,
147 if ((alg == ALG_WEP || alg == ALG_TKIP) && 102 params->key, params->seq_len, params->seq);
148 IS_ERR(sdata->local->wep_tx_tfm)) 103 if (IS_ERR(key))
149 return -EINVAL; 104 return PTR_ERR(key);
150
151 key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key,
152 params->seq_len, params->seq);
153 if (!key)
154 return -ENOMEM;
155 105
156 mutex_lock(&sdata->local->sta_mtx); 106 mutex_lock(&sdata->local->sta_mtx);
157 107
@@ -164,9 +114,10 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
164 } 114 }
165 } 115 }
166 116
167 ieee80211_key_link(key, sdata, sta); 117 err = ieee80211_key_link(key, sdata, sta);
118 if (err)
119 ieee80211_key_free(sdata->local, key);
168 120
169 err = 0;
170 out_unlock: 121 out_unlock:
171 mutex_unlock(&sdata->local->sta_mtx); 122 mutex_unlock(&sdata->local->sta_mtx);
172 123
@@ -247,10 +198,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
247 198
248 memset(&params, 0, sizeof(params)); 199 memset(&params, 0, sizeof(params));
249 200
250 switch (key->conf.alg) { 201 params.cipher = key->conf.cipher;
251 case ALG_TKIP:
252 params.cipher = WLAN_CIPHER_SUITE_TKIP;
253 202
203 switch (key->conf.cipher) {
204 case WLAN_CIPHER_SUITE_TKIP:
254 iv32 = key->u.tkip.tx.iv32; 205 iv32 = key->u.tkip.tx.iv32;
255 iv16 = key->u.tkip.tx.iv16; 206 iv16 = key->u.tkip.tx.iv16;
256 207
@@ -268,8 +219,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
268 params.seq = seq; 219 params.seq = seq;
269 params.seq_len = 6; 220 params.seq_len = 6;
270 break; 221 break;
271 case ALG_CCMP: 222 case WLAN_CIPHER_SUITE_CCMP:
272 params.cipher = WLAN_CIPHER_SUITE_CCMP;
273 seq[0] = key->u.ccmp.tx_pn[5]; 223 seq[0] = key->u.ccmp.tx_pn[5];
274 seq[1] = key->u.ccmp.tx_pn[4]; 224 seq[1] = key->u.ccmp.tx_pn[4];
275 seq[2] = key->u.ccmp.tx_pn[3]; 225 seq[2] = key->u.ccmp.tx_pn[3];
@@ -279,14 +229,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
279 params.seq = seq; 229 params.seq = seq;
280 params.seq_len = 6; 230 params.seq_len = 6;
281 break; 231 break;
282 case ALG_WEP: 232 case WLAN_CIPHER_SUITE_AES_CMAC:
283 if (key->conf.keylen == 5)
284 params.cipher = WLAN_CIPHER_SUITE_WEP40;
285 else
286 params.cipher = WLAN_CIPHER_SUITE_WEP104;
287 break;
288 case ALG_AES_CMAC:
289 params.cipher = WLAN_CIPHER_SUITE_AES_CMAC;
290 seq[0] = key->u.aes_cmac.tx_pn[5]; 233 seq[0] = key->u.aes_cmac.tx_pn[5];
291 seq[1] = key->u.aes_cmac.tx_pn[4]; 234 seq[1] = key->u.aes_cmac.tx_pn[4];
292 seq[2] = key->u.aes_cmac.tx_pn[3]; 235 seq[2] = key->u.aes_cmac.tx_pn[3];
@@ -634,6 +577,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
634 struct sta_info *sta, 577 struct sta_info *sta,
635 struct station_parameters *params) 578 struct station_parameters *params)
636{ 579{
580 unsigned long flags;
637 u32 rates; 581 u32 rates;
638 int i, j; 582 int i, j;
639 struct ieee80211_supported_band *sband; 583 struct ieee80211_supported_band *sband;
@@ -642,7 +586,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
642 586
643 sband = local->hw.wiphy->bands[local->oper_channel->band]; 587 sband = local->hw.wiphy->bands[local->oper_channel->band];
644 588
645 spin_lock_bh(&sta->lock); 589 spin_lock_irqsave(&sta->flaglock, flags);
646 mask = params->sta_flags_mask; 590 mask = params->sta_flags_mask;
647 set = params->sta_flags_set; 591 set = params->sta_flags_set;
648 592
@@ -669,7 +613,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
669 if (set & BIT(NL80211_STA_FLAG_MFP)) 613 if (set & BIT(NL80211_STA_FLAG_MFP))
670 sta->flags |= WLAN_STA_MFP; 614 sta->flags |= WLAN_STA_MFP;
671 } 615 }
672 spin_unlock_bh(&sta->lock); 616 spin_unlock_irqrestore(&sta->flaglock, flags);
673 617
674 /* 618 /*
675 * cfg80211 validates this (1-2007) and allows setting the AID 619 * cfg80211 validates this (1-2007) and allows setting the AID
@@ -1143,9 +1087,9 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
1143 p.uapsd = false; 1087 p.uapsd = false;
1144 1088
1145 if (drv_conf_tx(local, params->queue, &p)) { 1089 if (drv_conf_tx(local, params->queue, &p)) {
1146 printk(KERN_DEBUG "%s: failed to set TX queue " 1090 wiphy_debug(local->hw.wiphy,
1147 "parameters for queue %d\n", 1091 "failed to set TX queue parameters for queue %d\n",
1148 wiphy_name(local->hw.wiphy), params->queue); 1092 params->queue);
1149 return -EINVAL; 1093 return -EINVAL;
1150 } 1094 }
1151 1095
@@ -1207,15 +1151,26 @@ static int ieee80211_scan(struct wiphy *wiphy,
1207 struct net_device *dev, 1151 struct net_device *dev,
1208 struct cfg80211_scan_request *req) 1152 struct cfg80211_scan_request *req)
1209{ 1153{
1210 struct ieee80211_sub_if_data *sdata; 1154 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1211
1212 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1213 1155
1214 if (sdata->vif.type != NL80211_IFTYPE_STATION && 1156 switch (ieee80211_vif_type_p2p(&sdata->vif)) {
1215 sdata->vif.type != NL80211_IFTYPE_ADHOC && 1157 case NL80211_IFTYPE_STATION:
1216 sdata->vif.type != NL80211_IFTYPE_MESH_POINT && 1158 case NL80211_IFTYPE_ADHOC:
1217 (sdata->vif.type != NL80211_IFTYPE_AP || sdata->u.ap.beacon)) 1159 case NL80211_IFTYPE_MESH_POINT:
1160 case NL80211_IFTYPE_P2P_CLIENT:
1161 break;
1162 case NL80211_IFTYPE_P2P_GO:
1163 if (sdata->local->ops->hw_scan)
1164 break;
1165 /* FIXME: implement NoA while scanning in software */
1166 return -EOPNOTSUPP;
1167 case NL80211_IFTYPE_AP:
1168 if (sdata->u.ap.beacon)
1169 return -EOPNOTSUPP;
1170 break;
1171 default:
1218 return -EOPNOTSUPP; 1172 return -EOPNOTSUPP;
1173 }
1219 1174
1220 return ieee80211_request_scan(sdata, req); 1175 return ieee80211_request_scan(sdata, req);
1221} 1176}
@@ -1541,11 +1496,11 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
1541 return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); 1496 return ieee80211_wk_cancel_remain_on_channel(sdata, cookie);
1542} 1497}
1543 1498
1544static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, 1499static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
1545 struct ieee80211_channel *chan, 1500 struct ieee80211_channel *chan,
1546 enum nl80211_channel_type channel_type, 1501 enum nl80211_channel_type channel_type,
1547 bool channel_type_valid, 1502 bool channel_type_valid,
1548 const u8 *buf, size_t len, u64 *cookie) 1503 const u8 *buf, size_t len, u64 *cookie)
1549{ 1504{
1550 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1505 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1551 struct ieee80211_local *local = sdata->local; 1506 struct ieee80211_local *local = sdata->local;
@@ -1575,8 +1530,6 @@ static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
1575 return -ENOLINK; 1530 return -ENOLINK;
1576 break; 1531 break;
1577 case NL80211_IFTYPE_STATION: 1532 case NL80211_IFTYPE_STATION:
1578 if (!(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED))
1579 flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
1580 break; 1533 break;
1581 default: 1534 default:
1582 return -EOPNOTSUPP; 1535 return -EOPNOTSUPP;
@@ -1647,6 +1600,6 @@ struct cfg80211_ops mac80211_config_ops = {
1647 .set_bitrate_mask = ieee80211_set_bitrate_mask, 1600 .set_bitrate_mask = ieee80211_set_bitrate_mask,
1648 .remain_on_channel = ieee80211_remain_on_channel, 1601 .remain_on_channel = ieee80211_remain_on_channel,
1649 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, 1602 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
1650 .action = ieee80211_action, 1603 .mgmt_tx = ieee80211_mgmt_tx,
1651 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, 1604 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
1652}; 1605};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 32be11e4c4d..5b24740fc0b 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -11,7 +11,7 @@ __ieee80211_get_channel_mode(struct ieee80211_local *local,
11{ 11{
12 struct ieee80211_sub_if_data *sdata; 12 struct ieee80211_sub_if_data *sdata;
13 13
14 WARN_ON(!mutex_is_locked(&local->iflist_mtx)); 14 lockdep_assert_held(&local->iflist_mtx);
15 15
16 list_for_each_entry(sdata, &local->interfaces, list) { 16 list_for_each_entry(sdata, &local->interfaces, list) {
17 if (sdata == ignore) 17 if (sdata == ignore)
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index a694c593ff6..e81ef4e8cb3 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -85,13 +85,15 @@ static ssize_t tsf_write(struct file *file,
85 if (strncmp(buf, "reset", 5) == 0) { 85 if (strncmp(buf, "reset", 5) == 0) {
86 if (local->ops->reset_tsf) { 86 if (local->ops->reset_tsf) {
87 drv_reset_tsf(local); 87 drv_reset_tsf(local);
88 printk(KERN_INFO "%s: debugfs reset TSF\n", wiphy_name(local->hw.wiphy)); 88 wiphy_info(local->hw.wiphy, "debugfs reset TSF\n");
89 } 89 }
90 } else { 90 } else {
91 tsf = simple_strtoul(buf, NULL, 0); 91 tsf = simple_strtoul(buf, NULL, 0);
92 if (local->ops->set_tsf) { 92 if (local->ops->set_tsf) {
93 drv_set_tsf(local, tsf); 93 drv_set_tsf(local, tsf);
94 printk(KERN_INFO "%s: debugfs set TSF to %#018llx\n", wiphy_name(local->hw.wiphy), tsf); 94 wiphy_info(local->hw.wiphy,
95 "debugfs set TSF to %#018llx\n", tsf);
96
95 } 97 }
96 } 98 }
97 99
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index fa5e76e658e..1647f8dc5cd 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -64,26 +64,13 @@ static ssize_t key_algorithm_read(struct file *file,
64 char __user *userbuf, 64 char __user *userbuf,
65 size_t count, loff_t *ppos) 65 size_t count, loff_t *ppos)
66{ 66{
67 char *alg; 67 char buf[15];
68 struct ieee80211_key *key = file->private_data; 68 struct ieee80211_key *key = file->private_data;
69 u32 c = key->conf.cipher;
69 70
70 switch (key->conf.alg) { 71 sprintf(buf, "%.2x-%.2x-%.2x:%d\n",
71 case ALG_WEP: 72 c >> 24, (c >> 16) & 0xff, (c >> 8) & 0xff, c & 0xff);
72 alg = "WEP\n"; 73 return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
73 break;
74 case ALG_TKIP:
75 alg = "TKIP\n";
76 break;
77 case ALG_CCMP:
78 alg = "CCMP\n";
79 break;
80 case ALG_AES_CMAC:
81 alg = "AES-128-CMAC\n";
82 break;
83 default:
84 return 0;
85 }
86 return simple_read_from_buffer(userbuf, count, ppos, alg, strlen(alg));
87} 74}
88KEY_OPS(algorithm); 75KEY_OPS(algorithm);
89 76
@@ -95,21 +82,22 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
95 int len; 82 int len;
96 struct ieee80211_key *key = file->private_data; 83 struct ieee80211_key *key = file->private_data;
97 84
98 switch (key->conf.alg) { 85 switch (key->conf.cipher) {
99 case ALG_WEP: 86 case WLAN_CIPHER_SUITE_WEP40:
87 case WLAN_CIPHER_SUITE_WEP104:
100 len = scnprintf(buf, sizeof(buf), "\n"); 88 len = scnprintf(buf, sizeof(buf), "\n");
101 break; 89 break;
102 case ALG_TKIP: 90 case WLAN_CIPHER_SUITE_TKIP:
103 len = scnprintf(buf, sizeof(buf), "%08x %04x\n", 91 len = scnprintf(buf, sizeof(buf), "%08x %04x\n",
104 key->u.tkip.tx.iv32, 92 key->u.tkip.tx.iv32,
105 key->u.tkip.tx.iv16); 93 key->u.tkip.tx.iv16);
106 break; 94 break;
107 case ALG_CCMP: 95 case WLAN_CIPHER_SUITE_CCMP:
108 tpn = key->u.ccmp.tx_pn; 96 tpn = key->u.ccmp.tx_pn;
109 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 97 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
110 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); 98 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]);
111 break; 99 break;
112 case ALG_AES_CMAC: 100 case WLAN_CIPHER_SUITE_AES_CMAC:
113 tpn = key->u.aes_cmac.tx_pn; 101 tpn = key->u.aes_cmac.tx_pn;
114 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 102 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
115 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], 103 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4],
@@ -130,11 +118,12 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
130 int i, len; 118 int i, len;
131 const u8 *rpn; 119 const u8 *rpn;
132 120
133 switch (key->conf.alg) { 121 switch (key->conf.cipher) {
134 case ALG_WEP: 122 case WLAN_CIPHER_SUITE_WEP40:
123 case WLAN_CIPHER_SUITE_WEP104:
135 len = scnprintf(buf, sizeof(buf), "\n"); 124 len = scnprintf(buf, sizeof(buf), "\n");
136 break; 125 break;
137 case ALG_TKIP: 126 case WLAN_CIPHER_SUITE_TKIP:
138 for (i = 0; i < NUM_RX_DATA_QUEUES; i++) 127 for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
139 p += scnprintf(p, sizeof(buf)+buf-p, 128 p += scnprintf(p, sizeof(buf)+buf-p,
140 "%08x %04x\n", 129 "%08x %04x\n",
@@ -142,7 +131,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
142 key->u.tkip.rx[i].iv16); 131 key->u.tkip.rx[i].iv16);
143 len = p - buf; 132 len = p - buf;
144 break; 133 break;
145 case ALG_CCMP: 134 case WLAN_CIPHER_SUITE_CCMP:
146 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) { 135 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) {
147 rpn = key->u.ccmp.rx_pn[i]; 136 rpn = key->u.ccmp.rx_pn[i];
148 p += scnprintf(p, sizeof(buf)+buf-p, 137 p += scnprintf(p, sizeof(buf)+buf-p,
@@ -152,7 +141,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
152 } 141 }
153 len = p - buf; 142 len = p - buf;
154 break; 143 break;
155 case ALG_AES_CMAC: 144 case WLAN_CIPHER_SUITE_AES_CMAC:
156 rpn = key->u.aes_cmac.rx_pn; 145 rpn = key->u.aes_cmac.rx_pn;
157 p += scnprintf(p, sizeof(buf)+buf-p, 146 p += scnprintf(p, sizeof(buf)+buf-p,
158 "%02x%02x%02x%02x%02x%02x\n", 147 "%02x%02x%02x%02x%02x%02x\n",
@@ -174,11 +163,11 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
174 char buf[20]; 163 char buf[20];
175 int len; 164 int len;
176 165
177 switch (key->conf.alg) { 166 switch (key->conf.cipher) {
178 case ALG_CCMP: 167 case WLAN_CIPHER_SUITE_CCMP:
179 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); 168 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
180 break; 169 break;
181 case ALG_AES_CMAC: 170 case WLAN_CIPHER_SUITE_AES_CMAC:
182 len = scnprintf(buf, sizeof(buf), "%u\n", 171 len = scnprintf(buf, sizeof(buf), "%u\n",
183 key->u.aes_cmac.replays); 172 key->u.aes_cmac.replays);
184 break; 173 break;
@@ -196,8 +185,8 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
196 char buf[20]; 185 char buf[20];
197 int len; 186 int len;
198 187
199 switch (key->conf.alg) { 188 switch (key->conf.cipher) {
200 case ALG_AES_CMAC: 189 case WLAN_CIPHER_SUITE_AES_CMAC:
201 len = scnprintf(buf, sizeof(buf), "%u\n", 190 len = scnprintf(buf, sizeof(buf), "%u\n",
202 key->u.aes_cmac.icverrors); 191 key->u.aes_cmac.icverrors);
203 break; 192 break;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 14123dce544..16983825f8e 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -54,6 +54,20 @@ static inline int drv_add_interface(struct ieee80211_local *local,
54 return ret; 54 return ret;
55} 55}
56 56
57static inline int drv_change_interface(struct ieee80211_local *local,
58 struct ieee80211_sub_if_data *sdata,
59 enum nl80211_iftype type, bool p2p)
60{
61 int ret;
62
63 might_sleep();
64
65 trace_drv_change_interface(local, sdata, type, p2p);
66 ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
67 trace_drv_return_int(local, ret);
68 return ret;
69}
70
57static inline void drv_remove_interface(struct ieee80211_local *local, 71static inline void drv_remove_interface(struct ieee80211_local *local,
58 struct ieee80211_vif *vif) 72 struct ieee80211_vif *vif)
59{ 73{
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 5d5d2a97466..6831fb1641c 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -25,12 +25,14 @@ static inline void trace_ ## name(proto) {}
25#define STA_PR_FMT " sta:%pM" 25#define STA_PR_FMT " sta:%pM"
26#define STA_PR_ARG __entry->sta_addr 26#define STA_PR_ARG __entry->sta_addr
27 27
28#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \ 28#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \
29 __field(bool, p2p) \
29 __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") 30 __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
30#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ 31#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
32 __entry->p2p = sdata->vif.p2p; \
31 __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") 33 __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
32#define VIF_PR_FMT " vif:%s(%d)" 34#define VIF_PR_FMT " vif:%s(%d%s)"
33#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type 35#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
34 36
35/* 37/*
36 * Tracing for driver callbacks. 38 * Tracing for driver callbacks.
@@ -136,6 +138,34 @@ TRACE_EVENT(drv_add_interface,
136 ) 138 )
137); 139);
138 140
141TRACE_EVENT(drv_change_interface,
142 TP_PROTO(struct ieee80211_local *local,
143 struct ieee80211_sub_if_data *sdata,
144 enum nl80211_iftype type, bool p2p),
145
146 TP_ARGS(local, sdata, type, p2p),
147
148 TP_STRUCT__entry(
149 LOCAL_ENTRY
150 VIF_ENTRY
151 __field(u32, new_type)
152 __field(bool, new_p2p)
153 ),
154
155 TP_fast_assign(
156 LOCAL_ASSIGN;
157 VIF_ASSIGN;
158 __entry->new_type = type;
159 __entry->new_p2p = p2p;
160 ),
161
162 TP_printk(
163 LOCAL_PR_FMT VIF_PR_FMT " new type:%d%s",
164 LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type,
165 __entry->new_p2p ? "/p2p" : ""
166 )
167);
168
139TRACE_EVENT(drv_remove_interface, 169TRACE_EVENT(drv_remove_interface,
140 TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), 170 TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata),
141 171
@@ -336,7 +366,7 @@ TRACE_EVENT(drv_set_key,
336 LOCAL_ENTRY 366 LOCAL_ENTRY
337 VIF_ENTRY 367 VIF_ENTRY
338 STA_ENTRY 368 STA_ENTRY
339 __field(enum ieee80211_key_alg, alg) 369 __field(u32, cipher)
340 __field(u8, hw_key_idx) 370 __field(u8, hw_key_idx)
341 __field(u8, flags) 371 __field(u8, flags)
342 __field(s8, keyidx) 372 __field(s8, keyidx)
@@ -346,7 +376,7 @@ TRACE_EVENT(drv_set_key,
346 LOCAL_ASSIGN; 376 LOCAL_ASSIGN;
347 VIF_ASSIGN; 377 VIF_ASSIGN;
348 STA_ASSIGN; 378 STA_ASSIGN;
349 __entry->alg = key->alg; 379 __entry->cipher = key->cipher;
350 __entry->flags = key->flags; 380 __entry->flags = key->flags;
351 __entry->keyidx = key->keyidx; 381 __entry->keyidx = key->keyidx;
352 __entry->hw_key_idx = key->hw_key_idx; 382 __entry->hw_key_idx = key->hw_key_idx;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 9d101fb3386..11f74f5f7b2 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -265,3 +265,31 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
265 265
266 return 0; 266 return 0;
267} 267}
268
269void ieee80211_request_smps_work(struct work_struct *work)
270{
271 struct ieee80211_sub_if_data *sdata =
272 container_of(work, struct ieee80211_sub_if_data,
273 u.mgd.request_smps_work);
274
275 mutex_lock(&sdata->u.mgd.mtx);
276 __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode);
277 mutex_unlock(&sdata->u.mgd.mtx);
278}
279
280void ieee80211_request_smps(struct ieee80211_vif *vif,
281 enum ieee80211_smps_mode smps_mode)
282{
283 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
284
285 if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
286 return;
287
288 if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF))
289 smps_mode = IEEE80211_SMPS_AUTOMATIC;
290
291 ieee80211_queue_work(&sdata->local->hw,
292 &sdata->u.mgd.request_smps_work);
293}
294/* this might change ... don't want non-open drivers using it */
295EXPORT_SYMBOL_GPL(ieee80211_request_smps);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index c691780725a..1a3aae54f0c 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -427,8 +427,8 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
427 return NULL; 427 return NULL;
428 428
429#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 429#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
430 printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n", 430 wiphy_debug(local->hw.wiphy, "Adding new IBSS station %pM (dev=%s)\n",
431 wiphy_name(local->hw.wiphy), addr, sdata->name); 431 addr, sdata->name);
432#endif 432#endif
433 433
434 sta = sta_info_alloc(sdata, addr, gfp); 434 sta = sta_info_alloc(sdata, addr, gfp);
@@ -920,12 +920,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
920 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); 920 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN);
921 sdata->u.ibss.ssid_len = params->ssid_len; 921 sdata->u.ibss.ssid_len = params->ssid_len;
922 922
923 mutex_unlock(&sdata->u.ibss.mtx);
924
925 mutex_lock(&sdata->local->mtx);
923 ieee80211_recalc_idle(sdata->local); 926 ieee80211_recalc_idle(sdata->local);
927 mutex_unlock(&sdata->local->mtx);
924 928
925 ieee80211_queue_work(&sdata->local->hw, &sdata->work); 929 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
926 930
927 mutex_unlock(&sdata->u.ibss.mtx);
928
929 return 0; 931 return 0;
930} 932}
931 933
@@ -980,7 +982,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
980 982
981 mutex_unlock(&sdata->u.ibss.mtx); 983 mutex_unlock(&sdata->u.ibss.mtx);
982 984
985 mutex_lock(&local->mtx);
983 ieee80211_recalc_idle(sdata->local); 986 ieee80211_recalc_idle(sdata->local);
987 mutex_unlock(&local->mtx);
984 988
985 return 0; 989 return 0;
986} 990}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 65e0ed6c297..9346a6b0f40 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -50,12 +50,6 @@ struct ieee80211_local;
50 * increased memory use (about 2 kB of RAM per entry). */ 50 * increased memory use (about 2 kB of RAM per entry). */
51#define IEEE80211_FRAGMENT_MAX 4 51#define IEEE80211_FRAGMENT_MAX 4
52 52
53/*
54 * Time after which we ignore scan results and no longer report/use
55 * them in any way.
56 */
57#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
58
59#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024)) 53#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024))
60 54
61#define IEEE80211_DEFAULT_UAPSD_QUEUES \ 55#define IEEE80211_DEFAULT_UAPSD_QUEUES \
@@ -170,6 +164,7 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
170#define IEEE80211_RX_RA_MATCH BIT(1) 164#define IEEE80211_RX_RA_MATCH BIT(1)
171#define IEEE80211_RX_AMSDU BIT(2) 165#define IEEE80211_RX_AMSDU BIT(2)
172#define IEEE80211_RX_FRAGMENTED BIT(3) 166#define IEEE80211_RX_FRAGMENTED BIT(3)
167#define IEEE80211_MALFORMED_ACTION_FRM BIT(4)
173/* only add flags here that do not change with subframes of an aMPDU */ 168/* only add flags here that do not change with subframes of an aMPDU */
174 169
175struct ieee80211_rx_data { 170struct ieee80211_rx_data {
@@ -343,7 +338,10 @@ struct ieee80211_if_managed {
343 unsigned long timers_running; /* used for quiesce/restart */ 338 unsigned long timers_running; /* used for quiesce/restart */
344 bool powersave; /* powersave requested for this iface */ 339 bool powersave; /* powersave requested for this iface */
345 enum ieee80211_smps_mode req_smps, /* requested smps mode */ 340 enum ieee80211_smps_mode req_smps, /* requested smps mode */
346 ap_smps; /* smps mode AP thinks we're in */ 341 ap_smps, /* smps mode AP thinks we're in */
342 driver_smps_mode; /* smps mode request */
343
344 struct work_struct request_smps_work;
347 345
348 unsigned int flags; 346 unsigned int flags;
349 347
@@ -371,6 +369,13 @@ struct ieee80211_if_managed {
371 int ave_beacon_signal; 369 int ave_beacon_signal;
372 370
373 /* 371 /*
372 * Number of Beacon frames used in ave_beacon_signal. This can be used
373 * to avoid generating less reliable cqm events that would be based
374 * only on couple of received frames.
375 */
376 unsigned int count_beacon_signal;
377
378 /*
374 * Last Beacon frame signal strength average (ave_beacon_signal / 16) 379 * Last Beacon frame signal strength average (ave_beacon_signal / 16)
375 * that triggered a cqm event. 0 indicates that no event has been 380 * that triggered a cqm event. 0 indicates that no event has been
376 * generated for the current association. 381 * generated for the current association.
@@ -474,6 +479,19 @@ enum ieee80211_sub_if_data_flags {
474 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), 479 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
475}; 480};
476 481
482/**
483 * enum ieee80211_sdata_state_bits - virtual interface state bits
484 * @SDATA_STATE_RUNNING: virtual interface is up & running; this
485 * mirrors netif_running() but is separate for interface type
486 * change handling while the interface is up
487 * @SDATA_STATE_OFFCHANNEL: This interface is currently in offchannel
488 * mode, so queues are stopped
489 */
490enum ieee80211_sdata_state_bits {
491 SDATA_STATE_RUNNING,
492 SDATA_STATE_OFFCHANNEL,
493};
494
477struct ieee80211_sub_if_data { 495struct ieee80211_sub_if_data {
478 struct list_head list; 496 struct list_head list;
479 497
@@ -487,6 +505,8 @@ struct ieee80211_sub_if_data {
487 505
488 unsigned int flags; 506 unsigned int flags;
489 507
508 unsigned long state;
509
490 int drop_unencrypted; 510 int drop_unencrypted;
491 511
492 char name[IFNAMSIZ]; 512 char name[IFNAMSIZ];
@@ -497,6 +517,9 @@ struct ieee80211_sub_if_data {
497 */ 517 */
498 bool ht_opmode_valid; 518 bool ht_opmode_valid;
499 519
520 /* to detect idle changes */
521 bool old_idle;
522
500 /* Fragment table for host-based reassembly */ 523 /* Fragment table for host-based reassembly */
501 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; 524 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
502 unsigned int fragment_next; 525 unsigned int fragment_next;
@@ -508,6 +531,8 @@ struct ieee80211_sub_if_data {
508 struct ieee80211_key *default_mgmt_key; 531 struct ieee80211_key *default_mgmt_key;
509 532
510 u16 sequence_number; 533 u16 sequence_number;
534 __be16 control_port_protocol;
535 bool control_port_no_encrypt;
511 536
512 struct work_struct work; 537 struct work_struct work;
513 struct sk_buff_head skb_queue; 538 struct sk_buff_head skb_queue;
@@ -595,11 +620,17 @@ enum queue_stop_reason {
595 * determine if we are on the operating channel or not 620 * determine if we are on the operating channel or not
596 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, 621 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning,
597 * gets only set in conjunction with SCAN_SW_SCANNING 622 * gets only set in conjunction with SCAN_SW_SCANNING
623 * @SCAN_COMPLETED: Set for our scan work function when the driver reported
624 * that the scan completed.
625 * @SCAN_ABORTED: Set for our scan work function when the driver reported
626 * a scan complete for an aborted scan.
598 */ 627 */
599enum { 628enum {
600 SCAN_SW_SCANNING, 629 SCAN_SW_SCANNING,
601 SCAN_HW_SCANNING, 630 SCAN_HW_SCANNING,
602 SCAN_OFF_CHANNEL, 631 SCAN_OFF_CHANNEL,
632 SCAN_COMPLETED,
633 SCAN_ABORTED,
603}; 634};
604 635
605/** 636/**
@@ -634,7 +665,6 @@ struct ieee80211_local {
634 /* 665 /*
635 * work stuff, potentially off-channel (in the future) 666 * work stuff, potentially off-channel (in the future)
636 */ 667 */
637 struct mutex work_mtx;
638 struct list_head work_list; 668 struct list_head work_list;
639 struct timer_list work_timer; 669 struct timer_list work_timer;
640 struct work_struct work_work; 670 struct work_struct work_work;
@@ -656,6 +686,8 @@ struct ieee80211_local {
656 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll; 686 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll;
657 unsigned int filter_flags; /* FIF_* */ 687 unsigned int filter_flags; /* FIF_* */
658 688
689 bool wiphy_ciphers_allocated;
690
659 /* protects the aggregated multicast list and filter calls */ 691 /* protects the aggregated multicast list and filter calls */
660 spinlock_t filter_lock; 692 spinlock_t filter_lock;
661 693
@@ -746,9 +778,10 @@ struct ieee80211_local {
746 */ 778 */
747 struct mutex key_mtx; 779 struct mutex key_mtx;
748 780
781 /* mutex for scan and work locking */
782 struct mutex mtx;
749 783
750 /* Scanning and BSS list */ 784 /* Scanning and BSS list */
751 struct mutex scan_mtx;
752 unsigned long scanning; 785 unsigned long scanning;
753 struct cfg80211_ssid scan_ssid; 786 struct cfg80211_ssid scan_ssid;
754 struct cfg80211_scan_request *int_scan_req; 787 struct cfg80211_scan_request *int_scan_req;
@@ -870,6 +903,11 @@ struct ieee80211_local {
870 struct dentry *keys; 903 struct dentry *keys;
871 } debugfs; 904 } debugfs;
872#endif 905#endif
906
907 /* dummy netdev for use w/ NAPI */
908 struct net_device napi_dev;
909
910 struct napi_struct napi;
873}; 911};
874 912
875static inline struct ieee80211_sub_if_data * 913static inline struct ieee80211_sub_if_data *
@@ -1003,6 +1041,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
1003void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); 1041void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
1004void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 1042void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1005 struct sk_buff *skb); 1043 struct sk_buff *skb);
1044void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata);
1045void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata);
1006 1046
1007/* IBSS code */ 1047/* IBSS code */
1008void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); 1048void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
@@ -1071,7 +1111,7 @@ void ieee80211_recalc_idle(struct ieee80211_local *local);
1071 1111
1072static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) 1112static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
1073{ 1113{
1074 return netif_running(sdata->dev); 1114 return test_bit(SDATA_STATE_RUNNING, &sdata->state);
1075} 1115}
1076 1116
1077/* tx handling */ 1117/* tx handling */
@@ -1105,6 +1145,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
1105int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, 1145int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
1106 enum ieee80211_smps_mode smps, const u8 *da, 1146 enum ieee80211_smps_mode smps, const u8 *da,
1107 const u8 *bssid); 1147 const u8 *bssid);
1148void ieee80211_request_smps_work(struct work_struct *work);
1108 1149
1109void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 1150void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1110 u16 initiator, u16 reason); 1151 u16 initiator, u16 reason);
@@ -1131,6 +1172,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
1131void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); 1172void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
1132void ieee80211_ba_session_work(struct work_struct *work); 1173void ieee80211_ba_session_work(struct work_struct *work);
1133void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid); 1174void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
1175void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
1134 1176
1135/* Spectrum management */ 1177/* Spectrum management */
1136void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 1178void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1146,6 +1188,12 @@ int __ieee80211_suspend(struct ieee80211_hw *hw);
1146 1188
1147static inline int __ieee80211_resume(struct ieee80211_hw *hw) 1189static inline int __ieee80211_resume(struct ieee80211_hw *hw)
1148{ 1190{
1191 struct ieee80211_local *local = hw_to_local(hw);
1192
1193 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
1194 "%s: resume with hardware scan still in progress\n",
1195 wiphy_name(hw->wiphy));
1196
1149 return ieee80211_reconfig(hw_to_local(hw)); 1197 return ieee80211_reconfig(hw_to_local(hw));
1150} 1198}
1151#else 1199#else
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ebbe264e2b0..66785739dad 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -94,21 +94,14 @@ static inline int identical_mac_addr_allowed(int type1, int type2)
94 type2 == NL80211_IFTYPE_AP_VLAN)); 94 type2 == NL80211_IFTYPE_AP_VLAN));
95} 95}
96 96
97static int ieee80211_open(struct net_device *dev) 97static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
98 enum nl80211_iftype iftype)
98{ 99{
99 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
100 struct ieee80211_sub_if_data *nsdata;
101 struct ieee80211_local *local = sdata->local; 100 struct ieee80211_local *local = sdata->local;
102 struct sta_info *sta; 101 struct ieee80211_sub_if_data *nsdata;
103 u32 changed = 0; 102 struct net_device *dev = sdata->dev;
104 int res;
105 u32 hw_reconf_flags = 0;
106 u8 null_addr[ETH_ALEN] = {0};
107 103
108 /* fail early if user set an invalid address */ 104 ASSERT_RTNL();
109 if (compare_ether_addr(dev->dev_addr, null_addr) &&
110 !is_valid_ether_addr(dev->dev_addr))
111 return -EADDRNOTAVAIL;
112 105
113 /* we hold the RTNL here so can safely walk the list */ 106 /* we hold the RTNL here so can safely walk the list */
114 list_for_each_entry(nsdata, &local->interfaces, list) { 107 list_for_each_entry(nsdata, &local->interfaces, list) {
@@ -125,7 +118,7 @@ static int ieee80211_open(struct net_device *dev)
125 * belonging to the same hardware. Then, however, we're 118 * belonging to the same hardware. Then, however, we're
126 * faced with having to adopt two different TSF timers... 119 * faced with having to adopt two different TSF timers...
127 */ 120 */
128 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 121 if (iftype == NL80211_IFTYPE_ADHOC &&
129 nsdata->vif.type == NL80211_IFTYPE_ADHOC) 122 nsdata->vif.type == NL80211_IFTYPE_ADHOC)
130 return -EBUSY; 123 return -EBUSY;
131 124
@@ -139,19 +132,36 @@ static int ieee80211_open(struct net_device *dev)
139 /* 132 /*
140 * check whether it may have the same address 133 * check whether it may have the same address
141 */ 134 */
142 if (!identical_mac_addr_allowed(sdata->vif.type, 135 if (!identical_mac_addr_allowed(iftype,
143 nsdata->vif.type)) 136 nsdata->vif.type))
144 return -ENOTUNIQ; 137 return -ENOTUNIQ;
145 138
146 /* 139 /*
147 * can only add VLANs to enabled APs 140 * can only add VLANs to enabled APs
148 */ 141 */
149 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && 142 if (iftype == NL80211_IFTYPE_AP_VLAN &&
150 nsdata->vif.type == NL80211_IFTYPE_AP) 143 nsdata->vif.type == NL80211_IFTYPE_AP)
151 sdata->bss = &nsdata->u.ap; 144 sdata->bss = &nsdata->u.ap;
152 } 145 }
153 } 146 }
154 147
148 return 0;
149}
150
151/*
152 * NOTE: Be very careful when changing this function, it must NOT return
153 * an error on interface type changes that have been pre-checked, so most
154 * checks should be in ieee80211_check_concurrent_iface.
155 */
156static int ieee80211_do_open(struct net_device *dev, bool coming_up)
157{
158 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
159 struct ieee80211_local *local = sdata->local;
160 struct sta_info *sta;
161 u32 changed = 0;
162 int res;
163 u32 hw_reconf_flags = 0;
164
155 switch (sdata->vif.type) { 165 switch (sdata->vif.type) {
156 case NL80211_IFTYPE_WDS: 166 case NL80211_IFTYPE_WDS:
157 if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) 167 if (!is_valid_ether_addr(sdata->u.wds.remote_addr))
@@ -177,7 +187,9 @@ static int ieee80211_open(struct net_device *dev)
177 /* no special treatment */ 187 /* no special treatment */
178 break; 188 break;
179 case NL80211_IFTYPE_UNSPECIFIED: 189 case NL80211_IFTYPE_UNSPECIFIED:
180 case __NL80211_IFTYPE_AFTER_LAST: 190 case NUM_NL80211_IFTYPES:
191 case NL80211_IFTYPE_P2P_CLIENT:
192 case NL80211_IFTYPE_P2P_GO:
181 /* cannot happen */ 193 /* cannot happen */
182 WARN_ON(1); 194 WARN_ON(1);
183 break; 195 break;
@@ -187,39 +199,30 @@ static int ieee80211_open(struct net_device *dev)
187 res = drv_start(local); 199 res = drv_start(local);
188 if (res) 200 if (res)
189 goto err_del_bss; 201 goto err_del_bss;
202 if (local->ops->napi_poll)
203 napi_enable(&local->napi);
190 /* we're brought up, everything changes */ 204 /* we're brought up, everything changes */
191 hw_reconf_flags = ~0; 205 hw_reconf_flags = ~0;
192 ieee80211_led_radio(local, true); 206 ieee80211_led_radio(local, true);
193 } 207 }
194 208
195 /* 209 /*
196 * Check all interfaces and copy the hopefully now-present 210 * Copy the hopefully now-present MAC address to
197 * MAC address to those that have the special null one. 211 * this interface, if it has the special null one.
198 */ 212 */
199 list_for_each_entry(nsdata, &local->interfaces, list) { 213 if (is_zero_ether_addr(dev->dev_addr)) {
200 struct net_device *ndev = nsdata->dev; 214 memcpy(dev->dev_addr,
201 215 local->hw.wiphy->perm_addr,
202 /* 216 ETH_ALEN);
203 * No need to check running since we do not allow 217 memcpy(dev->perm_addr, dev->dev_addr, ETH_ALEN);
204 * it to start up with this invalid address. 218
205 */ 219 if (!is_valid_ether_addr(dev->dev_addr)) {
206 if (compare_ether_addr(null_addr, ndev->dev_addr) == 0) { 220 if (!local->open_count)
207 memcpy(ndev->dev_addr, 221 drv_stop(local);
208 local->hw.wiphy->perm_addr, 222 return -EADDRNOTAVAIL;
209 ETH_ALEN);
210 memcpy(ndev->perm_addr, ndev->dev_addr, ETH_ALEN);
211 } 223 }
212 } 224 }
213 225
214 /*
215 * Validate the MAC address for this device.
216 */
217 if (!is_valid_ether_addr(dev->dev_addr)) {
218 if (!local->open_count)
219 drv_stop(local);
220 return -EADDRNOTAVAIL;
221 }
222
223 switch (sdata->vif.type) { 226 switch (sdata->vif.type) {
224 case NL80211_IFTYPE_AP_VLAN: 227 case NL80211_IFTYPE_AP_VLAN:
225 /* no need to tell driver */ 228 /* no need to tell driver */
@@ -253,9 +256,11 @@ static int ieee80211_open(struct net_device *dev)
253 netif_carrier_on(dev); 256 netif_carrier_on(dev);
254 break; 257 break;
255 default: 258 default:
256 res = drv_add_interface(local, &sdata->vif); 259 if (coming_up) {
257 if (res) 260 res = drv_add_interface(local, &sdata->vif);
258 goto err_stop; 261 if (res)
262 goto err_stop;
263 }
259 264
260 if (ieee80211_vif_is_mesh(&sdata->vif)) { 265 if (ieee80211_vif_is_mesh(&sdata->vif)) {
261 local->fif_other_bss++; 266 local->fif_other_bss++;
@@ -277,6 +282,8 @@ static int ieee80211_open(struct net_device *dev)
277 netif_carrier_on(dev); 282 netif_carrier_on(dev);
278 } 283 }
279 284
285 set_bit(SDATA_STATE_RUNNING, &sdata->state);
286
280 if (sdata->vif.type == NL80211_IFTYPE_WDS) { 287 if (sdata->vif.type == NL80211_IFTYPE_WDS) {
281 /* Create STA entry for the WDS peer */ 288 /* Create STA entry for the WDS peer */
282 sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, 289 sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr,
@@ -307,9 +314,13 @@ static int ieee80211_open(struct net_device *dev)
307 if (sdata->flags & IEEE80211_SDATA_PROMISC) 314 if (sdata->flags & IEEE80211_SDATA_PROMISC)
308 atomic_inc(&local->iff_promiscs); 315 atomic_inc(&local->iff_promiscs);
309 316
317 mutex_lock(&local->mtx);
310 hw_reconf_flags |= __ieee80211_recalc_idle(local); 318 hw_reconf_flags |= __ieee80211_recalc_idle(local);
319 mutex_unlock(&local->mtx);
320
321 if (coming_up)
322 local->open_count++;
311 323
312 local->open_count++;
313 if (hw_reconf_flags) { 324 if (hw_reconf_flags) {
314 ieee80211_hw_config(local, hw_reconf_flags); 325 ieee80211_hw_config(local, hw_reconf_flags);
315 /* 326 /*
@@ -334,22 +345,42 @@ static int ieee80211_open(struct net_device *dev)
334 sdata->bss = NULL; 345 sdata->bss = NULL;
335 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 346 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
336 list_del(&sdata->u.vlan.list); 347 list_del(&sdata->u.vlan.list);
348 clear_bit(SDATA_STATE_RUNNING, &sdata->state);
337 return res; 349 return res;
338} 350}
339 351
340static int ieee80211_stop(struct net_device *dev) 352static int ieee80211_open(struct net_device *dev)
341{ 353{
342 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 354 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
355 int err;
356
357 /* fail early if user set an invalid address */
358 if (!is_zero_ether_addr(dev->dev_addr) &&
359 !is_valid_ether_addr(dev->dev_addr))
360 return -EADDRNOTAVAIL;
361
362 err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type);
363 if (err)
364 return err;
365
366 return ieee80211_do_open(dev, true);
367}
368
369static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
370 bool going_down)
371{
343 struct ieee80211_local *local = sdata->local; 372 struct ieee80211_local *local = sdata->local;
344 unsigned long flags; 373 unsigned long flags;
345 struct sk_buff *skb, *tmp; 374 struct sk_buff *skb, *tmp;
346 u32 hw_reconf_flags = 0; 375 u32 hw_reconf_flags = 0;
347 int i; 376 int i;
348 377
378 clear_bit(SDATA_STATE_RUNNING, &sdata->state);
379
349 /* 380 /*
350 * Stop TX on this interface first. 381 * Stop TX on this interface first.
351 */ 382 */
352 netif_tx_stop_all_queues(dev); 383 netif_tx_stop_all_queues(sdata->dev);
353 384
354 /* 385 /*
355 * Purge work for this interface. 386 * Purge work for this interface.
@@ -366,12 +397,9 @@ static int ieee80211_stop(struct net_device *dev)
366 * (because if we remove a STA after ops->remove_interface() 397 * (because if we remove a STA after ops->remove_interface()
367 * the driver will have removed the vif info already!) 398 * the driver will have removed the vif info already!)
368 * 399 *
369 * We could relax this and only unlink the stations from the 400 * This is relevant only in AP, WDS and mesh modes, since in
370 * hash table and list but keep them on a per-sdata list that 401 * all other modes we've already removed all stations when
371 * will be inserted back again when the interface is brought 402 * disconnecting etc.
372 * up again, but I don't currently see a use case for that,
373 * except with WDS which gets a STA entry created when it is
374 * brought up.
375 */ 403 */
376 sta_info_flush(local, sdata); 404 sta_info_flush(local, sdata);
377 405
@@ -390,11 +418,12 @@ static int ieee80211_stop(struct net_device *dev)
390 if (sdata->vif.type == NL80211_IFTYPE_AP) 418 if (sdata->vif.type == NL80211_IFTYPE_AP)
391 local->fif_pspoll--; 419 local->fif_pspoll--;
392 420
393 netif_addr_lock_bh(dev); 421 netif_addr_lock_bh(sdata->dev);
394 spin_lock_bh(&local->filter_lock); 422 spin_lock_bh(&local->filter_lock);
395 __hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len); 423 __hw_addr_unsync(&local->mc_list, &sdata->dev->mc,
424 sdata->dev->addr_len);
396 spin_unlock_bh(&local->filter_lock); 425 spin_unlock_bh(&local->filter_lock);
397 netif_addr_unlock_bh(dev); 426 netif_addr_unlock_bh(sdata->dev);
398 427
399 ieee80211_configure_filter(local); 428 ieee80211_configure_filter(local);
400 429
@@ -406,11 +435,21 @@ static int ieee80211_stop(struct net_device *dev)
406 struct ieee80211_sub_if_data *vlan, *tmpsdata; 435 struct ieee80211_sub_if_data *vlan, *tmpsdata;
407 struct beacon_data *old_beacon = sdata->u.ap.beacon; 436 struct beacon_data *old_beacon = sdata->u.ap.beacon;
408 437
438 /* sdata_running will return false, so this will disable */
439 ieee80211_bss_info_change_notify(sdata,
440 BSS_CHANGED_BEACON_ENABLED);
441
409 /* remove beacon */ 442 /* remove beacon */
410 rcu_assign_pointer(sdata->u.ap.beacon, NULL); 443 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
411 synchronize_rcu(); 444 synchronize_rcu();
412 kfree(old_beacon); 445 kfree(old_beacon);
413 446
447 /* free all potentially still buffered bcast frames */
448 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
449 local->total_ps_buffered--;
450 dev_kfree_skb(skb);
451 }
452
414 /* down all dependent devices, that is VLANs */ 453 /* down all dependent devices, that is VLANs */
415 list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, 454 list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
416 u.vlan.list) 455 u.vlan.list)
@@ -418,7 +457,8 @@ static int ieee80211_stop(struct net_device *dev)
418 WARN_ON(!list_empty(&sdata->u.ap.vlans)); 457 WARN_ON(!list_empty(&sdata->u.ap.vlans));
419 } 458 }
420 459
421 local->open_count--; 460 if (going_down)
461 local->open_count--;
422 462
423 switch (sdata->vif.type) { 463 switch (sdata->vif.type) {
424 case NL80211_IFTYPE_AP_VLAN: 464 case NL80211_IFTYPE_AP_VLAN:
@@ -450,27 +490,6 @@ static int ieee80211_stop(struct net_device *dev)
450 490
451 ieee80211_configure_filter(local); 491 ieee80211_configure_filter(local);
452 break; 492 break;
453 case NL80211_IFTYPE_STATION:
454 del_timer_sync(&sdata->u.mgd.chswitch_timer);
455 del_timer_sync(&sdata->u.mgd.timer);
456 del_timer_sync(&sdata->u.mgd.conn_mon_timer);
457 del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
458 /*
459 * If any of the timers fired while we waited for it, it will
460 * have queued its work. Now the work will be running again
461 * but will not rearm the timer again because it checks
462 * whether the interface is running, which, at this point,
463 * it no longer is.
464 */
465 cancel_work_sync(&sdata->u.mgd.chswitch_work);
466 cancel_work_sync(&sdata->u.mgd.monitor_work);
467 cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work);
468
469 /* fall through */
470 case NL80211_IFTYPE_ADHOC:
471 if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
472 del_timer_sync(&sdata->u.ibss.timer);
473 /* fall through */
474 case NL80211_IFTYPE_MESH_POINT: 493 case NL80211_IFTYPE_MESH_POINT:
475 if (ieee80211_vif_is_mesh(&sdata->vif)) { 494 if (ieee80211_vif_is_mesh(&sdata->vif)) {
476 /* other_bss and allmulti are always set on mesh 495 /* other_bss and allmulti are always set on mesh
@@ -498,27 +517,34 @@ static int ieee80211_stop(struct net_device *dev)
498 ieee80211_scan_cancel(local); 517 ieee80211_scan_cancel(local);
499 518
500 /* 519 /*
501 * Disable beaconing for AP and mesh, IBSS can't 520 * Disable beaconing here for mesh only, AP and IBSS
502 * still be joined to a network at this point. 521 * are already taken care of.
503 */ 522 */
504 if (sdata->vif.type == NL80211_IFTYPE_AP || 523 if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
505 sdata->vif.type == NL80211_IFTYPE_MESH_POINT) {
506 ieee80211_bss_info_change_notify(sdata, 524 ieee80211_bss_info_change_notify(sdata,
507 BSS_CHANGED_BEACON_ENABLED); 525 BSS_CHANGED_BEACON_ENABLED);
508 }
509 526
510 /* free all remaining keys, there shouldn't be any */ 527 /*
528 * Free all remaining keys, there shouldn't be any,
529 * except maybe group keys in AP more or WDS?
530 */
511 ieee80211_free_keys(sdata); 531 ieee80211_free_keys(sdata);
512 drv_remove_interface(local, &sdata->vif); 532
533 if (going_down)
534 drv_remove_interface(local, &sdata->vif);
513 } 535 }
514 536
515 sdata->bss = NULL; 537 sdata->bss = NULL;
516 538
539 mutex_lock(&local->mtx);
517 hw_reconf_flags |= __ieee80211_recalc_idle(local); 540 hw_reconf_flags |= __ieee80211_recalc_idle(local);
541 mutex_unlock(&local->mtx);
518 542
519 ieee80211_recalc_ps(local, -1); 543 ieee80211_recalc_ps(local, -1);
520 544
521 if (local->open_count == 0) { 545 if (local->open_count == 0) {
546 if (local->ops->napi_poll)
547 napi_disable(&local->napi);
522 ieee80211_clear_tx_pending(local); 548 ieee80211_clear_tx_pending(local);
523 ieee80211_stop_device(local); 549 ieee80211_stop_device(local);
524 550
@@ -541,6 +567,13 @@ static int ieee80211_stop(struct net_device *dev)
541 } 567 }
542 } 568 }
543 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 569 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
570}
571
572static int ieee80211_stop(struct net_device *dev)
573{
574 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
575
576 ieee80211_do_stop(sdata, true);
544 577
545 return 0; 578 return 0;
546} 579}
@@ -585,8 +618,6 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
585{ 618{
586 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 619 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
587 struct ieee80211_local *local = sdata->local; 620 struct ieee80211_local *local = sdata->local;
588 struct beacon_data *beacon;
589 struct sk_buff *skb;
590 int flushed; 621 int flushed;
591 int i; 622 int i;
592 623
@@ -599,37 +630,8 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
599 __skb_queue_purge(&sdata->fragments[i].skb_list); 630 __skb_queue_purge(&sdata->fragments[i].skb_list);
600 sdata->fragment_next = 0; 631 sdata->fragment_next = 0;
601 632
602 switch (sdata->vif.type) { 633 if (ieee80211_vif_is_mesh(&sdata->vif))
603 case NL80211_IFTYPE_AP: 634 mesh_rmc_free(sdata);
604 beacon = sdata->u.ap.beacon;
605 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
606 synchronize_rcu();
607 kfree(beacon);
608
609 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
610 local->total_ps_buffered--;
611 dev_kfree_skb(skb);
612 }
613
614 break;
615 case NL80211_IFTYPE_MESH_POINT:
616 if (ieee80211_vif_is_mesh(&sdata->vif))
617 mesh_rmc_free(sdata);
618 break;
619 case NL80211_IFTYPE_ADHOC:
620 if (WARN_ON(sdata->u.ibss.presp))
621 kfree_skb(sdata->u.ibss.presp);
622 break;
623 case NL80211_IFTYPE_STATION:
624 case NL80211_IFTYPE_WDS:
625 case NL80211_IFTYPE_AP_VLAN:
626 case NL80211_IFTYPE_MONITOR:
627 break;
628 case NL80211_IFTYPE_UNSPECIFIED:
629 case __NL80211_IFTYPE_AFTER_LAST:
630 BUG();
631 break;
632 }
633 635
634 flushed = sta_info_flush(local, sdata); 636 flushed = sta_info_flush(local, sdata);
635 WARN_ON(flushed); 637 WARN_ON(flushed);
@@ -844,9 +846,13 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
844 846
845 /* and set some type-dependent values */ 847 /* and set some type-dependent values */
846 sdata->vif.type = type; 848 sdata->vif.type = type;
849 sdata->vif.p2p = false;
847 sdata->dev->netdev_ops = &ieee80211_dataif_ops; 850 sdata->dev->netdev_ops = &ieee80211_dataif_ops;
848 sdata->wdev.iftype = type; 851 sdata->wdev.iftype = type;
849 852
853 sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE);
854 sdata->control_port_no_encrypt = false;
855
850 /* only monitor differs */ 856 /* only monitor differs */
851 sdata->dev->type = ARPHRD_ETHER; 857 sdata->dev->type = ARPHRD_ETHER;
852 858
@@ -854,10 +860,20 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
854 INIT_WORK(&sdata->work, ieee80211_iface_work); 860 INIT_WORK(&sdata->work, ieee80211_iface_work);
855 861
856 switch (type) { 862 switch (type) {
863 case NL80211_IFTYPE_P2P_GO:
864 type = NL80211_IFTYPE_AP;
865 sdata->vif.type = type;
866 sdata->vif.p2p = true;
867 /* fall through */
857 case NL80211_IFTYPE_AP: 868 case NL80211_IFTYPE_AP:
858 skb_queue_head_init(&sdata->u.ap.ps_bc_buf); 869 skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
859 INIT_LIST_HEAD(&sdata->u.ap.vlans); 870 INIT_LIST_HEAD(&sdata->u.ap.vlans);
860 break; 871 break;
872 case NL80211_IFTYPE_P2P_CLIENT:
873 type = NL80211_IFTYPE_STATION;
874 sdata->vif.type = type;
875 sdata->vif.p2p = true;
876 /* fall through */
861 case NL80211_IFTYPE_STATION: 877 case NL80211_IFTYPE_STATION:
862 ieee80211_sta_setup_sdata(sdata); 878 ieee80211_sta_setup_sdata(sdata);
863 break; 879 break;
@@ -878,7 +894,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
878 case NL80211_IFTYPE_AP_VLAN: 894 case NL80211_IFTYPE_AP_VLAN:
879 break; 895 break;
880 case NL80211_IFTYPE_UNSPECIFIED: 896 case NL80211_IFTYPE_UNSPECIFIED:
881 case __NL80211_IFTYPE_AFTER_LAST: 897 case NUM_NL80211_IFTYPES:
882 BUG(); 898 BUG();
883 break; 899 break;
884 } 900 }
@@ -886,12 +902,85 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
886 ieee80211_debugfs_add_netdev(sdata); 902 ieee80211_debugfs_add_netdev(sdata);
887} 903}
888 904
905static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
906 enum nl80211_iftype type)
907{
908 struct ieee80211_local *local = sdata->local;
909 int ret, err;
910 enum nl80211_iftype internal_type = type;
911 bool p2p = false;
912
913 ASSERT_RTNL();
914
915 if (!local->ops->change_interface)
916 return -EBUSY;
917
918 switch (sdata->vif.type) {
919 case NL80211_IFTYPE_AP:
920 case NL80211_IFTYPE_STATION:
921 case NL80211_IFTYPE_ADHOC:
922 /*
923 * Could maybe also all others here?
924 * Just not sure how that interacts
925 * with the RX/config path e.g. for
926 * mesh.
927 */
928 break;
929 default:
930 return -EBUSY;
931 }
932
933 switch (type) {
934 case NL80211_IFTYPE_AP:
935 case NL80211_IFTYPE_STATION:
936 case NL80211_IFTYPE_ADHOC:
937 /*
938 * Could probably support everything
939 * but WDS here (WDS do_open can fail
940 * under memory pressure, which this
941 * code isn't prepared to handle).
942 */
943 break;
944 case NL80211_IFTYPE_P2P_CLIENT:
945 p2p = true;
946 internal_type = NL80211_IFTYPE_STATION;
947 break;
948 case NL80211_IFTYPE_P2P_GO:
949 p2p = true;
950 internal_type = NL80211_IFTYPE_AP;
951 break;
952 default:
953 return -EBUSY;
954 }
955
956 ret = ieee80211_check_concurrent_iface(sdata, internal_type);
957 if (ret)
958 return ret;
959
960 ieee80211_do_stop(sdata, false);
961
962 ieee80211_teardown_sdata(sdata->dev);
963
964 ret = drv_change_interface(local, sdata, internal_type, p2p);
965 if (ret)
966 type = sdata->vif.type;
967
968 ieee80211_setup_sdata(sdata, type);
969
970 err = ieee80211_do_open(sdata->dev, false);
971 WARN(err, "type change: do_open returned %d", err);
972
973 return ret;
974}
975
889int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, 976int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
890 enum nl80211_iftype type) 977 enum nl80211_iftype type)
891{ 978{
979 int ret;
980
892 ASSERT_RTNL(); 981 ASSERT_RTNL();
893 982
894 if (type == sdata->vif.type) 983 if (type == ieee80211_vif_type_p2p(&sdata->vif))
895 return 0; 984 return 0;
896 985
897 /* Setting ad-hoc mode on non-IBSS channel is not supported. */ 986 /* Setting ad-hoc mode on non-IBSS channel is not supported. */
@@ -899,18 +988,15 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
899 type == NL80211_IFTYPE_ADHOC) 988 type == NL80211_IFTYPE_ADHOC)
900 return -EOPNOTSUPP; 989 return -EOPNOTSUPP;
901 990
902 /* 991 if (ieee80211_sdata_running(sdata)) {
903 * We could, here, on changes between IBSS/STA/MESH modes, 992 ret = ieee80211_runtime_change_iftype(sdata, type);
904 * invoke an MLME function instead that disassociates etc. 993 if (ret)
905 * and goes into the requested mode. 994 return ret;
906 */ 995 } else {
907 996 /* Purge and reset type-dependent state. */
908 if (ieee80211_sdata_running(sdata)) 997 ieee80211_teardown_sdata(sdata->dev);
909 return -EBUSY; 998 ieee80211_setup_sdata(sdata, type);
910 999 }
911 /* Purge and reset type-dependent state. */
912 ieee80211_teardown_sdata(sdata->dev);
913 ieee80211_setup_sdata(sdata, type);
914 1000
915 /* reset some values that shouldn't be kept across type changes */ 1001 /* reset some values that shouldn't be kept across type changes */
916 sdata->vif.bss_conf.basic_rates = 1002 sdata->vif.bss_conf.basic_rates =
@@ -1167,8 +1253,7 @@ static u32 ieee80211_idle_off(struct ieee80211_local *local,
1167 return 0; 1253 return 0;
1168 1254
1169#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1255#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1170 printk(KERN_DEBUG "%s: device no longer idle - %s\n", 1256 wiphy_debug(local->hw.wiphy, "device no longer idle - %s\n", reason);
1171 wiphy_name(local->hw.wiphy), reason);
1172#endif 1257#endif
1173 1258
1174 local->hw.conf.flags &= ~IEEE80211_CONF_IDLE; 1259 local->hw.conf.flags &= ~IEEE80211_CONF_IDLE;
@@ -1181,8 +1266,7 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local)
1181 return 0; 1266 return 0;
1182 1267
1183#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1268#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1184 printk(KERN_DEBUG "%s: device now idle\n", 1269 wiphy_debug(local->hw.wiphy, "device now idle\n");
1185 wiphy_name(local->hw.wiphy));
1186#endif 1270#endif
1187 1271
1188 drv_flush(local, false); 1272 drv_flush(local, false);
@@ -1195,28 +1279,61 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
1195{ 1279{
1196 struct ieee80211_sub_if_data *sdata; 1280 struct ieee80211_sub_if_data *sdata;
1197 int count = 0; 1281 int count = 0;
1282 bool working = false, scanning = false;
1283 struct ieee80211_work *wk;
1198 1284
1199 if (!list_empty(&local->work_list)) 1285#ifdef CONFIG_PROVE_LOCKING
1200 return ieee80211_idle_off(local, "working"); 1286 WARN_ON(debug_locks && !lockdep_rtnl_is_held() &&
1201 1287 !lockdep_is_held(&local->iflist_mtx));
1202 if (local->scanning) 1288#endif
1203 return ieee80211_idle_off(local, "scanning"); 1289 lockdep_assert_held(&local->mtx);
1204 1290
1205 list_for_each_entry(sdata, &local->interfaces, list) { 1291 list_for_each_entry(sdata, &local->interfaces, list) {
1206 if (!ieee80211_sdata_running(sdata)) 1292 if (!ieee80211_sdata_running(sdata)) {
1293 sdata->vif.bss_conf.idle = true;
1207 continue; 1294 continue;
1295 }
1296
1297 sdata->old_idle = sdata->vif.bss_conf.idle;
1298
1208 /* do not count disabled managed interfaces */ 1299 /* do not count disabled managed interfaces */
1209 if (sdata->vif.type == NL80211_IFTYPE_STATION && 1300 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
1210 !sdata->u.mgd.associated) 1301 !sdata->u.mgd.associated) {
1302 sdata->vif.bss_conf.idle = true;
1211 continue; 1303 continue;
1304 }
1212 /* do not count unused IBSS interfaces */ 1305 /* do not count unused IBSS interfaces */
1213 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 1306 if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
1214 !sdata->u.ibss.ssid_len) 1307 !sdata->u.ibss.ssid_len) {
1308 sdata->vif.bss_conf.idle = true;
1215 continue; 1309 continue;
1310 }
1216 /* count everything else */ 1311 /* count everything else */
1217 count++; 1312 count++;
1218 } 1313 }
1219 1314
1315 list_for_each_entry(wk, &local->work_list, list) {
1316 working = true;
1317 wk->sdata->vif.bss_conf.idle = false;
1318 }
1319
1320 if (local->scan_sdata) {
1321 scanning = true;
1322 local->scan_sdata->vif.bss_conf.idle = false;
1323 }
1324
1325 list_for_each_entry(sdata, &local->interfaces, list) {
1326 if (sdata->old_idle == sdata->vif.bss_conf.idle)
1327 continue;
1328 if (!ieee80211_sdata_running(sdata))
1329 continue;
1330 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
1331 }
1332
1333 if (working)
1334 return ieee80211_idle_off(local, "working");
1335 if (scanning)
1336 return ieee80211_idle_off(local, "scanning");
1220 if (!count) 1337 if (!count)
1221 return ieee80211_idle_on(local); 1338 return ieee80211_idle_on(local);
1222 else 1339 else
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 1b9d87ed143..6a63d1abd14 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -49,7 +49,7 @@ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
49 49
50static void assert_key_lock(struct ieee80211_local *local) 50static void assert_key_lock(struct ieee80211_local *local)
51{ 51{
52 WARN_ON(!mutex_is_locked(&local->key_mtx)); 52 lockdep_assert_held(&local->key_mtx);
53} 53}
54 54
55static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key) 55static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
@@ -60,7 +60,7 @@ static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
60 return NULL; 60 return NULL;
61} 61}
62 62
63static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) 63static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
64{ 64{
65 struct ieee80211_sub_if_data *sdata; 65 struct ieee80211_sub_if_data *sdata;
66 struct ieee80211_sta *sta; 66 struct ieee80211_sta *sta;
@@ -68,8 +68,10 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
68 68
69 might_sleep(); 69 might_sleep();
70 70
71 if (!key->local->ops->set_key) 71 if (!key->local->ops->set_key) {
72 return; 72 ret = -EOPNOTSUPP;
73 goto out_unsupported;
74 }
73 75
74 assert_key_lock(key->local); 76 assert_key_lock(key->local);
75 77
@@ -87,10 +89,27 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
87 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; 89 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
88 90
89 if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) 91 if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP)
90 printk(KERN_ERR "mac80211-%s: failed to set key " 92 wiphy_err(key->local->hw.wiphy,
91 "(%d, %pM) to hardware (%d)\n", 93 "failed to set key (%d, %pM) to hardware (%d)\n",
92 wiphy_name(key->local->hw.wiphy), 94 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
93 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); 95
96out_unsupported:
97 if (ret) {
98 switch (key->conf.cipher) {
99 case WLAN_CIPHER_SUITE_WEP40:
100 case WLAN_CIPHER_SUITE_WEP104:
101 case WLAN_CIPHER_SUITE_TKIP:
102 case WLAN_CIPHER_SUITE_CCMP:
103 case WLAN_CIPHER_SUITE_AES_CMAC:
104 /* all of these we can do in software */
105 ret = 0;
106 break;
107 default:
108 ret = -EINVAL;
109 }
110 }
111
112 return ret;
94} 113}
95 114
96static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) 115static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
@@ -121,10 +140,9 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
121 sta, &key->conf); 140 sta, &key->conf);
122 141
123 if (ret) 142 if (ret)
124 printk(KERN_ERR "mac80211-%s: failed to remove key " 143 wiphy_err(key->local->hw.wiphy,
125 "(%d, %pM) from hardware (%d)\n", 144 "failed to remove key (%d, %pM) from hardware (%d)\n",
126 wiphy_name(key->local->hw.wiphy), 145 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
127 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
128 146
129 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; 147 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
130} 148}
@@ -227,20 +245,18 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
227 } 245 }
228} 246}
229 247
230struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, 248struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
231 int idx,
232 size_t key_len,
233 const u8 *key_data, 249 const u8 *key_data,
234 size_t seq_len, const u8 *seq) 250 size_t seq_len, const u8 *seq)
235{ 251{
236 struct ieee80211_key *key; 252 struct ieee80211_key *key;
237 int i, j; 253 int i, j, err;
238 254
239 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS); 255 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
240 256
241 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); 257 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL);
242 if (!key) 258 if (!key)
243 return NULL; 259 return ERR_PTR(-ENOMEM);
244 260
245 /* 261 /*
246 * Default to software encryption; we'll later upload the 262 * Default to software encryption; we'll later upload the
@@ -249,15 +265,16 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
249 key->conf.flags = 0; 265 key->conf.flags = 0;
250 key->flags = 0; 266 key->flags = 0;
251 267
252 key->conf.alg = alg; 268 key->conf.cipher = cipher;
253 key->conf.keyidx = idx; 269 key->conf.keyidx = idx;
254 key->conf.keylen = key_len; 270 key->conf.keylen = key_len;
255 switch (alg) { 271 switch (cipher) {
256 case ALG_WEP: 272 case WLAN_CIPHER_SUITE_WEP40:
273 case WLAN_CIPHER_SUITE_WEP104:
257 key->conf.iv_len = WEP_IV_LEN; 274 key->conf.iv_len = WEP_IV_LEN;
258 key->conf.icv_len = WEP_ICV_LEN; 275 key->conf.icv_len = WEP_ICV_LEN;
259 break; 276 break;
260 case ALG_TKIP: 277 case WLAN_CIPHER_SUITE_TKIP:
261 key->conf.iv_len = TKIP_IV_LEN; 278 key->conf.iv_len = TKIP_IV_LEN;
262 key->conf.icv_len = TKIP_ICV_LEN; 279 key->conf.icv_len = TKIP_ICV_LEN;
263 if (seq) { 280 if (seq) {
@@ -269,7 +286,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
269 } 286 }
270 } 287 }
271 break; 288 break;
272 case ALG_CCMP: 289 case WLAN_CIPHER_SUITE_CCMP:
273 key->conf.iv_len = CCMP_HDR_LEN; 290 key->conf.iv_len = CCMP_HDR_LEN;
274 key->conf.icv_len = CCMP_MIC_LEN; 291 key->conf.icv_len = CCMP_MIC_LEN;
275 if (seq) { 292 if (seq) {
@@ -278,42 +295,38 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
278 key->u.ccmp.rx_pn[i][j] = 295 key->u.ccmp.rx_pn[i][j] =
279 seq[CCMP_PN_LEN - j - 1]; 296 seq[CCMP_PN_LEN - j - 1];
280 } 297 }
281 break;
282 case ALG_AES_CMAC:
283 key->conf.iv_len = 0;
284 key->conf.icv_len = sizeof(struct ieee80211_mmie);
285 if (seq)
286 for (j = 0; j < 6; j++)
287 key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1];
288 break;
289 }
290 memcpy(key->conf.key, key_data, key_len);
291 INIT_LIST_HEAD(&key->list);
292
293 if (alg == ALG_CCMP) {
294 /* 298 /*
295 * Initialize AES key state here as an optimization so that 299 * Initialize AES key state here as an optimization so that
296 * it does not need to be initialized for every packet. 300 * it does not need to be initialized for every packet.
297 */ 301 */
298 key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data); 302 key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data);
299 if (!key->u.ccmp.tfm) { 303 if (IS_ERR(key->u.ccmp.tfm)) {
304 err = PTR_ERR(key->u.ccmp.tfm);
300 kfree(key); 305 kfree(key);
301 return NULL; 306 key = ERR_PTR(err);
302 } 307 }
303 } 308 break;
304 309 case WLAN_CIPHER_SUITE_AES_CMAC:
305 if (alg == ALG_AES_CMAC) { 310 key->conf.iv_len = 0;
311 key->conf.icv_len = sizeof(struct ieee80211_mmie);
312 if (seq)
313 for (j = 0; j < 6; j++)
314 key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1];
306 /* 315 /*
307 * Initialize AES key state here as an optimization so that 316 * Initialize AES key state here as an optimization so that
308 * it does not need to be initialized for every packet. 317 * it does not need to be initialized for every packet.
309 */ 318 */
310 key->u.aes_cmac.tfm = 319 key->u.aes_cmac.tfm =
311 ieee80211_aes_cmac_key_setup(key_data); 320 ieee80211_aes_cmac_key_setup(key_data);
312 if (!key->u.aes_cmac.tfm) { 321 if (IS_ERR(key->u.aes_cmac.tfm)) {
322 err = PTR_ERR(key->u.aes_cmac.tfm);
313 kfree(key); 323 kfree(key);
314 return NULL; 324 key = ERR_PTR(err);
315 } 325 }
326 break;
316 } 327 }
328 memcpy(key->conf.key, key_data, key_len);
329 INIT_LIST_HEAD(&key->list);
317 330
318 return key; 331 return key;
319} 332}
@@ -326,9 +339,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
326 if (key->local) 339 if (key->local)
327 ieee80211_key_disable_hw_accel(key); 340 ieee80211_key_disable_hw_accel(key);
328 341
329 if (key->conf.alg == ALG_CCMP) 342 if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
330 ieee80211_aes_key_free(key->u.ccmp.tfm); 343 ieee80211_aes_key_free(key->u.ccmp.tfm);
331 if (key->conf.alg == ALG_AES_CMAC) 344 if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
332 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); 345 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
333 if (key->local) 346 if (key->local)
334 ieee80211_debugfs_key_remove(key); 347 ieee80211_debugfs_key_remove(key);
@@ -336,12 +349,12 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
336 kfree(key); 349 kfree(key);
337} 350}
338 351
339void ieee80211_key_link(struct ieee80211_key *key, 352int ieee80211_key_link(struct ieee80211_key *key,
340 struct ieee80211_sub_if_data *sdata, 353 struct ieee80211_sub_if_data *sdata,
341 struct sta_info *sta) 354 struct sta_info *sta)
342{ 355{
343 struct ieee80211_key *old_key; 356 struct ieee80211_key *old_key;
344 int idx; 357 int idx, ret;
345 358
346 BUG_ON(!sdata); 359 BUG_ON(!sdata);
347 BUG_ON(!key); 360 BUG_ON(!key);
@@ -396,9 +409,11 @@ void ieee80211_key_link(struct ieee80211_key *key,
396 409
397 ieee80211_debugfs_key_add(key); 410 ieee80211_debugfs_key_add(key);
398 411
399 ieee80211_key_enable_hw_accel(key); 412 ret = ieee80211_key_enable_hw_accel(key);
400 413
401 mutex_unlock(&sdata->local->key_mtx); 414 mutex_unlock(&sdata->local->key_mtx);
415
416 return ret;
402} 417}
403 418
404static void __ieee80211_key_free(struct ieee80211_key *key) 419static void __ieee80211_key_free(struct ieee80211_key *key)
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index b665bbb7a47..cb9a4a65cc6 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -123,18 +123,16 @@ struct ieee80211_key {
123 struct ieee80211_key_conf conf; 123 struct ieee80211_key_conf conf;
124}; 124};
125 125
126struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, 126struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
127 int idx,
128 size_t key_len,
129 const u8 *key_data, 127 const u8 *key_data,
130 size_t seq_len, const u8 *seq); 128 size_t seq_len, const u8 *seq);
131/* 129/*
132 * Insert a key into data structures (sdata, sta if necessary) 130 * Insert a key into data structures (sdata, sta if necessary)
133 * to make it used, free old key. 131 * to make it used, free old key.
134 */ 132 */
135void ieee80211_key_link(struct ieee80211_key *key, 133int __must_check ieee80211_key_link(struct ieee80211_key *key,
136 struct ieee80211_sub_if_data *sdata, 134 struct ieee80211_sub_if_data *sdata,
137 struct sta_info *sta); 135 struct sta_info *sta);
138void ieee80211_key_free(struct ieee80211_local *local, 136void ieee80211_key_free(struct ieee80211_local *local,
139 struct ieee80211_key *key); 137 struct ieee80211_key *key);
140void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); 138void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ded5c3843e0..fda97bb0018 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -99,11 +99,13 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
99 int ret = 0; 99 int ret = 0;
100 int power; 100 int power;
101 enum nl80211_channel_type channel_type; 101 enum nl80211_channel_type channel_type;
102 u32 offchannel_flag;
102 103
103 might_sleep(); 104 might_sleep();
104 105
105 scan_chan = local->scan_channel; 106 scan_chan = local->scan_channel;
106 107
108 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
107 if (scan_chan) { 109 if (scan_chan) {
108 chan = scan_chan; 110 chan = scan_chan;
109 channel_type = NL80211_CHAN_NO_HT; 111 channel_type = NL80211_CHAN_NO_HT;
@@ -117,8 +119,9 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
117 channel_type = local->_oper_channel_type; 119 channel_type = local->_oper_channel_type;
118 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; 120 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
119 } 121 }
122 offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
120 123
121 if (chan != local->hw.conf.channel || 124 if (offchannel_flag || chan != local->hw.conf.channel ||
122 channel_type != local->hw.conf.channel_type) { 125 channel_type != local->hw.conf.channel_type) {
123 local->hw.conf.channel = chan; 126 local->hw.conf.channel = chan;
124 local->hw.conf.channel_type = channel_type; 127 local->hw.conf.channel_type = channel_type;
@@ -302,7 +305,16 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
302 305
303 trace_api_restart_hw(local); 306 trace_api_restart_hw(local);
304 307
305 /* use this reason, __ieee80211_resume will unblock it */ 308 /* wait for scan work complete */
309 flush_workqueue(local->workqueue);
310
311 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
312 "%s called with hardware scan in progress\n", __func__);
313
314 if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning)))
315 ieee80211_scan_cancel(local);
316
317 /* use this reason, ieee80211_reconfig will unblock it */
306 ieee80211_stop_queues_by_reason(hw, 318 ieee80211_stop_queues_by_reason(hw,
307 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 319 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
308 320
@@ -336,9 +348,6 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
336 struct ieee80211_if_managed *ifmgd; 348 struct ieee80211_if_managed *ifmgd;
337 int c = 0; 349 int c = 0;
338 350
339 if (!netif_running(ndev))
340 return NOTIFY_DONE;
341
342 /* Make sure it's our interface that got changed */ 351 /* Make sure it's our interface that got changed */
343 if (!wdev) 352 if (!wdev)
344 return NOTIFY_DONE; 353 return NOTIFY_DONE;
@@ -349,11 +358,14 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
349 sdata = IEEE80211_DEV_TO_SUB_IF(ndev); 358 sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
350 bss_conf = &sdata->vif.bss_conf; 359 bss_conf = &sdata->vif.bss_conf;
351 360
361 if (!ieee80211_sdata_running(sdata))
362 return NOTIFY_DONE;
363
352 /* ARP filtering is only supported in managed mode */ 364 /* ARP filtering is only supported in managed mode */
353 if (sdata->vif.type != NL80211_IFTYPE_STATION) 365 if (sdata->vif.type != NL80211_IFTYPE_STATION)
354 return NOTIFY_DONE; 366 return NOTIFY_DONE;
355 367
356 idev = sdata->dev->ip_ptr; 368 idev = __in_dev_get_rtnl(sdata->dev);
357 if (!idev) 369 if (!idev)
358 return NOTIFY_DONE; 370 return NOTIFY_DONE;
359 371
@@ -390,6 +402,80 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
390} 402}
391#endif 403#endif
392 404
405static int ieee80211_napi_poll(struct napi_struct *napi, int budget)
406{
407 struct ieee80211_local *local =
408 container_of(napi, struct ieee80211_local, napi);
409
410 return local->ops->napi_poll(&local->hw, budget);
411}
412
413void ieee80211_napi_schedule(struct ieee80211_hw *hw)
414{
415 struct ieee80211_local *local = hw_to_local(hw);
416
417 napi_schedule(&local->napi);
418}
419EXPORT_SYMBOL(ieee80211_napi_schedule);
420
421void ieee80211_napi_complete(struct ieee80211_hw *hw)
422{
423 struct ieee80211_local *local = hw_to_local(hw);
424
425 napi_complete(&local->napi);
426}
427EXPORT_SYMBOL(ieee80211_napi_complete);
428
429/* There isn't a lot of sense in it, but you can transmit anything you like */
430static const struct ieee80211_txrx_stypes
431ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
432 [NL80211_IFTYPE_ADHOC] = {
433 .tx = 0xffff,
434 .rx = BIT(IEEE80211_STYPE_ACTION >> 4),
435 },
436 [NL80211_IFTYPE_STATION] = {
437 .tx = 0xffff,
438 .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
439 BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
440 },
441 [NL80211_IFTYPE_AP] = {
442 .tx = 0xffff,
443 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
444 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
445 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
446 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
447 BIT(IEEE80211_STYPE_AUTH >> 4) |
448 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
449 BIT(IEEE80211_STYPE_ACTION >> 4),
450 },
451 [NL80211_IFTYPE_AP_VLAN] = {
452 /* copy AP */
453 .tx = 0xffff,
454 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
455 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
456 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
457 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
458 BIT(IEEE80211_STYPE_AUTH >> 4) |
459 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
460 BIT(IEEE80211_STYPE_ACTION >> 4),
461 },
462 [NL80211_IFTYPE_P2P_CLIENT] = {
463 .tx = 0xffff,
464 .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
465 BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
466 },
467 [NL80211_IFTYPE_P2P_GO] = {
468 .tx = 0xffff,
469 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
470 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
471 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
472 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
473 BIT(IEEE80211_STYPE_AUTH >> 4) |
474 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
475 BIT(IEEE80211_STYPE_ACTION >> 4),
476 },
477};
478
393struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, 479struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
394 const struct ieee80211_ops *ops) 480 const struct ieee80211_ops *ops)
395{ 481{
@@ -419,6 +505,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
419 if (!wiphy) 505 if (!wiphy)
420 return NULL; 506 return NULL;
421 507
508 wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes;
509
422 wiphy->flags |= WIPHY_FLAG_NETNS_OK | 510 wiphy->flags |= WIPHY_FLAG_NETNS_OK |
423 WIPHY_FLAG_4ADDR_AP | 511 WIPHY_FLAG_4ADDR_AP |
424 WIPHY_FLAG_4ADDR_STATION; 512 WIPHY_FLAG_4ADDR_STATION;
@@ -455,7 +543,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
455 __hw_addr_init(&local->mc_list); 543 __hw_addr_init(&local->mc_list);
456 544
457 mutex_init(&local->iflist_mtx); 545 mutex_init(&local->iflist_mtx);
458 mutex_init(&local->scan_mtx); 546 mutex_init(&local->mtx);
459 547
460 mutex_init(&local->key_mtx); 548 mutex_init(&local->key_mtx);
461 spin_lock_init(&local->filter_lock); 549 spin_lock_init(&local->filter_lock);
@@ -494,6 +582,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
494 skb_queue_head_init(&local->skb_queue); 582 skb_queue_head_init(&local->skb_queue);
495 skb_queue_head_init(&local->skb_queue_unreliable); 583 skb_queue_head_init(&local->skb_queue_unreliable);
496 584
585 /* init dummy netdev for use w/ NAPI */
586 init_dummy_netdev(&local->napi_dev);
587
497 return local_to_hw(local); 588 return local_to_hw(local);
498} 589}
499EXPORT_SYMBOL(ieee80211_alloc_hw); 590EXPORT_SYMBOL(ieee80211_alloc_hw);
@@ -506,6 +597,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
506 int channels, max_bitrates; 597 int channels, max_bitrates;
507 bool supp_ht; 598 bool supp_ht;
508 static const u32 cipher_suites[] = { 599 static const u32 cipher_suites[] = {
600 /* keep WEP first, it may be removed below */
509 WLAN_CIPHER_SUITE_WEP40, 601 WLAN_CIPHER_SUITE_WEP40,
510 WLAN_CIPHER_SUITE_WEP104, 602 WLAN_CIPHER_SUITE_WEP104,
511 WLAN_CIPHER_SUITE_TKIP, 603 WLAN_CIPHER_SUITE_TKIP,
@@ -554,6 +646,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
554 /* mac80211 always supports monitor */ 646 /* mac80211 always supports monitor */
555 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); 647 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
556 648
649#ifndef CONFIG_MAC80211_MESH
650 /* mesh depends on Kconfig, but drivers should set it if they want */
651 local->hw.wiphy->interface_modes &= ~BIT(NL80211_IFTYPE_MESH_POINT);
652#endif
653
654 /* mac80211 supports control port protocol changing */
655 local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL;
656
557 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 657 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
558 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; 658 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
559 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) 659 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
@@ -589,10 +689,41 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
589 if (local->hw.wiphy->max_scan_ie_len) 689 if (local->hw.wiphy->max_scan_ie_len)
590 local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len; 690 local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;
591 691
592 local->hw.wiphy->cipher_suites = cipher_suites; 692 /* Set up cipher suites unless driver already did */
593 local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); 693 if (!local->hw.wiphy->cipher_suites) {
594 if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) 694 local->hw.wiphy->cipher_suites = cipher_suites;
595 local->hw.wiphy->n_cipher_suites--; 695 local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
696 if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE))
697 local->hw.wiphy->n_cipher_suites--;
698 }
699 if (IS_ERR(local->wep_tx_tfm) || IS_ERR(local->wep_rx_tfm)) {
700 if (local->hw.wiphy->cipher_suites == cipher_suites) {
701 local->hw.wiphy->cipher_suites += 2;
702 local->hw.wiphy->n_cipher_suites -= 2;
703 } else {
704 u32 *suites;
705 int r, w = 0;
706
707 /* Filter out WEP */
708
709 suites = kmemdup(
710 local->hw.wiphy->cipher_suites,
711 sizeof(u32) * local->hw.wiphy->n_cipher_suites,
712 GFP_KERNEL);
713 if (!suites)
714 return -ENOMEM;
715 for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) {
716 u32 suite = local->hw.wiphy->cipher_suites[r];
717 if (suite == WLAN_CIPHER_SUITE_WEP40 ||
718 suite == WLAN_CIPHER_SUITE_WEP104)
719 continue;
720 suites[w++] = suite;
721 }
722 local->hw.wiphy->cipher_suites = suites;
723 local->hw.wiphy->n_cipher_suites = w;
724 local->wiphy_ciphers_allocated = true;
725 }
726 }
596 727
597 result = wiphy_register(local->hw.wiphy); 728 result = wiphy_register(local->hw.wiphy);
598 if (result < 0) 729 if (result < 0)
@@ -641,16 +772,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
641 772
642 result = ieee80211_wep_init(local); 773 result = ieee80211_wep_init(local);
643 if (result < 0) 774 if (result < 0)
644 printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", 775 wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
645 wiphy_name(local->hw.wiphy), result); 776 result);
646 777
647 rtnl_lock(); 778 rtnl_lock();
648 779
649 result = ieee80211_init_rate_ctrl_alg(local, 780 result = ieee80211_init_rate_ctrl_alg(local,
650 hw->rate_control_algorithm); 781 hw->rate_control_algorithm);
651 if (result < 0) { 782 if (result < 0) {
652 printk(KERN_DEBUG "%s: Failed to initialize rate control " 783 wiphy_debug(local->hw.wiphy,
653 "algorithm\n", wiphy_name(local->hw.wiphy)); 784 "Failed to initialize rate control algorithm\n");
654 goto fail_rate; 785 goto fail_rate;
655 } 786 }
656 787
@@ -659,8 +790,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
659 result = ieee80211_if_add(local, "wlan%d", NULL, 790 result = ieee80211_if_add(local, "wlan%d", NULL,
660 NL80211_IFTYPE_STATION, NULL); 791 NL80211_IFTYPE_STATION, NULL);
661 if (result) 792 if (result)
662 printk(KERN_WARNING "%s: Failed to add default virtual iface\n", 793 wiphy_warn(local->hw.wiphy,
663 wiphy_name(local->hw.wiphy)); 794 "Failed to add default virtual iface\n");
664 } 795 }
665 796
666 rtnl_unlock(); 797 rtnl_unlock();
@@ -683,6 +814,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
683 goto fail_ifa; 814 goto fail_ifa;
684#endif 815#endif
685 816
817 netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll,
818 local->hw.napi_weight);
819
686 return 0; 820 return 0;
687 821
688#ifdef CONFIG_INET 822#ifdef CONFIG_INET
@@ -703,6 +837,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
703 fail_workqueue: 837 fail_workqueue:
704 wiphy_unregister(local->hw.wiphy); 838 wiphy_unregister(local->hw.wiphy);
705 fail_wiphy_register: 839 fail_wiphy_register:
840 if (local->wiphy_ciphers_allocated)
841 kfree(local->hw.wiphy->cipher_suites);
706 kfree(local->int_scan_req); 842 kfree(local->int_scan_req);
707 return result; 843 return result;
708} 844}
@@ -738,6 +874,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
738 */ 874 */
739 del_timer_sync(&local->work_timer); 875 del_timer_sync(&local->work_timer);
740 876
877 cancel_work_sync(&local->restart_work);
741 cancel_work_sync(&local->reconfig_filter); 878 cancel_work_sync(&local->reconfig_filter);
742 879
743 ieee80211_clear_tx_pending(local); 880 ieee80211_clear_tx_pending(local);
@@ -746,8 +883,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
746 883
747 if (skb_queue_len(&local->skb_queue) || 884 if (skb_queue_len(&local->skb_queue) ||
748 skb_queue_len(&local->skb_queue_unreliable)) 885 skb_queue_len(&local->skb_queue_unreliable))
749 printk(KERN_WARNING "%s: skb_queue not empty\n", 886 wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
750 wiphy_name(local->hw.wiphy));
751 skb_queue_purge(&local->skb_queue); 887 skb_queue_purge(&local->skb_queue);
752 skb_queue_purge(&local->skb_queue_unreliable); 888 skb_queue_purge(&local->skb_queue_unreliable);
753 889
@@ -764,7 +900,10 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
764 struct ieee80211_local *local = hw_to_local(hw); 900 struct ieee80211_local *local = hw_to_local(hw);
765 901
766 mutex_destroy(&local->iflist_mtx); 902 mutex_destroy(&local->iflist_mtx);
767 mutex_destroy(&local->scan_mtx); 903 mutex_destroy(&local->mtx);
904
905 if (local->wiphy_ciphers_allocated)
906 kfree(local->hw.wiphy->cipher_suites);
768 907
769 wiphy_free(local->hw.wiphy); 908 wiphy_free(local->hw.wiphy);
770} 909}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b6c163ac22d..8b733cf6f3e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -54,6 +54,12 @@
54 */ 54 */
55#define IEEE80211_SIGNAL_AVE_WEIGHT 3 55#define IEEE80211_SIGNAL_AVE_WEIGHT 3
56 56
57/*
58 * How many Beacon frames need to have been used in average signal strength
59 * before starting to indicate signal change events.
60 */
61#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
62
57#define TMR_RUNNING_TIMER 0 63#define TMR_RUNNING_TIMER 0
58#define TMR_RUNNING_CHANSW 1 64#define TMR_RUNNING_CHANSW 1
59 65
@@ -86,7 +92,7 @@ enum rx_mgmt_action {
86/* utils */ 92/* utils */
87static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) 93static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd)
88{ 94{
89 WARN_ON(!mutex_is_locked(&ifmgd->mtx)); 95 lockdep_assert_held(&ifmgd->mtx);
90} 96}
91 97
92/* 98/*
@@ -109,7 +115,7 @@ static void run_again(struct ieee80211_if_managed *ifmgd,
109 mod_timer(&ifmgd->timer, timeout); 115 mod_timer(&ifmgd->timer, timeout);
110} 116}
111 117
112static void mod_beacon_timer(struct ieee80211_sub_if_data *sdata) 118void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata)
113{ 119{
114 if (sdata->local->hw.flags & IEEE80211_HW_BEACON_FILTER) 120 if (sdata->local->hw.flags & IEEE80211_HW_BEACON_FILTER)
115 return; 121 return;
@@ -118,6 +124,19 @@ static void mod_beacon_timer(struct ieee80211_sub_if_data *sdata)
118 round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME)); 124 round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME));
119} 125}
120 126
127void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
128{
129 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
130
131 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
132 return;
133
134 mod_timer(&sdata->u.mgd.conn_mon_timer,
135 round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
136
137 ifmgd->probe_send_count = 0;
138}
139
121static int ecw2cw(int ecw) 140static int ecw2cw(int ecw)
122{ 141{
123 return (1 << ecw) - 1; 142 return (1 << ecw) - 1;
@@ -778,16 +797,17 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
778 params.uapsd = uapsd; 797 params.uapsd = uapsd;
779 798
780#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 799#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
781 printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d " 800 wiphy_debug(local->hw.wiphy,
782 "cWmin=%d cWmax=%d txop=%d uapsd=%d\n", 801 "WMM queue=%d aci=%d acm=%d aifs=%d "
783 wiphy_name(local->hw.wiphy), queue, aci, acm, 802 "cWmin=%d cWmax=%d txop=%d uapsd=%d\n",
784 params.aifs, params.cw_min, params.cw_max, params.txop, 803 queue, aci, acm,
785 params.uapsd); 804 params.aifs, params.cw_min, params.cw_max,
805 params.txop, params.uapsd);
786#endif 806#endif
787 if (drv_conf_tx(local, queue, &params)) 807 if (drv_conf_tx(local, queue, &params))
788 printk(KERN_DEBUG "%s: failed to set TX queue " 808 wiphy_debug(local->hw.wiphy,
789 "parameters for queue %d\n", 809 "failed to set TX queue parameters for queue %d\n",
790 wiphy_name(local->hw.wiphy), queue); 810 queue);
791 } 811 }
792 812
793 /* enable WMM or activate new settings */ 813 /* enable WMM or activate new settings */
@@ -990,6 +1010,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
990 1010
991 if (remove_sta) 1011 if (remove_sta)
992 sta_info_destroy_addr(sdata, bssid); 1012 sta_info_destroy_addr(sdata, bssid);
1013
1014 del_timer_sync(&sdata->u.mgd.conn_mon_timer);
1015 del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
1016 del_timer_sync(&sdata->u.mgd.timer);
1017 del_timer_sync(&sdata->u.mgd.chswitch_timer);
993} 1018}
994 1019
995void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, 1020void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
@@ -1006,21 +1031,26 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1006 if (is_multicast_ether_addr(hdr->addr1)) 1031 if (is_multicast_ether_addr(hdr->addr1))
1007 return; 1032 return;
1008 1033
1009 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) 1034 ieee80211_sta_reset_conn_monitor(sdata);
1010 return;
1011
1012 mod_timer(&sdata->u.mgd.conn_mon_timer,
1013 round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
1014} 1035}
1015 1036
1016static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) 1037static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1017{ 1038{
1018 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1039 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1019 const u8 *ssid; 1040 const u8 *ssid;
1041 u8 *dst = ifmgd->associated->bssid;
1042 u8 unicast_limit = max(1, IEEE80211_MAX_PROBE_TRIES - 3);
1043
1044 /*
1045 * Try sending broadcast probe requests for the last three
1046 * probe requests after the first ones failed since some
1047 * buggy APs only support broadcast probe requests.
1048 */
1049 if (ifmgd->probe_send_count >= unicast_limit)
1050 dst = NULL;
1020 1051
1021 ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); 1052 ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
1022 ieee80211_send_probe_req(sdata, ifmgd->associated->bssid, 1053 ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0);
1023 ssid + 2, ssid[1], NULL, 0);
1024 1054
1025 ifmgd->probe_send_count++; 1055 ifmgd->probe_send_count++;
1026 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; 1056 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT;
@@ -1103,8 +1133,11 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
1103 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); 1133 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
1104 1134
1105 ieee80211_set_disassoc(sdata, true); 1135 ieee80211_set_disassoc(sdata, true);
1106 ieee80211_recalc_idle(local);
1107 mutex_unlock(&ifmgd->mtx); 1136 mutex_unlock(&ifmgd->mtx);
1137
1138 mutex_lock(&local->mtx);
1139 ieee80211_recalc_idle(local);
1140 mutex_unlock(&local->mtx);
1108 /* 1141 /*
1109 * must be outside lock due to cfg80211, 1142 * must be outside lock due to cfg80211,
1110 * but that's not a problem. 1143 * but that's not a problem.
@@ -1173,7 +1206,9 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
1173 sdata->name, bssid, reason_code); 1206 sdata->name, bssid, reason_code);
1174 1207
1175 ieee80211_set_disassoc(sdata, true); 1208 ieee80211_set_disassoc(sdata, true);
1209 mutex_lock(&sdata->local->mtx);
1176 ieee80211_recalc_idle(sdata->local); 1210 ieee80211_recalc_idle(sdata->local);
1211 mutex_unlock(&sdata->local->mtx);
1177 1212
1178 return RX_MGMT_CFG80211_DEAUTH; 1213 return RX_MGMT_CFG80211_DEAUTH;
1179} 1214}
@@ -1203,7 +1238,9 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
1203 sdata->name, mgmt->sa, reason_code); 1238 sdata->name, mgmt->sa, reason_code);
1204 1239
1205 ieee80211_set_disassoc(sdata, true); 1240 ieee80211_set_disassoc(sdata, true);
1241 mutex_lock(&sdata->local->mtx);
1206 ieee80211_recalc_idle(sdata->local); 1242 ieee80211_recalc_idle(sdata->local);
1243 mutex_unlock(&sdata->local->mtx);
1207 return RX_MGMT_CFG80211_DISASSOC; 1244 return RX_MGMT_CFG80211_DISASSOC;
1208} 1245}
1209 1246
@@ -1362,7 +1399,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1362 * Also start the timer that will detect beacon loss. 1399 * Also start the timer that will detect beacon loss.
1363 */ 1400 */
1364 ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt); 1401 ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt);
1365 mod_beacon_timer(sdata); 1402 ieee80211_sta_reset_beacon_monitor(sdata);
1366 1403
1367 return true; 1404 return true;
1368} 1405}
@@ -1465,7 +1502,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
1465 * we have or will be receiving any beacons or data, so let's 1502 * we have or will be receiving any beacons or data, so let's
1466 * schedule the timers again, just in case. 1503 * schedule the timers again, just in case.
1467 */ 1504 */
1468 mod_beacon_timer(sdata); 1505 ieee80211_sta_reset_beacon_monitor(sdata);
1469 1506
1470 mod_timer(&ifmgd->conn_mon_timer, 1507 mod_timer(&ifmgd->conn_mon_timer,
1471 round_jiffies_up(jiffies + 1508 round_jiffies_up(jiffies +
@@ -1540,15 +1577,18 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1540 ifmgd->last_beacon_signal = rx_status->signal; 1577 ifmgd->last_beacon_signal = rx_status->signal;
1541 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) { 1578 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
1542 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE; 1579 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
1543 ifmgd->ave_beacon_signal = rx_status->signal; 1580 ifmgd->ave_beacon_signal = rx_status->signal * 16;
1544 ifmgd->last_cqm_event_signal = 0; 1581 ifmgd->last_cqm_event_signal = 0;
1582 ifmgd->count_beacon_signal = 1;
1545 } else { 1583 } else {
1546 ifmgd->ave_beacon_signal = 1584 ifmgd->ave_beacon_signal =
1547 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 + 1585 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 +
1548 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) * 1586 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) *
1549 ifmgd->ave_beacon_signal) / 16; 1587 ifmgd->ave_beacon_signal) / 16;
1588 ifmgd->count_beacon_signal++;
1550 } 1589 }
1551 if (bss_conf->cqm_rssi_thold && 1590 if (bss_conf->cqm_rssi_thold &&
1591 ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT &&
1552 !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) { 1592 !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) {
1553 int sig = ifmgd->ave_beacon_signal / 16; 1593 int sig = ifmgd->ave_beacon_signal / 16;
1554 int last_event = ifmgd->last_cqm_event_signal; 1594 int last_event = ifmgd->last_cqm_event_signal;
@@ -1588,7 +1628,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1588 * Push the beacon loss detection into the future since 1628 * Push the beacon loss detection into the future since
1589 * we are processing a beacon from the AP just now. 1629 * we are processing a beacon from the AP just now.
1590 */ 1630 */
1591 mod_beacon_timer(sdata); 1631 ieee80211_sta_reset_beacon_monitor(sdata);
1592 1632
1593 ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); 1633 ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
1594 ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable, 1634 ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable,
@@ -1751,7 +1791,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1751 struct ieee80211_local *local = sdata->local; 1791 struct ieee80211_local *local = sdata->local;
1752 struct ieee80211_work *wk; 1792 struct ieee80211_work *wk;
1753 1793
1754 mutex_lock(&local->work_mtx); 1794 mutex_lock(&local->mtx);
1755 list_for_each_entry(wk, &local->work_list, list) { 1795 list_for_each_entry(wk, &local->work_list, list) {
1756 if (wk->sdata != sdata) 1796 if (wk->sdata != sdata)
1757 continue; 1797 continue;
@@ -1783,7 +1823,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1783 free_work(wk); 1823 free_work(wk);
1784 break; 1824 break;
1785 } 1825 }
1786 mutex_unlock(&local->work_mtx); 1826 mutex_unlock(&local->mtx);
1787 1827
1788 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); 1828 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
1789 } 1829 }
@@ -1840,8 +1880,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1840 " after %dms, disconnecting.\n", 1880 " after %dms, disconnecting.\n",
1841 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); 1881 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
1842 ieee80211_set_disassoc(sdata, true); 1882 ieee80211_set_disassoc(sdata, true);
1843 ieee80211_recalc_idle(local);
1844 mutex_unlock(&ifmgd->mtx); 1883 mutex_unlock(&ifmgd->mtx);
1884 mutex_lock(&local->mtx);
1885 ieee80211_recalc_idle(local);
1886 mutex_unlock(&local->mtx);
1845 /* 1887 /*
1846 * must be outside lock due to cfg80211, 1888 * must be outside lock due to cfg80211,
1847 * but that's not a problem. 1889 * but that's not a problem.
@@ -1917,6 +1959,8 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
1917 * time -- the code here is properly synchronised. 1959 * time -- the code here is properly synchronised.
1918 */ 1960 */
1919 1961
1962 cancel_work_sync(&ifmgd->request_smps_work);
1963
1920 cancel_work_sync(&ifmgd->beacon_connection_loss_work); 1964 cancel_work_sync(&ifmgd->beacon_connection_loss_work);
1921 if (del_timer_sync(&ifmgd->timer)) 1965 if (del_timer_sync(&ifmgd->timer))
1922 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); 1966 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
@@ -1952,6 +1996,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
1952 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); 1996 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
1953 INIT_WORK(&ifmgd->beacon_connection_loss_work, 1997 INIT_WORK(&ifmgd->beacon_connection_loss_work,
1954 ieee80211_beacon_connection_loss_work); 1998 ieee80211_beacon_connection_loss_work);
1999 INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work);
1955 setup_timer(&ifmgd->timer, ieee80211_sta_timer, 2000 setup_timer(&ifmgd->timer, ieee80211_sta_timer,
1956 (unsigned long) sdata); 2001 (unsigned long) sdata);
1957 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 2002 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
@@ -2249,6 +2294,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2249 else 2294 else
2250 ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT; 2295 ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT;
2251 2296
2297 sdata->control_port_protocol = req->crypto.control_port_ethertype;
2298 sdata->control_port_no_encrypt = req->crypto.control_port_no_encrypt;
2299
2252 ieee80211_add_work(wk); 2300 ieee80211_add_work(wk);
2253 return 0; 2301 return 0;
2254} 2302}
@@ -2275,7 +2323,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2275 2323
2276 mutex_unlock(&ifmgd->mtx); 2324 mutex_unlock(&ifmgd->mtx);
2277 2325
2278 mutex_lock(&local->work_mtx); 2326 mutex_lock(&local->mtx);
2279 list_for_each_entry(wk, &local->work_list, list) { 2327 list_for_each_entry(wk, &local->work_list, list) {
2280 if (wk->sdata != sdata) 2328 if (wk->sdata != sdata)
2281 continue; 2329 continue;
@@ -2294,7 +2342,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2294 free_work(wk); 2342 free_work(wk);
2295 break; 2343 break;
2296 } 2344 }
2297 mutex_unlock(&local->work_mtx); 2345 mutex_unlock(&local->mtx);
2298 2346
2299 /* 2347 /*
2300 * If somebody requests authentication and we haven't 2348 * If somebody requests authentication and we haven't
@@ -2319,7 +2367,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2319 if (assoc_bss) 2367 if (assoc_bss)
2320 sta_info_destroy_addr(sdata, bssid); 2368 sta_info_destroy_addr(sdata, bssid);
2321 2369
2370 mutex_lock(&sdata->local->mtx);
2322 ieee80211_recalc_idle(sdata->local); 2371 ieee80211_recalc_idle(sdata->local);
2372 mutex_unlock(&sdata->local->mtx);
2323 2373
2324 return 0; 2374 return 0;
2325} 2375}
@@ -2357,7 +2407,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
2357 cookie, !req->local_state_change); 2407 cookie, !req->local_state_change);
2358 sta_info_destroy_addr(sdata, bssid); 2408 sta_info_destroy_addr(sdata, bssid);
2359 2409
2410 mutex_lock(&sdata->local->mtx);
2360 ieee80211_recalc_idle(sdata->local); 2411 ieee80211_recalc_idle(sdata->local);
2412 mutex_unlock(&sdata->local->mtx);
2361 2413
2362 return 0; 2414 return 0;
2363} 2415}
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index c36b1911987..4b564091e51 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -22,12 +22,16 @@
22static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) 22static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
23{ 23{
24 struct ieee80211_local *local = sdata->local; 24 struct ieee80211_local *local = sdata->local;
25 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
25 26
26 local->offchannel_ps_enabled = false; 27 local->offchannel_ps_enabled = false;
27 28
28 /* FIXME: what to do when local->pspolling is true? */ 29 /* FIXME: what to do when local->pspolling is true? */
29 30
30 del_timer_sync(&local->dynamic_ps_timer); 31 del_timer_sync(&local->dynamic_ps_timer);
32 del_timer_sync(&ifmgd->bcn_mon_timer);
33 del_timer_sync(&ifmgd->conn_mon_timer);
34
31 cancel_work_sync(&local->dynamic_ps_enable_work); 35 cancel_work_sync(&local->dynamic_ps_enable_work);
32 36
33 if (local->hw.conf.flags & IEEE80211_CONF_PS) { 37 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
@@ -85,6 +89,9 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
85 mod_timer(&local->dynamic_ps_timer, jiffies + 89 mod_timer(&local->dynamic_ps_timer, jiffies +
86 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); 90 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
87 } 91 }
92
93 ieee80211_sta_reset_beacon_monitor(sdata);
94 ieee80211_sta_reset_conn_monitor(sdata);
88} 95}
89 96
90void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local) 97void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
@@ -112,8 +119,10 @@ void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
112 * used from user space controlled off-channel operations. 119 * used from user space controlled off-channel operations.
113 */ 120 */
114 if (sdata->vif.type != NL80211_IFTYPE_STATION && 121 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
115 sdata->vif.type != NL80211_IFTYPE_MONITOR) 122 sdata->vif.type != NL80211_IFTYPE_MONITOR) {
123 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
116 netif_tx_stop_all_queues(sdata->dev); 124 netif_tx_stop_all_queues(sdata->dev);
125 }
117 } 126 }
118 mutex_unlock(&local->iflist_mtx); 127 mutex_unlock(&local->iflist_mtx);
119} 128}
@@ -131,6 +140,7 @@ void ieee80211_offchannel_stop_station(struct ieee80211_local *local)
131 continue; 140 continue;
132 141
133 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 142 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
143 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
134 netif_tx_stop_all_queues(sdata->dev); 144 netif_tx_stop_all_queues(sdata->dev);
135 if (sdata->u.mgd.associated) 145 if (sdata->u.mgd.associated)
136 ieee80211_offchannel_ps_enable(sdata); 146 ieee80211_offchannel_ps_enable(sdata);
@@ -155,8 +165,20 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
155 ieee80211_offchannel_ps_disable(sdata); 165 ieee80211_offchannel_ps_disable(sdata);
156 } 166 }
157 167
158 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) 168 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
169 clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
170 /*
171 * This may wake up queues even though the driver
172 * currently has them stopped. This is not very
173 * likely, since the driver won't have gotten any
174 * (or hardly any) new packets while we weren't
175 * on the right channel, and even if it happens
176 * it will at most lead to queueing up one more
177 * packet per queue in mac80211 rather than on
178 * the interface qdisc.
179 */
159 netif_tx_wake_all_queues(sdata->dev); 180 netif_tx_wake_all_queues(sdata->dev);
181 }
160 182
161 /* re-enable beaconing */ 183 /* re-enable beaconing */
162 if (enable_beaconing && 184 if (enable_beaconing &&
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d287fde0431..ce671dfd238 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -12,7 +12,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
12 struct ieee80211_sub_if_data *sdata; 12 struct ieee80211_sub_if_data *sdata;
13 struct sta_info *sta; 13 struct sta_info *sta;
14 14
15 ieee80211_scan_cancel(local); 15 if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning)))
16 ieee80211_scan_cancel(local);
16 17
17 ieee80211_stop_queues_by_reason(hw, 18 ieee80211_stop_queues_by_reason(hw,
18 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 19 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index be04d46110f..b0cc385bf98 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -207,7 +207,7 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc)
207 207
208 fc = hdr->frame_control; 208 fc = hdr->frame_control;
209 209
210 return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc)); 210 return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc);
211} 211}
212 212
213static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) 213static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx)
@@ -368,8 +368,8 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
368 368
369 ref = rate_control_alloc(name, local); 369 ref = rate_control_alloc(name, local);
370 if (!ref) { 370 if (!ref) {
371 printk(KERN_WARNING "%s: Failed to select rate control " 371 wiphy_warn(local->hw.wiphy,
372 "algorithm\n", wiphy_name(local->hw.wiphy)); 372 "Failed to select rate control algorithm\n");
373 return -ENOENT; 373 return -ENOENT;
374 } 374 }
375 375
@@ -380,9 +380,8 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
380 sta_info_flush(local, NULL); 380 sta_info_flush(local, NULL);
381 } 381 }
382 382
383 printk(KERN_DEBUG "%s: Selected rate control " 383 wiphy_debug(local->hw.wiphy, "Selected rate control algorithm '%s'\n",
384 "algorithm '%s'\n", wiphy_name(local->hw.wiphy), 384 ref->ops->name);
385 ref->ops->name);
386 385
387 return 0; 386 return 0;
388} 387}
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index 47438b4a9af..135f36fd4d5 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -162,7 +162,7 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
162 file_info->next_entry = (file_info->next_entry + 1) % 162 file_info->next_entry = (file_info->next_entry + 1) %
163 RC_PID_EVENT_RING_SIZE; 163 RC_PID_EVENT_RING_SIZE;
164 164
165 /* Print information about the event. Note that userpace needs to 165 /* Print information about the event. Note that userspace needs to
166 * provide large enough buffers. */ 166 * provide large enough buffers. */
167 length = length < RC_PID_PRINT_BUF_SIZE ? 167 length = length < RC_PID_PRINT_BUF_SIZE ?
168 length : RC_PID_PRINT_BUF_SIZE; 168 length : RC_PID_PRINT_BUF_SIZE;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fa0f37e4afe..c0368152b72 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -538,20 +538,12 @@ static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw,
538 int index, 538 int index,
539 struct sk_buff_head *frames) 539 struct sk_buff_head *frames)
540{ 540{
541 struct ieee80211_supported_band *sband;
542 struct ieee80211_rate *rate = NULL;
543 struct sk_buff *skb = tid_agg_rx->reorder_buf[index]; 541 struct sk_buff *skb = tid_agg_rx->reorder_buf[index];
544 struct ieee80211_rx_status *status;
545 542
546 if (!skb) 543 if (!skb)
547 goto no_frame; 544 goto no_frame;
548 545
549 status = IEEE80211_SKB_RXCB(skb); 546 /* release the frame from the reorder ring buffer */
550
551 /* release the reordered frames to stack */
552 sband = hw->wiphy->bands[status->band];
553 if (!(status->flag & RX_FLAG_HT))
554 rate = &sband->bitrates[status->rate_idx];
555 tid_agg_rx->stored_mpdu_num--; 547 tid_agg_rx->stored_mpdu_num--;
556 tid_agg_rx->reorder_buf[index] = NULL; 548 tid_agg_rx->reorder_buf[index] = NULL;
557 __skb_queue_tail(frames, skb); 549 __skb_queue_tail(frames, skb);
@@ -580,9 +572,78 @@ static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw,
580 * frames that have not yet been received are assumed to be lost and the skb 572 * frames that have not yet been received are assumed to be lost and the skb
581 * can be released for processing. This may also release other skb's from the 573 * can be released for processing. This may also release other skb's from the
582 * reorder buffer if there are no additional gaps between the frames. 574 * reorder buffer if there are no additional gaps between the frames.
575 *
576 * Callers must hold tid_agg_rx->reorder_lock.
583 */ 577 */
584#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10) 578#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10)
585 579
580static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
581 struct tid_ampdu_rx *tid_agg_rx,
582 struct sk_buff_head *frames)
583{
584 int index, j;
585
586 /* release the buffer until next missing frame */
587 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
588 tid_agg_rx->buf_size;
589 if (!tid_agg_rx->reorder_buf[index] &&
590 tid_agg_rx->stored_mpdu_num > 1) {
591 /*
592 * No buffers ready to be released, but check whether any
593 * frames in the reorder buffer have timed out.
594 */
595 int skipped = 1;
596 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
597 j = (j + 1) % tid_agg_rx->buf_size) {
598 if (!tid_agg_rx->reorder_buf[j]) {
599 skipped++;
600 continue;
601 }
602 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] +
603 HT_RX_REORDER_BUF_TIMEOUT))
604 goto set_release_timer;
605
606#ifdef CONFIG_MAC80211_HT_DEBUG
607 if (net_ratelimit())
608 wiphy_debug(hw->wiphy,
609 "release an RX reorder frame due to timeout on earlier frames\n");
610#endif
611 ieee80211_release_reorder_frame(hw, tid_agg_rx,
612 j, frames);
613
614 /*
615 * Increment the head seq# also for the skipped slots.
616 */
617 tid_agg_rx->head_seq_num =
618 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
619 skipped = 0;
620 }
621 } else while (tid_agg_rx->reorder_buf[index]) {
622 ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
623 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
624 tid_agg_rx->buf_size;
625 }
626
627 if (tid_agg_rx->stored_mpdu_num) {
628 j = index = seq_sub(tid_agg_rx->head_seq_num,
629 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
630
631 for (; j != (index - 1) % tid_agg_rx->buf_size;
632 j = (j + 1) % tid_agg_rx->buf_size) {
633 if (tid_agg_rx->reorder_buf[j])
634 break;
635 }
636
637 set_release_timer:
638
639 mod_timer(&tid_agg_rx->reorder_timer,
640 tid_agg_rx->reorder_time[j] +
641 HT_RX_REORDER_BUF_TIMEOUT);
642 } else {
643 del_timer(&tid_agg_rx->reorder_timer);
644 }
645}
646
586/* 647/*
587 * As this function belongs to the RX path it must be under 648 * As this function belongs to the RX path it must be under
588 * rcu_read_lock protection. It returns false if the frame 649 * rcu_read_lock protection. It returns false if the frame
@@ -598,14 +659,16 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
598 u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; 659 u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
599 u16 head_seq_num, buf_size; 660 u16 head_seq_num, buf_size;
600 int index; 661 int index;
662 bool ret = true;
601 663
602 buf_size = tid_agg_rx->buf_size; 664 buf_size = tid_agg_rx->buf_size;
603 head_seq_num = tid_agg_rx->head_seq_num; 665 head_seq_num = tid_agg_rx->head_seq_num;
604 666
667 spin_lock(&tid_agg_rx->reorder_lock);
605 /* frame with out of date sequence number */ 668 /* frame with out of date sequence number */
606 if (seq_less(mpdu_seq_num, head_seq_num)) { 669 if (seq_less(mpdu_seq_num, head_seq_num)) {
607 dev_kfree_skb(skb); 670 dev_kfree_skb(skb);
608 return true; 671 goto out;
609 } 672 }
610 673
611 /* 674 /*
@@ -626,7 +689,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
626 /* check if we already stored this frame */ 689 /* check if we already stored this frame */
627 if (tid_agg_rx->reorder_buf[index]) { 690 if (tid_agg_rx->reorder_buf[index]) {
628 dev_kfree_skb(skb); 691 dev_kfree_skb(skb);
629 return true; 692 goto out;
630 } 693 }
631 694
632 /* 695 /*
@@ -636,58 +699,19 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
636 if (mpdu_seq_num == tid_agg_rx->head_seq_num && 699 if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
637 tid_agg_rx->stored_mpdu_num == 0) { 700 tid_agg_rx->stored_mpdu_num == 0) {
638 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); 701 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
639 return false; 702 ret = false;
703 goto out;
640 } 704 }
641 705
642 /* put the frame in the reordering buffer */ 706 /* put the frame in the reordering buffer */
643 tid_agg_rx->reorder_buf[index] = skb; 707 tid_agg_rx->reorder_buf[index] = skb;
644 tid_agg_rx->reorder_time[index] = jiffies; 708 tid_agg_rx->reorder_time[index] = jiffies;
645 tid_agg_rx->stored_mpdu_num++; 709 tid_agg_rx->stored_mpdu_num++;
646 /* release the buffer until next missing frame */ 710 ieee80211_sta_reorder_release(hw, tid_agg_rx, frames);
647 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
648 tid_agg_rx->buf_size;
649 if (!tid_agg_rx->reorder_buf[index] &&
650 tid_agg_rx->stored_mpdu_num > 1) {
651 /*
652 * No buffers ready to be released, but check whether any
653 * frames in the reorder buffer have timed out.
654 */
655 int j;
656 int skipped = 1;
657 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
658 j = (j + 1) % tid_agg_rx->buf_size) {
659 if (!tid_agg_rx->reorder_buf[j]) {
660 skipped++;
661 continue;
662 }
663 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] +
664 HT_RX_REORDER_BUF_TIMEOUT))
665 break;
666
667#ifdef CONFIG_MAC80211_HT_DEBUG
668 if (net_ratelimit())
669 printk(KERN_DEBUG "%s: release an RX reorder "
670 "frame due to timeout on earlier "
671 "frames\n",
672 wiphy_name(hw->wiphy));
673#endif
674 ieee80211_release_reorder_frame(hw, tid_agg_rx,
675 j, frames);
676
677 /*
678 * Increment the head seq# also for the skipped slots.
679 */
680 tid_agg_rx->head_seq_num =
681 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
682 skipped = 0;
683 }
684 } else while (tid_agg_rx->reorder_buf[index]) {
685 ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
686 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
687 tid_agg_rx->buf_size;
688 }
689 711
690 return true; 712 out:
713 spin_unlock(&tid_agg_rx->reorder_lock);
714 return ret;
691} 715}
692 716
693/* 717/*
@@ -873,6 +897,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
873 897
874 if (!is_multicast_ether_addr(hdr->addr1) && stakey) { 898 if (!is_multicast_ether_addr(hdr->addr1) && stakey) {
875 rx->key = stakey; 899 rx->key = stakey;
900 if ((status->flag & RX_FLAG_DECRYPTED) &&
901 (status->flag & RX_FLAG_IV_STRIPPED))
902 return RX_CONTINUE;
876 /* Skip decryption if the frame is not protected. */ 903 /* Skip decryption if the frame is not protected. */
877 if (!ieee80211_has_protected(fc)) 904 if (!ieee80211_has_protected(fc))
878 return RX_CONTINUE; 905 return RX_CONTINUE;
@@ -935,7 +962,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
935 * pairwise or station-to-station keys, but for WEP we allow 962 * pairwise or station-to-station keys, but for WEP we allow
936 * using a key index as well. 963 * using a key index as well.
937 */ 964 */
938 if (rx->key && rx->key->conf.alg != ALG_WEP && 965 if (rx->key && rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
966 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
939 !is_multicast_ether_addr(hdr->addr1)) 967 !is_multicast_ether_addr(hdr->addr1))
940 rx->key = NULL; 968 rx->key = NULL;
941 } 969 }
@@ -951,8 +979,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
951 return RX_DROP_UNUSABLE; 979 return RX_DROP_UNUSABLE;
952 /* the hdr variable is invalid now! */ 980 /* the hdr variable is invalid now! */
953 981
954 switch (rx->key->conf.alg) { 982 switch (rx->key->conf.cipher) {
955 case ALG_WEP: 983 case WLAN_CIPHER_SUITE_WEP40:
984 case WLAN_CIPHER_SUITE_WEP104:
956 /* Check for weak IVs if possible */ 985 /* Check for weak IVs if possible */
957 if (rx->sta && ieee80211_is_data(fc) && 986 if (rx->sta && ieee80211_is_data(fc) &&
958 (!(status->flag & RX_FLAG_IV_STRIPPED) || 987 (!(status->flag & RX_FLAG_IV_STRIPPED) ||
@@ -962,15 +991,21 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
962 991
963 result = ieee80211_crypto_wep_decrypt(rx); 992 result = ieee80211_crypto_wep_decrypt(rx);
964 break; 993 break;
965 case ALG_TKIP: 994 case WLAN_CIPHER_SUITE_TKIP:
966 result = ieee80211_crypto_tkip_decrypt(rx); 995 result = ieee80211_crypto_tkip_decrypt(rx);
967 break; 996 break;
968 case ALG_CCMP: 997 case WLAN_CIPHER_SUITE_CCMP:
969 result = ieee80211_crypto_ccmp_decrypt(rx); 998 result = ieee80211_crypto_ccmp_decrypt(rx);
970 break; 999 break;
971 case ALG_AES_CMAC: 1000 case WLAN_CIPHER_SUITE_AES_CMAC:
972 result = ieee80211_crypto_aes_cmac_decrypt(rx); 1001 result = ieee80211_crypto_aes_cmac_decrypt(rx);
973 break; 1002 break;
1003 default:
1004 /*
1005 * We can reach here only with HW-only algorithms
1006 * but why didn't it decrypt the frame?!
1007 */
1008 return RX_DROP_UNUSABLE;
974 } 1009 }
975 1010
976 /* either the frame has been decrypted or will be dropped */ 1011 /* either the frame has been decrypted or will be dropped */
@@ -1265,7 +1300,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1265 /* This is the first fragment of a new frame. */ 1300 /* This is the first fragment of a new frame. */
1266 entry = ieee80211_reassemble_add(rx->sdata, frag, seq, 1301 entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
1267 rx->queue, &(rx->skb)); 1302 rx->queue, &(rx->skb));
1268 if (rx->key && rx->key->conf.alg == ALG_CCMP && 1303 if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP &&
1269 ieee80211_has_protected(fc)) { 1304 ieee80211_has_protected(fc)) {
1270 int queue = ieee80211_is_mgmt(fc) ? 1305 int queue = ieee80211_is_mgmt(fc) ?
1271 NUM_RX_DATA_QUEUES : rx->queue; 1306 NUM_RX_DATA_QUEUES : rx->queue;
@@ -1294,7 +1329,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1294 int i; 1329 int i;
1295 u8 pn[CCMP_PN_LEN], *rpn; 1330 u8 pn[CCMP_PN_LEN], *rpn;
1296 int queue; 1331 int queue;
1297 if (!rx->key || rx->key->conf.alg != ALG_CCMP) 1332 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP)
1298 return RX_DROP_UNUSABLE; 1333 return RX_DROP_UNUSABLE;
1299 memcpy(pn, entry->last_pn, CCMP_PN_LEN); 1334 memcpy(pn, entry->last_pn, CCMP_PN_LEN);
1300 for (i = CCMP_PN_LEN - 1; i >= 0; i--) { 1335 for (i = CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -1492,7 +1527,7 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
1492 * Allow EAPOL frames to us/the PAE group address regardless 1527 * Allow EAPOL frames to us/the PAE group address regardless
1493 * of whether the frame was encrypted or not. 1528 * of whether the frame was encrypted or not.
1494 */ 1529 */
1495 if (ehdr->h_proto == htons(ETH_P_PAE) && 1530 if (ehdr->h_proto == rx->sdata->control_port_protocol &&
1496 (compare_ether_addr(ehdr->h_dest, rx->sdata->vif.addr) == 0 || 1531 (compare_ether_addr(ehdr->h_dest, rx->sdata->vif.addr) == 0 ||
1497 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0)) 1532 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0))
1498 return true; 1533 return true;
@@ -1909,13 +1944,36 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
1909} 1944}
1910 1945
1911static ieee80211_rx_result debug_noinline 1946static ieee80211_rx_result debug_noinline
1947ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
1948{
1949 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1950
1951 /*
1952 * From here on, look only at management frames.
1953 * Data and control frames are already handled,
1954 * and unknown (reserved) frames are useless.
1955 */
1956 if (rx->skb->len < 24)
1957 return RX_DROP_MONITOR;
1958
1959 if (!ieee80211_is_mgmt(mgmt->frame_control))
1960 return RX_DROP_MONITOR;
1961
1962 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
1963 return RX_DROP_MONITOR;
1964
1965 if (ieee80211_drop_unencrypted_mgmt(rx))
1966 return RX_DROP_UNUSABLE;
1967
1968 return RX_CONTINUE;
1969}
1970
1971static ieee80211_rx_result debug_noinline
1912ieee80211_rx_h_action(struct ieee80211_rx_data *rx) 1972ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1913{ 1973{
1914 struct ieee80211_local *local = rx->local; 1974 struct ieee80211_local *local = rx->local;
1915 struct ieee80211_sub_if_data *sdata = rx->sdata; 1975 struct ieee80211_sub_if_data *sdata = rx->sdata;
1916 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; 1976 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1917 struct sk_buff *nskb;
1918 struct ieee80211_rx_status *status;
1919 int len = rx->skb->len; 1977 int len = rx->skb->len;
1920 1978
1921 if (!ieee80211_is_action(mgmt->frame_control)) 1979 if (!ieee80211_is_action(mgmt->frame_control))
@@ -1931,9 +1989,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1931 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 1989 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
1932 return RX_DROP_UNUSABLE; 1990 return RX_DROP_UNUSABLE;
1933 1991
1934 if (ieee80211_drop_unencrypted_mgmt(rx))
1935 return RX_DROP_UNUSABLE;
1936
1937 switch (mgmt->u.action.category) { 1992 switch (mgmt->u.action.category) {
1938 case WLAN_CATEGORY_BACK: 1993 case WLAN_CATEGORY_BACK:
1939 /* 1994 /*
@@ -2024,17 +2079,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2024 goto queue; 2079 goto queue;
2025 } 2080 }
2026 2081
2082 return RX_CONTINUE;
2083
2027 invalid: 2084 invalid:
2028 /* 2085 rx->flags |= IEEE80211_MALFORMED_ACTION_FRM;
2029 * For AP mode, hostapd is responsible for handling any action 2086 /* will return in the next handlers */
2030 * frames that we didn't handle, including returning unknown 2087 return RX_CONTINUE;
2031 * ones. For all other modes we will return them to the sender, 2088
2032 * setting the 0x80 bit in the action category, as required by 2089 handled:
2033 * 802.11-2007 7.3.1.11. 2090 if (rx->sta)
2034 */ 2091 rx->sta->rx_packets++;
2035 if (sdata->vif.type == NL80211_IFTYPE_AP || 2092 dev_kfree_skb(rx->skb);
2036 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 2093 return RX_QUEUED;
2037 return RX_DROP_MONITOR; 2094
2095 queue:
2096 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2097 skb_queue_tail(&sdata->skb_queue, rx->skb);
2098 ieee80211_queue_work(&local->hw, &sdata->work);
2099 if (rx->sta)
2100 rx->sta->rx_packets++;
2101 return RX_QUEUED;
2102}
2103
2104static ieee80211_rx_result debug_noinline
2105ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
2106{
2107 struct ieee80211_rx_status *status;
2108
2109 /* skip known-bad action frames and return them in the next handler */
2110 if (rx->flags & IEEE80211_MALFORMED_ACTION_FRM)
2111 return RX_CONTINUE;
2038 2112
2039 /* 2113 /*
2040 * Getting here means the kernel doesn't know how to handle 2114 * Getting here means the kernel doesn't know how to handle
@@ -2044,10 +2118,44 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2044 */ 2118 */
2045 status = IEEE80211_SKB_RXCB(rx->skb); 2119 status = IEEE80211_SKB_RXCB(rx->skb);
2046 2120
2047 if (cfg80211_rx_action(rx->sdata->dev, status->freq, 2121 if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq,
2048 rx->skb->data, rx->skb->len, 2122 rx->skb->data, rx->skb->len,
2049 GFP_ATOMIC)) 2123 GFP_ATOMIC)) {
2050 goto handled; 2124 if (rx->sta)
2125 rx->sta->rx_packets++;
2126 dev_kfree_skb(rx->skb);
2127 return RX_QUEUED;
2128 }
2129
2130
2131 return RX_CONTINUE;
2132}
2133
2134static ieee80211_rx_result debug_noinline
2135ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
2136{
2137 struct ieee80211_local *local = rx->local;
2138 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
2139 struct sk_buff *nskb;
2140 struct ieee80211_sub_if_data *sdata = rx->sdata;
2141
2142 if (!ieee80211_is_action(mgmt->frame_control))
2143 return RX_CONTINUE;
2144
2145 /*
2146 * For AP mode, hostapd is responsible for handling any action
2147 * frames that we didn't handle, including returning unknown
2148 * ones. For all other modes we will return them to the sender,
2149 * setting the 0x80 bit in the action category, as required by
2150 * 802.11-2007 7.3.1.11.
2151 * Newer versions of hostapd shall also use the management frame
2152 * registration mechanisms, but older ones still use cooked
2153 * monitor interfaces so push all frames there.
2154 */
2155 if (!(rx->flags & IEEE80211_MALFORMED_ACTION_FRM) &&
2156 (sdata->vif.type == NL80211_IFTYPE_AP ||
2157 sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
2158 return RX_DROP_MONITOR;
2051 2159
2052 /* do not return rejected action frames */ 2160 /* do not return rejected action frames */
2053 if (mgmt->u.action.category & 0x80) 2161 if (mgmt->u.action.category & 0x80)
@@ -2066,20 +2174,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2066 2174
2067 ieee80211_tx_skb(rx->sdata, nskb); 2175 ieee80211_tx_skb(rx->sdata, nskb);
2068 } 2176 }
2069
2070 handled:
2071 if (rx->sta)
2072 rx->sta->rx_packets++;
2073 dev_kfree_skb(rx->skb); 2177 dev_kfree_skb(rx->skb);
2074 return RX_QUEUED; 2178 return RX_QUEUED;
2075
2076 queue:
2077 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2078 skb_queue_tail(&sdata->skb_queue, rx->skb);
2079 ieee80211_queue_work(&local->hw, &sdata->work);
2080 if (rx->sta)
2081 rx->sta->rx_packets++;
2082 return RX_QUEUED;
2083} 2179}
2084 2180
2085static ieee80211_rx_result debug_noinline 2181static ieee80211_rx_result debug_noinline
@@ -2090,15 +2186,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
2090 struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; 2186 struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
2091 __le16 stype; 2187 __le16 stype;
2092 2188
2093 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
2094 return RX_DROP_MONITOR;
2095
2096 if (rx->skb->len < 24)
2097 return RX_DROP_MONITOR;
2098
2099 if (ieee80211_drop_unencrypted_mgmt(rx))
2100 return RX_DROP_UNUSABLE;
2101
2102 rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb); 2189 rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb);
2103 if (rxs != RX_CONTINUE) 2190 if (rxs != RX_CONTINUE)
2104 return rxs; 2191 return rxs;
@@ -2267,19 +2354,46 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2267 dev_kfree_skb(skb); 2354 dev_kfree_skb(skb);
2268} 2355}
2269 2356
2357static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
2358 ieee80211_rx_result res)
2359{
2360 switch (res) {
2361 case RX_DROP_MONITOR:
2362 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
2363 if (rx->sta)
2364 rx->sta->rx_dropped++;
2365 /* fall through */
2366 case RX_CONTINUE: {
2367 struct ieee80211_rate *rate = NULL;
2368 struct ieee80211_supported_band *sband;
2369 struct ieee80211_rx_status *status;
2370
2371 status = IEEE80211_SKB_RXCB((rx->skb));
2372
2373 sband = rx->local->hw.wiphy->bands[status->band];
2374 if (!(status->flag & RX_FLAG_HT))
2375 rate = &sband->bitrates[status->rate_idx];
2376
2377 ieee80211_rx_cooked_monitor(rx, rate);
2378 break;
2379 }
2380 case RX_DROP_UNUSABLE:
2381 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
2382 if (rx->sta)
2383 rx->sta->rx_dropped++;
2384 dev_kfree_skb(rx->skb);
2385 break;
2386 case RX_QUEUED:
2387 I802_DEBUG_INC(rx->sdata->local->rx_handlers_queued);
2388 break;
2389 }
2390}
2270 2391
2271static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata, 2392static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
2272 struct ieee80211_rx_data *rx, 2393 struct sk_buff_head *frames)
2273 struct sk_buff *skb,
2274 struct ieee80211_rate *rate)
2275{ 2394{
2276 struct sk_buff_head reorder_release;
2277 ieee80211_rx_result res = RX_DROP_MONITOR; 2395 ieee80211_rx_result res = RX_DROP_MONITOR;
2278 2396 struct sk_buff *skb;
2279 __skb_queue_head_init(&reorder_release);
2280
2281 rx->skb = skb;
2282 rx->sdata = sdata;
2283 2397
2284#define CALL_RXH(rxh) \ 2398#define CALL_RXH(rxh) \
2285 do { \ 2399 do { \
@@ -2288,17 +2402,7 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2288 goto rxh_next; \ 2402 goto rxh_next; \
2289 } while (0); 2403 } while (0);
2290 2404
2291 /* 2405 while ((skb = __skb_dequeue(frames))) {
2292 * NB: the rxh_next label works even if we jump
2293 * to it from here because then the list will
2294 * be empty, which is a trivial check
2295 */
2296 CALL_RXH(ieee80211_rx_h_passive_scan)
2297 CALL_RXH(ieee80211_rx_h_check)
2298
2299 ieee80211_rx_reorder_ampdu(rx, &reorder_release);
2300
2301 while ((skb = __skb_dequeue(&reorder_release))) {
2302 /* 2406 /*
2303 * all the other fields are valid across frames 2407 * all the other fields are valid across frames
2304 * that belong to an aMPDU since they are on the 2408 * that belong to an aMPDU since they are on the
@@ -2316,42 +2420,95 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2316 CALL_RXH(ieee80211_rx_h_remove_qos_control) 2420 CALL_RXH(ieee80211_rx_h_remove_qos_control)
2317 CALL_RXH(ieee80211_rx_h_amsdu) 2421 CALL_RXH(ieee80211_rx_h_amsdu)
2318#ifdef CONFIG_MAC80211_MESH 2422#ifdef CONFIG_MAC80211_MESH
2319 if (ieee80211_vif_is_mesh(&sdata->vif)) 2423 if (ieee80211_vif_is_mesh(&rx->sdata->vif))
2320 CALL_RXH(ieee80211_rx_h_mesh_fwding); 2424 CALL_RXH(ieee80211_rx_h_mesh_fwding);
2321#endif 2425#endif
2322 CALL_RXH(ieee80211_rx_h_data) 2426 CALL_RXH(ieee80211_rx_h_data)
2323 2427
2324 /* special treatment -- needs the queue */ 2428 /* special treatment -- needs the queue */
2325 res = ieee80211_rx_h_ctrl(rx, &reorder_release); 2429 res = ieee80211_rx_h_ctrl(rx, frames);
2326 if (res != RX_CONTINUE) 2430 if (res != RX_CONTINUE)
2327 goto rxh_next; 2431 goto rxh_next;
2328 2432
2433 CALL_RXH(ieee80211_rx_h_mgmt_check)
2329 CALL_RXH(ieee80211_rx_h_action) 2434 CALL_RXH(ieee80211_rx_h_action)
2435 CALL_RXH(ieee80211_rx_h_userspace_mgmt)
2436 CALL_RXH(ieee80211_rx_h_action_return)
2330 CALL_RXH(ieee80211_rx_h_mgmt) 2437 CALL_RXH(ieee80211_rx_h_mgmt)
2331 2438
2439 rxh_next:
2440 ieee80211_rx_handlers_result(rx, res);
2441
2332#undef CALL_RXH 2442#undef CALL_RXH
2443 }
2444}
2445
2446static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2447 struct ieee80211_rx_data *rx,
2448 struct sk_buff *skb)
2449{
2450 struct sk_buff_head reorder_release;
2451 ieee80211_rx_result res = RX_DROP_MONITOR;
2452
2453 __skb_queue_head_init(&reorder_release);
2454
2455 rx->skb = skb;
2456 rx->sdata = sdata;
2457
2458#define CALL_RXH(rxh) \
2459 do { \
2460 res = rxh(rx); \
2461 if (res != RX_CONTINUE) \
2462 goto rxh_next; \
2463 } while (0);
2464
2465 CALL_RXH(ieee80211_rx_h_passive_scan)
2466 CALL_RXH(ieee80211_rx_h_check)
2467
2468 ieee80211_rx_reorder_ampdu(rx, &reorder_release);
2469
2470 ieee80211_rx_handlers(rx, &reorder_release);
2471 return;
2333 2472
2334 rxh_next: 2473 rxh_next:
2335 switch (res) { 2474 ieee80211_rx_handlers_result(rx, res);
2336 case RX_DROP_MONITOR: 2475
2337 I802_DEBUG_INC(sdata->local->rx_handlers_drop); 2476#undef CALL_RXH
2338 if (rx->sta) 2477}
2339 rx->sta->rx_dropped++; 2478
2340 /* fall through */ 2479/*
2341 case RX_CONTINUE: 2480 * This function makes calls into the RX path. Therefore the
2342 ieee80211_rx_cooked_monitor(rx, rate); 2481 * caller must hold the sta_info->lock and everything has to
2343 break; 2482 * be under rcu_read_lock protection as well.
2344 case RX_DROP_UNUSABLE: 2483 */
2345 I802_DEBUG_INC(sdata->local->rx_handlers_drop); 2484void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
2346 if (rx->sta) 2485{
2347 rx->sta->rx_dropped++; 2486 struct sk_buff_head frames;
2348 dev_kfree_skb(rx->skb); 2487 struct ieee80211_rx_data rx = { };
2349 break; 2488 struct tid_ampdu_rx *tid_agg_rx;
2350 case RX_QUEUED: 2489
2351 I802_DEBUG_INC(sdata->local->rx_handlers_queued); 2490 tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
2352 break; 2491 if (!tid_agg_rx)
2353 } 2492 return;
2354 } 2493
2494 __skb_queue_head_init(&frames);
2495
2496 /* construct rx struct */
2497 rx.sta = sta;
2498 rx.sdata = sta->sdata;
2499 rx.local = sta->local;
2500 rx.queue = tid;
2501 rx.flags |= IEEE80211_RX_RA_MATCH;
2502
2503 if (unlikely(test_bit(SCAN_HW_SCANNING, &sta->local->scanning) ||
2504 test_bit(SCAN_OFF_CHANNEL, &sta->local->scanning)))
2505 rx.flags |= IEEE80211_RX_IN_SCAN;
2506
2507 spin_lock(&tid_agg_rx->reorder_lock);
2508 ieee80211_sta_reorder_release(&sta->local->hw, tid_agg_rx, &frames);
2509 spin_unlock(&tid_agg_rx->reorder_lock);
2510
2511 ieee80211_rx_handlers(&rx, &frames);
2355} 2512}
2356 2513
2357/* main receive path */ 2514/* main receive path */
@@ -2431,9 +2588,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2431 if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) 2588 if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2))
2432 return 0; 2589 return 0;
2433 break; 2590 break;
2434 case NL80211_IFTYPE_MONITOR: 2591 default:
2435 case NL80211_IFTYPE_UNSPECIFIED:
2436 case __NL80211_IFTYPE_AFTER_LAST:
2437 /* should never get here */ 2592 /* should never get here */
2438 WARN_ON(1); 2593 WARN_ON(1);
2439 break; 2594 break;
@@ -2447,8 +2602,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2447 * be called with rcu_read_lock protection. 2602 * be called with rcu_read_lock protection.
2448 */ 2603 */
2449static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, 2604static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2450 struct sk_buff *skb, 2605 struct sk_buff *skb)
2451 struct ieee80211_rate *rate)
2452{ 2606{
2453 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2607 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2454 struct ieee80211_local *local = hw_to_local(hw); 2608 struct ieee80211_local *local = hw_to_local(hw);
@@ -2550,13 +2704,12 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2550 skb_new = skb_copy(skb, GFP_ATOMIC); 2704 skb_new = skb_copy(skb, GFP_ATOMIC);
2551 if (!skb_new) { 2705 if (!skb_new) {
2552 if (net_ratelimit()) 2706 if (net_ratelimit())
2553 printk(KERN_DEBUG "%s: failed to copy " 2707 wiphy_debug(local->hw.wiphy,
2554 "multicast frame for %s\n", 2708 "failed to copy multicast frame for %s\n",
2555 wiphy_name(local->hw.wiphy), 2709 prev->name);
2556 prev->name);
2557 goto next; 2710 goto next;
2558 } 2711 }
2559 ieee80211_invoke_rx_handlers(prev, &rx, skb_new, rate); 2712 ieee80211_invoke_rx_handlers(prev, &rx, skb_new);
2560next: 2713next:
2561 prev = sdata; 2714 prev = sdata;
2562 } 2715 }
@@ -2572,7 +2725,7 @@ next:
2572 } 2725 }
2573 } 2726 }
2574 if (prev) 2727 if (prev)
2575 ieee80211_invoke_rx_handlers(prev, &rx, skb, rate); 2728 ieee80211_invoke_rx_handlers(prev, &rx, skb);
2576 else 2729 else
2577 dev_kfree_skb(skb); 2730 dev_kfree_skb(skb);
2578} 2731}
@@ -2615,28 +2768,37 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
2615 if (WARN_ON(!local->started)) 2768 if (WARN_ON(!local->started))
2616 goto drop; 2769 goto drop;
2617 2770
2618 if (status->flag & RX_FLAG_HT) { 2771 if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC))) {
2619 /* 2772 /*
2620 * rate_idx is MCS index, which can be [0-76] as documented on: 2773 * Validate the rate, unless a PLCP error means that
2621 * 2774 * we probably can't have a valid rate here anyway.
2622 * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
2623 *
2624 * Anything else would be some sort of driver or hardware error.
2625 * The driver should catch hardware errors.
2626 */ 2775 */
2627 if (WARN((status->rate_idx < 0 || 2776
2628 status->rate_idx > 76), 2777 if (status->flag & RX_FLAG_HT) {
2629 "Rate marked as an HT rate but passed " 2778 /*
2630 "status->rate_idx is not " 2779 * rate_idx is MCS index, which can be [0-76]
2631 "an MCS index [0-76]: %d (0x%02x)\n", 2780 * as documented on:
2632 status->rate_idx, 2781 *
2633 status->rate_idx)) 2782 * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
2634 goto drop; 2783 *
2635 } else { 2784 * Anything else would be some sort of driver or
2636 if (WARN_ON(status->rate_idx < 0 || 2785 * hardware error. The driver should catch hardware
2637 status->rate_idx >= sband->n_bitrates)) 2786 * errors.
2638 goto drop; 2787 */
2639 rate = &sband->bitrates[status->rate_idx]; 2788 if (WARN((status->rate_idx < 0 ||
2789 status->rate_idx > 76),
2790 "Rate marked as an HT rate but passed "
2791 "status->rate_idx is not "
2792 "an MCS index [0-76]: %d (0x%02x)\n",
2793 status->rate_idx,
2794 status->rate_idx))
2795 goto drop;
2796 } else {
2797 if (WARN_ON(status->rate_idx < 0 ||
2798 status->rate_idx >= sband->n_bitrates))
2799 goto drop;
2800 rate = &sband->bitrates[status->rate_idx];
2801 }
2640 } 2802 }
2641 2803
2642 /* 2804 /*
@@ -2658,7 +2820,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
2658 return; 2820 return;
2659 } 2821 }
2660 2822
2661 __ieee80211_rx_handle_packet(hw, skb, rate); 2823 __ieee80211_rx_handle_packet(hw, skb);
2662 2824
2663 rcu_read_unlock(); 2825 rcu_read_unlock();
2664 2826
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 872d7b6ef6b..d60389ba9b9 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -248,14 +248,12 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
248 return true; 248 return true;
249} 249}
250 250
251void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) 251static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
252{ 252{
253 struct ieee80211_local *local = hw_to_local(hw); 253 struct ieee80211_local *local = hw_to_local(hw);
254 bool was_hw_scan; 254 bool was_hw_scan;
255 255
256 trace_api_scan_completed(local, aborted); 256 mutex_lock(&local->mtx);
257
258 mutex_lock(&local->scan_mtx);
259 257
260 /* 258 /*
261 * It's ok to abort a not-yet-running scan (that 259 * It's ok to abort a not-yet-running scan (that
@@ -267,7 +265,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
267 aborted = true; 265 aborted = true;
268 266
269 if (WARN_ON(!local->scan_req)) { 267 if (WARN_ON(!local->scan_req)) {
270 mutex_unlock(&local->scan_mtx); 268 mutex_unlock(&local->mtx);
271 return; 269 return;
272 } 270 }
273 271
@@ -275,7 +273,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
275 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { 273 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) {
276 ieee80211_queue_delayed_work(&local->hw, 274 ieee80211_queue_delayed_work(&local->hw,
277 &local->scan_work, 0); 275 &local->scan_work, 0);
278 mutex_unlock(&local->scan_mtx); 276 mutex_unlock(&local->mtx);
279 return; 277 return;
280 } 278 }
281 279
@@ -291,7 +289,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
291 local->scan_channel = NULL; 289 local->scan_channel = NULL;
292 290
293 /* we only have to protect scan_req and hw/sw scan */ 291 /* we only have to protect scan_req and hw/sw scan */
294 mutex_unlock(&local->scan_mtx); 292 mutex_unlock(&local->mtx);
295 293
296 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 294 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
297 if (was_hw_scan) 295 if (was_hw_scan)
@@ -304,12 +302,26 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
304 ieee80211_offchannel_return(local, true); 302 ieee80211_offchannel_return(local, true);
305 303
306 done: 304 done:
305 mutex_lock(&local->mtx);
307 ieee80211_recalc_idle(local); 306 ieee80211_recalc_idle(local);
307 mutex_unlock(&local->mtx);
308 ieee80211_mlme_notify_scan_completed(local); 308 ieee80211_mlme_notify_scan_completed(local);
309 ieee80211_ibss_notify_scan_completed(local); 309 ieee80211_ibss_notify_scan_completed(local);
310 ieee80211_mesh_notify_scan_completed(local); 310 ieee80211_mesh_notify_scan_completed(local);
311 ieee80211_queue_work(&local->hw, &local->work_work); 311 ieee80211_queue_work(&local->hw, &local->work_work);
312} 312}
313
314void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
315{
316 struct ieee80211_local *local = hw_to_local(hw);
317
318 trace_api_scan_completed(local, aborted);
319
320 set_bit(SCAN_COMPLETED, &local->scanning);
321 if (aborted)
322 set_bit(SCAN_ABORTED, &local->scanning);
323 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
324}
313EXPORT_SYMBOL(ieee80211_scan_completed); 325EXPORT_SYMBOL(ieee80211_scan_completed);
314 326
315static int ieee80211_start_sw_scan(struct ieee80211_local *local) 327static int ieee80211_start_sw_scan(struct ieee80211_local *local)
@@ -447,7 +459,7 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
447 459
448 /* if no more bands/channels left, complete scan and advance to the idle state */ 460 /* if no more bands/channels left, complete scan and advance to the idle state */
449 if (local->scan_channel_idx >= local->scan_req->n_channels) { 461 if (local->scan_channel_idx >= local->scan_req->n_channels) {
450 ieee80211_scan_completed(&local->hw, false); 462 __ieee80211_scan_completed(&local->hw, false);
451 return 1; 463 return 1;
452 } 464 }
453 465
@@ -639,17 +651,25 @@ void ieee80211_scan_work(struct work_struct *work)
639 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 651 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
640 unsigned long next_delay = 0; 652 unsigned long next_delay = 0;
641 653
642 mutex_lock(&local->scan_mtx); 654 if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) {
655 bool aborted;
656
657 aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning);
658 __ieee80211_scan_completed(&local->hw, aborted);
659 return;
660 }
661
662 mutex_lock(&local->mtx);
643 if (!sdata || !local->scan_req) { 663 if (!sdata || !local->scan_req) {
644 mutex_unlock(&local->scan_mtx); 664 mutex_unlock(&local->mtx);
645 return; 665 return;
646 } 666 }
647 667
648 if (local->hw_scan_req) { 668 if (local->hw_scan_req) {
649 int rc = drv_hw_scan(local, sdata, local->hw_scan_req); 669 int rc = drv_hw_scan(local, sdata, local->hw_scan_req);
650 mutex_unlock(&local->scan_mtx); 670 mutex_unlock(&local->mtx);
651 if (rc) 671 if (rc)
652 ieee80211_scan_completed(&local->hw, true); 672 __ieee80211_scan_completed(&local->hw, true);
653 return; 673 return;
654 } 674 }
655 675
@@ -661,20 +681,20 @@ void ieee80211_scan_work(struct work_struct *work)
661 local->scan_sdata = NULL; 681 local->scan_sdata = NULL;
662 682
663 rc = __ieee80211_start_scan(sdata, req); 683 rc = __ieee80211_start_scan(sdata, req);
664 mutex_unlock(&local->scan_mtx); 684 mutex_unlock(&local->mtx);
665 685
666 if (rc) 686 if (rc)
667 ieee80211_scan_completed(&local->hw, true); 687 __ieee80211_scan_completed(&local->hw, true);
668 return; 688 return;
669 } 689 }
670 690
671 mutex_unlock(&local->scan_mtx); 691 mutex_unlock(&local->mtx);
672 692
673 /* 693 /*
674 * Avoid re-scheduling when the sdata is going away. 694 * Avoid re-scheduling when the sdata is going away.
675 */ 695 */
676 if (!ieee80211_sdata_running(sdata)) { 696 if (!ieee80211_sdata_running(sdata)) {
677 ieee80211_scan_completed(&local->hw, true); 697 __ieee80211_scan_completed(&local->hw, true);
678 return; 698 return;
679 } 699 }
680 700
@@ -711,9 +731,9 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
711{ 731{
712 int res; 732 int res;
713 733
714 mutex_lock(&sdata->local->scan_mtx); 734 mutex_lock(&sdata->local->mtx);
715 res = __ieee80211_start_scan(sdata, req); 735 res = __ieee80211_start_scan(sdata, req);
716 mutex_unlock(&sdata->local->scan_mtx); 736 mutex_unlock(&sdata->local->mtx);
717 737
718 return res; 738 return res;
719} 739}
@@ -726,7 +746,7 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
726 int ret = -EBUSY; 746 int ret = -EBUSY;
727 enum ieee80211_band band; 747 enum ieee80211_band band;
728 748
729 mutex_lock(&local->scan_mtx); 749 mutex_lock(&local->mtx);
730 750
731 /* busy scanning */ 751 /* busy scanning */
732 if (local->scan_req) 752 if (local->scan_req)
@@ -761,7 +781,7 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
761 781
762 ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req); 782 ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req);
763 unlock: 783 unlock:
764 mutex_unlock(&local->scan_mtx); 784 mutex_unlock(&local->mtx);
765 return ret; 785 return ret;
766} 786}
767 787
@@ -775,11 +795,11 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
775 * Only call this function when a scan can't be 795 * Only call this function when a scan can't be
776 * queued -- mostly at suspend under RTNL. 796 * queued -- mostly at suspend under RTNL.
777 */ 797 */
778 mutex_lock(&local->scan_mtx); 798 mutex_lock(&local->mtx);
779 abortscan = test_bit(SCAN_SW_SCANNING, &local->scanning) || 799 abortscan = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
780 (!local->scanning && local->scan_req); 800 (!local->scanning && local->scan_req);
781 mutex_unlock(&local->scan_mtx); 801 mutex_unlock(&local->mtx);
782 802
783 if (abortscan) 803 if (abortscan)
784 ieee80211_scan_completed(&local->hw, true); 804 __ieee80211_scan_completed(&local->hw, true);
785} 805}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 6d86f0c1ad0..44e10a9de0a 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -125,7 +125,7 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
125 lockdep_is_held(&local->sta_mtx)); 125 lockdep_is_held(&local->sta_mtx));
126 while (sta) { 126 while (sta) {
127 if ((sta->sdata == sdata || 127 if ((sta->sdata == sdata ||
128 sta->sdata->bss == sdata->bss) && 128 (sta->sdata->bss && sta->sdata->bss == sdata->bss)) &&
129 memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) 129 memcmp(sta->sta.addr, addr, ETH_ALEN) == 0)
130 break; 130 break;
131 sta = rcu_dereference_check(sta->hnext, 131 sta = rcu_dereference_check(sta->hnext,
@@ -174,8 +174,7 @@ static void __sta_info_free(struct ieee80211_local *local,
174 } 174 }
175 175
176#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 176#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
177 printk(KERN_DEBUG "%s: Destroyed STA %pM\n", 177 wiphy_debug(local->hw.wiphy, "Destroyed STA %pM\n", sta->sta.addr);
178 wiphy_name(local->hw.wiphy), sta->sta.addr);
179#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 178#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
180 179
181 kfree(sta); 180 kfree(sta);
@@ -262,8 +261,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
262 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); 261 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
263 262
264#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 263#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
265 printk(KERN_DEBUG "%s: Allocated STA %pM\n", 264 wiphy_debug(local->hw.wiphy, "Allocated STA %pM\n", sta->sta.addr);
266 wiphy_name(local->hw.wiphy), sta->sta.addr);
267#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 265#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
268 266
269#ifdef CONFIG_MAC80211_MESH 267#ifdef CONFIG_MAC80211_MESH
@@ -282,7 +280,7 @@ static int sta_info_finish_insert(struct sta_info *sta, bool async)
282 unsigned long flags; 280 unsigned long flags;
283 int err = 0; 281 int err = 0;
284 282
285 WARN_ON(!mutex_is_locked(&local->sta_mtx)); 283 lockdep_assert_held(&local->sta_mtx);
286 284
287 /* notify driver */ 285 /* notify driver */
288 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 286 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
@@ -300,8 +298,9 @@ static int sta_info_finish_insert(struct sta_info *sta, bool async)
300 sta->uploaded = true; 298 sta->uploaded = true;
301#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 299#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
302 if (async) 300 if (async)
303 printk(KERN_DEBUG "%s: Finished adding IBSS STA %pM\n", 301 wiphy_debug(local->hw.wiphy,
304 wiphy_name(local->hw.wiphy), sta->sta.addr); 302 "Finished adding IBSS STA %pM\n",
303 sta->sta.addr);
305#endif 304#endif
306 } 305 }
307 306
@@ -411,8 +410,8 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
411 spin_unlock_irqrestore(&local->sta_lock, flags); 410 spin_unlock_irqrestore(&local->sta_lock, flags);
412 411
413#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 412#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
414 printk(KERN_DEBUG "%s: Added IBSS STA %pM\n", 413 wiphy_debug(local->hw.wiphy, "Added IBSS STA %pM\n",
415 wiphy_name(local->hw.wiphy), sta->sta.addr); 414 sta->sta.addr);
416#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 415#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
417 416
418 ieee80211_queue_work(&local->hw, &local->sta_finish_work); 417 ieee80211_queue_work(&local->hw, &local->sta_finish_work);
@@ -459,8 +458,7 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
459 } 458 }
460 459
461#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 460#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
462 printk(KERN_DEBUG "%s: Inserted STA %pM\n", 461 wiphy_debug(local->hw.wiphy, "Inserted STA %pM\n", sta->sta.addr);
463 wiphy_name(local->hw.wiphy), sta->sta.addr);
464#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 462#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
465 463
466 /* move reference to rcu-protected */ 464 /* move reference to rcu-protected */
@@ -690,8 +688,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
690#endif 688#endif
691 689
692#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 690#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
693 printk(KERN_DEBUG "%s: Removed STA %pM\n", 691 wiphy_debug(local->hw.wiphy, "Removed STA %pM\n", sta->sta.addr);
694 wiphy_name(local->hw.wiphy), sta->sta.addr);
695#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 692#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
696 cancel_work_sync(&sta->drv_unblock_wk); 693 cancel_work_sync(&sta->drv_unblock_wk);
697 694
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 54262e72376..810c5ce9831 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -103,6 +103,7 @@ struct tid_ampdu_tx {
103 * @reorder_buf: buffer to reorder incoming aggregated MPDUs 103 * @reorder_buf: buffer to reorder incoming aggregated MPDUs
104 * @reorder_time: jiffies when skb was added 104 * @reorder_time: jiffies when skb was added
105 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) 105 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
106 * @reorder_timer: releases expired frames from the reorder buffer.
106 * @head_seq_num: head sequence number in reordering buffer. 107 * @head_seq_num: head sequence number in reordering buffer.
107 * @stored_mpdu_num: number of MPDUs in reordering buffer 108 * @stored_mpdu_num: number of MPDUs in reordering buffer
108 * @ssn: Starting Sequence Number expected to be aggregated. 109 * @ssn: Starting Sequence Number expected to be aggregated.
@@ -110,20 +111,25 @@ struct tid_ampdu_tx {
110 * @timeout: reset timer value (in TUs). 111 * @timeout: reset timer value (in TUs).
111 * @dialog_token: dialog token for aggregation session 112 * @dialog_token: dialog token for aggregation session
112 * @rcu_head: RCU head used for freeing this struct 113 * @rcu_head: RCU head used for freeing this struct
114 * @reorder_lock: serializes access to reorder buffer, see below.
113 * 115 *
114 * This structure is protected by RCU and the per-station 116 * This structure is protected by RCU and the per-station
115 * spinlock. Assignments to the array holding it must hold 117 * spinlock. Assignments to the array holding it must hold
116 * the spinlock, only the RX path can access it under RCU 118 * the spinlock.
117 * lock-free. The RX path, since it is single-threaded, 119 *
118 * can even modify the structure without locking since the 120 * The @reorder_lock is used to protect the variables and
119 * only other modifications to it are done when the struct 121 * arrays such as @reorder_buf, @reorder_time, @head_seq_num,
120 * can not yet or no longer be found by the RX path. 122 * @stored_mpdu_num and @reorder_time from being corrupted by
123 * concurrent access of the RX path and the expired frame
124 * release timer.
121 */ 125 */
122struct tid_ampdu_rx { 126struct tid_ampdu_rx {
123 struct rcu_head rcu_head; 127 struct rcu_head rcu_head;
128 spinlock_t reorder_lock;
124 struct sk_buff **reorder_buf; 129 struct sk_buff **reorder_buf;
125 unsigned long *reorder_time; 130 unsigned long *reorder_time;
126 struct timer_list session_timer; 131 struct timer_list session_timer;
132 struct timer_list reorder_timer;
127 u16 head_seq_num; 133 u16 head_seq_num;
128 u16 stored_mpdu_num; 134 u16 stored_mpdu_num;
129 u16 ssn; 135 u16 ssn;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 10caec5ea8f..571b32bfc54 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -114,11 +114,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
114 114
115#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 115#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
116 if (net_ratelimit()) 116 if (net_ratelimit())
117 printk(KERN_DEBUG "%s: dropped TX filtered frame, " 117 wiphy_debug(local->hw.wiphy,
118 "queue_len=%d PS=%d @%lu\n", 118 "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n",
119 wiphy_name(local->hw.wiphy), 119 skb_queue_len(&sta->tx_filtered),
120 skb_queue_len(&sta->tx_filtered), 120 !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies);
121 !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies);
122#endif 121#endif
123 dev_kfree_skb(skb); 122 dev_kfree_skb(skb);
124} 123}
@@ -296,7 +295,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
296 } 295 }
297 296
298 if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) 297 if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX)
299 cfg80211_action_tx_status( 298 cfg80211_mgmt_tx_status(
300 skb->dev, (unsigned long) skb, skb->data, skb->len, 299 skb->dev, (unsigned long) skb, skb->data, skb->len,
301 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); 300 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
302 301
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c54db966926..e1733dcb58a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -351,8 +351,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
351 351
352 local->total_ps_buffered = total; 352 local->total_ps_buffered = total;
353#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG 353#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
354 printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", 354 wiphy_debug(local->hw.wiphy, "PS buffers full - purged %d frames\n",
355 wiphy_name(local->hw.wiphy), purged); 355 purged);
356#endif 356#endif
357} 357}
358 358
@@ -509,6 +509,18 @@ ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
509} 509}
510 510
511static ieee80211_tx_result debug_noinline 511static ieee80211_tx_result debug_noinline
512ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx)
513{
514 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
515
516 if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol &&
517 tx->sdata->control_port_no_encrypt))
518 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
519
520 return TX_CONTINUE;
521}
522
523static ieee80211_tx_result debug_noinline
512ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) 524ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
513{ 525{
514 struct ieee80211_key *key = NULL; 526 struct ieee80211_key *key = NULL;
@@ -527,7 +539,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
527 else if ((key = rcu_dereference(tx->sdata->default_key))) 539 else if ((key = rcu_dereference(tx->sdata->default_key)))
528 tx->key = key; 540 tx->key = key;
529 else if (tx->sdata->drop_unencrypted && 541 else if (tx->sdata->drop_unencrypted &&
530 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) && 542 (tx->skb->protocol != tx->sdata->control_port_protocol) &&
531 !(info->flags & IEEE80211_TX_CTL_INJECTED) && 543 !(info->flags & IEEE80211_TX_CTL_INJECTED) &&
532 (!ieee80211_is_robust_mgmt_frame(hdr) || 544 (!ieee80211_is_robust_mgmt_frame(hdr) ||
533 (ieee80211_is_action(hdr->frame_control) && 545 (ieee80211_is_action(hdr->frame_control) &&
@@ -543,15 +555,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
543 tx->key->tx_rx_count++; 555 tx->key->tx_rx_count++;
544 /* TODO: add threshold stuff again */ 556 /* TODO: add threshold stuff again */
545 557
546 switch (tx->key->conf.alg) { 558 switch (tx->key->conf.cipher) {
547 case ALG_WEP: 559 case WLAN_CIPHER_SUITE_WEP40:
560 case WLAN_CIPHER_SUITE_WEP104:
548 if (ieee80211_is_auth(hdr->frame_control)) 561 if (ieee80211_is_auth(hdr->frame_control))
549 break; 562 break;
550 case ALG_TKIP: 563 case WLAN_CIPHER_SUITE_TKIP:
551 if (!ieee80211_is_data_present(hdr->frame_control)) 564 if (!ieee80211_is_data_present(hdr->frame_control))
552 tx->key = NULL; 565 tx->key = NULL;
553 break; 566 break;
554 case ALG_CCMP: 567 case WLAN_CIPHER_SUITE_CCMP:
555 if (!ieee80211_is_data_present(hdr->frame_control) && 568 if (!ieee80211_is_data_present(hdr->frame_control) &&
556 !ieee80211_use_mfp(hdr->frame_control, tx->sta, 569 !ieee80211_use_mfp(hdr->frame_control, tx->sta,
557 tx->skb)) 570 tx->skb))
@@ -561,7 +574,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
561 IEEE80211_KEY_FLAG_SW_MGMT) && 574 IEEE80211_KEY_FLAG_SW_MGMT) &&
562 ieee80211_is_mgmt(hdr->frame_control); 575 ieee80211_is_mgmt(hdr->frame_control);
563 break; 576 break;
564 case ALG_AES_CMAC: 577 case WLAN_CIPHER_SUITE_AES_CMAC:
565 if (!ieee80211_is_mgmt(hdr->frame_control)) 578 if (!ieee80211_is_mgmt(hdr->frame_control))
566 tx->key = NULL; 579 tx->key = NULL;
567 break; 580 break;
@@ -946,22 +959,31 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
946static ieee80211_tx_result debug_noinline 959static ieee80211_tx_result debug_noinline
947ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) 960ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
948{ 961{
962 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
963
949 if (!tx->key) 964 if (!tx->key)
950 return TX_CONTINUE; 965 return TX_CONTINUE;
951 966
952 switch (tx->key->conf.alg) { 967 switch (tx->key->conf.cipher) {
953 case ALG_WEP: 968 case WLAN_CIPHER_SUITE_WEP40:
969 case WLAN_CIPHER_SUITE_WEP104:
954 return ieee80211_crypto_wep_encrypt(tx); 970 return ieee80211_crypto_wep_encrypt(tx);
955 case ALG_TKIP: 971 case WLAN_CIPHER_SUITE_TKIP:
956 return ieee80211_crypto_tkip_encrypt(tx); 972 return ieee80211_crypto_tkip_encrypt(tx);
957 case ALG_CCMP: 973 case WLAN_CIPHER_SUITE_CCMP:
958 return ieee80211_crypto_ccmp_encrypt(tx); 974 return ieee80211_crypto_ccmp_encrypt(tx);
959 case ALG_AES_CMAC: 975 case WLAN_CIPHER_SUITE_AES_CMAC:
960 return ieee80211_crypto_aes_cmac_encrypt(tx); 976 return ieee80211_crypto_aes_cmac_encrypt(tx);
977 default:
978 /* handle hw-only algorithm */
979 if (info->control.hw_key) {
980 ieee80211_tx_set_protected(tx);
981 return TX_CONTINUE;
982 }
983 break;
984
961 } 985 }
962 986
963 /* not reached */
964 WARN_ON(1);
965 return TX_DROP; 987 return TX_DROP;
966} 988}
967 989
@@ -1339,6 +1361,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1339 CALL_TXH(ieee80211_tx_h_dynamic_ps); 1361 CALL_TXH(ieee80211_tx_h_dynamic_ps);
1340 CALL_TXH(ieee80211_tx_h_check_assoc); 1362 CALL_TXH(ieee80211_tx_h_check_assoc);
1341 CALL_TXH(ieee80211_tx_h_ps_buf); 1363 CALL_TXH(ieee80211_tx_h_ps_buf);
1364 CALL_TXH(ieee80211_tx_h_check_control_port_protocol);
1342 CALL_TXH(ieee80211_tx_h_select_key); 1365 CALL_TXH(ieee80211_tx_h_select_key);
1343 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) 1366 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
1344 CALL_TXH(ieee80211_tx_h_rate_ctrl); 1367 CALL_TXH(ieee80211_tx_h_rate_ctrl);
@@ -1511,8 +1534,8 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
1511 I802_DEBUG_INC(local->tx_expand_skb_head); 1534 I802_DEBUG_INC(local->tx_expand_skb_head);
1512 1535
1513 if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) { 1536 if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) {
1514 printk(KERN_DEBUG "%s: failed to reallocate TX buffer\n", 1537 wiphy_debug(local->hw.wiphy,
1515 wiphy_name(local->hw.wiphy)); 1538 "failed to reallocate TX buffer\n");
1516 return -ENOMEM; 1539 return -ENOMEM;
1517 } 1540 }
1518 1541
@@ -1586,6 +1609,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
1586 return; 1609 return;
1587 } 1610 }
1588 1611
1612 hdr = (struct ieee80211_hdr *) skb->data;
1589 info->control.vif = &sdata->vif; 1613 info->control.vif = &sdata->vif;
1590 1614
1591 if (ieee80211_vif_is_mesh(&sdata->vif) && 1615 if (ieee80211_vif_is_mesh(&sdata->vif) &&
@@ -1699,7 +1723,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1699 u16 ethertype, hdrlen, meshhdrlen = 0; 1723 u16 ethertype, hdrlen, meshhdrlen = 0;
1700 __le16 fc; 1724 __le16 fc;
1701 struct ieee80211_hdr hdr; 1725 struct ieee80211_hdr hdr;
1702 struct ieee80211s_hdr mesh_hdr; 1726 struct ieee80211s_hdr mesh_hdr __maybe_unused;
1703 const u8 *encaps_data; 1727 const u8 *encaps_data;
1704 int encaps_len, skip_header_bytes; 1728 int encaps_len, skip_header_bytes;
1705 int nh_pos, h_pos; 1729 int nh_pos, h_pos;
@@ -1816,7 +1840,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1816#endif 1840#endif
1817 case NL80211_IFTYPE_STATION: 1841 case NL80211_IFTYPE_STATION:
1818 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); 1842 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN);
1819 if (sdata->u.mgd.use_4addr && ethertype != ETH_P_PAE) { 1843 if (sdata->u.mgd.use_4addr &&
1844 cpu_to_be16(ethertype) != sdata->control_port_protocol) {
1820 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); 1845 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
1821 /* RA TA DA SA */ 1846 /* RA TA DA SA */
1822 memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN); 1847 memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
@@ -1869,7 +1894,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1869 if (!ieee80211_vif_is_mesh(&sdata->vif) && 1894 if (!ieee80211_vif_is_mesh(&sdata->vif) &&
1870 unlikely(!is_multicast_ether_addr(hdr.addr1) && 1895 unlikely(!is_multicast_ether_addr(hdr.addr1) &&
1871 !(sta_flags & WLAN_STA_AUTHORIZED) && 1896 !(sta_flags & WLAN_STA_AUTHORIZED) &&
1872 !(ethertype == ETH_P_PAE && 1897 !(cpu_to_be16(ethertype) == sdata->control_port_protocol &&
1873 compare_ether_addr(sdata->vif.addr, 1898 compare_ether_addr(sdata->vif.addr,
1874 skb->data + ETH_ALEN) == 0))) { 1899 skb->data + ETH_ALEN) == 0))) {
1875#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1900#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -2068,8 +2093,7 @@ void ieee80211_tx_pending(unsigned long data)
2068 2093
2069 if (skb_queue_empty(&local->pending[i])) 2094 if (skb_queue_empty(&local->pending[i]))
2070 list_for_each_entry_rcu(sdata, &local->interfaces, list) 2095 list_for_each_entry_rcu(sdata, &local->interfaces, list)
2071 netif_tx_wake_queue( 2096 netif_wake_subqueue(sdata->dev, i);
2072 netdev_get_tx_queue(sdata->dev, i));
2073 } 2097 }
2074 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 2098 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
2075 2099
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 748387d45bc..737f4267c33 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -283,8 +283,11 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
283 283
284 if (skb_queue_empty(&local->pending[queue])) { 284 if (skb_queue_empty(&local->pending[queue])) {
285 rcu_read_lock(); 285 rcu_read_lock();
286 list_for_each_entry_rcu(sdata, &local->interfaces, list) 286 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
287 netif_tx_wake_queue(netdev_get_tx_queue(sdata->dev, queue)); 287 if (test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
288 continue;
289 netif_wake_subqueue(sdata->dev, queue);
290 }
288 rcu_read_unlock(); 291 rcu_read_unlock();
289 } else 292 } else
290 tasklet_schedule(&local->tx_pending_tasklet); 293 tasklet_schedule(&local->tx_pending_tasklet);
@@ -323,7 +326,7 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
323 326
324 rcu_read_lock(); 327 rcu_read_lock();
325 list_for_each_entry_rcu(sdata, &local->interfaces, list) 328 list_for_each_entry_rcu(sdata, &local->interfaces, list)
326 netif_tx_stop_queue(netdev_get_tx_queue(sdata->dev, queue)); 329 netif_stop_subqueue(sdata->dev, queue);
327 rcu_read_unlock(); 330 rcu_read_unlock();
328} 331}
329 332
@@ -471,16 +474,10 @@ void ieee80211_iterate_active_interfaces(
471 474
472 list_for_each_entry(sdata, &local->interfaces, list) { 475 list_for_each_entry(sdata, &local->interfaces, list) {
473 switch (sdata->vif.type) { 476 switch (sdata->vif.type) {
474 case __NL80211_IFTYPE_AFTER_LAST:
475 case NL80211_IFTYPE_UNSPECIFIED:
476 case NL80211_IFTYPE_MONITOR: 477 case NL80211_IFTYPE_MONITOR:
477 case NL80211_IFTYPE_AP_VLAN: 478 case NL80211_IFTYPE_AP_VLAN:
478 continue; 479 continue;
479 case NL80211_IFTYPE_AP: 480 default:
480 case NL80211_IFTYPE_STATION:
481 case NL80211_IFTYPE_ADHOC:
482 case NL80211_IFTYPE_WDS:
483 case NL80211_IFTYPE_MESH_POINT:
484 break; 481 break;
485 } 482 }
486 if (ieee80211_sdata_running(sdata)) 483 if (ieee80211_sdata_running(sdata))
@@ -505,16 +502,10 @@ void ieee80211_iterate_active_interfaces_atomic(
505 502
506 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 503 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
507 switch (sdata->vif.type) { 504 switch (sdata->vif.type) {
508 case __NL80211_IFTYPE_AFTER_LAST:
509 case NL80211_IFTYPE_UNSPECIFIED:
510 case NL80211_IFTYPE_MONITOR: 505 case NL80211_IFTYPE_MONITOR:
511 case NL80211_IFTYPE_AP_VLAN: 506 case NL80211_IFTYPE_AP_VLAN:
512 continue; 507 continue;
513 case NL80211_IFTYPE_AP: 508 default:
514 case NL80211_IFTYPE_STATION:
515 case NL80211_IFTYPE_ADHOC:
516 case NL80211_IFTYPE_WDS:
517 case NL80211_IFTYPE_MESH_POINT:
518 break; 509 break;
519 } 510 }
520 if (ieee80211_sdata_running(sdata)) 511 if (ieee80211_sdata_running(sdata))
@@ -1189,7 +1180,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1189 /* ignore virtual */ 1180 /* ignore virtual */
1190 break; 1181 break;
1191 case NL80211_IFTYPE_UNSPECIFIED: 1182 case NL80211_IFTYPE_UNSPECIFIED:
1192 case __NL80211_IFTYPE_AFTER_LAST: 1183 case NUM_NL80211_IFTYPES:
1184 case NL80211_IFTYPE_P2P_CLIENT:
1185 case NL80211_IFTYPE_P2P_GO:
1193 WARN_ON(1); 1186 WARN_ON(1);
1194 break; 1187 break;
1195 } 1188 }
@@ -1293,9 +1286,9 @@ void ieee80211_recalc_smps(struct ieee80211_local *local,
1293 int count = 0; 1286 int count = 0;
1294 1287
1295 if (forsdata) 1288 if (forsdata)
1296 WARN_ON(!mutex_is_locked(&forsdata->u.mgd.mtx)); 1289 lockdep_assert_held(&forsdata->u.mgd.mtx);
1297 1290
1298 WARN_ON(!mutex_is_locked(&local->iflist_mtx)); 1291 lockdep_assert_held(&local->iflist_mtx);
1299 1292
1300 /* 1293 /*
1301 * This function could be improved to handle multiple 1294 * This function could be improved to handle multiple
@@ -1308,7 +1301,7 @@ void ieee80211_recalc_smps(struct ieee80211_local *local,
1308 */ 1301 */
1309 1302
1310 list_for_each_entry(sdata, &local->interfaces, list) { 1303 list_for_each_entry(sdata, &local->interfaces, list) {
1311 if (!netif_running(sdata->dev)) 1304 if (!ieee80211_sdata_running(sdata))
1312 continue; 1305 continue;
1313 if (sdata->vif.type != NL80211_IFTYPE_STATION) 1306 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1314 goto set; 1307 goto set;
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 9ebc8d8a1f5..f27484c22b9 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -240,7 +240,7 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
240 240
241 keyidx = skb->data[hdrlen + 3] >> 6; 241 keyidx = skb->data[hdrlen + 3] >> 6;
242 242
243 if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP) 243 if (!key || keyidx != key->conf.keyidx)
244 return -1; 244 return -1;
245 245
246 klen = 3 + key->conf.keylen; 246 klen = 3 + key->conf.keylen;
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 81d4ad64184..ae344d1ba05 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -43,7 +43,7 @@ enum work_action {
43/* utils */ 43/* utils */
44static inline void ASSERT_WORK_MTX(struct ieee80211_local *local) 44static inline void ASSERT_WORK_MTX(struct ieee80211_local *local)
45{ 45{
46 WARN_ON(!mutex_is_locked(&local->work_mtx)); 46 lockdep_assert_held(&local->mtx);
47} 47}
48 48
49/* 49/*
@@ -757,7 +757,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
757 mgmt = (struct ieee80211_mgmt *) skb->data; 757 mgmt = (struct ieee80211_mgmt *) skb->data;
758 fc = le16_to_cpu(mgmt->frame_control); 758 fc = le16_to_cpu(mgmt->frame_control);
759 759
760 mutex_lock(&local->work_mtx); 760 mutex_lock(&local->mtx);
761 761
762 list_for_each_entry(wk, &local->work_list, list) { 762 list_for_each_entry(wk, &local->work_list, list) {
763 const u8 *bssid = NULL; 763 const u8 *bssid = NULL;
@@ -833,7 +833,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
833 WARN(1, "unexpected: %d", rma); 833 WARN(1, "unexpected: %d", rma);
834 } 834 }
835 835
836 mutex_unlock(&local->work_mtx); 836 mutex_unlock(&local->mtx);
837 837
838 if (rma != WORK_ACT_DONE) 838 if (rma != WORK_ACT_DONE)
839 goto out; 839 goto out;
@@ -845,9 +845,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
845 case WORK_DONE_REQUEUE: 845 case WORK_DONE_REQUEUE:
846 synchronize_rcu(); 846 synchronize_rcu();
847 wk->started = false; /* restart */ 847 wk->started = false; /* restart */
848 mutex_lock(&local->work_mtx); 848 mutex_lock(&local->mtx);
849 list_add_tail(&wk->list, &local->work_list); 849 list_add_tail(&wk->list, &local->work_list);
850 mutex_unlock(&local->work_mtx); 850 mutex_unlock(&local->mtx);
851 } 851 }
852 852
853 out: 853 out:
@@ -888,9 +888,9 @@ static void ieee80211_work_work(struct work_struct *work)
888 while ((skb = skb_dequeue(&local->work_skb_queue))) 888 while ((skb = skb_dequeue(&local->work_skb_queue)))
889 ieee80211_work_rx_queued_mgmt(local, skb); 889 ieee80211_work_rx_queued_mgmt(local, skb);
890 890
891 ieee80211_recalc_idle(local); 891 mutex_lock(&local->mtx);
892 892
893 mutex_lock(&local->work_mtx); 893 ieee80211_recalc_idle(local);
894 894
895 list_for_each_entry_safe(wk, tmp, &local->work_list, list) { 895 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
896 bool started = wk->started; 896 bool started = wk->started;
@@ -995,20 +995,16 @@ static void ieee80211_work_work(struct work_struct *work)
995 run_again(local, jiffies + HZ/2); 995 run_again(local, jiffies + HZ/2);
996 } 996 }
997 997
998 mutex_lock(&local->scan_mtx);
999
1000 if (list_empty(&local->work_list) && local->scan_req && 998 if (list_empty(&local->work_list) && local->scan_req &&
1001 !local->scanning) 999 !local->scanning)
1002 ieee80211_queue_delayed_work(&local->hw, 1000 ieee80211_queue_delayed_work(&local->hw,
1003 &local->scan_work, 1001 &local->scan_work,
1004 round_jiffies_relative(0)); 1002 round_jiffies_relative(0));
1005 1003
1006 mutex_unlock(&local->scan_mtx);
1007
1008 mutex_unlock(&local->work_mtx);
1009
1010 ieee80211_recalc_idle(local); 1004 ieee80211_recalc_idle(local);
1011 1005
1006 mutex_unlock(&local->mtx);
1007
1012 list_for_each_entry_safe(wk, tmp, &free_work, list) { 1008 list_for_each_entry_safe(wk, tmp, &free_work, list) {
1013 wk->done(wk, NULL); 1009 wk->done(wk, NULL);
1014 list_del(&wk->list); 1010 list_del(&wk->list);
@@ -1035,16 +1031,15 @@ void ieee80211_add_work(struct ieee80211_work *wk)
1035 wk->started = false; 1031 wk->started = false;
1036 1032
1037 local = wk->sdata->local; 1033 local = wk->sdata->local;
1038 mutex_lock(&local->work_mtx); 1034 mutex_lock(&local->mtx);
1039 list_add_tail(&wk->list, &local->work_list); 1035 list_add_tail(&wk->list, &local->work_list);
1040 mutex_unlock(&local->work_mtx); 1036 mutex_unlock(&local->mtx);
1041 1037
1042 ieee80211_queue_work(&local->hw, &local->work_work); 1038 ieee80211_queue_work(&local->hw, &local->work_work);
1043} 1039}
1044 1040
1045void ieee80211_work_init(struct ieee80211_local *local) 1041void ieee80211_work_init(struct ieee80211_local *local)
1046{ 1042{
1047 mutex_init(&local->work_mtx);
1048 INIT_LIST_HEAD(&local->work_list); 1043 INIT_LIST_HEAD(&local->work_list);
1049 setup_timer(&local->work_timer, ieee80211_work_timer, 1044 setup_timer(&local->work_timer, ieee80211_work_timer,
1050 (unsigned long)local); 1045 (unsigned long)local);
@@ -1057,7 +1052,7 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1057 struct ieee80211_local *local = sdata->local; 1052 struct ieee80211_local *local = sdata->local;
1058 struct ieee80211_work *wk; 1053 struct ieee80211_work *wk;
1059 1054
1060 mutex_lock(&local->work_mtx); 1055 mutex_lock(&local->mtx);
1061 list_for_each_entry(wk, &local->work_list, list) { 1056 list_for_each_entry(wk, &local->work_list, list) {
1062 if (wk->sdata != sdata) 1057 if (wk->sdata != sdata)
1063 continue; 1058 continue;
@@ -1065,19 +1060,19 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1065 wk->started = true; 1060 wk->started = true;
1066 wk->timeout = jiffies; 1061 wk->timeout = jiffies;
1067 } 1062 }
1068 mutex_unlock(&local->work_mtx); 1063 mutex_unlock(&local->mtx);
1069 1064
1070 /* run cleanups etc. */ 1065 /* run cleanups etc. */
1071 ieee80211_work_work(&local->work_work); 1066 ieee80211_work_work(&local->work_work);
1072 1067
1073 mutex_lock(&local->work_mtx); 1068 mutex_lock(&local->mtx);
1074 list_for_each_entry(wk, &local->work_list, list) { 1069 list_for_each_entry(wk, &local->work_list, list) {
1075 if (wk->sdata != sdata) 1070 if (wk->sdata != sdata)
1076 continue; 1071 continue;
1077 WARN_ON(1); 1072 WARN_ON(1);
1078 break; 1073 break;
1079 } 1074 }
1080 mutex_unlock(&local->work_mtx); 1075 mutex_unlock(&local->mtx);
1081} 1076}
1082 1077
1083ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata, 1078ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
@@ -1163,7 +1158,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1163 struct ieee80211_work *wk, *tmp; 1158 struct ieee80211_work *wk, *tmp;
1164 bool found = false; 1159 bool found = false;
1165 1160
1166 mutex_lock(&local->work_mtx); 1161 mutex_lock(&local->mtx);
1167 list_for_each_entry_safe(wk, tmp, &local->work_list, list) { 1162 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
1168 if ((unsigned long) wk == cookie) { 1163 if ((unsigned long) wk == cookie) {
1169 wk->timeout = jiffies; 1164 wk->timeout = jiffies;
@@ -1171,7 +1166,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1171 break; 1166 break;
1172 } 1167 }
1173 } 1168 }
1174 mutex_unlock(&local->work_mtx); 1169 mutex_unlock(&local->mtx);
1175 1170
1176 if (!found) 1171 if (!found)
1177 return -ENOENT; 1172 return -ENOENT;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 8d59d27d887..43882b36da5 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -36,8 +36,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
36 int tail; 36 int tail;
37 37
38 hdr = (struct ieee80211_hdr *)skb->data; 38 hdr = (struct ieee80211_hdr *)skb->data;
39 if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || 39 if (!tx->key || tx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
40 !ieee80211_is_data_present(hdr->frame_control)) 40 skb->len < 24 || !ieee80211_is_data_present(hdr->frame_control))
41 return TX_CONTINUE; 41 return TX_CONTINUE;
42 42
43 hdrlen = ieee80211_hdrlen(hdr->frame_control); 43 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -94,7 +94,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
94 if (status->flag & RX_FLAG_MMIC_STRIPPED) 94 if (status->flag & RX_FLAG_MMIC_STRIPPED)
95 return RX_CONTINUE; 95 return RX_CONTINUE;
96 96
97 if (!rx->key || rx->key->conf.alg != ALG_TKIP || 97 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
98 !ieee80211_has_protected(hdr->frame_control) || 98 !ieee80211_has_protected(hdr->frame_control) ||
99 !ieee80211_is_data_present(hdr->frame_control)) 99 !ieee80211_is_data_present(hdr->frame_control))
100 return RX_CONTINUE; 100 return RX_CONTINUE;
@@ -221,19 +221,13 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
221 if (!rx->sta || skb->len - hdrlen < 12) 221 if (!rx->sta || skb->len - hdrlen < 12)
222 return RX_DROP_UNUSABLE; 222 return RX_DROP_UNUSABLE;
223 223
224 if (status->flag & RX_FLAG_DECRYPTED) { 224 /*
225 if (status->flag & RX_FLAG_IV_STRIPPED) { 225 * Let TKIP code verify IV, but skip decryption.
226 /* 226 * In the case where hardware checks the IV as well,
227 * Hardware took care of all processing, including 227 * we don't even get here, see ieee80211_rx_h_decrypt()
228 * replay protection, and stripped the ICV/IV so 228 */
229 * we cannot do any checks here. 229 if (status->flag & RX_FLAG_DECRYPTED)
230 */
231 return RX_CONTINUE;
232 }
233
234 /* let TKIP code verify IV, but skip decryption */
235 hwaccel = 1; 230 hwaccel = 1;
236 }
237 231
238 res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm, 232 res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm,
239 key, skb->data + hdrlen, 233 key, skb->data + hdrlen,
@@ -447,10 +441,6 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
447 if (!rx->sta || data_len < 0) 441 if (!rx->sta || data_len < 0)
448 return RX_DROP_UNUSABLE; 442 return RX_DROP_UNUSABLE;
449 443
450 if ((status->flag & RX_FLAG_DECRYPTED) &&
451 (status->flag & RX_FLAG_IV_STRIPPED))
452 return RX_CONTINUE;
453
454 ccmp_hdr2pn(pn, skb->data + hdrlen); 444 ccmp_hdr2pn(pn, skb->data + hdrlen);
455 445
456 queue = ieee80211_is_mgmt(hdr->frame_control) ? 446 queue = ieee80211_is_mgmt(hdr->frame_control) ?
@@ -564,10 +554,6 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
564 if (!ieee80211_is_mgmt(hdr->frame_control)) 554 if (!ieee80211_is_mgmt(hdr->frame_control))
565 return RX_CONTINUE; 555 return RX_CONTINUE;
566 556
567 if ((status->flag & RX_FLAG_DECRYPTED) &&
568 (status->flag & RX_FLAG_IV_STRIPPED))
569 return RX_CONTINUE;
570
571 if (skb->len < 24 + sizeof(*mmie)) 557 if (skb->len < 24 + sizeof(*mmie))
572 return RX_DROP_UNUSABLE; 558 return RX_DROP_UNUSABLE;
573 559
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4c2f89df5cc..0c043b6ce65 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -40,6 +40,7 @@
40#include <net/udp.h> 40#include <net/udp.h>
41#include <net/icmp.h> /* for icmp_send */ 41#include <net/icmp.h> /* for icmp_send */
42#include <net/route.h> 42#include <net/route.h>
43#include <net/ip6_checksum.h>
43 44
44#include <linux/netfilter.h> 45#include <linux/netfilter.h>
45#include <linux/netfilter_ipv4.h> 46#include <linux/netfilter_ipv4.h>
@@ -637,10 +638,12 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
637 } 638 }
638 639
639 /* And finally the ICMP checksum */ 640 /* And finally the ICMP checksum */
640 icmph->icmp6_cksum = 0; 641 icmph->icmp6_cksum = ~csum_ipv6_magic(&iph->saddr, &iph->daddr,
641 /* TODO IPv6: is this correct for ICMPv6? */ 642 skb->len - icmp_offset,
642 ip_vs_checksum_complete(skb, icmp_offset); 643 IPPROTO_ICMPV6, 0);
643 skb->ip_summed = CHECKSUM_UNNECESSARY; 644 skb->csum_start = skb_network_header(skb) - skb->head + icmp_offset;
645 skb->csum_offset = offsetof(struct icmp6hdr, icmp6_cksum);
646 skb->ip_summed = CHECKSUM_PARTIAL;
644 647
645 if (inout) 648 if (inout)
646 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 649 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
@@ -1381,8 +1384,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1381 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1384 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1382 cp->protocol == IPPROTO_SCTP) { 1385 cp->protocol == IPPROTO_SCTP) {
1383 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1386 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1384 (atomic_read(&cp->in_pkts) % 1387 (pkts % sysctl_ip_vs_sync_threshold[1]
1385 sysctl_ip_vs_sync_threshold[1]
1386 == sysctl_ip_vs_sync_threshold[0])) || 1388 == sysctl_ip_vs_sync_threshold[0])) ||
1387 (cp->old_state != cp->state && 1389 (cp->old_state != cp->state &&
1388 ((cp->state == IP_VS_SCTP_S_CLOSED) || 1390 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
@@ -1393,7 +1395,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1393 } 1395 }
1394 } 1396 }
1395 1397
1396 if (af == AF_INET && 1398 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1399 else if (af == AF_INET &&
1397 (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1400 (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1398 (((cp->protocol != IPPROTO_TCP || 1401 (((cp->protocol != IPPROTO_TCP ||
1399 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1402 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 0f0c079c422..ca8ec8c4f31 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -61,7 +61,7 @@ static DEFINE_RWLOCK(__ip_vs_svc_lock);
61static DEFINE_RWLOCK(__ip_vs_rs_lock); 61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62 62
63/* lock for state and timeout tables */ 63/* lock for state and timeout tables */
64static DEFINE_RWLOCK(__ip_vs_securetcp_lock); 64static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65 65
66/* lock for drop entry handling */ 66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); 67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
@@ -204,7 +204,7 @@ static void update_defense_level(void)
204 spin_unlock(&__ip_vs_droppacket_lock); 204 spin_unlock(&__ip_vs_droppacket_lock);
205 205
206 /* secure_tcp */ 206 /* secure_tcp */
207 write_lock(&__ip_vs_securetcp_lock); 207 spin_lock(&ip_vs_securetcp_lock);
208 switch (sysctl_ip_vs_secure_tcp) { 208 switch (sysctl_ip_vs_secure_tcp) {
209 case 0: 209 case 0:
210 if (old_secure_tcp >= 2) 210 if (old_secure_tcp >= 2)
@@ -238,7 +238,7 @@ static void update_defense_level(void)
238 old_secure_tcp = sysctl_ip_vs_secure_tcp; 238 old_secure_tcp = sysctl_ip_vs_secure_tcp;
239 if (to_change >= 0) 239 if (to_change >= 0)
240 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 240 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
241 write_unlock(&__ip_vs_securetcp_lock); 241 spin_unlock(&ip_vs_securetcp_lock);
242 242
243 local_bh_enable(); 243 local_bh_enable();
244} 244}
@@ -843,7 +843,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
843 return -EINVAL; 843 return -EINVAL;
844 } 844 }
845 845
846 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); 846 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
847 if (dest == NULL) { 847 if (dest == NULL) {
848 pr_err("%s(): no memory.\n", __func__); 848 pr_err("%s(): no memory.\n", __func__);
849 return -ENOMEM; 849 return -ENOMEM;
@@ -1177,7 +1177,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1177 } 1177 }
1178#endif 1178#endif
1179 1179
1180 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); 1180 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1181 if (svc == NULL) { 1181 if (svc == NULL) {
1182 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1182 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1183 ret = -ENOMEM; 1183 ret = -ENOMEM;
@@ -2155,7 +2155,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2155 if (cmd != IP_VS_SO_SET_ADD 2155 if (cmd != IP_VS_SO_SET_ADD
2156 && (svc == NULL || svc->protocol != usvc.protocol)) { 2156 && (svc == NULL || svc->protocol != usvc.protocol)) {
2157 ret = -ESRCH; 2157 ret = -ESRCH;
2158 goto out_unlock; 2158 goto out_drop_service;
2159 } 2159 }
2160 2160
2161 switch (cmd) { 2161 switch (cmd) {
@@ -2189,6 +2189,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2189 ret = -EINVAL; 2189 ret = -EINVAL;
2190 } 2190 }
2191 2191
2192out_drop_service:
2192 if (svc) 2193 if (svc)
2193 ip_vs_service_put(svc); 2194 ip_vs_service_put(svc);
2194 2195
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index bbc1ac79595..727e45b6695 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -35,7 +35,7 @@
35static LIST_HEAD(ip_vs_schedulers); 35static LIST_HEAD(ip_vs_schedulers);
36 36
37/* lock for service table */ 37/* lock for service table */
38static DEFINE_RWLOCK(__ip_vs_sched_lock); 38static DEFINE_SPINLOCK(ip_vs_sched_lock);
39 39
40 40
41/* 41/*
@@ -108,7 +108,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
108 108
109 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name); 109 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
110 110
111 read_lock_bh(&__ip_vs_sched_lock); 111 spin_lock_bh(&ip_vs_sched_lock);
112 112
113 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 113 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
114 /* 114 /*
@@ -122,14 +122,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
122 } 122 }
123 if (strcmp(sched_name, sched->name)==0) { 123 if (strcmp(sched_name, sched->name)==0) {
124 /* HIT */ 124 /* HIT */
125 read_unlock_bh(&__ip_vs_sched_lock); 125 spin_unlock_bh(&ip_vs_sched_lock);
126 return sched; 126 return sched;
127 } 127 }
128 if (sched->module) 128 if (sched->module)
129 module_put(sched->module); 129 module_put(sched->module);
130 } 130 }
131 131
132 read_unlock_bh(&__ip_vs_sched_lock); 132 spin_unlock_bh(&ip_vs_sched_lock);
133 return NULL; 133 return NULL;
134} 134}
135 135
@@ -184,10 +184,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
184 /* increase the module use count */ 184 /* increase the module use count */
185 ip_vs_use_count_inc(); 185 ip_vs_use_count_inc();
186 186
187 write_lock_bh(&__ip_vs_sched_lock); 187 spin_lock_bh(&ip_vs_sched_lock);
188 188
189 if (!list_empty(&scheduler->n_list)) { 189 if (!list_empty(&scheduler->n_list)) {
190 write_unlock_bh(&__ip_vs_sched_lock); 190 spin_unlock_bh(&ip_vs_sched_lock);
191 ip_vs_use_count_dec(); 191 ip_vs_use_count_dec();
192 pr_err("%s(): [%s] scheduler already linked\n", 192 pr_err("%s(): [%s] scheduler already linked\n",
193 __func__, scheduler->name); 193 __func__, scheduler->name);
@@ -200,7 +200,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
200 */ 200 */
201 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 201 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
202 if (strcmp(scheduler->name, sched->name) == 0) { 202 if (strcmp(scheduler->name, sched->name) == 0) {
203 write_unlock_bh(&__ip_vs_sched_lock); 203 spin_unlock_bh(&ip_vs_sched_lock);
204 ip_vs_use_count_dec(); 204 ip_vs_use_count_dec();
205 pr_err("%s(): [%s] scheduler already existed " 205 pr_err("%s(): [%s] scheduler already existed "
206 "in the system\n", __func__, scheduler->name); 206 "in the system\n", __func__, scheduler->name);
@@ -211,7 +211,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
211 * Add it into the d-linked scheduler list 211 * Add it into the d-linked scheduler list
212 */ 212 */
213 list_add(&scheduler->n_list, &ip_vs_schedulers); 213 list_add(&scheduler->n_list, &ip_vs_schedulers);
214 write_unlock_bh(&__ip_vs_sched_lock); 214 spin_unlock_bh(&ip_vs_sched_lock);
215 215
216 pr_info("[%s] scheduler registered.\n", scheduler->name); 216 pr_info("[%s] scheduler registered.\n", scheduler->name);
217 217
@@ -229,9 +229,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
229 return -EINVAL; 229 return -EINVAL;
230 } 230 }
231 231
232 write_lock_bh(&__ip_vs_sched_lock); 232 spin_lock_bh(&ip_vs_sched_lock);
233 if (list_empty(&scheduler->n_list)) { 233 if (list_empty(&scheduler->n_list)) {
234 write_unlock_bh(&__ip_vs_sched_lock); 234 spin_unlock_bh(&ip_vs_sched_lock);
235 pr_err("%s(): [%s] scheduler is not in the list. failed\n", 235 pr_err("%s(): [%s] scheduler is not in the list. failed\n",
236 __func__, scheduler->name); 236 __func__, scheduler->name);
237 return -EINVAL; 237 return -EINVAL;
@@ -241,7 +241,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
241 * Remove it from the d-linked scheduler list 241 * Remove it from the d-linked scheduler list
242 */ 242 */
243 list_del(&scheduler->n_list); 243 list_del(&scheduler->n_list);
244 write_unlock_bh(&__ip_vs_sched_lock); 244 spin_unlock_bh(&ip_vs_sched_lock);
245 245
246 /* decrease the module use count */ 246 /* decrease the module use count */
247 ip_vs_use_count_dec(); 247 ip_vs_use_count_dec();
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index b46a8390896..9228ee0dc11 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -448,6 +448,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
448{ 448{
449 __be16 _ports[2], *ports; 449 __be16 _ports[2], *ports;
450 u8 nexthdr; 450 u8 nexthdr;
451 int poff;
451 452
452 memset(dst, 0, sizeof(*dst)); 453 memset(dst, 0, sizeof(*dst));
453 454
@@ -492,19 +493,13 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
492 return 0; 493 return 0;
493 } 494 }
494 495
495 switch (nexthdr) { 496 poff = proto_ports_offset(nexthdr);
496 case IPPROTO_TCP: 497 if (poff >= 0) {
497 case IPPROTO_UDP: 498 ports = skb_header_pointer(skb, protoff + poff, sizeof(_ports),
498 case IPPROTO_UDPLITE:
499 case IPPROTO_SCTP:
500 case IPPROTO_DCCP:
501 ports = skb_header_pointer(skb, protoff, sizeof(_ports),
502 &_ports); 499 &_ports);
503 break; 500 } else {
504 default:
505 _ports[0] = _ports[1] = 0; 501 _ports[0] = _ports[1] = 0;
506 ports = _ports; 502 ports = _ports;
507 break;
508 } 503 }
509 if (!ports) 504 if (!ports)
510 return -1; 505 return -1;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9a17f28b125..3616f27b9d4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -488,7 +488,7 @@ retry:
488 skb->dev = dev; 488 skb->dev = dev;
489 skb->priority = sk->sk_priority; 489 skb->priority = sk->sk_priority;
490 skb->mark = sk->sk_mark; 490 skb->mark = sk->sk_mark;
491 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 491 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
492 if (err < 0) 492 if (err < 0)
493 goto out_unlock; 493 goto out_unlock;
494 494
@@ -1209,7 +1209,7 @@ static int packet_snd(struct socket *sock,
1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); 1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1210 if (err) 1210 if (err)
1211 goto out_free; 1211 goto out_free;
1212 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 1212 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1213 if (err < 0) 1213 if (err < 0)
1214 goto out_free; 1214 goto out_free;
1215 1215
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 73aee7f2fcd..fd95beb72f5 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -251,6 +251,16 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
251 else if (phonet_address_lookup(net, daddr) == 0) { 251 else if (phonet_address_lookup(net, daddr) == 0) {
252 dev = phonet_device_get(net); 252 dev = phonet_device_get(net);
253 skb->pkt_type = PACKET_LOOPBACK; 253 skb->pkt_type = PACKET_LOOPBACK;
254 } else if (pn_sockaddr_get_object(target) == 0) {
255 /* Resource routing (small race until phonet_rcv()) */
256 struct sock *sk = pn_find_sock_by_res(net,
257 target->spn_resource);
258 if (sk) {
259 sock_put(sk);
260 dev = phonet_device_get(net);
261 skb->pkt_type = PACKET_LOOPBACK;
262 } else
263 dev = phonet_route_output(net, daddr);
254 } else 264 } else
255 dev = phonet_route_output(net, daddr); 265 dev = phonet_route_output(net, daddr);
256 266
@@ -383,6 +393,13 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
383 goto out; 393 goto out;
384 } 394 }
385 395
396 /* resource routing */
397 if (pn_sockaddr_get_object(&sa) == 0) {
398 struct sock *sk = pn_find_sock_by_res(net, sa.spn_resource);
399 if (sk)
400 return sk_receive_skb(sk, skb, 0);
401 }
402
386 /* check if we are the destination */ 403 /* check if we are the destination */
387 if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) { 404 if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) {
388 /* Phonet packet input */ 405 /* Phonet packet input */
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 1bd38db4fe1..2f032381bd4 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -52,6 +52,19 @@ static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg)
52 answ = skb ? skb->len : 0; 52 answ = skb ? skb->len : 0;
53 release_sock(sk); 53 release_sock(sk);
54 return put_user(answ, (int __user *)arg); 54 return put_user(answ, (int __user *)arg);
55
56 case SIOCPNADDRESOURCE:
57 case SIOCPNDELRESOURCE: {
58 u32 res;
59 if (get_user(res, (u32 __user *)arg))
60 return -EFAULT;
61 if (res >= 256)
62 return -EINVAL;
63 if (cmd == SIOCPNADDRESOURCE)
64 return pn_sock_bind_res(sk, res);
65 else
66 return pn_sock_unbind_res(sk, res);
67 }
55 } 68 }
56 69
57 return -ENOIOCTLCMD; 70 return -ENOIOCTLCMD;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index b2a3ae6cad7..d0e7eb24c8b 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -620,6 +620,28 @@ drop:
620 return err; 620 return err;
621} 621}
622 622
623static int pipe_do_remove(struct sock *sk)
624{
625 struct pep_sock *pn = pep_sk(sk);
626 struct pnpipehdr *ph;
627 struct sk_buff *skb;
628
629 skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL);
630 if (!skb)
631 return -ENOMEM;
632
633 skb_reserve(skb, MAX_PNPIPE_HEADER);
634 __skb_push(skb, sizeof(*ph));
635 skb_reset_transport_header(skb);
636 ph = pnp_hdr(skb);
637 ph->utid = 0;
638 ph->message_id = PNS_PIPE_REMOVE_REQ;
639 ph->pipe_handle = pn->pipe_handle;
640 ph->data[0] = PAD;
641
642 return pn_skb_send(sk, skb, &pipe_srv);
643}
644
623/* associated socket ceases to exist */ 645/* associated socket ceases to exist */
624static void pep_sock_close(struct sock *sk, long timeout) 646static void pep_sock_close(struct sock *sk, long timeout)
625{ 647{
@@ -638,7 +660,10 @@ static void pep_sock_close(struct sock *sk, long timeout)
638 sk_for_each_safe(sknode, p, n, &pn->ackq) 660 sk_for_each_safe(sknode, p, n, &pn->ackq)
639 sk_del_node_init(sknode); 661 sk_del_node_init(sknode);
640 sk->sk_state = TCP_CLOSE; 662 sk->sk_state = TCP_CLOSE;
641 } 663 } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
664 /* Forcefully remove dangling Phonet pipe */
665 pipe_do_remove(sk);
666
642 ifindex = pn->ifindex; 667 ifindex = pn->ifindex;
643 pn->ifindex = 0; 668 pn->ifindex = 0;
644 release_sock(sk); 669 release_sock(sk);
@@ -834,6 +859,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
834{ 859{
835 struct pep_sock *pn = pep_sk(sk); 860 struct pep_sock *pn = pep_sk(sk);
836 struct pnpipehdr *ph; 861 struct pnpipehdr *ph;
862 int err;
837 863
838 if (pn_flow_safe(pn->tx_fc) && 864 if (pn_flow_safe(pn->tx_fc) &&
839 !atomic_add_unless(&pn->tx_credits, -1, 0)) { 865 !atomic_add_unless(&pn->tx_credits, -1, 0)) {
@@ -852,7 +878,10 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
852 ph->message_id = PNS_PIPE_DATA; 878 ph->message_id = PNS_PIPE_DATA;
853 ph->pipe_handle = pn->pipe_handle; 879 ph->pipe_handle = pn->pipe_handle;
854 880
855 return pn_skb_send(sk, skb, &pipe_srv); 881 err = pn_skb_send(sk, skb, &pipe_srv);
882 if (err && pn_flow_safe(pn->tx_fc))
883 atomic_inc(&pn->tx_credits);
884 return err;
856} 885}
857 886
858static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, 887static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
@@ -872,7 +901,7 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
872 skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len, 901 skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len,
873 flags & MSG_DONTWAIT, &err); 902 flags & MSG_DONTWAIT, &err);
874 if (!skb) 903 if (!skb)
875 return -ENOBUFS; 904 return err;
876 905
877 skb_reserve(skb, MAX_PHONET_HEADER + 3); 906 skb_reserve(skb, MAX_PHONET_HEADER + 3);
878 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 907 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index b18e48fae97..947038ddd04 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -292,8 +292,7 @@ static void phonet_route_autodel(struct net_device *dev)
292 if (bitmap_empty(deleted, 64)) 292 if (bitmap_empty(deleted, 64))
293 return; /* short-circuit RCU */ 293 return; /* short-circuit RCU */
294 synchronize_rcu(); 294 synchronize_rcu();
295 for (i = find_first_bit(deleted, 64); i < 64; 295 for_each_set_bit(i, deleted, 64) {
296 i = find_next_bit(deleted, 64, i + 1)) {
297 rtm_phonet_notify(RTM_DELROUTE, dev, i); 296 rtm_phonet_notify(RTM_DELROUTE, dev, i);
298 dev_put(dev); 297 dev_put(dev);
299 } 298 }
@@ -374,6 +373,7 @@ int __init phonet_device_init(void)
374 if (err) 373 if (err)
375 return err; 374 return err;
376 375
376 proc_net_fops_create(&init_net, "pnresource", 0, &pn_res_seq_fops);
377 register_netdevice_notifier(&phonet_device_notifier); 377 register_netdevice_notifier(&phonet_device_notifier);
378 err = phonet_netlink_register(); 378 err = phonet_netlink_register();
379 if (err) 379 if (err)
@@ -386,6 +386,7 @@ void phonet_device_exit(void)
386 rtnl_unregister_all(PF_PHONET); 386 rtnl_unregister_all(PF_PHONET);
387 unregister_netdevice_notifier(&phonet_device_notifier); 387 unregister_netdevice_notifier(&phonet_device_notifier);
388 unregister_pernet_device(&phonet_net_ops); 388 unregister_pernet_device(&phonet_net_ops);
389 proc_net_remove(&init_net, "pnresource");
389} 390}
390 391
391int phonet_route_add(struct net_device *dev, u8 daddr) 392int phonet_route_add(struct net_device *dev, u8 daddr)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 6e9848bf037..aca8fba099e 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -158,6 +158,7 @@ void pn_sock_unhash(struct sock *sk)
158 spin_lock_bh(&pnsocks.lock); 158 spin_lock_bh(&pnsocks.lock);
159 sk_del_node_init(sk); 159 sk_del_node_init(sk);
160 spin_unlock_bh(&pnsocks.lock); 160 spin_unlock_bh(&pnsocks.lock);
161 pn_sock_unbind_all_res(sk);
161} 162}
162EXPORT_SYMBOL(pn_sock_unhash); 163EXPORT_SYMBOL(pn_sock_unhash);
163 164
@@ -281,7 +282,9 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
281 if (!mask && sk->sk_state == TCP_CLOSE_WAIT) 282 if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
282 return POLLHUP; 283 return POLLHUP;
283 284
284 if (sk->sk_state == TCP_ESTABLISHED && atomic_read(&pn->tx_credits)) 285 if (sk->sk_state == TCP_ESTABLISHED &&
286 atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
287 atomic_read(&pn->tx_credits))
285 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 288 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
286 289
287 return mask; 290 return mask;
@@ -563,3 +566,188 @@ const struct file_operations pn_sock_seq_fops = {
563 .release = seq_release_net, 566 .release = seq_release_net,
564}; 567};
565#endif 568#endif
569
570static struct {
571 struct sock *sk[256];
572} pnres;
573
574/*
575 * Find and hold socket based on resource.
576 */
577struct sock *pn_find_sock_by_res(struct net *net, u8 res)
578{
579 struct sock *sk;
580
581 if (!net_eq(net, &init_net))
582 return NULL;
583
584 rcu_read_lock();
585 sk = rcu_dereference(pnres.sk[res]);
586 if (sk)
587 sock_hold(sk);
588 rcu_read_unlock();
589 return sk;
590}
591
592static DEFINE_MUTEX(resource_mutex);
593
594int pn_sock_bind_res(struct sock *sk, u8 res)
595{
596 int ret = -EADDRINUSE;
597
598 if (!net_eq(sock_net(sk), &init_net))
599 return -ENOIOCTLCMD;
600 if (!capable(CAP_SYS_ADMIN))
601 return -EPERM;
602 if (pn_socket_autobind(sk->sk_socket))
603 return -EAGAIN;
604
605 mutex_lock(&resource_mutex);
606 if (pnres.sk[res] == NULL) {
607 sock_hold(sk);
608 rcu_assign_pointer(pnres.sk[res], sk);
609 ret = 0;
610 }
611 mutex_unlock(&resource_mutex);
612 return ret;
613}
614
615int pn_sock_unbind_res(struct sock *sk, u8 res)
616{
617 int ret = -ENOENT;
618
619 if (!capable(CAP_SYS_ADMIN))
620 return -EPERM;
621
622 mutex_lock(&resource_mutex);
623 if (pnres.sk[res] == sk) {
624 rcu_assign_pointer(pnres.sk[res], NULL);
625 ret = 0;
626 }
627 mutex_unlock(&resource_mutex);
628
629 if (ret == 0) {
630 synchronize_rcu();
631 sock_put(sk);
632 }
633 return ret;
634}
635
636void pn_sock_unbind_all_res(struct sock *sk)
637{
638 unsigned res, match = 0;
639
640 mutex_lock(&resource_mutex);
641 for (res = 0; res < 256; res++) {
642 if (pnres.sk[res] == sk) {
643 rcu_assign_pointer(pnres.sk[res], NULL);
644 match++;
645 }
646 }
647 mutex_unlock(&resource_mutex);
648
649 if (match == 0)
650 return;
651 synchronize_rcu();
652 while (match > 0) {
653 sock_put(sk);
654 match--;
655 }
656}
657
658#ifdef CONFIG_PROC_FS
659static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos)
660{
661 struct net *net = seq_file_net(seq);
662 unsigned i;
663
664 if (!net_eq(net, &init_net))
665 return NULL;
666
667 for (i = 0; i < 256; i++) {
668 if (pnres.sk[i] == NULL)
669 continue;
670 if (!pos)
671 return pnres.sk + i;
672 pos--;
673 }
674 return NULL;
675}
676
677static struct sock **pn_res_get_next(struct seq_file *seq, struct sock **sk)
678{
679 struct net *net = seq_file_net(seq);
680 unsigned i;
681
682 BUG_ON(!net_eq(net, &init_net));
683
684 for (i = (sk - pnres.sk) + 1; i < 256; i++)
685 if (pnres.sk[i])
686 return pnres.sk + i;
687 return NULL;
688}
689
690static void *pn_res_seq_start(struct seq_file *seq, loff_t *pos)
691 __acquires(resource_mutex)
692{
693 mutex_lock(&resource_mutex);
694 return *pos ? pn_res_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
695}
696
697static void *pn_res_seq_next(struct seq_file *seq, void *v, loff_t *pos)
698{
699 struct sock **sk;
700
701 if (v == SEQ_START_TOKEN)
702 sk = pn_res_get_idx(seq, 0);
703 else
704 sk = pn_res_get_next(seq, v);
705 (*pos)++;
706 return sk;
707}
708
709static void pn_res_seq_stop(struct seq_file *seq, void *v)
710 __releases(resource_mutex)
711{
712 mutex_unlock(&resource_mutex);
713}
714
715static int pn_res_seq_show(struct seq_file *seq, void *v)
716{
717 int len;
718
719 if (v == SEQ_START_TOKEN)
720 seq_printf(seq, "%s%n", "rs uid inode", &len);
721 else {
722 struct sock **psk = v;
723 struct sock *sk = *psk;
724
725 seq_printf(seq, "%02X %5d %lu%n",
726 (int) (psk - pnres.sk), sock_i_uid(sk),
727 sock_i_ino(sk), &len);
728 }
729 seq_printf(seq, "%*s\n", 63 - len, "");
730 return 0;
731}
732
733static const struct seq_operations pn_res_seq_ops = {
734 .start = pn_res_seq_start,
735 .next = pn_res_seq_next,
736 .stop = pn_res_seq_stop,
737 .show = pn_res_seq_show,
738};
739
740static int pn_res_open(struct inode *inode, struct file *file)
741{
742 return seq_open_net(inode, file, &pn_res_seq_ops,
743 sizeof(struct seq_net_private));
744}
745
746const struct file_operations pn_res_seq_fops = {
747 .owner = THIS_MODULE,
748 .open = pn_res_open,
749 .read = seq_read,
750 .llseek = seq_lseek,
751 .release = seq_release_net,
752};
753#endif
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index aebfecbdb84..bb6ad81b671 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -39,7 +39,15 @@
39#include <net/sock.h> 39#include <net/sock.h>
40 40
41#include "rds.h" 41#include "rds.h"
42#include "rdma.h" 42
43char *rds_str_array(char **array, size_t elements, size_t index)
44{
45 if ((index < elements) && array[index])
46 return array[index];
47 else
48 return "unknown";
49}
50EXPORT_SYMBOL(rds_str_array);
43 51
44/* this is just used for stats gathering :/ */ 52/* this is just used for stats gathering :/ */
45static DEFINE_SPINLOCK(rds_sock_lock); 53static DEFINE_SPINLOCK(rds_sock_lock);
@@ -62,7 +70,7 @@ static int rds_release(struct socket *sock)
62 struct rds_sock *rs; 70 struct rds_sock *rs;
63 unsigned long flags; 71 unsigned long flags;
64 72
65 if (sk == NULL) 73 if (!sk)
66 goto out; 74 goto out;
67 75
68 rs = rds_sk_to_rs(sk); 76 rs = rds_sk_to_rs(sk);
@@ -73,7 +81,15 @@ static int rds_release(struct socket *sock)
73 * with the socket. */ 81 * with the socket. */
74 rds_clear_recv_queue(rs); 82 rds_clear_recv_queue(rs);
75 rds_cong_remove_socket(rs); 83 rds_cong_remove_socket(rs);
84
85 /*
86 * the binding lookup hash uses rcu, we need to
87 * make sure we sychronize_rcu before we free our
88 * entry
89 */
76 rds_remove_bound(rs); 90 rds_remove_bound(rs);
91 synchronize_rcu();
92
77 rds_send_drop_to(rs, NULL); 93 rds_send_drop_to(rs, NULL);
78 rds_rdma_drop_keys(rs); 94 rds_rdma_drop_keys(rs);
79 rds_notify_queue_get(rs, NULL); 95 rds_notify_queue_get(rs, NULL);
@@ -83,6 +99,8 @@ static int rds_release(struct socket *sock)
83 rds_sock_count--; 99 rds_sock_count--;
84 spin_unlock_irqrestore(&rds_sock_lock, flags); 100 spin_unlock_irqrestore(&rds_sock_lock, flags);
85 101
102 rds_trans_put(rs->rs_transport);
103
86 sock->sk = NULL; 104 sock->sk = NULL;
87 sock_put(sk); 105 sock_put(sk);
88out: 106out:
@@ -514,7 +532,7 @@ out:
514 spin_unlock_irqrestore(&rds_sock_lock, flags); 532 spin_unlock_irqrestore(&rds_sock_lock, flags);
515} 533}
516 534
517static void __exit rds_exit(void) 535static void rds_exit(void)
518{ 536{
519 sock_unregister(rds_family_ops.family); 537 sock_unregister(rds_family_ops.family);
520 proto_unregister(&rds_proto); 538 proto_unregister(&rds_proto);
@@ -529,7 +547,7 @@ static void __exit rds_exit(void)
529} 547}
530module_exit(rds_exit); 548module_exit(rds_exit);
531 549
532static int __init rds_init(void) 550static int rds_init(void)
533{ 551{
534 int ret; 552 int ret;
535 553
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 5d95fc007f1..2f6b3fcc79f 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -34,45 +34,52 @@
34#include <net/sock.h> 34#include <net/sock.h>
35#include <linux/in.h> 35#include <linux/in.h>
36#include <linux/if_arp.h> 36#include <linux/if_arp.h>
37#include <linux/jhash.h>
37#include "rds.h" 38#include "rds.h"
38 39
39/* 40#define BIND_HASH_SIZE 1024
40 * XXX this probably still needs more work.. no INADDR_ANY, and rbtrees aren't 41static struct hlist_head bind_hash_table[BIND_HASH_SIZE];
41 * particularly zippy.
42 *
43 * This is now called for every incoming frame so we arguably care much more
44 * about it than we used to.
45 */
46static DEFINE_SPINLOCK(rds_bind_lock); 42static DEFINE_SPINLOCK(rds_bind_lock);
47static struct rb_root rds_bind_tree = RB_ROOT;
48 43
49static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port, 44static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port)
50 struct rds_sock *insert) 45{
46 return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
47 (BIND_HASH_SIZE - 1));
48}
49
50static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
51 struct rds_sock *insert)
51{ 52{
52 struct rb_node **p = &rds_bind_tree.rb_node;
53 struct rb_node *parent = NULL;
54 struct rds_sock *rs; 53 struct rds_sock *rs;
54 struct hlist_node *node;
55 struct hlist_head *head = hash_to_bucket(addr, port);
55 u64 cmp; 56 u64 cmp;
56 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); 57 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
57 58
58 while (*p) { 59 rcu_read_lock();
59 parent = *p; 60 hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) {
60 rs = rb_entry(parent, struct rds_sock, rs_bound_node);
61
62 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | 61 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
63 be16_to_cpu(rs->rs_bound_port); 62 be16_to_cpu(rs->rs_bound_port);
64 63
65 if (needle < cmp) 64 if (cmp == needle) {
66 p = &(*p)->rb_left; 65 rcu_read_unlock();
67 else if (needle > cmp)
68 p = &(*p)->rb_right;
69 else
70 return rs; 66 return rs;
67 }
71 } 68 }
69 rcu_read_unlock();
72 70
73 if (insert) { 71 if (insert) {
74 rb_link_node(&insert->rs_bound_node, parent, p); 72 /*
75 rb_insert_color(&insert->rs_bound_node, &rds_bind_tree); 73 * make sure our addr and port are set before
74 * we are added to the list, other people
75 * in rcu will find us as soon as the
76 * hlist_add_head_rcu is done
77 */
78 insert->rs_bound_addr = addr;
79 insert->rs_bound_port = port;
80 rds_sock_addref(insert);
81
82 hlist_add_head_rcu(&insert->rs_bound_node, head);
76 } 83 }
77 return NULL; 84 return NULL;
78} 85}
@@ -86,15 +93,13 @@ static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port,
86struct rds_sock *rds_find_bound(__be32 addr, __be16 port) 93struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
87{ 94{
88 struct rds_sock *rs; 95 struct rds_sock *rs;
89 unsigned long flags;
90 96
91 spin_lock_irqsave(&rds_bind_lock, flags); 97 rs = rds_bind_lookup(addr, port, NULL);
92 rs = rds_bind_tree_walk(addr, port, NULL); 98
93 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) 99 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
94 rds_sock_addref(rs); 100 rds_sock_addref(rs);
95 else 101 else
96 rs = NULL; 102 rs = NULL;
97 spin_unlock_irqrestore(&rds_bind_lock, flags);
98 103
99 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, 104 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
100 ntohs(port)); 105 ntohs(port));
@@ -121,22 +126,15 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
121 do { 126 do {
122 if (rover == 0) 127 if (rover == 0)
123 rover++; 128 rover++;
124 if (rds_bind_tree_walk(addr, cpu_to_be16(rover), rs) == NULL) { 129 if (!rds_bind_lookup(addr, cpu_to_be16(rover), rs)) {
125 *port = cpu_to_be16(rover); 130 *port = rs->rs_bound_port;
126 ret = 0; 131 ret = 0;
132 rdsdebug("rs %p binding to %pI4:%d\n",
133 rs, &addr, (int)ntohs(*port));
127 break; 134 break;
128 } 135 }
129 } while (rover++ != last); 136 } while (rover++ != last);
130 137
131 if (ret == 0) {
132 rs->rs_bound_addr = addr;
133 rs->rs_bound_port = *port;
134 rds_sock_addref(rs);
135
136 rdsdebug("rs %p binding to %pI4:%d\n",
137 rs, &addr, (int)ntohs(*port));
138 }
139
140 spin_unlock_irqrestore(&rds_bind_lock, flags); 138 spin_unlock_irqrestore(&rds_bind_lock, flags);
141 139
142 return ret; 140 return ret;
@@ -153,7 +151,7 @@ void rds_remove_bound(struct rds_sock *rs)
153 rs, &rs->rs_bound_addr, 151 rs, &rs->rs_bound_addr,
154 ntohs(rs->rs_bound_port)); 152 ntohs(rs->rs_bound_port));
155 153
156 rb_erase(&rs->rs_bound_node, &rds_bind_tree); 154 hlist_del_init_rcu(&rs->rs_bound_node);
157 rds_sock_put(rs); 155 rds_sock_put(rs);
158 rs->rs_bound_addr = 0; 156 rs->rs_bound_addr = 0;
159 } 157 }
@@ -184,7 +182,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
184 goto out; 182 goto out;
185 183
186 trans = rds_trans_get_preferred(sin->sin_addr.s_addr); 184 trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
187 if (trans == NULL) { 185 if (!trans) {
188 ret = -EADDRNOTAVAIL; 186 ret = -EADDRNOTAVAIL;
189 rds_remove_bound(rs); 187 rds_remove_bound(rs);
190 if (printk_ratelimit()) 188 if (printk_ratelimit())
@@ -198,5 +196,9 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
198 196
199out: 197out:
200 release_sock(sk); 198 release_sock(sk);
199
200 /* we might have called rds_remove_bound on error */
201 if (ret)
202 synchronize_rcu();
201 return ret; 203 return ret;
202} 204}
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 0871a29f078..75ea686f27d 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -141,7 +141,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
141 unsigned long flags; 141 unsigned long flags;
142 142
143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL); 143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
144 if (map == NULL) 144 if (!map)
145 return NULL; 145 return NULL;
146 146
147 map->m_addr = addr; 147 map->m_addr = addr;
@@ -159,7 +159,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
159 ret = rds_cong_tree_walk(addr, map); 159 ret = rds_cong_tree_walk(addr, map);
160 spin_unlock_irqrestore(&rds_cong_lock, flags); 160 spin_unlock_irqrestore(&rds_cong_lock, flags);
161 161
162 if (ret == NULL) { 162 if (!ret) {
163 ret = map; 163 ret = map;
164 map = NULL; 164 map = NULL;
165 } 165 }
@@ -205,7 +205,7 @@ int rds_cong_get_maps(struct rds_connection *conn)
205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr); 205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr); 206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
207 207
208 if (conn->c_lcong == NULL || conn->c_fcong == NULL) 208 if (!(conn->c_lcong && conn->c_fcong))
209 return -ENOMEM; 209 return -ENOMEM;
210 210
211 return 0; 211 return 0;
@@ -221,7 +221,7 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) { 221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (!test_and_set_bit(0, &conn->c_map_queued)) { 222 if (!test_and_set_bit(0, &conn->c_map_queued)) {
223 rds_stats_inc(s_cong_update_queued); 223 rds_stats_inc(s_cong_update_queued);
224 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 224 rds_send_xmit(conn);
225 } 225 }
226 } 226 }
227 227
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7619b671ca2..870992e08ca 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -37,7 +37,6 @@
37 37
38#include "rds.h" 38#include "rds.h"
39#include "loop.h" 39#include "loop.h"
40#include "rdma.h"
41 40
42#define RDS_CONNECTION_HASH_BITS 12 41#define RDS_CONNECTION_HASH_BITS 12
43#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) 42#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS)
@@ -63,18 +62,7 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
63 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ 62 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \
64} while (0) 63} while (0)
65 64
66static inline int rds_conn_is_sending(struct rds_connection *conn) 65/* rcu read lock must be held or the connection spinlock */
67{
68 int ret = 0;
69
70 if (!mutex_trylock(&conn->c_send_lock))
71 ret = 1;
72 else
73 mutex_unlock(&conn->c_send_lock);
74
75 return ret;
76}
77
78static struct rds_connection *rds_conn_lookup(struct hlist_head *head, 66static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
79 __be32 laddr, __be32 faddr, 67 __be32 laddr, __be32 faddr,
80 struct rds_transport *trans) 68 struct rds_transport *trans)
@@ -82,7 +70,7 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
82 struct rds_connection *conn, *ret = NULL; 70 struct rds_connection *conn, *ret = NULL;
83 struct hlist_node *pos; 71 struct hlist_node *pos;
84 72
85 hlist_for_each_entry(conn, pos, head, c_hash_node) { 73 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
86 if (conn->c_faddr == faddr && conn->c_laddr == laddr && 74 if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
87 conn->c_trans == trans) { 75 conn->c_trans == trans) {
88 ret = conn; 76 ret = conn;
@@ -129,10 +117,11 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
129{ 117{
130 struct rds_connection *conn, *parent = NULL; 118 struct rds_connection *conn, *parent = NULL;
131 struct hlist_head *head = rds_conn_bucket(laddr, faddr); 119 struct hlist_head *head = rds_conn_bucket(laddr, faddr);
120 struct rds_transport *loop_trans;
132 unsigned long flags; 121 unsigned long flags;
133 int ret; 122 int ret;
134 123
135 spin_lock_irqsave(&rds_conn_lock, flags); 124 rcu_read_lock();
136 conn = rds_conn_lookup(head, laddr, faddr, trans); 125 conn = rds_conn_lookup(head, laddr, faddr, trans);
137 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && 126 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
138 !is_outgoing) { 127 !is_outgoing) {
@@ -143,12 +132,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
143 parent = conn; 132 parent = conn;
144 conn = parent->c_passive; 133 conn = parent->c_passive;
145 } 134 }
146 spin_unlock_irqrestore(&rds_conn_lock, flags); 135 rcu_read_unlock();
147 if (conn) 136 if (conn)
148 goto out; 137 goto out;
149 138
150 conn = kmem_cache_zalloc(rds_conn_slab, gfp); 139 conn = kmem_cache_zalloc(rds_conn_slab, gfp);
151 if (conn == NULL) { 140 if (!conn) {
152 conn = ERR_PTR(-ENOMEM); 141 conn = ERR_PTR(-ENOMEM);
153 goto out; 142 goto out;
154 } 143 }
@@ -159,7 +148,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
159 spin_lock_init(&conn->c_lock); 148 spin_lock_init(&conn->c_lock);
160 conn->c_next_tx_seq = 1; 149 conn->c_next_tx_seq = 1;
161 150
162 mutex_init(&conn->c_send_lock); 151 init_waitqueue_head(&conn->c_waitq);
163 INIT_LIST_HEAD(&conn->c_send_queue); 152 INIT_LIST_HEAD(&conn->c_send_queue);
164 INIT_LIST_HEAD(&conn->c_retrans); 153 INIT_LIST_HEAD(&conn->c_retrans);
165 154
@@ -175,7 +164,9 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
175 * can bind to the destination address then we'd rather the messages 164 * can bind to the destination address then we'd rather the messages
176 * flow through loopback rather than either transport. 165 * flow through loopback rather than either transport.
177 */ 166 */
178 if (rds_trans_get_preferred(faddr)) { 167 loop_trans = rds_trans_get_preferred(faddr);
168 if (loop_trans) {
169 rds_trans_put(loop_trans);
179 conn->c_loopback = 1; 170 conn->c_loopback = 1;
180 if (is_outgoing && trans->t_prefer_loopback) { 171 if (is_outgoing && trans->t_prefer_loopback) {
181 /* "outgoing" connection - and the transport 172 /* "outgoing" connection - and the transport
@@ -238,7 +229,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
238 kmem_cache_free(rds_conn_slab, conn); 229 kmem_cache_free(rds_conn_slab, conn);
239 conn = found; 230 conn = found;
240 } else { 231 } else {
241 hlist_add_head(&conn->c_hash_node, head); 232 hlist_add_head_rcu(&conn->c_hash_node, head);
242 rds_cong_add_conn(conn); 233 rds_cong_add_conn(conn);
243 rds_conn_count++; 234 rds_conn_count++;
244 } 235 }
@@ -263,21 +254,91 @@ struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
263} 254}
264EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); 255EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
265 256
257void rds_conn_shutdown(struct rds_connection *conn)
258{
259 /* shut it down unless it's down already */
260 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
261 /*
262 * Quiesce the connection mgmt handlers before we start tearing
263 * things down. We don't hold the mutex for the entire
264 * duration of the shutdown operation, else we may be
265 * deadlocking with the CM handler. Instead, the CM event
266 * handler is supposed to check for state DISCONNECTING
267 */
268 mutex_lock(&conn->c_cm_lock);
269 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
270 && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
271 rds_conn_error(conn, "shutdown called in state %d\n",
272 atomic_read(&conn->c_state));
273 mutex_unlock(&conn->c_cm_lock);
274 return;
275 }
276 mutex_unlock(&conn->c_cm_lock);
277
278 wait_event(conn->c_waitq,
279 !test_bit(RDS_IN_XMIT, &conn->c_flags));
280
281 conn->c_trans->conn_shutdown(conn);
282 rds_conn_reset(conn);
283
284 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
285 /* This can happen - eg when we're in the middle of tearing
286 * down the connection, and someone unloads the rds module.
287 * Quite reproduceable with loopback connections.
288 * Mostly harmless.
289 */
290 rds_conn_error(conn,
291 "%s: failed to transition to state DOWN, "
292 "current state is %d\n",
293 __func__,
294 atomic_read(&conn->c_state));
295 return;
296 }
297 }
298
299 /* Then reconnect if it's still live.
300 * The passive side of an IB loopback connection is never added
301 * to the conn hash, so we never trigger a reconnect on this
302 * conn - the reconnect is always triggered by the active peer. */
303 cancel_delayed_work_sync(&conn->c_conn_w);
304 rcu_read_lock();
305 if (!hlist_unhashed(&conn->c_hash_node)) {
306 rcu_read_unlock();
307 rds_queue_reconnect(conn);
308 } else {
309 rcu_read_unlock();
310 }
311}
312
313/*
314 * Stop and free a connection.
315 *
316 * This can only be used in very limited circumstances. It assumes that once
317 * the conn has been shutdown that no one else is referencing the connection.
318 * We can only ensure this in the rmmod path in the current code.
319 */
266void rds_conn_destroy(struct rds_connection *conn) 320void rds_conn_destroy(struct rds_connection *conn)
267{ 321{
268 struct rds_message *rm, *rtmp; 322 struct rds_message *rm, *rtmp;
323 unsigned long flags;
269 324
270 rdsdebug("freeing conn %p for %pI4 -> " 325 rdsdebug("freeing conn %p for %pI4 -> "
271 "%pI4\n", conn, &conn->c_laddr, 326 "%pI4\n", conn, &conn->c_laddr,
272 &conn->c_faddr); 327 &conn->c_faddr);
273 328
274 hlist_del_init(&conn->c_hash_node); 329 /* Ensure conn will not be scheduled for reconnect */
330 spin_lock_irq(&rds_conn_lock);
331 hlist_del_init_rcu(&conn->c_hash_node);
332 spin_unlock_irq(&rds_conn_lock);
333 synchronize_rcu();
275 334
276 /* wait for the rds thread to shut it down */ 335 /* shut the connection down */
277 atomic_set(&conn->c_state, RDS_CONN_ERROR); 336 rds_conn_drop(conn);
278 cancel_delayed_work(&conn->c_conn_w); 337 flush_work(&conn->c_down_w);
279 queue_work(rds_wq, &conn->c_down_w); 338
280 flush_workqueue(rds_wq); 339 /* make sure lingering queued work won't try to ref the conn */
340 cancel_delayed_work_sync(&conn->c_send_w);
341 cancel_delayed_work_sync(&conn->c_recv_w);
281 342
282 /* tear down queued messages */ 343 /* tear down queued messages */
283 list_for_each_entry_safe(rm, rtmp, 344 list_for_each_entry_safe(rm, rtmp,
@@ -302,7 +363,9 @@ void rds_conn_destroy(struct rds_connection *conn)
302 BUG_ON(!list_empty(&conn->c_retrans)); 363 BUG_ON(!list_empty(&conn->c_retrans));
303 kmem_cache_free(rds_conn_slab, conn); 364 kmem_cache_free(rds_conn_slab, conn);
304 365
366 spin_lock_irqsave(&rds_conn_lock, flags);
305 rds_conn_count--; 367 rds_conn_count--;
368 spin_unlock_irqrestore(&rds_conn_lock, flags);
306} 369}
307EXPORT_SYMBOL_GPL(rds_conn_destroy); 370EXPORT_SYMBOL_GPL(rds_conn_destroy);
308 371
@@ -316,23 +379,23 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
316 struct list_head *list; 379 struct list_head *list;
317 struct rds_connection *conn; 380 struct rds_connection *conn;
318 struct rds_message *rm; 381 struct rds_message *rm;
319 unsigned long flags;
320 unsigned int total = 0; 382 unsigned int total = 0;
383 unsigned long flags;
321 size_t i; 384 size_t i;
322 385
323 len /= sizeof(struct rds_info_message); 386 len /= sizeof(struct rds_info_message);
324 387
325 spin_lock_irqsave(&rds_conn_lock, flags); 388 rcu_read_lock();
326 389
327 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
328 i++, head++) { 391 i++, head++) {
329 hlist_for_each_entry(conn, pos, head, c_hash_node) { 392 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
330 if (want_send) 393 if (want_send)
331 list = &conn->c_send_queue; 394 list = &conn->c_send_queue;
332 else 395 else
333 list = &conn->c_retrans; 396 list = &conn->c_retrans;
334 397
335 spin_lock(&conn->c_lock); 398 spin_lock_irqsave(&conn->c_lock, flags);
336 399
337 /* XXX too lazy to maintain counts.. */ 400 /* XXX too lazy to maintain counts.. */
338 list_for_each_entry(rm, list, m_conn_item) { 401 list_for_each_entry(rm, list, m_conn_item) {
@@ -343,11 +406,10 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
343 conn->c_faddr, 0); 406 conn->c_faddr, 0);
344 } 407 }
345 408
346 spin_unlock(&conn->c_lock); 409 spin_unlock_irqrestore(&conn->c_lock, flags);
347 } 410 }
348 } 411 }
349 412 rcu_read_unlock();
350 spin_unlock_irqrestore(&rds_conn_lock, flags);
351 413
352 lens->nr = total; 414 lens->nr = total;
353 lens->each = sizeof(struct rds_info_message); 415 lens->each = sizeof(struct rds_info_message);
@@ -377,19 +439,17 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
377 uint64_t buffer[(item_len + 7) / 8]; 439 uint64_t buffer[(item_len + 7) / 8];
378 struct hlist_head *head; 440 struct hlist_head *head;
379 struct hlist_node *pos; 441 struct hlist_node *pos;
380 struct hlist_node *tmp;
381 struct rds_connection *conn; 442 struct rds_connection *conn;
382 unsigned long flags;
383 size_t i; 443 size_t i;
384 444
385 spin_lock_irqsave(&rds_conn_lock, flags); 445 rcu_read_lock();
386 446
387 lens->nr = 0; 447 lens->nr = 0;
388 lens->each = item_len; 448 lens->each = item_len;
389 449
390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 450 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
391 i++, head++) { 451 i++, head++) {
392 hlist_for_each_entry_safe(conn, pos, tmp, head, c_hash_node) { 452 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
393 453
394 /* XXX no c_lock usage.. */ 454 /* XXX no c_lock usage.. */
395 if (!visitor(conn, buffer)) 455 if (!visitor(conn, buffer))
@@ -405,8 +465,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
405 lens->nr++; 465 lens->nr++;
406 } 466 }
407 } 467 }
408 468 rcu_read_unlock();
409 spin_unlock_irqrestore(&rds_conn_lock, flags);
410} 469}
411EXPORT_SYMBOL_GPL(rds_for_each_conn_info); 470EXPORT_SYMBOL_GPL(rds_for_each_conn_info);
412 471
@@ -423,8 +482,8 @@ static int rds_conn_info_visitor(struct rds_connection *conn,
423 sizeof(cinfo->transport)); 482 sizeof(cinfo->transport));
424 cinfo->flags = 0; 483 cinfo->flags = 0;
425 484
426 rds_conn_info_set(cinfo->flags, 485 rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &conn->c_flags),
427 rds_conn_is_sending(conn), SENDING); 486 SENDING);
428 /* XXX Future: return the state rather than these funky bits */ 487 /* XXX Future: return the state rather than these funky bits */
429 rds_conn_info_set(cinfo->flags, 488 rds_conn_info_set(cinfo->flags,
430 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING, 489 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
@@ -444,12 +503,12 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
444 sizeof(struct rds_info_connection)); 503 sizeof(struct rds_info_connection));
445} 504}
446 505
447int __init rds_conn_init(void) 506int rds_conn_init(void)
448{ 507{
449 rds_conn_slab = kmem_cache_create("rds_connection", 508 rds_conn_slab = kmem_cache_create("rds_connection",
450 sizeof(struct rds_connection), 509 sizeof(struct rds_connection),
451 0, 0, NULL); 510 0, 0, NULL);
452 if (rds_conn_slab == NULL) 511 if (!rds_conn_slab)
453 return -ENOMEM; 512 return -ENOMEM;
454 513
455 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); 514 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
@@ -487,6 +546,18 @@ void rds_conn_drop(struct rds_connection *conn)
487EXPORT_SYMBOL_GPL(rds_conn_drop); 546EXPORT_SYMBOL_GPL(rds_conn_drop);
488 547
489/* 548/*
549 * If the connection is down, trigger a connect. We may have scheduled a
550 * delayed reconnect however - in this case we should not interfere.
551 */
552void rds_conn_connect_if_down(struct rds_connection *conn)
553{
554 if (rds_conn_state(conn) == RDS_CONN_DOWN &&
555 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
556 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
557}
558EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
559
560/*
490 * An error occurred on the connection 561 * An error occurred on the connection
491 */ 562 */
492void 563void
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 8f2d6dd7700..b12a3951167 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -53,12 +53,71 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
53module_param(rds_ib_retry_count, int, 0444); 53module_param(rds_ib_retry_count, int, 0444);
54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); 54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
55 55
56/*
57 * we have a clumsy combination of RCU and a rwsem protecting this list
58 * because it is used both in the get_mr fast path and while blocking in
59 * the FMR flushing path.
60 */
61DECLARE_RWSEM(rds_ib_devices_lock);
56struct list_head rds_ib_devices; 62struct list_head rds_ib_devices;
57 63
58/* NOTE: if also grabbing ibdev lock, grab this first */ 64/* NOTE: if also grabbing ibdev lock, grab this first */
59DEFINE_SPINLOCK(ib_nodev_conns_lock); 65DEFINE_SPINLOCK(ib_nodev_conns_lock);
60LIST_HEAD(ib_nodev_conns); 66LIST_HEAD(ib_nodev_conns);
61 67
68void rds_ib_nodev_connect(void)
69{
70 struct rds_ib_connection *ic;
71
72 spin_lock(&ib_nodev_conns_lock);
73 list_for_each_entry(ic, &ib_nodev_conns, ib_node)
74 rds_conn_connect_if_down(ic->conn);
75 spin_unlock(&ib_nodev_conns_lock);
76}
77
78void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev)
79{
80 struct rds_ib_connection *ic;
81 unsigned long flags;
82
83 spin_lock_irqsave(&rds_ibdev->spinlock, flags);
84 list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node)
85 rds_conn_drop(ic->conn);
86 spin_unlock_irqrestore(&rds_ibdev->spinlock, flags);
87}
88
89/*
90 * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references
91 * from interrupt context so we push freing off into a work struct in krdsd.
92 */
93static void rds_ib_dev_free(struct work_struct *work)
94{
95 struct rds_ib_ipaddr *i_ipaddr, *i_next;
96 struct rds_ib_device *rds_ibdev = container_of(work,
97 struct rds_ib_device, free_work);
98
99 if (rds_ibdev->mr_pool)
100 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
101 if (rds_ibdev->mr)
102 ib_dereg_mr(rds_ibdev->mr);
103 if (rds_ibdev->pd)
104 ib_dealloc_pd(rds_ibdev->pd);
105
106 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
107 list_del(&i_ipaddr->list);
108 kfree(i_ipaddr);
109 }
110
111 kfree(rds_ibdev);
112}
113
114void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
115{
116 BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0);
117 if (atomic_dec_and_test(&rds_ibdev->refcount))
118 queue_work(rds_wq, &rds_ibdev->free_work);
119}
120
62void rds_ib_add_one(struct ib_device *device) 121void rds_ib_add_one(struct ib_device *device)
63{ 122{
64 struct rds_ib_device *rds_ibdev; 123 struct rds_ib_device *rds_ibdev;
@@ -77,11 +136,14 @@ void rds_ib_add_one(struct ib_device *device)
77 goto free_attr; 136 goto free_attr;
78 } 137 }
79 138
80 rds_ibdev = kmalloc(sizeof *rds_ibdev, GFP_KERNEL); 139 rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
140 ibdev_to_node(device));
81 if (!rds_ibdev) 141 if (!rds_ibdev)
82 goto free_attr; 142 goto free_attr;
83 143
84 spin_lock_init(&rds_ibdev->spinlock); 144 spin_lock_init(&rds_ibdev->spinlock);
145 atomic_set(&rds_ibdev->refcount, 1);
146 INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
85 147
86 rds_ibdev->max_wrs = dev_attr->max_qp_wr; 148 rds_ibdev->max_wrs = dev_attr->max_qp_wr;
87 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); 149 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
@@ -91,68 +153,107 @@ void rds_ib_add_one(struct ib_device *device)
91 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) : 153 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) :
92 fmr_pool_size; 154 fmr_pool_size;
93 155
156 rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
157 rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
158
94 rds_ibdev->dev = device; 159 rds_ibdev->dev = device;
95 rds_ibdev->pd = ib_alloc_pd(device); 160 rds_ibdev->pd = ib_alloc_pd(device);
96 if (IS_ERR(rds_ibdev->pd)) 161 if (IS_ERR(rds_ibdev->pd)) {
97 goto free_dev; 162 rds_ibdev->pd = NULL;
163 goto put_dev;
164 }
98 165
99 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, 166 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, IB_ACCESS_LOCAL_WRITE);
100 IB_ACCESS_LOCAL_WRITE); 167 if (IS_ERR(rds_ibdev->mr)) {
101 if (IS_ERR(rds_ibdev->mr)) 168 rds_ibdev->mr = NULL;
102 goto err_pd; 169 goto put_dev;
170 }
103 171
104 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); 172 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev);
105 if (IS_ERR(rds_ibdev->mr_pool)) { 173 if (IS_ERR(rds_ibdev->mr_pool)) {
106 rds_ibdev->mr_pool = NULL; 174 rds_ibdev->mr_pool = NULL;
107 goto err_mr; 175 goto put_dev;
108 } 176 }
109 177
110 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); 178 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
111 INIT_LIST_HEAD(&rds_ibdev->conn_list); 179 INIT_LIST_HEAD(&rds_ibdev->conn_list);
112 list_add_tail(&rds_ibdev->list, &rds_ib_devices); 180
181 down_write(&rds_ib_devices_lock);
182 list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
183 up_write(&rds_ib_devices_lock);
184 atomic_inc(&rds_ibdev->refcount);
113 185
114 ib_set_client_data(device, &rds_ib_client, rds_ibdev); 186 ib_set_client_data(device, &rds_ib_client, rds_ibdev);
187 atomic_inc(&rds_ibdev->refcount);
115 188
116 goto free_attr; 189 rds_ib_nodev_connect();
117 190
118err_mr: 191put_dev:
119 ib_dereg_mr(rds_ibdev->mr); 192 rds_ib_dev_put(rds_ibdev);
120err_pd:
121 ib_dealloc_pd(rds_ibdev->pd);
122free_dev:
123 kfree(rds_ibdev);
124free_attr: 193free_attr:
125 kfree(dev_attr); 194 kfree(dev_attr);
126} 195}
127 196
197/*
198 * New connections use this to find the device to associate with the
199 * connection. It's not in the fast path so we're not concerned about the
200 * performance of the IB call. (As of this writing, it uses an interrupt
201 * blocking spinlock to serialize walking a per-device list of all registered
202 * clients.)
203 *
204 * RCU is used to handle incoming connections racing with device teardown.
205 * Rather than use a lock to serialize removal from the client_data and
206 * getting a new reference, we use an RCU grace period. The destruction
207 * path removes the device from client_data and then waits for all RCU
208 * readers to finish.
209 *
210 * A new connection can get NULL from this if its arriving on a
211 * device that is in the process of being removed.
212 */
213struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device)
214{
215 struct rds_ib_device *rds_ibdev;
216
217 rcu_read_lock();
218 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
219 if (rds_ibdev)
220 atomic_inc(&rds_ibdev->refcount);
221 rcu_read_unlock();
222 return rds_ibdev;
223}
224
225/*
226 * The IB stack is letting us know that a device is going away. This can
227 * happen if the underlying HCA driver is removed or if PCI hotplug is removing
228 * the pci function, for example.
229 *
230 * This can be called at any time and can be racing with any other RDS path.
231 */
128void rds_ib_remove_one(struct ib_device *device) 232void rds_ib_remove_one(struct ib_device *device)
129{ 233{
130 struct rds_ib_device *rds_ibdev; 234 struct rds_ib_device *rds_ibdev;
131 struct rds_ib_ipaddr *i_ipaddr, *i_next;
132 235
133 rds_ibdev = ib_get_client_data(device, &rds_ib_client); 236 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
134 if (!rds_ibdev) 237 if (!rds_ibdev)
135 return; 238 return;
136 239
137 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { 240 rds_ib_dev_shutdown(rds_ibdev);
138 list_del(&i_ipaddr->list);
139 kfree(i_ipaddr);
140 }
141 241
142 rds_ib_destroy_conns(rds_ibdev); 242 /* stop connection attempts from getting a reference to this device. */
243 ib_set_client_data(device, &rds_ib_client, NULL);
143 244
144 if (rds_ibdev->mr_pool) 245 down_write(&rds_ib_devices_lock);
145 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); 246 list_del_rcu(&rds_ibdev->list);
146 247 up_write(&rds_ib_devices_lock);
147 ib_dereg_mr(rds_ibdev->mr);
148
149 while (ib_dealloc_pd(rds_ibdev->pd)) {
150 rdsdebug("Failed to dealloc pd %p\n", rds_ibdev->pd);
151 msleep(1);
152 }
153 248
154 list_del(&rds_ibdev->list); 249 /*
155 kfree(rds_ibdev); 250 * This synchronize rcu is waiting for readers of both the ib
251 * client data and the devices list to finish before we drop
252 * both of those references.
253 */
254 synchronize_rcu();
255 rds_ib_dev_put(rds_ibdev);
256 rds_ib_dev_put(rds_ibdev);
156} 257}
157 258
158struct ib_client rds_ib_client = { 259struct ib_client rds_ib_client = {
@@ -186,7 +287,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
186 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); 287 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
187 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); 288 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
188 289
189 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 290 rds_ibdev = ic->rds_ibdev;
190 iinfo->max_send_wr = ic->i_send_ring.w_nr; 291 iinfo->max_send_wr = ic->i_send_ring.w_nr;
191 iinfo->max_recv_wr = ic->i_recv_ring.w_nr; 292 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
192 iinfo->max_send_sge = rds_ibdev->max_sge; 293 iinfo->max_send_sge = rds_ibdev->max_sge;
@@ -248,29 +349,36 @@ static int rds_ib_laddr_check(__be32 addr)
248 return ret; 349 return ret;
249} 350}
250 351
352static void rds_ib_unregister_client(void)
353{
354 ib_unregister_client(&rds_ib_client);
355 /* wait for rds_ib_dev_free() to complete */
356 flush_workqueue(rds_wq);
357}
358
251void rds_ib_exit(void) 359void rds_ib_exit(void)
252{ 360{
253 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 361 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
362 rds_ib_unregister_client();
254 rds_ib_destroy_nodev_conns(); 363 rds_ib_destroy_nodev_conns();
255 ib_unregister_client(&rds_ib_client);
256 rds_ib_sysctl_exit(); 364 rds_ib_sysctl_exit();
257 rds_ib_recv_exit(); 365 rds_ib_recv_exit();
258 rds_trans_unregister(&rds_ib_transport); 366 rds_trans_unregister(&rds_ib_transport);
367 rds_ib_fmr_exit();
259} 368}
260 369
261struct rds_transport rds_ib_transport = { 370struct rds_transport rds_ib_transport = {
262 .laddr_check = rds_ib_laddr_check, 371 .laddr_check = rds_ib_laddr_check,
263 .xmit_complete = rds_ib_xmit_complete, 372 .xmit_complete = rds_ib_xmit_complete,
264 .xmit = rds_ib_xmit, 373 .xmit = rds_ib_xmit,
265 .xmit_cong_map = NULL,
266 .xmit_rdma = rds_ib_xmit_rdma, 374 .xmit_rdma = rds_ib_xmit_rdma,
375 .xmit_atomic = rds_ib_xmit_atomic,
267 .recv = rds_ib_recv, 376 .recv = rds_ib_recv,
268 .conn_alloc = rds_ib_conn_alloc, 377 .conn_alloc = rds_ib_conn_alloc,
269 .conn_free = rds_ib_conn_free, 378 .conn_free = rds_ib_conn_free,
270 .conn_connect = rds_ib_conn_connect, 379 .conn_connect = rds_ib_conn_connect,
271 .conn_shutdown = rds_ib_conn_shutdown, 380 .conn_shutdown = rds_ib_conn_shutdown,
272 .inc_copy_to_user = rds_ib_inc_copy_to_user, 381 .inc_copy_to_user = rds_ib_inc_copy_to_user,
273 .inc_purge = rds_ib_inc_purge,
274 .inc_free = rds_ib_inc_free, 382 .inc_free = rds_ib_inc_free,
275 .cm_initiate_connect = rds_ib_cm_initiate_connect, 383 .cm_initiate_connect = rds_ib_cm_initiate_connect,
276 .cm_handle_connect = rds_ib_cm_handle_connect, 384 .cm_handle_connect = rds_ib_cm_handle_connect,
@@ -286,16 +394,20 @@ struct rds_transport rds_ib_transport = {
286 .t_type = RDS_TRANS_IB 394 .t_type = RDS_TRANS_IB
287}; 395};
288 396
289int __init rds_ib_init(void) 397int rds_ib_init(void)
290{ 398{
291 int ret; 399 int ret;
292 400
293 INIT_LIST_HEAD(&rds_ib_devices); 401 INIT_LIST_HEAD(&rds_ib_devices);
294 402
295 ret = ib_register_client(&rds_ib_client); 403 ret = rds_ib_fmr_init();
296 if (ret) 404 if (ret)
297 goto out; 405 goto out;
298 406
407 ret = ib_register_client(&rds_ib_client);
408 if (ret)
409 goto out_fmr_exit;
410
299 ret = rds_ib_sysctl_init(); 411 ret = rds_ib_sysctl_init();
300 if (ret) 412 if (ret)
301 goto out_ibreg; 413 goto out_ibreg;
@@ -317,7 +429,9 @@ out_recv:
317out_sysctl: 429out_sysctl:
318 rds_ib_sysctl_exit(); 430 rds_ib_sysctl_exit();
319out_ibreg: 431out_ibreg:
320 ib_unregister_client(&rds_ib_client); 432 rds_ib_unregister_client();
433out_fmr_exit:
434 rds_ib_fmr_exit();
321out: 435out:
322 return ret; 436 return ret;
323} 437}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 64df4e79b29..7ad3d57e06a 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -3,11 +3,13 @@
3 3
4#include <rdma/ib_verbs.h> 4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h> 5#include <rdma/rdma_cm.h>
6#include <linux/pci.h>
7#include <linux/slab.h>
6#include "rds.h" 8#include "rds.h"
7#include "rdma_transport.h" 9#include "rdma_transport.h"
8 10
9#define RDS_FMR_SIZE 256 11#define RDS_FMR_SIZE 256
10#define RDS_FMR_POOL_SIZE 4096 12#define RDS_FMR_POOL_SIZE 8192
11 13
12#define RDS_IB_MAX_SGE 8 14#define RDS_IB_MAX_SGE 8
13#define RDS_IB_RECV_SGE 2 15#define RDS_IB_RECV_SGE 2
@@ -19,6 +21,9 @@
19 21
20#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ 22#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
21 23
24#define RDS_IB_RECYCLE_BATCH_COUNT 32
25
26extern struct rw_semaphore rds_ib_devices_lock;
22extern struct list_head rds_ib_devices; 27extern struct list_head rds_ib_devices;
23 28
24/* 29/*
@@ -26,20 +31,29 @@ extern struct list_head rds_ib_devices;
26 * try and minimize the amount of memory tied up both the device and 31 * try and minimize the amount of memory tied up both the device and
27 * socket receive queues. 32 * socket receive queues.
28 */ 33 */
29/* page offset of the final full frag that fits in the page */
30#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
31struct rds_page_frag { 34struct rds_page_frag {
32 struct list_head f_item; 35 struct list_head f_item;
33 struct page *f_page; 36 struct list_head f_cache_entry;
34 unsigned long f_offset; 37 struct scatterlist f_sg;
35 dma_addr_t f_mapped;
36}; 38};
37 39
38struct rds_ib_incoming { 40struct rds_ib_incoming {
39 struct list_head ii_frags; 41 struct list_head ii_frags;
42 struct list_head ii_cache_entry;
40 struct rds_incoming ii_inc; 43 struct rds_incoming ii_inc;
41}; 44};
42 45
46struct rds_ib_cache_head {
47 struct list_head *first;
48 unsigned long count;
49};
50
51struct rds_ib_refill_cache {
52 struct rds_ib_cache_head *percpu;
53 struct list_head *xfer;
54 struct list_head *ready;
55};
56
43struct rds_ib_connect_private { 57struct rds_ib_connect_private {
44 /* Add new fields at the end, and don't permute existing fields. */ 58 /* Add new fields at the end, and don't permute existing fields. */
45 __be32 dp_saddr; 59 __be32 dp_saddr;
@@ -53,8 +67,7 @@ struct rds_ib_connect_private {
53}; 67};
54 68
55struct rds_ib_send_work { 69struct rds_ib_send_work {
56 struct rds_message *s_rm; 70 void *s_op;
57 struct rds_rdma_op *s_op;
58 struct ib_send_wr s_wr; 71 struct ib_send_wr s_wr;
59 struct ib_sge s_sge[RDS_IB_MAX_SGE]; 72 struct ib_sge s_sge[RDS_IB_MAX_SGE];
60 unsigned long s_queued; 73 unsigned long s_queued;
@@ -92,10 +105,11 @@ struct rds_ib_connection {
92 105
93 /* tx */ 106 /* tx */
94 struct rds_ib_work_ring i_send_ring; 107 struct rds_ib_work_ring i_send_ring;
95 struct rds_message *i_rm; 108 struct rm_data_op *i_data_op;
96 struct rds_header *i_send_hdrs; 109 struct rds_header *i_send_hdrs;
97 u64 i_send_hdrs_dma; 110 u64 i_send_hdrs_dma;
98 struct rds_ib_send_work *i_sends; 111 struct rds_ib_send_work *i_sends;
112 atomic_t i_signaled_sends;
99 113
100 /* rx */ 114 /* rx */
101 struct tasklet_struct i_recv_tasklet; 115 struct tasklet_struct i_recv_tasklet;
@@ -106,8 +120,9 @@ struct rds_ib_connection {
106 struct rds_header *i_recv_hdrs; 120 struct rds_header *i_recv_hdrs;
107 u64 i_recv_hdrs_dma; 121 u64 i_recv_hdrs_dma;
108 struct rds_ib_recv_work *i_recvs; 122 struct rds_ib_recv_work *i_recvs;
109 struct rds_page_frag i_frag;
110 u64 i_ack_recv; /* last ACK received */ 123 u64 i_ack_recv; /* last ACK received */
124 struct rds_ib_refill_cache i_cache_incs;
125 struct rds_ib_refill_cache i_cache_frags;
111 126
112 /* sending acks */ 127 /* sending acks */
113 unsigned long i_ack_flags; 128 unsigned long i_ack_flags;
@@ -138,7 +153,6 @@ struct rds_ib_connection {
138 153
139 /* Batched completions */ 154 /* Batched completions */
140 unsigned int i_unsignaled_wrs; 155 unsigned int i_unsignaled_wrs;
141 long i_unsignaled_bytes;
142}; 156};
143 157
144/* This assumes that atomic_t is at least 32 bits */ 158/* This assumes that atomic_t is at least 32 bits */
@@ -164,9 +178,17 @@ struct rds_ib_device {
164 unsigned int max_fmrs; 178 unsigned int max_fmrs;
165 int max_sge; 179 int max_sge;
166 unsigned int max_wrs; 180 unsigned int max_wrs;
181 unsigned int max_initiator_depth;
182 unsigned int max_responder_resources;
167 spinlock_t spinlock; /* protect the above */ 183 spinlock_t spinlock; /* protect the above */
184 atomic_t refcount;
185 struct work_struct free_work;
168}; 186};
169 187
188#define pcidev_to_node(pcidev) pcibus_to_node(pcidev->bus)
189#define ibdev_to_node(ibdev) pcidev_to_node(to_pci_dev(ibdev->dma_device))
190#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
191
170/* bits for i_ack_flags */ 192/* bits for i_ack_flags */
171#define IB_ACK_IN_FLIGHT 0 193#define IB_ACK_IN_FLIGHT 0
172#define IB_ACK_REQUESTED 1 194#define IB_ACK_REQUESTED 1
@@ -202,6 +224,8 @@ struct rds_ib_statistics {
202 uint64_t s_ib_rdma_mr_pool_flush; 224 uint64_t s_ib_rdma_mr_pool_flush;
203 uint64_t s_ib_rdma_mr_pool_wait; 225 uint64_t s_ib_rdma_mr_pool_wait;
204 uint64_t s_ib_rdma_mr_pool_depleted; 226 uint64_t s_ib_rdma_mr_pool_depleted;
227 uint64_t s_ib_atomic_cswp;
228 uint64_t s_ib_atomic_fadd;
205}; 229};
206 230
207extern struct workqueue_struct *rds_ib_wq; 231extern struct workqueue_struct *rds_ib_wq;
@@ -243,6 +267,8 @@ static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
243extern struct rds_transport rds_ib_transport; 267extern struct rds_transport rds_ib_transport;
244extern void rds_ib_add_one(struct ib_device *device); 268extern void rds_ib_add_one(struct ib_device *device);
245extern void rds_ib_remove_one(struct ib_device *device); 269extern void rds_ib_remove_one(struct ib_device *device);
270struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
271void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
246extern struct ib_client rds_ib_client; 272extern struct ib_client rds_ib_client;
247 273
248extern unsigned int fmr_pool_size; 274extern unsigned int fmr_pool_size;
@@ -258,7 +284,7 @@ void rds_ib_conn_free(void *arg);
258int rds_ib_conn_connect(struct rds_connection *conn); 284int rds_ib_conn_connect(struct rds_connection *conn);
259void rds_ib_conn_shutdown(struct rds_connection *conn); 285void rds_ib_conn_shutdown(struct rds_connection *conn);
260void rds_ib_state_change(struct sock *sk); 286void rds_ib_state_change(struct sock *sk);
261int __init rds_ib_listen_init(void); 287int rds_ib_listen_init(void);
262void rds_ib_listen_stop(void); 288void rds_ib_listen_stop(void);
263void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); 289void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
264int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, 290int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -275,15 +301,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
275int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); 301int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
276void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 302void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
277void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 303void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
278void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock); 304void rds_ib_destroy_nodev_conns(void);
279static inline void rds_ib_destroy_nodev_conns(void)
280{
281 __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock);
282}
283static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev)
284{
285 __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock);
286}
287struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); 305struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
288void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); 306void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
289void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); 307void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
@@ -292,14 +310,16 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
292void rds_ib_sync_mr(void *trans_private, int dir); 310void rds_ib_sync_mr(void *trans_private, int dir);
293void rds_ib_free_mr(void *trans_private, int invalidate); 311void rds_ib_free_mr(void *trans_private, int invalidate);
294void rds_ib_flush_mrs(void); 312void rds_ib_flush_mrs(void);
313int rds_ib_fmr_init(void);
314void rds_ib_fmr_exit(void);
295 315
296/* ib_recv.c */ 316/* ib_recv.c */
297int __init rds_ib_recv_init(void); 317int rds_ib_recv_init(void);
298void rds_ib_recv_exit(void); 318void rds_ib_recv_exit(void);
299int rds_ib_recv(struct rds_connection *conn); 319int rds_ib_recv(struct rds_connection *conn);
300int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 320int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
301 gfp_t page_gfp, int prefill); 321void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
302void rds_ib_inc_purge(struct rds_incoming *inc); 322void rds_ib_recv_refill(struct rds_connection *conn, int prefill);
303void rds_ib_inc_free(struct rds_incoming *inc); 323void rds_ib_inc_free(struct rds_incoming *inc);
304int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 324int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
305 size_t size); 325 size_t size);
@@ -325,17 +345,19 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
325extern wait_queue_head_t rds_ib_ring_empty_wait; 345extern wait_queue_head_t rds_ib_ring_empty_wait;
326 346
327/* ib_send.c */ 347/* ib_send.c */
348char *rds_ib_wc_status_str(enum ib_wc_status status);
328void rds_ib_xmit_complete(struct rds_connection *conn); 349void rds_ib_xmit_complete(struct rds_connection *conn);
329int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, 350int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
330 unsigned int hdr_off, unsigned int sg, unsigned int off); 351 unsigned int hdr_off, unsigned int sg, unsigned int off);
331void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context); 352void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
332void rds_ib_send_init_ring(struct rds_ib_connection *ic); 353void rds_ib_send_init_ring(struct rds_ib_connection *ic);
333void rds_ib_send_clear_ring(struct rds_ib_connection *ic); 354void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
334int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); 355int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
335void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); 356void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits);
336void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); 357void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted);
337int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, 358int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
338 u32 *adv_credits, int need_posted, int max_posted); 359 u32 *adv_credits, int need_posted, int max_posted);
360int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);
339 361
340/* ib_stats.c */ 362/* ib_stats.c */
341DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); 363DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
@@ -344,7 +366,7 @@ unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
344 unsigned int avail); 366 unsigned int avail);
345 367
346/* ib_sysctl.c */ 368/* ib_sysctl.c */
347int __init rds_ib_sysctl_init(void); 369int rds_ib_sysctl_init(void);
348void rds_ib_sysctl_exit(void); 370void rds_ib_sysctl_exit(void);
349extern unsigned long rds_ib_sysctl_max_send_wr; 371extern unsigned long rds_ib_sysctl_max_send_wr;
350extern unsigned long rds_ib_sysctl_max_recv_wr; 372extern unsigned long rds_ib_sysctl_max_recv_wr;
@@ -354,28 +376,4 @@ extern unsigned long rds_ib_sysctl_max_recv_allocation;
354extern unsigned int rds_ib_sysctl_flow_control; 376extern unsigned int rds_ib_sysctl_flow_control;
355extern ctl_table rds_ib_sysctl_table[]; 377extern ctl_table rds_ib_sysctl_table[];
356 378
357/*
358 * Helper functions for getting/setting the header and data SGEs in
359 * RDS packets (not RDMA)
360 *
361 * From version 3.1 onwards, header is in front of data in the sge.
362 */
363static inline struct ib_sge *
364rds_ib_header_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
365{
366 if (ic->conn->c_version > RDS_PROTOCOL_3_0)
367 return &sge[0];
368 else
369 return &sge[1];
370}
371
372static inline struct ib_sge *
373rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
374{
375 if (ic->conn->c_version > RDS_PROTOCOL_3_0)
376 return &sge[1];
377 else
378 return &sge[0];
379}
380
381#endif 379#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index f68832798db..ee369d201a6 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -38,6 +38,36 @@
38#include "rds.h" 38#include "rds.h"
39#include "ib.h" 39#include "ib.h"
40 40
41static char *rds_ib_event_type_strings[] = {
42#define RDS_IB_EVENT_STRING(foo) \
43 [IB_EVENT_##foo] = __stringify(IB_EVENT_##foo)
44 RDS_IB_EVENT_STRING(CQ_ERR),
45 RDS_IB_EVENT_STRING(QP_FATAL),
46 RDS_IB_EVENT_STRING(QP_REQ_ERR),
47 RDS_IB_EVENT_STRING(QP_ACCESS_ERR),
48 RDS_IB_EVENT_STRING(COMM_EST),
49 RDS_IB_EVENT_STRING(SQ_DRAINED),
50 RDS_IB_EVENT_STRING(PATH_MIG),
51 RDS_IB_EVENT_STRING(PATH_MIG_ERR),
52 RDS_IB_EVENT_STRING(DEVICE_FATAL),
53 RDS_IB_EVENT_STRING(PORT_ACTIVE),
54 RDS_IB_EVENT_STRING(PORT_ERR),
55 RDS_IB_EVENT_STRING(LID_CHANGE),
56 RDS_IB_EVENT_STRING(PKEY_CHANGE),
57 RDS_IB_EVENT_STRING(SM_CHANGE),
58 RDS_IB_EVENT_STRING(SRQ_ERR),
59 RDS_IB_EVENT_STRING(SRQ_LIMIT_REACHED),
60 RDS_IB_EVENT_STRING(QP_LAST_WQE_REACHED),
61 RDS_IB_EVENT_STRING(CLIENT_REREGISTER),
62#undef RDS_IB_EVENT_STRING
63};
64
65static char *rds_ib_event_str(enum ib_event_type type)
66{
67 return rds_str_array(rds_ib_event_type_strings,
68 ARRAY_SIZE(rds_ib_event_type_strings), type);
69};
70
41/* 71/*
42 * Set the selected protocol version 72 * Set the selected protocol version
43 */ 73 */
@@ -95,7 +125,6 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
95{ 125{
96 const struct rds_ib_connect_private *dp = NULL; 126 const struct rds_ib_connect_private *dp = NULL;
97 struct rds_ib_connection *ic = conn->c_transport_data; 127 struct rds_ib_connection *ic = conn->c_transport_data;
98 struct rds_ib_device *rds_ibdev;
99 struct ib_qp_attr qp_attr; 128 struct ib_qp_attr qp_attr;
100 int err; 129 int err;
101 130
@@ -111,11 +140,21 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
111 } 140 }
112 } 141 }
113 142
114 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", 143 if (conn->c_version < RDS_PROTOCOL(3,1)) {
115 &conn->c_faddr, 144 printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed,"
116 RDS_PROTOCOL_MAJOR(conn->c_version), 145 " no longer supported\n",
117 RDS_PROTOCOL_MINOR(conn->c_version), 146 &conn->c_faddr,
118 ic->i_flowctl ? ", flow control" : ""); 147 RDS_PROTOCOL_MAJOR(conn->c_version),
148 RDS_PROTOCOL_MINOR(conn->c_version));
149 rds_conn_destroy(conn);
150 return;
151 } else {
152 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n",
153 &conn->c_faddr,
154 RDS_PROTOCOL_MAJOR(conn->c_version),
155 RDS_PROTOCOL_MINOR(conn->c_version),
156 ic->i_flowctl ? ", flow control" : "");
157 }
119 158
120 /* 159 /*
121 * Init rings and fill recv. this needs to wait until protocol negotiation 160 * Init rings and fill recv. this needs to wait until protocol negotiation
@@ -125,7 +164,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
125 rds_ib_recv_init_ring(ic); 164 rds_ib_recv_init_ring(ic);
126 /* Post receive buffers - as a side effect, this will update 165 /* Post receive buffers - as a side effect, this will update
127 * the posted credit count. */ 166 * the posted credit count. */
128 rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); 167 rds_ib_recv_refill(conn, 1);
129 168
130 /* Tune RNR behavior */ 169 /* Tune RNR behavior */
131 rds_ib_tune_rnr(ic, &qp_attr); 170 rds_ib_tune_rnr(ic, &qp_attr);
@@ -135,12 +174,11 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
135 if (err) 174 if (err)
136 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); 175 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
137 176
138 /* update ib_device with this local ipaddr & conn */ 177 /* update ib_device with this local ipaddr */
139 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 178 err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr);
140 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
141 if (err) 179 if (err)
142 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); 180 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
143 rds_ib_add_conn(rds_ibdev, conn); 181 err);
144 182
145 /* If the peer gave us the last packet it saw, process this as if 183 /* If the peer gave us the last packet it saw, process this as if
146 * we had received a regular ACK. */ 184 * we had received a regular ACK. */
@@ -153,18 +191,23 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
153static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, 191static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
154 struct rdma_conn_param *conn_param, 192 struct rdma_conn_param *conn_param,
155 struct rds_ib_connect_private *dp, 193 struct rds_ib_connect_private *dp,
156 u32 protocol_version) 194 u32 protocol_version,
195 u32 max_responder_resources,
196 u32 max_initiator_depth)
157{ 197{
198 struct rds_ib_connection *ic = conn->c_transport_data;
199 struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
200
158 memset(conn_param, 0, sizeof(struct rdma_conn_param)); 201 memset(conn_param, 0, sizeof(struct rdma_conn_param));
159 /* XXX tune these? */ 202
160 conn_param->responder_resources = 1; 203 conn_param->responder_resources =
161 conn_param->initiator_depth = 1; 204 min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources);
205 conn_param->initiator_depth =
206 min_t(u32, rds_ibdev->max_initiator_depth, max_initiator_depth);
162 conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7); 207 conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7);
163 conn_param->rnr_retry_count = 7; 208 conn_param->rnr_retry_count = 7;
164 209
165 if (dp) { 210 if (dp) {
166 struct rds_ib_connection *ic = conn->c_transport_data;
167
168 memset(dp, 0, sizeof(*dp)); 211 memset(dp, 0, sizeof(*dp));
169 dp->dp_saddr = conn->c_laddr; 212 dp->dp_saddr = conn->c_laddr;
170 dp->dp_daddr = conn->c_faddr; 213 dp->dp_daddr = conn->c_faddr;
@@ -189,7 +232,8 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
189 232
190static void rds_ib_cq_event_handler(struct ib_event *event, void *data) 233static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
191{ 234{
192 rdsdebug("event %u data %p\n", event->event, data); 235 rdsdebug("event %u (%s) data %p\n",
236 event->event, rds_ib_event_str(event->event), data);
193} 237}
194 238
195static void rds_ib_qp_event_handler(struct ib_event *event, void *data) 239static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
@@ -197,16 +241,18 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
197 struct rds_connection *conn = data; 241 struct rds_connection *conn = data;
198 struct rds_ib_connection *ic = conn->c_transport_data; 242 struct rds_ib_connection *ic = conn->c_transport_data;
199 243
200 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event); 244 rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event,
245 rds_ib_event_str(event->event));
201 246
202 switch (event->event) { 247 switch (event->event) {
203 case IB_EVENT_COMM_EST: 248 case IB_EVENT_COMM_EST:
204 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); 249 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
205 break; 250 break;
206 default: 251 default:
207 rdsdebug("Fatal QP Event %u " 252 rdsdebug("Fatal QP Event %u (%s) "
208 "- connection %pI4->%pI4, reconnecting\n", 253 "- connection %pI4->%pI4, reconnecting\n",
209 event->event, &conn->c_laddr, &conn->c_faddr); 254 event->event, rds_ib_event_str(event->event),
255 &conn->c_laddr, &conn->c_faddr);
210 rds_conn_drop(conn); 256 rds_conn_drop(conn);
211 break; 257 break;
212 } 258 }
@@ -224,18 +270,16 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
224 struct rds_ib_device *rds_ibdev; 270 struct rds_ib_device *rds_ibdev;
225 int ret; 271 int ret;
226 272
227 /* rds_ib_add_one creates a rds_ib_device object per IB device, 273 /*
228 * and allocates a protection domain, memory range and FMR pool 274 * It's normal to see a null device if an incoming connection races
229 * for each. If that fails for any reason, it will not register 275 * with device removal, so we don't print a warning.
230 * the rds_ibdev at all.
231 */ 276 */
232 rds_ibdev = ib_get_client_data(dev, &rds_ib_client); 277 rds_ibdev = rds_ib_get_client_data(dev);
233 if (rds_ibdev == NULL) { 278 if (!rds_ibdev)
234 if (printk_ratelimit())
235 printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n",
236 dev->name);
237 return -EOPNOTSUPP; 279 return -EOPNOTSUPP;
238 } 280
281 /* add the conn now so that connection establishment has the dev */
282 rds_ib_add_conn(rds_ibdev, conn);
239 283
240 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) 284 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
241 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); 285 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
@@ -306,7 +350,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
306 ic->i_send_ring.w_nr * 350 ic->i_send_ring.w_nr *
307 sizeof(struct rds_header), 351 sizeof(struct rds_header),
308 &ic->i_send_hdrs_dma, GFP_KERNEL); 352 &ic->i_send_hdrs_dma, GFP_KERNEL);
309 if (ic->i_send_hdrs == NULL) { 353 if (!ic->i_send_hdrs) {
310 ret = -ENOMEM; 354 ret = -ENOMEM;
311 rdsdebug("ib_dma_alloc_coherent send failed\n"); 355 rdsdebug("ib_dma_alloc_coherent send failed\n");
312 goto out; 356 goto out;
@@ -316,7 +360,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
316 ic->i_recv_ring.w_nr * 360 ic->i_recv_ring.w_nr *
317 sizeof(struct rds_header), 361 sizeof(struct rds_header),
318 &ic->i_recv_hdrs_dma, GFP_KERNEL); 362 &ic->i_recv_hdrs_dma, GFP_KERNEL);
319 if (ic->i_recv_hdrs == NULL) { 363 if (!ic->i_recv_hdrs) {
320 ret = -ENOMEM; 364 ret = -ENOMEM;
321 rdsdebug("ib_dma_alloc_coherent recv failed\n"); 365 rdsdebug("ib_dma_alloc_coherent recv failed\n");
322 goto out; 366 goto out;
@@ -324,22 +368,24 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
324 368
325 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), 369 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
326 &ic->i_ack_dma, GFP_KERNEL); 370 &ic->i_ack_dma, GFP_KERNEL);
327 if (ic->i_ack == NULL) { 371 if (!ic->i_ack) {
328 ret = -ENOMEM; 372 ret = -ENOMEM;
329 rdsdebug("ib_dma_alloc_coherent ack failed\n"); 373 rdsdebug("ib_dma_alloc_coherent ack failed\n");
330 goto out; 374 goto out;
331 } 375 }
332 376
333 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); 377 ic->i_sends = vmalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
334 if (ic->i_sends == NULL) { 378 ibdev_to_node(dev));
379 if (!ic->i_sends) {
335 ret = -ENOMEM; 380 ret = -ENOMEM;
336 rdsdebug("send allocation failed\n"); 381 rdsdebug("send allocation failed\n");
337 goto out; 382 goto out;
338 } 383 }
339 memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); 384 memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work));
340 385
341 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); 386 ic->i_recvs = vmalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
342 if (ic->i_recvs == NULL) { 387 ibdev_to_node(dev));
388 if (!ic->i_recvs) {
343 ret = -ENOMEM; 389 ret = -ENOMEM;
344 rdsdebug("recv allocation failed\n"); 390 rdsdebug("recv allocation failed\n");
345 goto out; 391 goto out;
@@ -352,6 +398,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
352 ic->i_send_cq, ic->i_recv_cq); 398 ic->i_send_cq, ic->i_recv_cq);
353 399
354out: 400out:
401 rds_ib_dev_put(rds_ibdev);
355 return ret; 402 return ret;
356} 403}
357 404
@@ -409,7 +456,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
409 struct rds_ib_connection *ic = NULL; 456 struct rds_ib_connection *ic = NULL;
410 struct rdma_conn_param conn_param; 457 struct rdma_conn_param conn_param;
411 u32 version; 458 u32 version;
412 int err, destroy = 1; 459 int err = 1, destroy = 1;
413 460
414 /* Check whether the remote protocol version matches ours. */ 461 /* Check whether the remote protocol version matches ours. */
415 version = rds_ib_protocol_compatible(event); 462 version = rds_ib_protocol_compatible(event);
@@ -448,7 +495,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
448 /* Wait and see - our connect may still be succeeding */ 495 /* Wait and see - our connect may still be succeeding */
449 rds_ib_stats_inc(s_ib_connect_raced); 496 rds_ib_stats_inc(s_ib_connect_raced);
450 } 497 }
451 mutex_unlock(&conn->c_cm_lock);
452 goto out; 498 goto out;
453 } 499 }
454 500
@@ -475,24 +521,23 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
475 err = rds_ib_setup_qp(conn); 521 err = rds_ib_setup_qp(conn);
476 if (err) { 522 if (err) {
477 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); 523 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
478 mutex_unlock(&conn->c_cm_lock);
479 goto out; 524 goto out;
480 } 525 }
481 526
482 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version); 527 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
528 event->param.conn.responder_resources,
529 event->param.conn.initiator_depth);
483 530
484 /* rdma_accept() calls rdma_reject() internally if it fails */ 531 /* rdma_accept() calls rdma_reject() internally if it fails */
485 err = rdma_accept(cm_id, &conn_param); 532 err = rdma_accept(cm_id, &conn_param);
486 mutex_unlock(&conn->c_cm_lock); 533 if (err)
487 if (err) {
488 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); 534 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
489 goto out;
490 }
491
492 return 0;
493 535
494out: 536out:
495 rdma_reject(cm_id, NULL, 0); 537 if (conn)
538 mutex_unlock(&conn->c_cm_lock);
539 if (err)
540 rdma_reject(cm_id, NULL, 0);
496 return destroy; 541 return destroy;
497} 542}
498 543
@@ -516,8 +561,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
516 goto out; 561 goto out;
517 } 562 }
518 563
519 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION); 564 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
520 565 UINT_MAX, UINT_MAX);
521 ret = rdma_connect(cm_id, &conn_param); 566 ret = rdma_connect(cm_id, &conn_param);
522 if (ret) 567 if (ret)
523 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); 568 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
@@ -601,9 +646,19 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
601 ic->i_cm_id, err); 646 ic->i_cm_id, err);
602 } 647 }
603 648
649 /*
650 * We want to wait for tx and rx completion to finish
651 * before we tear down the connection, but we have to be
652 * careful not to get stuck waiting on a send ring that
653 * only has unsignaled sends in it. We've shutdown new
654 * sends before getting here so by waiting for signaled
655 * sends to complete we're ensured that there will be no
656 * more tx processing.
657 */
604 wait_event(rds_ib_ring_empty_wait, 658 wait_event(rds_ib_ring_empty_wait,
605 rds_ib_ring_empty(&ic->i_send_ring) && 659 rds_ib_ring_empty(&ic->i_recv_ring) &&
606 rds_ib_ring_empty(&ic->i_recv_ring)); 660 (atomic_read(&ic->i_signaled_sends) == 0));
661 tasklet_kill(&ic->i_recv_tasklet);
607 662
608 if (ic->i_send_hdrs) 663 if (ic->i_send_hdrs)
609 ib_dma_free_coherent(dev, 664 ib_dma_free_coherent(dev,
@@ -654,9 +709,12 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
654 BUG_ON(ic->rds_ibdev); 709 BUG_ON(ic->rds_ibdev);
655 710
656 /* Clear pending transmit */ 711 /* Clear pending transmit */
657 if (ic->i_rm) { 712 if (ic->i_data_op) {
658 rds_message_put(ic->i_rm); 713 struct rds_message *rm;
659 ic->i_rm = NULL; 714
715 rm = container_of(ic->i_data_op, struct rds_message, data);
716 rds_message_put(rm);
717 ic->i_data_op = NULL;
660 } 718 }
661 719
662 /* Clear the ACK state */ 720 /* Clear the ACK state */
@@ -690,12 +748,19 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
690{ 748{
691 struct rds_ib_connection *ic; 749 struct rds_ib_connection *ic;
692 unsigned long flags; 750 unsigned long flags;
751 int ret;
693 752
694 /* XXX too lazy? */ 753 /* XXX too lazy? */
695 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); 754 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL);
696 if (ic == NULL) 755 if (!ic)
697 return -ENOMEM; 756 return -ENOMEM;
698 757
758 ret = rds_ib_recv_alloc_caches(ic);
759 if (ret) {
760 kfree(ic);
761 return ret;
762 }
763
699 INIT_LIST_HEAD(&ic->ib_node); 764 INIT_LIST_HEAD(&ic->ib_node);
700 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn, 765 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn,
701 (unsigned long) ic); 766 (unsigned long) ic);
@@ -703,6 +768,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
703#ifndef KERNEL_HAS_ATOMIC64 768#ifndef KERNEL_HAS_ATOMIC64
704 spin_lock_init(&ic->i_ack_lock); 769 spin_lock_init(&ic->i_ack_lock);
705#endif 770#endif
771 atomic_set(&ic->i_signaled_sends, 0);
706 772
707 /* 773 /*
708 * rds_ib_conn_shutdown() waits for these to be emptied so they 774 * rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -744,6 +810,8 @@ void rds_ib_conn_free(void *arg)
744 list_del(&ic->ib_node); 810 list_del(&ic->ib_node);
745 spin_unlock_irq(lock_ptr); 811 spin_unlock_irq(lock_ptr);
746 812
813 rds_ib_recv_free_caches(ic);
814
747 kfree(ic); 815 kfree(ic);
748} 816}
749 817
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index a54cd63f9e3..b5a88415a18 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -32,11 +32,16 @@
32 */ 32 */
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/rculist.h>
35 36
36#include "rds.h" 37#include "rds.h"
37#include "rdma.h"
38#include "ib.h" 38#include "ib.h"
39#include "xlist.h"
39 40
41struct workqueue_struct *rds_ib_fmr_wq;
42
43static DEFINE_PER_CPU(unsigned long, clean_list_grace);
44#define CLEAN_LIST_BUSY_BIT 0
40 45
41/* 46/*
42 * This is stored as mr->r_trans_private. 47 * This is stored as mr->r_trans_private.
@@ -45,7 +50,11 @@ struct rds_ib_mr {
45 struct rds_ib_device *device; 50 struct rds_ib_device *device;
46 struct rds_ib_mr_pool *pool; 51 struct rds_ib_mr_pool *pool;
47 struct ib_fmr *fmr; 52 struct ib_fmr *fmr;
48 struct list_head list; 53
54 struct xlist_head xlist;
55
56 /* unmap_list is for freeing */
57 struct list_head unmap_list;
49 unsigned int remap_count; 58 unsigned int remap_count;
50 59
51 struct scatterlist *sg; 60 struct scatterlist *sg;
@@ -59,14 +68,16 @@ struct rds_ib_mr {
59 */ 68 */
60struct rds_ib_mr_pool { 69struct rds_ib_mr_pool {
61 struct mutex flush_lock; /* serialize fmr invalidate */ 70 struct mutex flush_lock; /* serialize fmr invalidate */
62 struct work_struct flush_worker; /* flush worker */ 71 struct delayed_work flush_worker; /* flush worker */
63 72
64 spinlock_t list_lock; /* protect variables below */
65 atomic_t item_count; /* total # of MRs */ 73 atomic_t item_count; /* total # of MRs */
66 atomic_t dirty_count; /* # dirty of MRs */ 74 atomic_t dirty_count; /* # dirty of MRs */
67 struct list_head drop_list; /* MRs that have reached their max_maps limit */ 75
68 struct list_head free_list; /* unused MRs */ 76 struct xlist_head drop_list; /* MRs that have reached their max_maps limit */
69 struct list_head clean_list; /* unused & unamapped MRs */ 77 struct xlist_head free_list; /* unused MRs */
78 struct xlist_head clean_list; /* global unused & unamapped MRs */
79 wait_queue_head_t flush_wait;
80
70 atomic_t free_pinned; /* memory pinned by free MRs */ 81 atomic_t free_pinned; /* memory pinned by free MRs */
71 unsigned long max_items; 82 unsigned long max_items;
72 unsigned long max_items_soft; 83 unsigned long max_items_soft;
@@ -74,7 +85,7 @@ struct rds_ib_mr_pool {
74 struct ib_fmr_attr fmr_attr; 85 struct ib_fmr_attr fmr_attr;
75}; 86};
76 87
77static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all); 88static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **);
78static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr); 89static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
79static void rds_ib_mr_pool_flush_worker(struct work_struct *work); 90static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
80 91
@@ -83,16 +94,17 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
83 struct rds_ib_device *rds_ibdev; 94 struct rds_ib_device *rds_ibdev;
84 struct rds_ib_ipaddr *i_ipaddr; 95 struct rds_ib_ipaddr *i_ipaddr;
85 96
86 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 97 rcu_read_lock();
87 spin_lock_irq(&rds_ibdev->spinlock); 98 list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
88 list_for_each_entry(i_ipaddr, &rds_ibdev->ipaddr_list, list) { 99 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
89 if (i_ipaddr->ipaddr == ipaddr) { 100 if (i_ipaddr->ipaddr == ipaddr) {
90 spin_unlock_irq(&rds_ibdev->spinlock); 101 atomic_inc(&rds_ibdev->refcount);
102 rcu_read_unlock();
91 return rds_ibdev; 103 return rds_ibdev;
92 } 104 }
93 } 105 }
94 spin_unlock_irq(&rds_ibdev->spinlock);
95 } 106 }
107 rcu_read_unlock();
96 108
97 return NULL; 109 return NULL;
98} 110}
@@ -108,7 +120,7 @@ static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
108 i_ipaddr->ipaddr = ipaddr; 120 i_ipaddr->ipaddr = ipaddr;
109 121
110 spin_lock_irq(&rds_ibdev->spinlock); 122 spin_lock_irq(&rds_ibdev->spinlock);
111 list_add_tail(&i_ipaddr->list, &rds_ibdev->ipaddr_list); 123 list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list);
112 spin_unlock_irq(&rds_ibdev->spinlock); 124 spin_unlock_irq(&rds_ibdev->spinlock);
113 125
114 return 0; 126 return 0;
@@ -116,17 +128,24 @@ static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
116 128
117static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 129static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
118{ 130{
119 struct rds_ib_ipaddr *i_ipaddr, *next; 131 struct rds_ib_ipaddr *i_ipaddr;
132 struct rds_ib_ipaddr *to_free = NULL;
133
120 134
121 spin_lock_irq(&rds_ibdev->spinlock); 135 spin_lock_irq(&rds_ibdev->spinlock);
122 list_for_each_entry_safe(i_ipaddr, next, &rds_ibdev->ipaddr_list, list) { 136 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
123 if (i_ipaddr->ipaddr == ipaddr) { 137 if (i_ipaddr->ipaddr == ipaddr) {
124 list_del(&i_ipaddr->list); 138 list_del_rcu(&i_ipaddr->list);
125 kfree(i_ipaddr); 139 to_free = i_ipaddr;
126 break; 140 break;
127 } 141 }
128 } 142 }
129 spin_unlock_irq(&rds_ibdev->spinlock); 143 spin_unlock_irq(&rds_ibdev->spinlock);
144
145 if (to_free) {
146 synchronize_rcu();
147 kfree(to_free);
148 }
130} 149}
131 150
132int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 151int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
@@ -134,8 +153,10 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
134 struct rds_ib_device *rds_ibdev_old; 153 struct rds_ib_device *rds_ibdev_old;
135 154
136 rds_ibdev_old = rds_ib_get_device(ipaddr); 155 rds_ibdev_old = rds_ib_get_device(ipaddr);
137 if (rds_ibdev_old) 156 if (rds_ibdev_old) {
138 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr); 157 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
158 rds_ib_dev_put(rds_ibdev_old);
159 }
139 160
140 return rds_ib_add_ipaddr(rds_ibdev, ipaddr); 161 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
141} 162}
@@ -150,12 +171,13 @@ void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *con
150 BUG_ON(list_empty(&ic->ib_node)); 171 BUG_ON(list_empty(&ic->ib_node));
151 list_del(&ic->ib_node); 172 list_del(&ic->ib_node);
152 173
153 spin_lock_irq(&rds_ibdev->spinlock); 174 spin_lock(&rds_ibdev->spinlock);
154 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); 175 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
155 spin_unlock_irq(&rds_ibdev->spinlock); 176 spin_unlock(&rds_ibdev->spinlock);
156 spin_unlock_irq(&ib_nodev_conns_lock); 177 spin_unlock_irq(&ib_nodev_conns_lock);
157 178
158 ic->rds_ibdev = rds_ibdev; 179 ic->rds_ibdev = rds_ibdev;
180 atomic_inc(&rds_ibdev->refcount);
159} 181}
160 182
161void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 183void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
@@ -175,18 +197,18 @@ void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *
175 spin_unlock(&ib_nodev_conns_lock); 197 spin_unlock(&ib_nodev_conns_lock);
176 198
177 ic->rds_ibdev = NULL; 199 ic->rds_ibdev = NULL;
200 rds_ib_dev_put(rds_ibdev);
178} 201}
179 202
180void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock) 203void rds_ib_destroy_nodev_conns(void)
181{ 204{
182 struct rds_ib_connection *ic, *_ic; 205 struct rds_ib_connection *ic, *_ic;
183 LIST_HEAD(tmp_list); 206 LIST_HEAD(tmp_list);
184 207
185 /* avoid calling conn_destroy with irqs off */ 208 /* avoid calling conn_destroy with irqs off */
186 spin_lock_irq(list_lock); 209 spin_lock_irq(&ib_nodev_conns_lock);
187 list_splice(list, &tmp_list); 210 list_splice(&ib_nodev_conns, &tmp_list);
188 INIT_LIST_HEAD(list); 211 spin_unlock_irq(&ib_nodev_conns_lock);
189 spin_unlock_irq(list_lock);
190 212
191 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) 213 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node)
192 rds_conn_destroy(ic->conn); 214 rds_conn_destroy(ic->conn);
@@ -200,12 +222,12 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
200 if (!pool) 222 if (!pool)
201 return ERR_PTR(-ENOMEM); 223 return ERR_PTR(-ENOMEM);
202 224
203 INIT_LIST_HEAD(&pool->free_list); 225 INIT_XLIST_HEAD(&pool->free_list);
204 INIT_LIST_HEAD(&pool->drop_list); 226 INIT_XLIST_HEAD(&pool->drop_list);
205 INIT_LIST_HEAD(&pool->clean_list); 227 INIT_XLIST_HEAD(&pool->clean_list);
206 mutex_init(&pool->flush_lock); 228 mutex_init(&pool->flush_lock);
207 spin_lock_init(&pool->list_lock); 229 init_waitqueue_head(&pool->flush_wait);
208 INIT_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); 230 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
209 231
210 pool->fmr_attr.max_pages = fmr_message_size; 232 pool->fmr_attr.max_pages = fmr_message_size;
211 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; 233 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
@@ -233,34 +255,60 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co
233 255
234void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 256void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
235{ 257{
236 flush_workqueue(rds_wq); 258 cancel_delayed_work_sync(&pool->flush_worker);
237 rds_ib_flush_mr_pool(pool, 1); 259 rds_ib_flush_mr_pool(pool, 1, NULL);
238 WARN_ON(atomic_read(&pool->item_count)); 260 WARN_ON(atomic_read(&pool->item_count));
239 WARN_ON(atomic_read(&pool->free_pinned)); 261 WARN_ON(atomic_read(&pool->free_pinned));
240 kfree(pool); 262 kfree(pool);
241} 263}
242 264
265static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl,
266 struct rds_ib_mr **ibmr_ret)
267{
268 struct xlist_head *ibmr_xl;
269 ibmr_xl = xlist_del_head_fast(xl);
270 *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist);
271}
272
243static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) 273static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
244{ 274{
245 struct rds_ib_mr *ibmr = NULL; 275 struct rds_ib_mr *ibmr = NULL;
246 unsigned long flags; 276 struct xlist_head *ret;
277 unsigned long *flag;
247 278
248 spin_lock_irqsave(&pool->list_lock, flags); 279 preempt_disable();
249 if (!list_empty(&pool->clean_list)) { 280 flag = &__get_cpu_var(clean_list_grace);
250 ibmr = list_entry(pool->clean_list.next, struct rds_ib_mr, list); 281 set_bit(CLEAN_LIST_BUSY_BIT, flag);
251 list_del_init(&ibmr->list); 282 ret = xlist_del_head(&pool->clean_list);
252 } 283 if (ret)
253 spin_unlock_irqrestore(&pool->list_lock, flags); 284 ibmr = list_entry(ret, struct rds_ib_mr, xlist);
254 285
286 clear_bit(CLEAN_LIST_BUSY_BIT, flag);
287 preempt_enable();
255 return ibmr; 288 return ibmr;
256} 289}
257 290
291static inline void wait_clean_list_grace(void)
292{
293 int cpu;
294 unsigned long *flag;
295
296 for_each_online_cpu(cpu) {
297 flag = &per_cpu(clean_list_grace, cpu);
298 while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
299 cpu_relax();
300 }
301}
302
258static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) 303static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
259{ 304{
260 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 305 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
261 struct rds_ib_mr *ibmr = NULL; 306 struct rds_ib_mr *ibmr = NULL;
262 int err = 0, iter = 0; 307 int err = 0, iter = 0;
263 308
309 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
310 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
311
264 while (1) { 312 while (1) {
265 ibmr = rds_ib_reuse_fmr(pool); 313 ibmr = rds_ib_reuse_fmr(pool);
266 if (ibmr) 314 if (ibmr)
@@ -287,19 +335,24 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
287 335
288 /* We do have some empty MRs. Flush them out. */ 336 /* We do have some empty MRs. Flush them out. */
289 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait); 337 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait);
290 rds_ib_flush_mr_pool(pool, 0); 338 rds_ib_flush_mr_pool(pool, 0, &ibmr);
339 if (ibmr)
340 return ibmr;
291 } 341 }
292 342
293 ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); 343 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, rdsibdev_to_node(rds_ibdev));
294 if (!ibmr) { 344 if (!ibmr) {
295 err = -ENOMEM; 345 err = -ENOMEM;
296 goto out_no_cigar; 346 goto out_no_cigar;
297 } 347 }
298 348
349 memset(ibmr, 0, sizeof(*ibmr));
350
299 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, 351 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
300 (IB_ACCESS_LOCAL_WRITE | 352 (IB_ACCESS_LOCAL_WRITE |
301 IB_ACCESS_REMOTE_READ | 353 IB_ACCESS_REMOTE_READ |
302 IB_ACCESS_REMOTE_WRITE), 354 IB_ACCESS_REMOTE_WRITE|
355 IB_ACCESS_REMOTE_ATOMIC),
303 &pool->fmr_attr); 356 &pool->fmr_attr);
304 if (IS_ERR(ibmr->fmr)) { 357 if (IS_ERR(ibmr->fmr)) {
305 err = PTR_ERR(ibmr->fmr); 358 err = PTR_ERR(ibmr->fmr);
@@ -367,7 +420,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
367 if (page_cnt > fmr_message_size) 420 if (page_cnt > fmr_message_size)
368 return -EINVAL; 421 return -EINVAL;
369 422
370 dma_pages = kmalloc(sizeof(u64) * page_cnt, GFP_ATOMIC); 423 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
424 rdsibdev_to_node(rds_ibdev));
371 if (!dma_pages) 425 if (!dma_pages)
372 return -ENOMEM; 426 return -ENOMEM;
373 427
@@ -441,7 +495,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
441 495
442 /* FIXME we need a way to tell a r/w MR 496 /* FIXME we need a way to tell a r/w MR
443 * from a r/o MR */ 497 * from a r/o MR */
444 BUG_ON(in_interrupt()); 498 BUG_ON(irqs_disabled());
445 set_page_dirty(page); 499 set_page_dirty(page);
446 put_page(page); 500 put_page(page);
447 } 501 }
@@ -477,33 +531,109 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr
477} 531}
478 532
479/* 533/*
534 * given an xlist of mrs, put them all into the list_head for more processing
535 */
536static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list)
537{
538 struct rds_ib_mr *ibmr;
539 struct xlist_head splice;
540 struct xlist_head *cur;
541 struct xlist_head *next;
542
543 splice.next = NULL;
544 xlist_splice(xlist, &splice);
545 cur = splice.next;
546 while (cur) {
547 next = cur->next;
548 ibmr = list_entry(cur, struct rds_ib_mr, xlist);
549 list_add_tail(&ibmr->unmap_list, list);
550 cur = next;
551 }
552}
553
554/*
555 * this takes a list head of mrs and turns it into an xlist of clusters.
556 * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for
557 * reuse.
558 */
559static void list_append_to_xlist(struct rds_ib_mr_pool *pool,
560 struct list_head *list, struct xlist_head *xlist,
561 struct xlist_head **tail_ret)
562{
563 struct rds_ib_mr *ibmr;
564 struct xlist_head *cur_mr = xlist;
565 struct xlist_head *tail_mr = NULL;
566
567 list_for_each_entry(ibmr, list, unmap_list) {
568 tail_mr = &ibmr->xlist;
569 tail_mr->next = NULL;
570 cur_mr->next = tail_mr;
571 cur_mr = tail_mr;
572 }
573 *tail_ret = tail_mr;
574}
575
576/*
480 * Flush our pool of MRs. 577 * Flush our pool of MRs.
481 * At a minimum, all currently unused MRs are unmapped. 578 * At a minimum, all currently unused MRs are unmapped.
482 * If the number of MRs allocated exceeds the limit, we also try 579 * If the number of MRs allocated exceeds the limit, we also try
483 * to free as many MRs as needed to get back to this limit. 580 * to free as many MRs as needed to get back to this limit.
484 */ 581 */
485static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all) 582static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
583 int free_all, struct rds_ib_mr **ibmr_ret)
486{ 584{
487 struct rds_ib_mr *ibmr, *next; 585 struct rds_ib_mr *ibmr, *next;
586 struct xlist_head clean_xlist;
587 struct xlist_head *clean_tail;
488 LIST_HEAD(unmap_list); 588 LIST_HEAD(unmap_list);
489 LIST_HEAD(fmr_list); 589 LIST_HEAD(fmr_list);
490 unsigned long unpinned = 0; 590 unsigned long unpinned = 0;
491 unsigned long flags;
492 unsigned int nfreed = 0, ncleaned = 0, free_goal; 591 unsigned int nfreed = 0, ncleaned = 0, free_goal;
493 int ret = 0; 592 int ret = 0;
494 593
495 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush); 594 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
496 595
497 mutex_lock(&pool->flush_lock); 596 if (ibmr_ret) {
597 DEFINE_WAIT(wait);
598 while(!mutex_trylock(&pool->flush_lock)) {
599 ibmr = rds_ib_reuse_fmr(pool);
600 if (ibmr) {
601 *ibmr_ret = ibmr;
602 finish_wait(&pool->flush_wait, &wait);
603 goto out_nolock;
604 }
605
606 prepare_to_wait(&pool->flush_wait, &wait,
607 TASK_UNINTERRUPTIBLE);
608 if (xlist_empty(&pool->clean_list))
609 schedule();
610
611 ibmr = rds_ib_reuse_fmr(pool);
612 if (ibmr) {
613 *ibmr_ret = ibmr;
614 finish_wait(&pool->flush_wait, &wait);
615 goto out_nolock;
616 }
617 }
618 finish_wait(&pool->flush_wait, &wait);
619 } else
620 mutex_lock(&pool->flush_lock);
621
622 if (ibmr_ret) {
623 ibmr = rds_ib_reuse_fmr(pool);
624 if (ibmr) {
625 *ibmr_ret = ibmr;
626 goto out;
627 }
628 }
498 629
499 spin_lock_irqsave(&pool->list_lock, flags);
500 /* Get the list of all MRs to be dropped. Ordering matters - 630 /* Get the list of all MRs to be dropped. Ordering matters -
501 * we want to put drop_list ahead of free_list. */ 631 * we want to put drop_list ahead of free_list.
502 list_splice_init(&pool->free_list, &unmap_list); 632 */
503 list_splice_init(&pool->drop_list, &unmap_list); 633 xlist_append_to_list(&pool->drop_list, &unmap_list);
634 xlist_append_to_list(&pool->free_list, &unmap_list);
504 if (free_all) 635 if (free_all)
505 list_splice_init(&pool->clean_list, &unmap_list); 636 xlist_append_to_list(&pool->clean_list, &unmap_list);
506 spin_unlock_irqrestore(&pool->list_lock, flags);
507 637
508 free_goal = rds_ib_flush_goal(pool, free_all); 638 free_goal = rds_ib_flush_goal(pool, free_all);
509 639
@@ -511,19 +641,20 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
511 goto out; 641 goto out;
512 642
513 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ 643 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
514 list_for_each_entry(ibmr, &unmap_list, list) 644 list_for_each_entry(ibmr, &unmap_list, unmap_list)
515 list_add(&ibmr->fmr->list, &fmr_list); 645 list_add(&ibmr->fmr->list, &fmr_list);
646
516 ret = ib_unmap_fmr(&fmr_list); 647 ret = ib_unmap_fmr(&fmr_list);
517 if (ret) 648 if (ret)
518 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret); 649 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret);
519 650
520 /* Now we can destroy the DMA mapping and unpin any pages */ 651 /* Now we can destroy the DMA mapping and unpin any pages */
521 list_for_each_entry_safe(ibmr, next, &unmap_list, list) { 652 list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
522 unpinned += ibmr->sg_len; 653 unpinned += ibmr->sg_len;
523 __rds_ib_teardown_mr(ibmr); 654 __rds_ib_teardown_mr(ibmr);
524 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) { 655 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) {
525 rds_ib_stats_inc(s_ib_rdma_mr_free); 656 rds_ib_stats_inc(s_ib_rdma_mr_free);
526 list_del(&ibmr->list); 657 list_del(&ibmr->unmap_list);
527 ib_dealloc_fmr(ibmr->fmr); 658 ib_dealloc_fmr(ibmr->fmr);
528 kfree(ibmr); 659 kfree(ibmr);
529 nfreed++; 660 nfreed++;
@@ -531,9 +662,27 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
531 ncleaned++; 662 ncleaned++;
532 } 663 }
533 664
534 spin_lock_irqsave(&pool->list_lock, flags); 665 if (!list_empty(&unmap_list)) {
535 list_splice(&unmap_list, &pool->clean_list); 666 /* we have to make sure that none of the things we're about
536 spin_unlock_irqrestore(&pool->list_lock, flags); 667 * to put on the clean list would race with other cpus trying
668 * to pull items off. The xlist would explode if we managed to
669 * remove something from the clean list and then add it back again
670 * while another CPU was spinning on that same item in xlist_del_head.
671 *
672 * This is pretty unlikely, but just in case wait for an xlist grace period
673 * here before adding anything back into the clean list.
674 */
675 wait_clean_list_grace();
676
677 list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail);
678 if (ibmr_ret)
679 refill_local(pool, &clean_xlist, ibmr_ret);
680
681 /* refill_local may have emptied our list */
682 if (!xlist_empty(&clean_xlist))
683 xlist_add(clean_xlist.next, clean_tail, &pool->clean_list);
684
685 }
537 686
538 atomic_sub(unpinned, &pool->free_pinned); 687 atomic_sub(unpinned, &pool->free_pinned);
539 atomic_sub(ncleaned, &pool->dirty_count); 688 atomic_sub(ncleaned, &pool->dirty_count);
@@ -541,14 +690,35 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
541 690
542out: 691out:
543 mutex_unlock(&pool->flush_lock); 692 mutex_unlock(&pool->flush_lock);
693 if (waitqueue_active(&pool->flush_wait))
694 wake_up(&pool->flush_wait);
695out_nolock:
544 return ret; 696 return ret;
545} 697}
546 698
699int rds_ib_fmr_init(void)
700{
701 rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
702 if (!rds_ib_fmr_wq)
703 return -ENOMEM;
704 return 0;
705}
706
707/*
708 * By the time this is called all the IB devices should have been torn down and
709 * had their pools freed. As each pool is freed its work struct is waited on,
710 * so the pool flushing work queue should be idle by the time we get here.
711 */
712void rds_ib_fmr_exit(void)
713{
714 destroy_workqueue(rds_ib_fmr_wq);
715}
716
547static void rds_ib_mr_pool_flush_worker(struct work_struct *work) 717static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
548{ 718{
549 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker); 719 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
550 720
551 rds_ib_flush_mr_pool(pool, 0); 721 rds_ib_flush_mr_pool(pool, 0, NULL);
552} 722}
553 723
554void rds_ib_free_mr(void *trans_private, int invalidate) 724void rds_ib_free_mr(void *trans_private, int invalidate)
@@ -556,47 +726,49 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
556 struct rds_ib_mr *ibmr = trans_private; 726 struct rds_ib_mr *ibmr = trans_private;
557 struct rds_ib_device *rds_ibdev = ibmr->device; 727 struct rds_ib_device *rds_ibdev = ibmr->device;
558 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 728 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
559 unsigned long flags;
560 729
561 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); 730 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
562 731
563 /* Return it to the pool's free list */ 732 /* Return it to the pool's free list */
564 spin_lock_irqsave(&pool->list_lock, flags);
565 if (ibmr->remap_count >= pool->fmr_attr.max_maps) 733 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
566 list_add(&ibmr->list, &pool->drop_list); 734 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list);
567 else 735 else
568 list_add(&ibmr->list, &pool->free_list); 736 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list);
569 737
570 atomic_add(ibmr->sg_len, &pool->free_pinned); 738 atomic_add(ibmr->sg_len, &pool->free_pinned);
571 atomic_inc(&pool->dirty_count); 739 atomic_inc(&pool->dirty_count);
572 spin_unlock_irqrestore(&pool->list_lock, flags);
573 740
574 /* If we've pinned too many pages, request a flush */ 741 /* If we've pinned too many pages, request a flush */
575 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 742 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
576 atomic_read(&pool->dirty_count) >= pool->max_items / 10) 743 atomic_read(&pool->dirty_count) >= pool->max_items / 10)
577 queue_work(rds_wq, &pool->flush_worker); 744 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
578 745
579 if (invalidate) { 746 if (invalidate) {
580 if (likely(!in_interrupt())) { 747 if (likely(!in_interrupt())) {
581 rds_ib_flush_mr_pool(pool, 0); 748 rds_ib_flush_mr_pool(pool, 0, NULL);
582 } else { 749 } else {
583 /* We get here if the user created a MR marked 750 /* We get here if the user created a MR marked
584 * as use_once and invalidate at the same time. */ 751 * as use_once and invalidate at the same time. */
585 queue_work(rds_wq, &pool->flush_worker); 752 queue_delayed_work(rds_ib_fmr_wq,
753 &pool->flush_worker, 10);
586 } 754 }
587 } 755 }
756
757 rds_ib_dev_put(rds_ibdev);
588} 758}
589 759
590void rds_ib_flush_mrs(void) 760void rds_ib_flush_mrs(void)
591{ 761{
592 struct rds_ib_device *rds_ibdev; 762 struct rds_ib_device *rds_ibdev;
593 763
764 down_read(&rds_ib_devices_lock);
594 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 765 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
595 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 766 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
596 767
597 if (pool) 768 if (pool)
598 rds_ib_flush_mr_pool(pool, 0); 769 rds_ib_flush_mr_pool(pool, 0, NULL);
599 } 770 }
771 up_read(&rds_ib_devices_lock);
600} 772}
601 773
602void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, 774void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
@@ -628,6 +800,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
628 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret); 800 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret);
629 801
630 ibmr->device = rds_ibdev; 802 ibmr->device = rds_ibdev;
803 rds_ibdev = NULL;
631 804
632 out: 805 out:
633 if (ret) { 806 if (ret) {
@@ -635,5 +808,8 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
635 rds_ib_free_mr(ibmr, 0); 808 rds_ib_free_mr(ibmr, 0);
636 ibmr = ERR_PTR(ret); 809 ibmr = ERR_PTR(ret);
637 } 810 }
811 if (rds_ibdev)
812 rds_ib_dev_put(rds_ibdev);
638 return ibmr; 813 return ibmr;
639} 814}
815
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index c74e9904a6b..e29e0ca32f7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -43,42 +43,6 @@ static struct kmem_cache *rds_ib_incoming_slab;
43static struct kmem_cache *rds_ib_frag_slab; 43static struct kmem_cache *rds_ib_frag_slab;
44static atomic_t rds_ib_allocation = ATOMIC_INIT(0); 44static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
45 45
46static void rds_ib_frag_drop_page(struct rds_page_frag *frag)
47{
48 rdsdebug("frag %p page %p\n", frag, frag->f_page);
49 __free_page(frag->f_page);
50 frag->f_page = NULL;
51}
52
53static void rds_ib_frag_free(struct rds_page_frag *frag)
54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page != NULL);
57 kmem_cache_free(rds_ib_frag_slab, frag);
58}
59
60/*
61 * We map a page at a time. Its fragments are posted in order. This
62 * is called in fragment order as the fragments get send completion events.
63 * Only the last frag in the page performs the unmapping.
64 *
65 * It's OK for ring cleanup to call this in whatever order it likes because
66 * DMA is not in flight and so we can unmap while other ring entries still
67 * hold page references in their frags.
68 */
69static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic,
70 struct rds_ib_recv_work *recv)
71{
72 struct rds_page_frag *frag = recv->r_frag;
73
74 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
75 if (frag->f_mapped)
76 ib_dma_unmap_page(ic->i_cm_id->device,
77 frag->f_mapped,
78 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
79 frag->f_mapped = 0;
80}
81
82void rds_ib_recv_init_ring(struct rds_ib_connection *ic) 46void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
83{ 47{
84 struct rds_ib_recv_work *recv; 48 struct rds_ib_recv_work *recv;
@@ -95,16 +59,161 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
95 recv->r_wr.sg_list = recv->r_sge; 59 recv->r_wr.sg_list = recv->r_sge;
96 recv->r_wr.num_sge = RDS_IB_RECV_SGE; 60 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
97 61
98 sge = rds_ib_data_sge(ic, recv->r_sge); 62 sge = &recv->r_sge[0];
63 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
64 sge->length = sizeof(struct rds_header);
65 sge->lkey = ic->i_mr->lkey;
66
67 sge = &recv->r_sge[1];
99 sge->addr = 0; 68 sge->addr = 0;
100 sge->length = RDS_FRAG_SIZE; 69 sge->length = RDS_FRAG_SIZE;
101 sge->lkey = ic->i_mr->lkey; 70 sge->lkey = ic->i_mr->lkey;
71 }
72}
102 73
103 sge = rds_ib_header_sge(ic, recv->r_sge); 74/*
104 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); 75 * The entire 'from' list, including the from element itself, is put on
105 sge->length = sizeof(struct rds_header); 76 * to the tail of the 'to' list.
106 sge->lkey = ic->i_mr->lkey; 77 */
78static void list_splice_entire_tail(struct list_head *from,
79 struct list_head *to)
80{
81 struct list_head *from_last = from->prev;
82
83 list_splice_tail(from_last, to);
84 list_add_tail(from_last, to);
85}
86
87static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
88{
89 struct list_head *tmp;
90
91 tmp = xchg(&cache->xfer, NULL);
92 if (tmp) {
93 if (cache->ready)
94 list_splice_entire_tail(tmp, cache->ready);
95 else
96 cache->ready = tmp;
97 }
98}
99
100static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
101{
102 struct rds_ib_cache_head *head;
103 int cpu;
104
105 cache->percpu = alloc_percpu(struct rds_ib_cache_head);
106 if (!cache->percpu)
107 return -ENOMEM;
108
109 for_each_possible_cpu(cpu) {
110 head = per_cpu_ptr(cache->percpu, cpu);
111 head->first = NULL;
112 head->count = 0;
113 }
114 cache->xfer = NULL;
115 cache->ready = NULL;
116
117 return 0;
118}
119
120int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic)
121{
122 int ret;
123
124 ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs);
125 if (!ret) {
126 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags);
127 if (ret)
128 free_percpu(ic->i_cache_incs.percpu);
107 } 129 }
130
131 return ret;
132}
133
134static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
135 struct list_head *caller_list)
136{
137 struct rds_ib_cache_head *head;
138 int cpu;
139
140 for_each_possible_cpu(cpu) {
141 head = per_cpu_ptr(cache->percpu, cpu);
142 if (head->first) {
143 list_splice_entire_tail(head->first, caller_list);
144 head->first = NULL;
145 }
146 }
147
148 if (cache->ready) {
149 list_splice_entire_tail(cache->ready, caller_list);
150 cache->ready = NULL;
151 }
152}
153
154void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
155{
156 struct rds_ib_incoming *inc;
157 struct rds_ib_incoming *inc_tmp;
158 struct rds_page_frag *frag;
159 struct rds_page_frag *frag_tmp;
160 LIST_HEAD(list);
161
162 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
163 rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
164 free_percpu(ic->i_cache_incs.percpu);
165
166 list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
167 list_del(&inc->ii_cache_entry);
168 WARN_ON(!list_empty(&inc->ii_frags));
169 kmem_cache_free(rds_ib_incoming_slab, inc);
170 }
171
172 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
173 rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
174 free_percpu(ic->i_cache_frags.percpu);
175
176 list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
177 list_del(&frag->f_cache_entry);
178 WARN_ON(!list_empty(&frag->f_item));
179 kmem_cache_free(rds_ib_frag_slab, frag);
180 }
181}
182
183/* fwd decl */
184static void rds_ib_recv_cache_put(struct list_head *new_item,
185 struct rds_ib_refill_cache *cache);
186static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
187
188
189/* Recycle frag and attached recv buffer f_sg */
190static void rds_ib_frag_free(struct rds_ib_connection *ic,
191 struct rds_page_frag *frag)
192{
193 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
194
195 rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
196}
197
198/* Recycle inc after freeing attached frags */
199void rds_ib_inc_free(struct rds_incoming *inc)
200{
201 struct rds_ib_incoming *ibinc;
202 struct rds_page_frag *frag;
203 struct rds_page_frag *pos;
204 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
205
206 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
207
208 /* Free attached frags */
209 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
210 list_del_init(&frag->f_item);
211 rds_ib_frag_free(ic, frag);
212 }
213 BUG_ON(!list_empty(&ibinc->ii_frags));
214
215 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
216 rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
108} 217}
109 218
110static void rds_ib_recv_clear_one(struct rds_ib_connection *ic, 219static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
@@ -115,10 +224,8 @@ static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
115 recv->r_ibinc = NULL; 224 recv->r_ibinc = NULL;
116 } 225 }
117 if (recv->r_frag) { 226 if (recv->r_frag) {
118 rds_ib_recv_unmap_page(ic, recv); 227 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
119 if (recv->r_frag->f_page) 228 rds_ib_frag_free(ic, recv->r_frag);
120 rds_ib_frag_drop_page(recv->r_frag);
121 rds_ib_frag_free(recv->r_frag);
122 recv->r_frag = NULL; 229 recv->r_frag = NULL;
123 } 230 }
124} 231}
@@ -129,84 +236,111 @@ void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
129 236
130 for (i = 0; i < ic->i_recv_ring.w_nr; i++) 237 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
131 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); 238 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
132
133 if (ic->i_frag.f_page)
134 rds_ib_frag_drop_page(&ic->i_frag);
135} 239}
136 240
137static int rds_ib_recv_refill_one(struct rds_connection *conn, 241static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic,
138 struct rds_ib_recv_work *recv, 242 gfp_t slab_mask)
139 gfp_t kptr_gfp, gfp_t page_gfp)
140{ 243{
141 struct rds_ib_connection *ic = conn->c_transport_data; 244 struct rds_ib_incoming *ibinc;
142 dma_addr_t dma_addr; 245 struct list_head *cache_item;
143 struct ib_sge *sge; 246 int avail_allocs;
144 int ret = -ENOMEM;
145 247
146 if (recv->r_ibinc == NULL) { 248 cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
147 if (!atomic_add_unless(&rds_ib_allocation, 1, rds_ib_sysctl_max_recv_allocation)) { 249 if (cache_item) {
250 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
251 } else {
252 avail_allocs = atomic_add_unless(&rds_ib_allocation,
253 1, rds_ib_sysctl_max_recv_allocation);
254 if (!avail_allocs) {
148 rds_ib_stats_inc(s_ib_rx_alloc_limit); 255 rds_ib_stats_inc(s_ib_rx_alloc_limit);
149 goto out; 256 return NULL;
150 } 257 }
151 recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab, 258 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
152 kptr_gfp); 259 if (!ibinc) {
153 if (recv->r_ibinc == NULL) {
154 atomic_dec(&rds_ib_allocation); 260 atomic_dec(&rds_ib_allocation);
155 goto out; 261 return NULL;
156 } 262 }
157 INIT_LIST_HEAD(&recv->r_ibinc->ii_frags);
158 rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr);
159 } 263 }
264 INIT_LIST_HEAD(&ibinc->ii_frags);
265 rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
160 266
161 if (recv->r_frag == NULL) { 267 return ibinc;
162 recv->r_frag = kmem_cache_alloc(rds_ib_frag_slab, kptr_gfp); 268}
163 if (recv->r_frag == NULL) 269
164 goto out; 270static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic,
165 INIT_LIST_HEAD(&recv->r_frag->f_item); 271 gfp_t slab_mask, gfp_t page_mask)
166 recv->r_frag->f_page = NULL; 272{
273 struct rds_page_frag *frag;
274 struct list_head *cache_item;
275 int ret;
276
277 cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
278 if (cache_item) {
279 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
280 } else {
281 frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask);
282 if (!frag)
283 return NULL;
284
285 sg_init_table(&frag->f_sg, 1);
286 ret = rds_page_remainder_alloc(&frag->f_sg,
287 RDS_FRAG_SIZE, page_mask);
288 if (ret) {
289 kmem_cache_free(rds_ib_frag_slab, frag);
290 return NULL;
291 }
167 } 292 }
168 293
169 if (ic->i_frag.f_page == NULL) { 294 INIT_LIST_HEAD(&frag->f_item);
170 ic->i_frag.f_page = alloc_page(page_gfp); 295
171 if (ic->i_frag.f_page == NULL) 296 return frag;
172 goto out; 297}
173 ic->i_frag.f_offset = 0; 298
299static int rds_ib_recv_refill_one(struct rds_connection *conn,
300 struct rds_ib_recv_work *recv, int prefill)
301{
302 struct rds_ib_connection *ic = conn->c_transport_data;
303 struct ib_sge *sge;
304 int ret = -ENOMEM;
305 gfp_t slab_mask = GFP_NOWAIT;
306 gfp_t page_mask = GFP_NOWAIT;
307
308 if (prefill) {
309 slab_mask = GFP_KERNEL;
310 page_mask = GFP_HIGHUSER;
174 } 311 }
175 312
176 dma_addr = ib_dma_map_page(ic->i_cm_id->device, 313 if (!ic->i_cache_incs.ready)
177 ic->i_frag.f_page, 314 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
178 ic->i_frag.f_offset, 315 if (!ic->i_cache_frags.ready)
179 RDS_FRAG_SIZE, 316 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
180 DMA_FROM_DEVICE);
181 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
182 goto out;
183 317
184 /* 318 /*
185 * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap() 319 * ibinc was taken from recv if recv contained the start of a message.
186 * must be called on this recv. This happens as completions hit 320 * recvs that were continuations will still have this allocated.
187 * in order or on connection shutdown.
188 */ 321 */
189 recv->r_frag->f_page = ic->i_frag.f_page; 322 if (!recv->r_ibinc) {
190 recv->r_frag->f_offset = ic->i_frag.f_offset; 323 recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask);
191 recv->r_frag->f_mapped = dma_addr; 324 if (!recv->r_ibinc)
325 goto out;
326 }
192 327
193 sge = rds_ib_data_sge(ic, recv->r_sge); 328 WARN_ON(recv->r_frag); /* leak! */
194 sge->addr = dma_addr; 329 recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask);
195 sge->length = RDS_FRAG_SIZE; 330 if (!recv->r_frag)
331 goto out;
332
333 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
334 1, DMA_FROM_DEVICE);
335 WARN_ON(ret != 1);
196 336
197 sge = rds_ib_header_sge(ic, recv->r_sge); 337 sge = &recv->r_sge[0];
198 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); 338 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
199 sge->length = sizeof(struct rds_header); 339 sge->length = sizeof(struct rds_header);
200 340
201 get_page(recv->r_frag->f_page); 341 sge = &recv->r_sge[1];
202 342 sge->addr = sg_dma_address(&recv->r_frag->f_sg);
203 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) { 343 sge->length = sg_dma_len(&recv->r_frag->f_sg);
204 ic->i_frag.f_offset += RDS_FRAG_SIZE;
205 } else {
206 put_page(ic->i_frag.f_page);
207 ic->i_frag.f_page = NULL;
208 ic->i_frag.f_offset = 0;
209 }
210 344
211 ret = 0; 345 ret = 0;
212out: 346out:
@@ -216,13 +350,11 @@ out:
216/* 350/*
217 * This tries to allocate and post unused work requests after making sure that 351 * This tries to allocate and post unused work requests after making sure that
218 * they have all the allocations they need to queue received fragments into 352 * they have all the allocations they need to queue received fragments into
219 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc 353 * sockets.
220 * pairs don't go unmatched.
221 * 354 *
222 * -1 is returned if posting fails due to temporary resource exhaustion. 355 * -1 is returned if posting fails due to temporary resource exhaustion.
223 */ 356 */
224int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 357void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
225 gfp_t page_gfp, int prefill)
226{ 358{
227 struct rds_ib_connection *ic = conn->c_transport_data; 359 struct rds_ib_connection *ic = conn->c_transport_data;
228 struct rds_ib_recv_work *recv; 360 struct rds_ib_recv_work *recv;
@@ -236,28 +368,25 @@ int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
236 if (pos >= ic->i_recv_ring.w_nr) { 368 if (pos >= ic->i_recv_ring.w_nr) {
237 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", 369 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
238 pos); 370 pos);
239 ret = -EINVAL;
240 break; 371 break;
241 } 372 }
242 373
243 recv = &ic->i_recvs[pos]; 374 recv = &ic->i_recvs[pos];
244 ret = rds_ib_recv_refill_one(conn, recv, kptr_gfp, page_gfp); 375 ret = rds_ib_recv_refill_one(conn, recv, prefill);
245 if (ret) { 376 if (ret) {
246 ret = -1;
247 break; 377 break;
248 } 378 }
249 379
250 /* XXX when can this fail? */ 380 /* XXX when can this fail? */
251 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); 381 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
252 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, 382 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
253 recv->r_ibinc, recv->r_frag->f_page, 383 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
254 (long) recv->r_frag->f_mapped, ret); 384 (long) sg_dma_address(&recv->r_frag->f_sg), ret);
255 if (ret) { 385 if (ret) {
256 rds_ib_conn_error(conn, "recv post on " 386 rds_ib_conn_error(conn, "recv post on "
257 "%pI4 returned %d, disconnecting and " 387 "%pI4 returned %d, disconnecting and "
258 "reconnecting\n", &conn->c_faddr, 388 "reconnecting\n", &conn->c_faddr,
259 ret); 389 ret);
260 ret = -1;
261 break; 390 break;
262 } 391 }
263 392
@@ -270,37 +399,73 @@ int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
270 399
271 if (ret) 400 if (ret)
272 rds_ib_ring_unalloc(&ic->i_recv_ring, 1); 401 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
273 return ret;
274} 402}
275 403
276void rds_ib_inc_purge(struct rds_incoming *inc) 404/*
405 * We want to recycle several types of recv allocations, like incs and frags.
406 * To use this, the *_free() function passes in the ptr to a list_head within
407 * the recyclee, as well as the cache to put it on.
408 *
409 * First, we put the memory on a percpu list. When this reaches a certain size,
410 * We move it to an intermediate non-percpu list in a lockless manner, with some
411 * xchg/compxchg wizardry.
412 *
413 * N.B. Instead of a list_head as the anchor, we use a single pointer, which can
414 * be NULL and xchg'd. The list is actually empty when the pointer is NULL, and
415 * list_empty() will return true with one element is actually present.
416 */
417static void rds_ib_recv_cache_put(struct list_head *new_item,
418 struct rds_ib_refill_cache *cache)
277{ 419{
278 struct rds_ib_incoming *ibinc; 420 unsigned long flags;
279 struct rds_page_frag *frag; 421 struct rds_ib_cache_head *chp;
280 struct rds_page_frag *pos; 422 struct list_head *old;
281 423
282 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 424 local_irq_save(flags);
283 rdsdebug("purging ibinc %p inc %p\n", ibinc, inc);
284 425
285 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { 426 chp = per_cpu_ptr(cache->percpu, smp_processor_id());
286 list_del_init(&frag->f_item); 427 if (!chp->first)
287 rds_ib_frag_drop_page(frag); 428 INIT_LIST_HEAD(new_item);
288 rds_ib_frag_free(frag); 429 else /* put on front */
289 } 430 list_add_tail(new_item, chp->first);
431 chp->first = new_item;
432 chp->count++;
433
434 if (chp->count < RDS_IB_RECYCLE_BATCH_COUNT)
435 goto end;
436
437 /*
438 * Return our per-cpu first list to the cache's xfer by atomically
439 * grabbing the current xfer list, appending it to our per-cpu list,
440 * and then atomically returning that entire list back to the
441 * cache's xfer list as long as it's still empty.
442 */
443 do {
444 old = xchg(&cache->xfer, NULL);
445 if (old)
446 list_splice_entire_tail(old, chp->first);
447 old = cmpxchg(&cache->xfer, NULL, chp->first);
448 } while (old);
449
450 chp->first = NULL;
451 chp->count = 0;
452end:
453 local_irq_restore(flags);
290} 454}
291 455
292void rds_ib_inc_free(struct rds_incoming *inc) 456static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
293{ 457{
294 struct rds_ib_incoming *ibinc; 458 struct list_head *head = cache->ready;
295 459
296 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 460 if (head) {
461 if (!list_empty(head)) {
462 cache->ready = head->next;
463 list_del_init(head);
464 } else
465 cache->ready = NULL;
466 }
297 467
298 rds_ib_inc_purge(inc); 468 return head;
299 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
300 BUG_ON(!list_empty(&ibinc->ii_frags));
301 kmem_cache_free(rds_ib_incoming_slab, ibinc);
302 atomic_dec(&rds_ib_allocation);
303 BUG_ON(atomic_read(&rds_ib_allocation) < 0);
304} 469}
305 470
306int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, 471int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
@@ -336,13 +501,13 @@ int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
336 to_copy = min_t(unsigned long, to_copy, len - copied); 501 to_copy = min_t(unsigned long, to_copy, len - copied);
337 502
338 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " 503 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
339 "[%p, %lu] + %lu\n", 504 "[%p, %u] + %lu\n",
340 to_copy, iov->iov_base, iov->iov_len, iov_off, 505 to_copy, iov->iov_base, iov->iov_len, iov_off,
341 frag->f_page, frag->f_offset, frag_off); 506 sg_page(&frag->f_sg), frag->f_sg.offset, frag_off);
342 507
343 /* XXX needs + offset for multiple recvs per page */ 508 /* XXX needs + offset for multiple recvs per page */
344 ret = rds_page_copy_to_user(frag->f_page, 509 ret = rds_page_copy_to_user(sg_page(&frag->f_sg),
345 frag->f_offset + frag_off, 510 frag->f_sg.offset + frag_off,
346 iov->iov_base + iov_off, 511 iov->iov_base + iov_off,
347 to_copy); 512 to_copy);
348 if (ret) { 513 if (ret) {
@@ -557,47 +722,6 @@ u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
557 return rds_ib_get_ack(ic); 722 return rds_ib_get_ack(ic);
558} 723}
559 724
560static struct rds_header *rds_ib_get_header(struct rds_connection *conn,
561 struct rds_ib_recv_work *recv,
562 u32 data_len)
563{
564 struct rds_ib_connection *ic = conn->c_transport_data;
565 void *hdr_buff = &ic->i_recv_hdrs[recv - ic->i_recvs];
566 void *addr;
567 u32 misplaced_hdr_bytes;
568
569 /*
570 * Support header at the front (RDS 3.1+) as well as header-at-end.
571 *
572 * Cases:
573 * 1) header all in header buff (great!)
574 * 2) header all in data page (copy all to header buff)
575 * 3) header split across hdr buf + data page
576 * (move bit in hdr buff to end before copying other bit from data page)
577 */
578 if (conn->c_version > RDS_PROTOCOL_3_0 || data_len == RDS_FRAG_SIZE)
579 return hdr_buff;
580
581 if (data_len <= (RDS_FRAG_SIZE - sizeof(struct rds_header))) {
582 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
583 memcpy(hdr_buff,
584 addr + recv->r_frag->f_offset + data_len,
585 sizeof(struct rds_header));
586 kunmap_atomic(addr, KM_SOFTIRQ0);
587 return hdr_buff;
588 }
589
590 misplaced_hdr_bytes = (sizeof(struct rds_header) - (RDS_FRAG_SIZE - data_len));
591
592 memmove(hdr_buff + misplaced_hdr_bytes, hdr_buff, misplaced_hdr_bytes);
593
594 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
595 memcpy(hdr_buff, addr + recv->r_frag->f_offset + data_len,
596 sizeof(struct rds_header) - misplaced_hdr_bytes);
597 kunmap_atomic(addr, KM_SOFTIRQ0);
598 return hdr_buff;
599}
600
601/* 725/*
602 * It's kind of lame that we're copying from the posted receive pages into 726 * It's kind of lame that we're copying from the posted receive pages into
603 * long-lived bitmaps. We could have posted the bitmaps and rdma written into 727 * long-lived bitmaps. We could have posted the bitmaps and rdma written into
@@ -639,7 +763,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
639 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); 763 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
640 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ 764 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
641 765
642 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); 766 addr = kmap_atomic(sg_page(&frag->f_sg), KM_SOFTIRQ0);
643 767
644 src = addr + frag_off; 768 src = addr + frag_off;
645 dst = (void *)map->m_page_addrs[map_page] + map_off; 769 dst = (void *)map->m_page_addrs[map_page] + map_off;
@@ -710,7 +834,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
710 } 834 }
711 data_len -= sizeof(struct rds_header); 835 data_len -= sizeof(struct rds_header);
712 836
713 ihdr = rds_ib_get_header(conn, recv, data_len); 837 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
714 838
715 /* Validate the checksum. */ 839 /* Validate the checksum. */
716 if (!rds_message_verify_checksum(ihdr)) { 840 if (!rds_message_verify_checksum(ihdr)) {
@@ -742,12 +866,12 @@ static void rds_ib_process_recv(struct rds_connection *conn,
742 * the inc is freed. We don't go that route, so we have to drop the 866 * the inc is freed. We don't go that route, so we have to drop the
743 * page ref ourselves. We can't just leave the page on the recv 867 * page ref ourselves. We can't just leave the page on the recv
744 * because that confuses the dma mapping of pages and each recv's use 868 * because that confuses the dma mapping of pages and each recv's use
745 * of a partial page. We can leave the frag, though, it will be 869 * of a partial page.
746 * reused.
747 * 870 *
748 * FIXME: Fold this into the code path below. 871 * FIXME: Fold this into the code path below.
749 */ 872 */
750 rds_ib_frag_drop_page(recv->r_frag); 873 rds_ib_frag_free(ic, recv->r_frag);
874 recv->r_frag = NULL;
751 return; 875 return;
752 } 876 }
753 877
@@ -757,7 +881,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
757 * into the inc and save the inc so we can hang upcoming fragments 881 * into the inc and save the inc so we can hang upcoming fragments
758 * off its list. 882 * off its list.
759 */ 883 */
760 if (ibinc == NULL) { 884 if (!ibinc) {
761 ibinc = recv->r_ibinc; 885 ibinc = recv->r_ibinc;
762 recv->r_ibinc = NULL; 886 recv->r_ibinc = NULL;
763 ic->i_ibinc = ibinc; 887 ic->i_ibinc = ibinc;
@@ -842,32 +966,38 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic,
842 struct rds_ib_recv_work *recv; 966 struct rds_ib_recv_work *recv;
843 967
844 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { 968 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
845 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 969 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
846 (unsigned long long)wc.wr_id, wc.status, wc.byte_len, 970 (unsigned long long)wc.wr_id, wc.status,
971 rds_ib_wc_status_str(wc.status), wc.byte_len,
847 be32_to_cpu(wc.ex.imm_data)); 972 be32_to_cpu(wc.ex.imm_data));
848 rds_ib_stats_inc(s_ib_rx_cq_event); 973 rds_ib_stats_inc(s_ib_rx_cq_event);
849 974
850 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)]; 975 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
851 976
852 rds_ib_recv_unmap_page(ic, recv); 977 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
853 978
854 /* 979 /*
855 * Also process recvs in connecting state because it is possible 980 * Also process recvs in connecting state because it is possible
856 * to get a recv completion _before_ the rdmacm ESTABLISHED 981 * to get a recv completion _before_ the rdmacm ESTABLISHED
857 * event is processed. 982 * event is processed.
858 */ 983 */
859 if (rds_conn_up(conn) || rds_conn_connecting(conn)) { 984 if (wc.status == IB_WC_SUCCESS) {
985 rds_ib_process_recv(conn, recv, wc.byte_len, state);
986 } else {
860 /* We expect errors as the qp is drained during shutdown */ 987 /* We expect errors as the qp is drained during shutdown */
861 if (wc.status == IB_WC_SUCCESS) { 988 if (rds_conn_up(conn) || rds_conn_connecting(conn))
862 rds_ib_process_recv(conn, recv, wc.byte_len, state); 989 rds_ib_conn_error(conn, "recv completion on %pI4 had "
863 } else { 990 "status %u (%s), disconnecting and "
864 rds_ib_conn_error(conn, "recv completion on " 991 "reconnecting\n", &conn->c_faddr,
865 "%pI4 had status %u, disconnecting and " 992 wc.status,
866 "reconnecting\n", &conn->c_faddr, 993 rds_ib_wc_status_str(wc.status));
867 wc.status);
868 }
869 } 994 }
870 995
996 /*
997 * It's very important that we only free this ring entry if we've truly
998 * freed the resources allocated to the entry. The refilling path can
999 * leak if we don't.
1000 */
871 rds_ib_ring_free(&ic->i_recv_ring, 1); 1001 rds_ib_ring_free(&ic->i_recv_ring, 1);
872 } 1002 }
873} 1003}
@@ -897,11 +1027,8 @@ void rds_ib_recv_tasklet_fn(unsigned long data)
897 if (rds_ib_ring_empty(&ic->i_recv_ring)) 1027 if (rds_ib_ring_empty(&ic->i_recv_ring))
898 rds_ib_stats_inc(s_ib_rx_ring_empty); 1028 rds_ib_stats_inc(s_ib_rx_ring_empty);
899 1029
900 /*
901 * If the ring is running low, then schedule the thread to refill.
902 */
903 if (rds_ib_ring_low(&ic->i_recv_ring)) 1030 if (rds_ib_ring_low(&ic->i_recv_ring))
904 queue_delayed_work(rds_wq, &conn->c_recv_w, 0); 1031 rds_ib_recv_refill(conn, 0);
905} 1032}
906 1033
907int rds_ib_recv(struct rds_connection *conn) 1034int rds_ib_recv(struct rds_connection *conn)
@@ -910,25 +1037,13 @@ int rds_ib_recv(struct rds_connection *conn)
910 int ret = 0; 1037 int ret = 0;
911 1038
912 rdsdebug("conn %p\n", conn); 1039 rdsdebug("conn %p\n", conn);
913
914 /*
915 * If we get a temporary posting failure in this context then
916 * we're really low and we want the caller to back off for a bit.
917 */
918 mutex_lock(&ic->i_recv_mutex);
919 if (rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
920 ret = -ENOMEM;
921 else
922 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
923 mutex_unlock(&ic->i_recv_mutex);
924
925 if (rds_conn_up(conn)) 1040 if (rds_conn_up(conn))
926 rds_ib_attempt_ack(ic); 1041 rds_ib_attempt_ack(ic);
927 1042
928 return ret; 1043 return ret;
929} 1044}
930 1045
931int __init rds_ib_recv_init(void) 1046int rds_ib_recv_init(void)
932{ 1047{
933 struct sysinfo si; 1048 struct sysinfo si;
934 int ret = -ENOMEM; 1049 int ret = -ENOMEM;
@@ -939,14 +1054,14 @@ int __init rds_ib_recv_init(void)
939 1054
940 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", 1055 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
941 sizeof(struct rds_ib_incoming), 1056 sizeof(struct rds_ib_incoming),
942 0, 0, NULL); 1057 0, SLAB_HWCACHE_ALIGN, NULL);
943 if (rds_ib_incoming_slab == NULL) 1058 if (!rds_ib_incoming_slab)
944 goto out; 1059 goto out;
945 1060
946 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag", 1061 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
947 sizeof(struct rds_page_frag), 1062 sizeof(struct rds_page_frag),
948 0, 0, NULL); 1063 0, SLAB_HWCACHE_ALIGN, NULL);
949 if (rds_ib_frag_slab == NULL) 1064 if (!rds_ib_frag_slab)
950 kmem_cache_destroy(rds_ib_incoming_slab); 1065 kmem_cache_destroy(rds_ib_incoming_slab);
951 else 1066 else
952 ret = 0; 1067 ret = 0;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 17fa80803ab..71f373c421b 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -36,11 +36,49 @@
36#include <linux/dmapool.h> 36#include <linux/dmapool.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40#include "ib.h" 39#include "ib.h"
41 40
42static void rds_ib_send_rdma_complete(struct rds_message *rm, 41static char *rds_ib_wc_status_strings[] = {
43 int wc_status) 42#define RDS_IB_WC_STATUS_STR(foo) \
43 [IB_WC_##foo] = __stringify(IB_WC_##foo)
44 RDS_IB_WC_STATUS_STR(SUCCESS),
45 RDS_IB_WC_STATUS_STR(LOC_LEN_ERR),
46 RDS_IB_WC_STATUS_STR(LOC_QP_OP_ERR),
47 RDS_IB_WC_STATUS_STR(LOC_EEC_OP_ERR),
48 RDS_IB_WC_STATUS_STR(LOC_PROT_ERR),
49 RDS_IB_WC_STATUS_STR(WR_FLUSH_ERR),
50 RDS_IB_WC_STATUS_STR(MW_BIND_ERR),
51 RDS_IB_WC_STATUS_STR(BAD_RESP_ERR),
52 RDS_IB_WC_STATUS_STR(LOC_ACCESS_ERR),
53 RDS_IB_WC_STATUS_STR(REM_INV_REQ_ERR),
54 RDS_IB_WC_STATUS_STR(REM_ACCESS_ERR),
55 RDS_IB_WC_STATUS_STR(REM_OP_ERR),
56 RDS_IB_WC_STATUS_STR(RETRY_EXC_ERR),
57 RDS_IB_WC_STATUS_STR(RNR_RETRY_EXC_ERR),
58 RDS_IB_WC_STATUS_STR(LOC_RDD_VIOL_ERR),
59 RDS_IB_WC_STATUS_STR(REM_INV_RD_REQ_ERR),
60 RDS_IB_WC_STATUS_STR(REM_ABORT_ERR),
61 RDS_IB_WC_STATUS_STR(INV_EECN_ERR),
62 RDS_IB_WC_STATUS_STR(INV_EEC_STATE_ERR),
63 RDS_IB_WC_STATUS_STR(FATAL_ERR),
64 RDS_IB_WC_STATUS_STR(RESP_TIMEOUT_ERR),
65 RDS_IB_WC_STATUS_STR(GENERAL_ERR),
66#undef RDS_IB_WC_STATUS_STR
67};
68
69char *rds_ib_wc_status_str(enum ib_wc_status status)
70{
71 return rds_str_array(rds_ib_wc_status_strings,
72 ARRAY_SIZE(rds_ib_wc_status_strings), status);
73}
74
75/*
76 * Convert IB-specific error message to RDS error message and call core
77 * completion handler.
78 */
79static void rds_ib_send_complete(struct rds_message *rm,
80 int wc_status,
81 void (*complete)(struct rds_message *rm, int status))
44{ 82{
45 int notify_status; 83 int notify_status;
46 84
@@ -60,69 +98,125 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm,
60 notify_status = RDS_RDMA_OTHER_ERROR; 98 notify_status = RDS_RDMA_OTHER_ERROR;
61 break; 99 break;
62 } 100 }
63 rds_rdma_send_complete(rm, notify_status); 101 complete(rm, notify_status);
102}
103
104static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
105 struct rm_data_op *op,
106 int wc_status)
107{
108 if (op->op_nents)
109 ib_dma_unmap_sg(ic->i_cm_id->device,
110 op->op_sg, op->op_nents,
111 DMA_TO_DEVICE);
64} 112}
65 113
66static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, 114static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
67 struct rds_rdma_op *op) 115 struct rm_rdma_op *op,
116 int wc_status)
68{ 117{
69 if (op->r_mapped) { 118 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device, 119 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents, 120 op->op_sg, op->op_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 121 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0; 122 op->op_mapped = 0;
74 } 123 }
124
125 /* If the user asked for a completion notification on this
126 * message, we can implement three different semantics:
127 * 1. Notify when we received the ACK on the RDS message
128 * that was queued with the RDMA. This provides reliable
129 * notification of RDMA status at the expense of a one-way
130 * packet delay.
131 * 2. Notify when the IB stack gives us the completion event for
132 * the RDMA operation.
133 * 3. Notify when the IB stack gives us the completion event for
134 * the accompanying RDS messages.
135 * Here, we implement approach #3. To implement approach #2,
136 * we would need to take an event for the rdma WR. To implement #1,
137 * don't call rds_rdma_send_complete at all, and fall back to the notify
138 * handling in the ACK processing code.
139 *
140 * Note: There's no need to explicitly sync any RDMA buffers using
141 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
142 * operation itself unmapped the RDMA buffers, which takes care
143 * of synching.
144 */
145 rds_ib_send_complete(container_of(op, struct rds_message, rdma),
146 wc_status, rds_rdma_send_complete);
147
148 if (op->op_write)
149 rds_stats_add(s_send_rdma_bytes, op->op_bytes);
150 else
151 rds_stats_add(s_recv_rdma_bytes, op->op_bytes);
75} 152}
76 153
77static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, 154static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
78 struct rds_ib_send_work *send, 155 struct rm_atomic_op *op,
79 int wc_status) 156 int wc_status)
80{ 157{
81 struct rds_message *rm = send->s_rm; 158 /* unmap atomic recvbuf */
82 159 if (op->op_mapped) {
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm); 160 ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1,
84 161 DMA_FROM_DEVICE);
85 ib_dma_unmap_sg(ic->i_cm_id->device, 162 op->op_mapped = 0;
86 rm->m_sg, rm->m_nents, 163 }
87 DMA_TO_DEVICE);
88
89 if (rm->m_rdma_op != NULL) {
90 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
91
92 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics:
94 * 1. Notify when we received the ACK on the RDS message
95 * that was queued with the RDMA. This provides reliable
96 * notification of RDMA status at the expense of a one-way
97 * packet delay.
98 * 2. Notify when the IB stack gives us the completion event for
99 * the RDMA operation.
100 * 3. Notify when the IB stack gives us the completion event for
101 * the accompanying RDS messages.
102 * Here, we implement approach #3. To implement approach #2,
103 * call rds_rdma_send_complete from the cq_handler. To implement #1,
104 * don't call rds_rdma_send_complete at all, and fall back to the notify
105 * handling in the ACK processing code.
106 *
107 * Note: There's no need to explicitly sync any RDMA buffers using
108 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
109 * operation itself unmapped the RDMA buffers, which takes care
110 * of synching.
111 */
112 rds_ib_send_rdma_complete(rm, wc_status);
113 164
114 if (rm->m_rdma_op->r_write) 165 rds_ib_send_complete(container_of(op, struct rds_message, atomic),
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); 166 wc_status, rds_atomic_send_complete);
116 else 167
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); 168 if (op->op_type == RDS_ATOMIC_TYPE_CSWP)
169 rds_ib_stats_inc(s_ib_atomic_cswp);
170 else
171 rds_ib_stats_inc(s_ib_atomic_fadd);
172}
173
174/*
175 * Unmap the resources associated with a struct send_work.
176 *
177 * Returns the rm for no good reason other than it is unobtainable
178 * other than by switching on wr.opcode, currently, and the caller,
179 * the event handler, needs it.
180 */
181static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic,
182 struct rds_ib_send_work *send,
183 int wc_status)
184{
185 struct rds_message *rm = NULL;
186
187 /* In the error case, wc.opcode sometimes contains garbage */
188 switch (send->s_wr.opcode) {
189 case IB_WR_SEND:
190 if (send->s_op) {
191 rm = container_of(send->s_op, struct rds_message, data);
192 rds_ib_send_unmap_data(ic, send->s_op, wc_status);
193 }
194 break;
195 case IB_WR_RDMA_WRITE:
196 case IB_WR_RDMA_READ:
197 if (send->s_op) {
198 rm = container_of(send->s_op, struct rds_message, rdma);
199 rds_ib_send_unmap_rdma(ic, send->s_op, wc_status);
200 }
201 break;
202 case IB_WR_ATOMIC_FETCH_AND_ADD:
203 case IB_WR_ATOMIC_CMP_AND_SWP:
204 if (send->s_op) {
205 rm = container_of(send->s_op, struct rds_message, atomic);
206 rds_ib_send_unmap_atomic(ic, send->s_op, wc_status);
207 }
208 break;
209 default:
210 if (printk_ratelimit())
211 printk(KERN_NOTICE
212 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
213 __func__, send->s_wr.opcode);
214 break;
118 } 215 }
119 216
120 /* If anyone waited for this message to get flushed out, wake 217 send->s_wr.opcode = 0xdead;
121 * them up now */
122 rds_message_unmapped(rm);
123 218
124 rds_message_put(rm); 219 return rm;
125 send->s_rm = NULL;
126} 220}
127 221
128void rds_ib_send_init_ring(struct rds_ib_connection *ic) 222void rds_ib_send_init_ring(struct rds_ib_connection *ic)
@@ -133,23 +227,18 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 227 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge; 228 struct ib_sge *sge;
135 229
136 send->s_rm = NULL;
137 send->s_op = NULL; 230 send->s_op = NULL;
138 231
139 send->s_wr.wr_id = i; 232 send->s_wr.wr_id = i;
140 send->s_wr.sg_list = send->s_sge; 233 send->s_wr.sg_list = send->s_sge;
141 send->s_wr.num_sge = 1;
142 send->s_wr.opcode = IB_WR_SEND;
143 send->s_wr.send_flags = 0;
144 send->s_wr.ex.imm_data = 0; 234 send->s_wr.ex.imm_data = 0;
145 235
146 sge = rds_ib_data_sge(ic, send->s_sge); 236 sge = &send->s_sge[0];
147 sge->lkey = ic->i_mr->lkey;
148
149 sge = rds_ib_header_sge(ic, send->s_sge);
150 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); 237 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
151 sge->length = sizeof(struct rds_header); 238 sge->length = sizeof(struct rds_header);
152 sge->lkey = ic->i_mr->lkey; 239 sge->lkey = ic->i_mr->lkey;
240
241 send->s_sge[1].lkey = ic->i_mr->lkey;
153 } 242 }
154} 243}
155 244
@@ -159,16 +248,24 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
159 u32 i; 248 u32 i;
160 249
161 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 250 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
162 if (send->s_wr.opcode == 0xdead) 251 if (send->s_op && send->s_wr.opcode != 0xdead)
163 continue; 252 rds_ib_send_unmap_op(ic, send, IB_WC_WR_FLUSH_ERR);
164 if (send->s_rm)
165 rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
166 if (send->s_op)
167 rds_ib_send_unmap_rdma(ic, send->s_op);
168 } 253 }
169} 254}
170 255
171/* 256/*
257 * The only fast path caller always has a non-zero nr, so we don't
258 * bother testing nr before performing the atomic sub.
259 */
260static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
261{
262 if ((atomic_sub_return(nr, &ic->i_signaled_sends) == 0) &&
263 waitqueue_active(&rds_ib_ring_empty_wait))
264 wake_up(&rds_ib_ring_empty_wait);
265 BUG_ON(atomic_read(&ic->i_signaled_sends) < 0);
266}
267
268/*
172 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc 269 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
173 * operations performed in the send path. As the sender allocs and potentially 270 * operations performed in the send path. As the sender allocs and potentially
174 * unallocs the next free entry in the ring it doesn't alter which is 271 * unallocs the next free entry in the ring it doesn't alter which is
@@ -178,12 +275,14 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
178{ 275{
179 struct rds_connection *conn = context; 276 struct rds_connection *conn = context;
180 struct rds_ib_connection *ic = conn->c_transport_data; 277 struct rds_ib_connection *ic = conn->c_transport_data;
278 struct rds_message *rm = NULL;
181 struct ib_wc wc; 279 struct ib_wc wc;
182 struct rds_ib_send_work *send; 280 struct rds_ib_send_work *send;
183 u32 completed; 281 u32 completed;
184 u32 oldest; 282 u32 oldest;
185 u32 i = 0; 283 u32 i = 0;
186 int ret; 284 int ret;
285 int nr_sig = 0;
187 286
188 rdsdebug("cq %p conn %p\n", cq, conn); 287 rdsdebug("cq %p conn %p\n", cq, conn);
189 rds_ib_stats_inc(s_ib_tx_cq_call); 288 rds_ib_stats_inc(s_ib_tx_cq_call);
@@ -192,8 +291,9 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
192 rdsdebug("ib_req_notify_cq send failed: %d\n", ret); 291 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
193 292
194 while (ib_poll_cq(cq, 1, &wc) > 0) { 293 while (ib_poll_cq(cq, 1, &wc) > 0) {
195 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 294 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
196 (unsigned long long)wc.wr_id, wc.status, wc.byte_len, 295 (unsigned long long)wc.wr_id, wc.status,
296 rds_ib_wc_status_str(wc.status), wc.byte_len,
197 be32_to_cpu(wc.ex.imm_data)); 297 be32_to_cpu(wc.ex.imm_data));
198 rds_ib_stats_inc(s_ib_tx_cq_event); 298 rds_ib_stats_inc(s_ib_tx_cq_event);
199 299
@@ -210,51 +310,30 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
210 310
211 for (i = 0; i < completed; i++) { 311 for (i = 0; i < completed; i++) {
212 send = &ic->i_sends[oldest]; 312 send = &ic->i_sends[oldest];
313 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
314 nr_sig++;
213 315
214 /* In the error case, wc.opcode sometimes contains garbage */ 316 rm = rds_ib_send_unmap_op(ic, send, wc.status);
215 switch (send->s_wr.opcode) {
216 case IB_WR_SEND:
217 if (send->s_rm)
218 rds_ib_send_unmap_rm(ic, send, wc.status);
219 break;
220 case IB_WR_RDMA_WRITE:
221 case IB_WR_RDMA_READ:
222 /* Nothing to be done - the SG list will be unmapped
223 * when the SEND completes. */
224 break;
225 default:
226 if (printk_ratelimit())
227 printk(KERN_NOTICE
228 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
229 __func__, send->s_wr.opcode);
230 break;
231 }
232 317
233 send->s_wr.opcode = 0xdead;
234 send->s_wr.num_sge = 1;
235 if (send->s_queued + HZ/2 < jiffies) 318 if (send->s_queued + HZ/2 < jiffies)
236 rds_ib_stats_inc(s_ib_tx_stalled); 319 rds_ib_stats_inc(s_ib_tx_stalled);
237 320
238 /* If a RDMA operation produced an error, signal this right 321 if (send->s_op) {
239 * away. If we don't, the subsequent SEND that goes with this 322 if (send->s_op == rm->m_final_op) {
240 * RDMA will be canceled with ERR_WFLUSH, and the application 323 /* If anyone waited for this message to get flushed out, wake
241 * never learn that the RDMA failed. */ 324 * them up now */
242 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { 325 rds_message_unmapped(rm);
243 struct rds_message *rm;
244
245 rm = rds_send_get_message(conn, send->s_op);
246 if (rm) {
247 if (rm->m_rdma_op)
248 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
249 rds_ib_send_rdma_complete(rm, wc.status);
250 rds_message_put(rm);
251 } 326 }
327 rds_message_put(rm);
328 send->s_op = NULL;
252 } 329 }
253 330
254 oldest = (oldest + 1) % ic->i_send_ring.w_nr; 331 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
255 } 332 }
256 333
257 rds_ib_ring_free(&ic->i_send_ring, completed); 334 rds_ib_ring_free(&ic->i_send_ring, completed);
335 rds_ib_sub_signaled(ic, nr_sig);
336 nr_sig = 0;
258 337
259 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || 338 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
260 test_bit(0, &conn->c_map_queued)) 339 test_bit(0, &conn->c_map_queued))
@@ -262,10 +341,10 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
262 341
263 /* We expect errors as the qp is drained during shutdown */ 342 /* We expect errors as the qp is drained during shutdown */
264 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { 343 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
265 rds_ib_conn_error(conn, 344 rds_ib_conn_error(conn, "send completion on %pI4 had status "
266 "send completion on %pI4 " 345 "%u (%s), disconnecting and reconnecting\n",
267 "had status %u, disconnecting and reconnecting\n", 346 &conn->c_faddr, wc.status,
268 &conn->c_faddr, wc.status); 347 rds_ib_wc_status_str(wc.status));
269 } 348 }
270 } 349 }
271} 350}
@@ -294,7 +373,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
294 * credits (see rds_ib_send_add_credits below). 373 * credits (see rds_ib_send_add_credits below).
295 * 374 *
296 * The RDS send code is essentially single-threaded; rds_send_xmit 375 * The RDS send code is essentially single-threaded; rds_send_xmit
297 * grabs c_send_lock to ensure exclusive access to the send ring. 376 * sets RDS_IN_XMIT to ensure exclusive access to the send ring.
298 * However, the ACK sending code is independent and can race with 377 * However, the ACK sending code is independent and can race with
299 * message SENDs. 378 * message SENDs.
300 * 379 *
@@ -413,40 +492,21 @@ void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
413 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 492 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
414} 493}
415 494
416static inline void 495static inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic,
417rds_ib_xmit_populate_wr(struct rds_ib_connection *ic, 496 struct rds_ib_send_work *send,
418 struct rds_ib_send_work *send, unsigned int pos, 497 bool notify)
419 unsigned long buffer, unsigned int length,
420 int send_flags)
421{ 498{
422 struct ib_sge *sge; 499 /*
423 500 * We want to delay signaling completions just enough to get
424 WARN_ON(pos != send - ic->i_sends); 501 * the batching benefits but not so much that we create dead time
425 502 * on the wire.
426 send->s_wr.send_flags = send_flags; 503 */
427 send->s_wr.opcode = IB_WR_SEND; 504 if (ic->i_unsignaled_wrs-- == 0 || notify) {
428 send->s_wr.num_sge = 2; 505 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
429 send->s_wr.next = NULL; 506 send->s_wr.send_flags |= IB_SEND_SIGNALED;
430 send->s_queued = jiffies; 507 return 1;
431 send->s_op = NULL;
432
433 if (length != 0) {
434 sge = rds_ib_data_sge(ic, send->s_sge);
435 sge->addr = buffer;
436 sge->length = length;
437 sge->lkey = ic->i_mr->lkey;
438
439 sge = rds_ib_header_sge(ic, send->s_sge);
440 } else {
441 /* We're sending a packet with no payload. There is only
442 * one SGE */
443 send->s_wr.num_sge = 1;
444 sge = &send->s_sge[0];
445 } 508 }
446 509 return 0;
447 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
448 sge->length = sizeof(struct rds_header);
449 sge->lkey = ic->i_mr->lkey;
450} 510}
451 511
452/* 512/*
@@ -475,13 +535,14 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
475 u32 pos; 535 u32 pos;
476 u32 i; 536 u32 i;
477 u32 work_alloc; 537 u32 work_alloc;
478 u32 credit_alloc; 538 u32 credit_alloc = 0;
479 u32 posted; 539 u32 posted;
480 u32 adv_credits = 0; 540 u32 adv_credits = 0;
481 int send_flags = 0; 541 int send_flags = 0;
482 int sent; 542 int bytes_sent = 0;
483 int ret; 543 int ret;
484 int flow_controlled = 0; 544 int flow_controlled = 0;
545 int nr_sig = 0;
485 546
486 BUG_ON(off % RDS_FRAG_SIZE); 547 BUG_ON(off % RDS_FRAG_SIZE);
487 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); 548 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
@@ -507,14 +568,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
507 goto out; 568 goto out;
508 } 569 }
509 570
510 credit_alloc = work_alloc;
511 if (ic->i_flowctl) { 571 if (ic->i_flowctl) {
512 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT); 572 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
513 adv_credits += posted; 573 adv_credits += posted;
514 if (credit_alloc < work_alloc) { 574 if (credit_alloc < work_alloc) {
515 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); 575 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
516 work_alloc = credit_alloc; 576 work_alloc = credit_alloc;
517 flow_controlled++; 577 flow_controlled = 1;
518 } 578 }
519 if (work_alloc == 0) { 579 if (work_alloc == 0) {
520 set_bit(RDS_LL_SEND_FULL, &conn->c_flags); 580 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
@@ -525,31 +585,25 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
525 } 585 }
526 586
527 /* map the message the first time we see it */ 587 /* map the message the first time we see it */
528 if (ic->i_rm == NULL) { 588 if (!ic->i_data_op) {
529 /* 589 if (rm->data.op_nents) {
530 printk(KERN_NOTICE "rds_ib_xmit prep msg dport=%u flags=0x%x len=%d\n", 590 rm->data.op_count = ib_dma_map_sg(dev,
531 be16_to_cpu(rm->m_inc.i_hdr.h_dport), 591 rm->data.op_sg,
532 rm->m_inc.i_hdr.h_flags, 592 rm->data.op_nents,
533 be32_to_cpu(rm->m_inc.i_hdr.h_len)); 593 DMA_TO_DEVICE);
534 */ 594 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
535 if (rm->m_nents) { 595 if (rm->data.op_count == 0) {
536 rm->m_count = ib_dma_map_sg(dev,
537 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
538 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
539 if (rm->m_count == 0) {
540 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); 596 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
541 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 597 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
542 ret = -ENOMEM; /* XXX ? */ 598 ret = -ENOMEM; /* XXX ? */
543 goto out; 599 goto out;
544 } 600 }
545 } else { 601 } else {
546 rm->m_count = 0; 602 rm->data.op_count = 0;
547 } 603 }
548 604
549 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
550 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes;
551 rds_message_addref(rm); 605 rds_message_addref(rm);
552 ic->i_rm = rm; 606 ic->i_data_op = &rm->data;
553 607
554 /* Finalize the header */ 608 /* Finalize the header */
555 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags)) 609 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
@@ -559,10 +613,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
559 613
560 /* If it has a RDMA op, tell the peer we did it. This is 614 /* If it has a RDMA op, tell the peer we did it. This is
561 * used by the peer to release use-once RDMA MRs. */ 615 * used by the peer to release use-once RDMA MRs. */
562 if (rm->m_rdma_op) { 616 if (rm->rdma.op_active) {
563 struct rds_ext_header_rdma ext_hdr; 617 struct rds_ext_header_rdma ext_hdr;
564 618
565 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); 619 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
566 rds_message_add_extension(&rm->m_inc.i_hdr, 620 rds_message_add_extension(&rm->m_inc.i_hdr,
567 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 621 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
568 } 622 }
@@ -582,99 +636,77 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
582 /* 636 /*
583 * Update adv_credits since we reset the ACK_REQUIRED bit. 637 * Update adv_credits since we reset the ACK_REQUIRED bit.
584 */ 638 */
585 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); 639 if (ic->i_flowctl) {
586 adv_credits += posted; 640 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
587 BUG_ON(adv_credits > 255); 641 adv_credits += posted;
642 BUG_ON(adv_credits > 255);
643 }
588 } 644 }
589 645
590 send = &ic->i_sends[pos];
591 first = send;
592 prev = NULL;
593 scat = &rm->m_sg[sg];
594 sent = 0;
595 i = 0;
596
597 /* Sometimes you want to put a fence between an RDMA 646 /* Sometimes you want to put a fence between an RDMA
598 * READ and the following SEND. 647 * READ and the following SEND.
599 * We could either do this all the time 648 * We could either do this all the time
600 * or when requested by the user. Right now, we let 649 * or when requested by the user. Right now, we let
601 * the application choose. 650 * the application choose.
602 */ 651 */
603 if (rm->m_rdma_op && rm->m_rdma_op->r_fence) 652 if (rm->rdma.op_active && rm->rdma.op_fence)
604 send_flags = IB_SEND_FENCE; 653 send_flags = IB_SEND_FENCE;
605 654
606 /* 655 /* Each frag gets a header. Msgs may be 0 bytes */
607 * We could be copying the header into the unused tail of the page. 656 send = &ic->i_sends[pos];
608 * That would need to be changed in the future when those pages might 657 first = send;
609 * be mapped userspace pages or page cache pages. So instead we always 658 prev = NULL;
610 * use a second sge and our long-lived ring of mapped headers. We send 659 scat = &ic->i_data_op->op_sg[sg];
611 * the header after the data so that the data payload can be aligned on 660 i = 0;
612 * the receiver. 661 do {
613 */ 662 unsigned int len = 0;
614 663
615 /* handle a 0-len message */ 664 /* Set up the header */
616 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) { 665 send->s_wr.send_flags = send_flags;
617 rds_ib_xmit_populate_wr(ic, send, pos, 0, 0, send_flags); 666 send->s_wr.opcode = IB_WR_SEND;
618 goto add_header; 667 send->s_wr.num_sge = 1;
619 } 668 send->s_wr.next = NULL;
669 send->s_queued = jiffies;
670 send->s_op = NULL;
620 671
621 /* if there's data reference it with a chain of work reqs */ 672 send->s_sge[0].addr = ic->i_send_hdrs_dma
622 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { 673 + (pos * sizeof(struct rds_header));
623 unsigned int len; 674 send->s_sge[0].length = sizeof(struct rds_header);
624 675
625 send = &ic->i_sends[pos]; 676 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
626 677
627 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); 678 /* Set up the data, if present */
628 rds_ib_xmit_populate_wr(ic, send, pos, 679 if (i < work_alloc
629 ib_sg_dma_address(dev, scat) + off, len, 680 && scat != &rm->data.op_sg[rm->data.op_count]) {
630 send_flags); 681 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
682 send->s_wr.num_sge = 2;
631 683
632 /* 684 send->s_sge[1].addr = ib_sg_dma_address(dev, scat) + off;
633 * We want to delay signaling completions just enough to get 685 send->s_sge[1].length = len;
634 * the batching benefits but not so much that we create dead time
635 * on the wire.
636 */
637 if (ic->i_unsignaled_wrs-- == 0) {
638 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
639 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
640 }
641 686
642 ic->i_unsignaled_bytes -= len; 687 bytes_sent += len;
643 if (ic->i_unsignaled_bytes <= 0) { 688 off += len;
644 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes; 689 if (off == ib_sg_dma_len(dev, scat)) {
645 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 690 scat++;
691 off = 0;
692 }
646 } 693 }
647 694
695 rds_ib_set_wr_signal_state(ic, send, 0);
696
648 /* 697 /*
649 * Always signal the last one if we're stopping due to flow control. 698 * Always signal the last one if we're stopping due to flow control.
650 */ 699 */
651 if (flow_controlled && i == (work_alloc-1)) 700 if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
652 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 701 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
653 702
703 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
704 nr_sig++;
705
654 rdsdebug("send %p wr %p num_sge %u next %p\n", send, 706 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
655 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 707 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
656 708
657 sent += len; 709 if (ic->i_flowctl && adv_credits) {
658 off += len;
659 if (off == ib_sg_dma_len(dev, scat)) {
660 scat++;
661 off = 0;
662 }
663
664add_header:
665 /* Tack on the header after the data. The header SGE should already
666 * have been set up to point to the right header buffer. */
667 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
668
669 if (0) {
670 struct rds_header *hdr = &ic->i_send_hdrs[pos];
671
672 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
673 be16_to_cpu(hdr->h_dport),
674 hdr->h_flags,
675 be32_to_cpu(hdr->h_len));
676 }
677 if (adv_credits) {
678 struct rds_header *hdr = &ic->i_send_hdrs[pos]; 710 struct rds_header *hdr = &ic->i_send_hdrs[pos];
679 711
680 /* add credit and redo the header checksum */ 712 /* add credit and redo the header checksum */
@@ -689,20 +721,25 @@ add_header:
689 prev = send; 721 prev = send;
690 722
691 pos = (pos + 1) % ic->i_send_ring.w_nr; 723 pos = (pos + 1) % ic->i_send_ring.w_nr;
692 } 724 send = &ic->i_sends[pos];
725 i++;
726
727 } while (i < work_alloc
728 && scat != &rm->data.op_sg[rm->data.op_count]);
693 729
694 /* Account the RDS header in the number of bytes we sent, but just once. 730 /* Account the RDS header in the number of bytes we sent, but just once.
695 * The caller has no concept of fragmentation. */ 731 * The caller has no concept of fragmentation. */
696 if (hdr_off == 0) 732 if (hdr_off == 0)
697 sent += sizeof(struct rds_header); 733 bytes_sent += sizeof(struct rds_header);
698 734
699 /* if we finished the message then send completion owns it */ 735 /* if we finished the message then send completion owns it */
700 if (scat == &rm->m_sg[rm->m_count]) { 736 if (scat == &rm->data.op_sg[rm->data.op_count]) {
701 prev->s_rm = ic->i_rm; 737 prev->s_op = ic->i_data_op;
702 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 738 prev->s_wr.send_flags |= IB_SEND_SOLICITED;
703 ic->i_rm = NULL; 739 ic->i_data_op = NULL;
704 } 740 }
705 741
742 /* Put back wrs & credits we didn't use */
706 if (i < work_alloc) { 743 if (i < work_alloc) {
707 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); 744 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
708 work_alloc = i; 745 work_alloc = i;
@@ -710,6 +747,9 @@ add_header:
710 if (ic->i_flowctl && i < credit_alloc) 747 if (ic->i_flowctl && i < credit_alloc)
711 rds_ib_send_add_credits(conn, credit_alloc - i); 748 rds_ib_send_add_credits(conn, credit_alloc - i);
712 749
750 if (nr_sig)
751 atomic_add(nr_sig, &ic->i_signaled_sends);
752
713 /* XXX need to worry about failed_wr and partial sends. */ 753 /* XXX need to worry about failed_wr and partial sends. */
714 failed_wr = &first->s_wr; 754 failed_wr = &first->s_wr;
715 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 755 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
@@ -720,32 +760,127 @@ add_header:
720 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 " 760 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
721 "returned %d\n", &conn->c_faddr, ret); 761 "returned %d\n", &conn->c_faddr, ret);
722 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 762 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
723 if (prev->s_rm) { 763 rds_ib_sub_signaled(ic, nr_sig);
724 ic->i_rm = prev->s_rm; 764 if (prev->s_op) {
725 prev->s_rm = NULL; 765 ic->i_data_op = prev->s_op;
766 prev->s_op = NULL;
726 } 767 }
727 768
728 rds_ib_conn_error(ic->conn, "ib_post_send failed\n"); 769 rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
729 goto out; 770 goto out;
730 } 771 }
731 772
732 ret = sent; 773 ret = bytes_sent;
733out: 774out:
734 BUG_ON(adv_credits); 775 BUG_ON(adv_credits);
735 return ret; 776 return ret;
736} 777}
737 778
738int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) 779/*
780 * Issue atomic operation.
781 * A simplified version of the rdma case, we always map 1 SG, and
782 * only 8 bytes, for the return value from the atomic operation.
783 */
784int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
785{
786 struct rds_ib_connection *ic = conn->c_transport_data;
787 struct rds_ib_send_work *send = NULL;
788 struct ib_send_wr *failed_wr;
789 struct rds_ib_device *rds_ibdev;
790 u32 pos;
791 u32 work_alloc;
792 int ret;
793 int nr_sig = 0;
794
795 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
796
797 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
798 if (work_alloc != 1) {
799 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
800 rds_ib_stats_inc(s_ib_tx_ring_full);
801 ret = -ENOMEM;
802 goto out;
803 }
804
805 /* address of send request in ring */
806 send = &ic->i_sends[pos];
807 send->s_queued = jiffies;
808
809 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
810 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
811 send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
812 send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
813 send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
814 send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
815 } else { /* FADD */
816 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
817 send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
818 send->s_wr.wr.atomic.swap = 0;
819 send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
820 send->s_wr.wr.atomic.swap_mask = 0;
821 }
822 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
823 send->s_wr.num_sge = 1;
824 send->s_wr.next = NULL;
825 send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
826 send->s_wr.wr.atomic.rkey = op->op_rkey;
827 send->s_op = op;
828 rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
829
830 /* map 8 byte retval buffer to the device */
831 ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE);
832 rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret);
833 if (ret != 1) {
834 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
835 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
836 ret = -ENOMEM; /* XXX ? */
837 goto out;
838 }
839
840 /* Convert our struct scatterlist to struct ib_sge */
841 send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
842 send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
843 send->s_sge[0].lkey = ic->i_mr->lkey;
844
845 rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
846 send->s_sge[0].addr, send->s_sge[0].length);
847
848 if (nr_sig)
849 atomic_add(nr_sig, &ic->i_signaled_sends);
850
851 failed_wr = &send->s_wr;
852 ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
853 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
854 send, &send->s_wr, ret, failed_wr);
855 BUG_ON(failed_wr != &send->s_wr);
856 if (ret) {
857 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
858 "returned %d\n", &conn->c_faddr, ret);
859 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
860 rds_ib_sub_signaled(ic, nr_sig);
861 goto out;
862 }
863
864 if (unlikely(failed_wr != &send->s_wr)) {
865 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
866 BUG_ON(failed_wr != &send->s_wr);
867 }
868
869out:
870 return ret;
871}
872
873int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
739{ 874{
740 struct rds_ib_connection *ic = conn->c_transport_data; 875 struct rds_ib_connection *ic = conn->c_transport_data;
741 struct rds_ib_send_work *send = NULL; 876 struct rds_ib_send_work *send = NULL;
742 struct rds_ib_send_work *first; 877 struct rds_ib_send_work *first;
743 struct rds_ib_send_work *prev; 878 struct rds_ib_send_work *prev;
744 struct ib_send_wr *failed_wr; 879 struct ib_send_wr *failed_wr;
745 struct rds_ib_device *rds_ibdev;
746 struct scatterlist *scat; 880 struct scatterlist *scat;
747 unsigned long len; 881 unsigned long len;
748 u64 remote_addr = op->r_remote_addr; 882 u64 remote_addr = op->op_remote_addr;
883 u32 max_sge = ic->rds_ibdev->max_sge;
749 u32 pos; 884 u32 pos;
750 u32 work_alloc; 885 u32 work_alloc;
751 u32 i; 886 u32 i;
@@ -753,29 +888,28 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
753 int sent; 888 int sent;
754 int ret; 889 int ret;
755 int num_sge; 890 int num_sge;
756 891 int nr_sig = 0;
757 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 892
758 893 /* map the op the first time we see it */
759 /* map the message the first time we see it */ 894 if (!op->op_mapped) {
760 if (!op->r_mapped) { 895 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
761 op->r_count = ib_dma_map_sg(ic->i_cm_id->device, 896 op->op_sg, op->op_nents, (op->op_write) ?
762 op->r_sg, op->r_nents, (op->r_write) ? 897 DMA_TO_DEVICE : DMA_FROM_DEVICE);
763 DMA_TO_DEVICE : DMA_FROM_DEVICE); 898 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
764 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); 899 if (op->op_count == 0) {
765 if (op->r_count == 0) {
766 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); 900 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
767 ret = -ENOMEM; /* XXX ? */ 901 ret = -ENOMEM; /* XXX ? */
768 goto out; 902 goto out;
769 } 903 }
770 904
771 op->r_mapped = 1; 905 op->op_mapped = 1;
772 } 906 }
773 907
774 /* 908 /*
775 * Instead of knowing how to return a partial rdma read/write we insist that there 909 * Instead of knowing how to return a partial rdma read/write we insist that there
776 * be enough work requests to send the entire message. 910 * be enough work requests to send the entire message.
777 */ 911 */
778 i = ceil(op->r_count, rds_ibdev->max_sge); 912 i = ceil(op->op_count, max_sge);
779 913
780 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); 914 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
781 if (work_alloc != i) { 915 if (work_alloc != i) {
@@ -788,30 +922,24 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
788 send = &ic->i_sends[pos]; 922 send = &ic->i_sends[pos];
789 first = send; 923 first = send;
790 prev = NULL; 924 prev = NULL;
791 scat = &op->r_sg[0]; 925 scat = &op->op_sg[0];
792 sent = 0; 926 sent = 0;
793 num_sge = op->r_count; 927 num_sge = op->op_count;
794 928
795 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { 929 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
796 send->s_wr.send_flags = 0; 930 send->s_wr.send_flags = 0;
797 send->s_queued = jiffies; 931 send->s_queued = jiffies;
798 /* 932 send->s_op = NULL;
799 * We want to delay signaling completions just enough to get 933
800 * the batching benefits but not so much that we create dead time on the wire. 934 nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
801 */
802 if (ic->i_unsignaled_wrs-- == 0) {
803 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
804 send->s_wr.send_flags = IB_SEND_SIGNALED;
805 }
806 935
807 send->s_wr.opcode = op->r_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; 936 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
808 send->s_wr.wr.rdma.remote_addr = remote_addr; 937 send->s_wr.wr.rdma.remote_addr = remote_addr;
809 send->s_wr.wr.rdma.rkey = op->r_key; 938 send->s_wr.wr.rdma.rkey = op->op_rkey;
810 send->s_op = op;
811 939
812 if (num_sge > rds_ibdev->max_sge) { 940 if (num_sge > max_sge) {
813 send->s_wr.num_sge = rds_ibdev->max_sge; 941 send->s_wr.num_sge = max_sge;
814 num_sge -= rds_ibdev->max_sge; 942 num_sge -= max_sge;
815 } else { 943 } else {
816 send->s_wr.num_sge = num_sge; 944 send->s_wr.num_sge = num_sge;
817 } 945 }
@@ -821,7 +949,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
821 if (prev) 949 if (prev)
822 prev->s_wr.next = &send->s_wr; 950 prev->s_wr.next = &send->s_wr;
823 951
824 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { 952 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
825 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 953 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
826 send->s_sge[j].addr = 954 send->s_sge[j].addr =
827 ib_sg_dma_address(ic->i_cm_id->device, scat); 955 ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -843,15 +971,20 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
843 send = ic->i_sends; 971 send = ic->i_sends;
844 } 972 }
845 973
846 /* if we finished the message then send completion owns it */ 974 /* give a reference to the last op */
847 if (scat == &op->r_sg[op->r_count]) 975 if (scat == &op->op_sg[op->op_count]) {
848 prev->s_wr.send_flags = IB_SEND_SIGNALED; 976 prev->s_op = op;
977 rds_message_addref(container_of(op, struct rds_message, rdma));
978 }
849 979
850 if (i < work_alloc) { 980 if (i < work_alloc) {
851 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); 981 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
852 work_alloc = i; 982 work_alloc = i;
853 } 983 }
854 984
985 if (nr_sig)
986 atomic_add(nr_sig, &ic->i_signaled_sends);
987
855 failed_wr = &first->s_wr; 988 failed_wr = &first->s_wr;
856 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 989 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
857 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, 990 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
@@ -861,6 +994,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
861 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " 994 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
862 "returned %d\n", &conn->c_faddr, ret); 995 "returned %d\n", &conn->c_faddr, ret);
863 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 996 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
997 rds_ib_sub_signaled(ic, nr_sig);
864 goto out; 998 goto out;
865 } 999 }
866 1000
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index d2c904dd6fb..2d5965d6e97 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -67,6 +67,8 @@ static const char *const rds_ib_stat_names[] = {
67 "ib_rdma_mr_pool_flush", 67 "ib_rdma_mr_pool_flush",
68 "ib_rdma_mr_pool_wait", 68 "ib_rdma_mr_pool_wait",
69 "ib_rdma_mr_pool_depleted", 69 "ib_rdma_mr_pool_depleted",
70 "ib_atomic_cswp",
71 "ib_atomic_fadd",
70}; 72};
71 73
72unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter, 74unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
index 03f01cb4e0f..fc3da37220f 100644
--- a/net/rds/ib_sysctl.c
+++ b/net/rds/ib_sysctl.c
@@ -49,10 +49,6 @@ unsigned long rds_ib_sysctl_max_unsig_wrs = 16;
49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1; 49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1;
50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; 50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64;
51 51
52unsigned long rds_ib_sysctl_max_unsig_bytes = (16 << 20);
53static unsigned long rds_ib_sysctl_max_unsig_bytes_min = 1;
54static unsigned long rds_ib_sysctl_max_unsig_bytes_max = ~0UL;
55
56/* 52/*
57 * This sysctl does nothing. 53 * This sysctl does nothing.
58 * 54 *
@@ -94,15 +90,6 @@ ctl_table rds_ib_sysctl_table[] = {
94 .extra2 = &rds_ib_sysctl_max_unsig_wr_max, 90 .extra2 = &rds_ib_sysctl_max_unsig_wr_max,
95 }, 91 },
96 { 92 {
97 .procname = "max_unsignaled_bytes",
98 .data = &rds_ib_sysctl_max_unsig_bytes,
99 .maxlen = sizeof(unsigned long),
100 .mode = 0644,
101 .proc_handler = proc_doulongvec_minmax,
102 .extra1 = &rds_ib_sysctl_max_unsig_bytes_min,
103 .extra2 = &rds_ib_sysctl_max_unsig_bytes_max,
104 },
105 {
106 .procname = "max_recv_allocation", 93 .procname = "max_recv_allocation",
107 .data = &rds_ib_sysctl_max_recv_allocation, 94 .data = &rds_ib_sysctl_max_recv_allocation,
108 .maxlen = sizeof(unsigned long), 95 .maxlen = sizeof(unsigned long),
@@ -132,10 +119,10 @@ void rds_ib_sysctl_exit(void)
132 unregister_sysctl_table(rds_ib_sysctl_hdr); 119 unregister_sysctl_table(rds_ib_sysctl_hdr);
133} 120}
134 121
135int __init rds_ib_sysctl_init(void) 122int rds_ib_sysctl_init(void)
136{ 123{
137 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table); 124 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table);
138 if (rds_ib_sysctl_hdr == NULL) 125 if (!rds_ib_sysctl_hdr)
139 return -ENOMEM; 126 return -ENOMEM;
140 return 0; 127 return 0;
141} 128}
diff --git a/net/rds/info.c b/net/rds/info.c
index c45c4173a44..4fdf1b6e84f 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -76,7 +76,7 @@ void rds_info_register_func(int optname, rds_info_func func)
76 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); 76 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST);
77 77
78 spin_lock(&rds_info_lock); 78 spin_lock(&rds_info_lock);
79 BUG_ON(rds_info_funcs[offset] != NULL); 79 BUG_ON(rds_info_funcs[offset]);
80 rds_info_funcs[offset] = func; 80 rds_info_funcs[offset] = func;
81 spin_unlock(&rds_info_lock); 81 spin_unlock(&rds_info_lock);
82} 82}
@@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(rds_info_deregister_func);
102 */ 102 */
103void rds_info_iter_unmap(struct rds_info_iterator *iter) 103void rds_info_iter_unmap(struct rds_info_iterator *iter)
104{ 104{
105 if (iter->addr != NULL) { 105 if (iter->addr) {
106 kunmap_atomic(iter->addr, KM_USER0); 106 kunmap_atomic(iter->addr, KM_USER0);
107 iter->addr = NULL; 107 iter->addr = NULL;
108 } 108 }
@@ -117,7 +117,7 @@ void rds_info_copy(struct rds_info_iterator *iter, void *data,
117 unsigned long this; 117 unsigned long this;
118 118
119 while (bytes) { 119 while (bytes) {
120 if (iter->addr == NULL) 120 if (!iter->addr)
121 iter->addr = kmap_atomic(*iter->pages, KM_USER0); 121 iter->addr = kmap_atomic(*iter->pages, KM_USER0);
122 122
123 this = min(bytes, PAGE_SIZE - iter->offset); 123 this = min(bytes, PAGE_SIZE - iter->offset);
@@ -188,7 +188,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
188 >> PAGE_SHIFT; 188 >> PAGE_SHIFT;
189 189
190 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); 190 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
191 if (pages == NULL) { 191 if (!pages) {
192 ret = -ENOMEM; 192 ret = -ENOMEM;
193 goto out; 193 goto out;
194 } 194 }
@@ -206,7 +206,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
206 206
207call_func: 207call_func:
208 func = rds_info_funcs[optname - RDS_INFO_FIRST]; 208 func = rds_info_funcs[optname - RDS_INFO_FIRST];
209 if (func == NULL) { 209 if (!func) {
210 ret = -ENOPROTOOPT; 210 ret = -ENOPROTOOPT;
211 goto out; 211 goto out;
212 } 212 }
@@ -234,7 +234,7 @@ call_func:
234 ret = -EFAULT; 234 ret = -EFAULT;
235 235
236out: 236out:
237 for (i = 0; pages != NULL && i < nr_pages; i++) 237 for (i = 0; pages && i < nr_pages; i++)
238 put_page(pages[i]); 238 put_page(pages[i]);
239 kfree(pages); 239 kfree(pages);
240 240
diff --git a/net/rds/iw.c b/net/rds/iw.c
index c8f3d3525cb..56808cac0fc 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -264,7 +264,6 @@ struct rds_transport rds_iw_transport = {
264 .laddr_check = rds_iw_laddr_check, 264 .laddr_check = rds_iw_laddr_check,
265 .xmit_complete = rds_iw_xmit_complete, 265 .xmit_complete = rds_iw_xmit_complete,
266 .xmit = rds_iw_xmit, 266 .xmit = rds_iw_xmit,
267 .xmit_cong_map = NULL,
268 .xmit_rdma = rds_iw_xmit_rdma, 267 .xmit_rdma = rds_iw_xmit_rdma,
269 .recv = rds_iw_recv, 268 .recv = rds_iw_recv,
270 .conn_alloc = rds_iw_conn_alloc, 269 .conn_alloc = rds_iw_conn_alloc,
@@ -272,7 +271,6 @@ struct rds_transport rds_iw_transport = {
272 .conn_connect = rds_iw_conn_connect, 271 .conn_connect = rds_iw_conn_connect,
273 .conn_shutdown = rds_iw_conn_shutdown, 272 .conn_shutdown = rds_iw_conn_shutdown,
274 .inc_copy_to_user = rds_iw_inc_copy_to_user, 273 .inc_copy_to_user = rds_iw_inc_copy_to_user,
275 .inc_purge = rds_iw_inc_purge,
276 .inc_free = rds_iw_inc_free, 274 .inc_free = rds_iw_inc_free,
277 .cm_initiate_connect = rds_iw_cm_initiate_connect, 275 .cm_initiate_connect = rds_iw_cm_initiate_connect,
278 .cm_handle_connect = rds_iw_cm_handle_connect, 276 .cm_handle_connect = rds_iw_cm_handle_connect,
@@ -289,7 +287,7 @@ struct rds_transport rds_iw_transport = {
289 .t_prefer_loopback = 1, 287 .t_prefer_loopback = 1,
290}; 288};
291 289
292int __init rds_iw_init(void) 290int rds_iw_init(void)
293{ 291{
294 int ret; 292 int ret;
295 293
diff --git a/net/rds/iw.h b/net/rds/iw.h
index eef2f0c2847..543e665fafe 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -70,7 +70,7 @@ struct rds_iw_send_work {
70 struct rds_message *s_rm; 70 struct rds_message *s_rm;
71 71
72 /* We should really put these into a union: */ 72 /* We should really put these into a union: */
73 struct rds_rdma_op *s_op; 73 struct rm_rdma_op *s_op;
74 struct rds_iw_mapping *s_mapping; 74 struct rds_iw_mapping *s_mapping;
75 struct ib_mr *s_mr; 75 struct ib_mr *s_mr;
76 struct ib_fast_reg_page_list *s_page_list; 76 struct ib_fast_reg_page_list *s_page_list;
@@ -284,7 +284,7 @@ void rds_iw_conn_free(void *arg);
284int rds_iw_conn_connect(struct rds_connection *conn); 284int rds_iw_conn_connect(struct rds_connection *conn);
285void rds_iw_conn_shutdown(struct rds_connection *conn); 285void rds_iw_conn_shutdown(struct rds_connection *conn);
286void rds_iw_state_change(struct sock *sk); 286void rds_iw_state_change(struct sock *sk);
287int __init rds_iw_listen_init(void); 287int rds_iw_listen_init(void);
288void rds_iw_listen_stop(void); 288void rds_iw_listen_stop(void);
289void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...); 289void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...);
290int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, 290int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -321,12 +321,11 @@ void rds_iw_flush_mrs(void);
321void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); 321void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
322 322
323/* ib_recv.c */ 323/* ib_recv.c */
324int __init rds_iw_recv_init(void); 324int rds_iw_recv_init(void);
325void rds_iw_recv_exit(void); 325void rds_iw_recv_exit(void);
326int rds_iw_recv(struct rds_connection *conn); 326int rds_iw_recv(struct rds_connection *conn);
327int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 327int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
328 gfp_t page_gfp, int prefill); 328 gfp_t page_gfp, int prefill);
329void rds_iw_inc_purge(struct rds_incoming *inc);
330void rds_iw_inc_free(struct rds_incoming *inc); 329void rds_iw_inc_free(struct rds_incoming *inc);
331int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 330int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
332 size_t size); 331 size_t size);
@@ -358,7 +357,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
358void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context); 357void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context);
359void rds_iw_send_init_ring(struct rds_iw_connection *ic); 358void rds_iw_send_init_ring(struct rds_iw_connection *ic);
360void rds_iw_send_clear_ring(struct rds_iw_connection *ic); 359void rds_iw_send_clear_ring(struct rds_iw_connection *ic);
361int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); 360int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
362void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits); 361void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits);
363void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted); 362void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted);
364int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted, 363int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted,
@@ -371,7 +370,7 @@ unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
371 unsigned int avail); 370 unsigned int avail);
372 371
373/* ib_sysctl.c */ 372/* ib_sysctl.c */
374int __init rds_iw_sysctl_init(void); 373int rds_iw_sysctl_init(void);
375void rds_iw_sysctl_exit(void); 374void rds_iw_sysctl_exit(void);
376extern unsigned long rds_iw_sysctl_max_send_wr; 375extern unsigned long rds_iw_sysctl_max_send_wr;
377extern unsigned long rds_iw_sysctl_max_recv_wr; 376extern unsigned long rds_iw_sysctl_max_recv_wr;
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index b5dd6ac39be..712cf2d1f28 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -257,7 +257,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
257 * the rds_iwdev at all. 257 * the rds_iwdev at all.
258 */ 258 */
259 rds_iwdev = ib_get_client_data(dev, &rds_iw_client); 259 rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
260 if (rds_iwdev == NULL) { 260 if (!rds_iwdev) {
261 if (printk_ratelimit()) 261 if (printk_ratelimit())
262 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", 262 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
263 dev->name); 263 dev->name);
@@ -292,7 +292,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
292 ic->i_send_ring.w_nr * 292 ic->i_send_ring.w_nr *
293 sizeof(struct rds_header), 293 sizeof(struct rds_header),
294 &ic->i_send_hdrs_dma, GFP_KERNEL); 294 &ic->i_send_hdrs_dma, GFP_KERNEL);
295 if (ic->i_send_hdrs == NULL) { 295 if (!ic->i_send_hdrs) {
296 ret = -ENOMEM; 296 ret = -ENOMEM;
297 rdsdebug("ib_dma_alloc_coherent send failed\n"); 297 rdsdebug("ib_dma_alloc_coherent send failed\n");
298 goto out; 298 goto out;
@@ -302,7 +302,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
302 ic->i_recv_ring.w_nr * 302 ic->i_recv_ring.w_nr *
303 sizeof(struct rds_header), 303 sizeof(struct rds_header),
304 &ic->i_recv_hdrs_dma, GFP_KERNEL); 304 &ic->i_recv_hdrs_dma, GFP_KERNEL);
305 if (ic->i_recv_hdrs == NULL) { 305 if (!ic->i_recv_hdrs) {
306 ret = -ENOMEM; 306 ret = -ENOMEM;
307 rdsdebug("ib_dma_alloc_coherent recv failed\n"); 307 rdsdebug("ib_dma_alloc_coherent recv failed\n");
308 goto out; 308 goto out;
@@ -310,14 +310,14 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
310 310
311 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), 311 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
312 &ic->i_ack_dma, GFP_KERNEL); 312 &ic->i_ack_dma, GFP_KERNEL);
313 if (ic->i_ack == NULL) { 313 if (!ic->i_ack) {
314 ret = -ENOMEM; 314 ret = -ENOMEM;
315 rdsdebug("ib_dma_alloc_coherent ack failed\n"); 315 rdsdebug("ib_dma_alloc_coherent ack failed\n");
316 goto out; 316 goto out;
317 } 317 }
318 318
319 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work)); 319 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
320 if (ic->i_sends == NULL) { 320 if (!ic->i_sends) {
321 ret = -ENOMEM; 321 ret = -ENOMEM;
322 rdsdebug("send allocation failed\n"); 322 rdsdebug("send allocation failed\n");
323 goto out; 323 goto out;
@@ -325,7 +325,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
325 rds_iw_send_init_ring(ic); 325 rds_iw_send_init_ring(ic);
326 326
327 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work)); 327 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
328 if (ic->i_recvs == NULL) { 328 if (!ic->i_recvs) {
329 ret = -ENOMEM; 329 ret = -ENOMEM;
330 rdsdebug("recv allocation failed\n"); 330 rdsdebug("recv allocation failed\n");
331 goto out; 331 goto out;
@@ -696,7 +696,7 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
696 696
697 /* XXX too lazy? */ 697 /* XXX too lazy? */
698 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL); 698 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL);
699 if (ic == NULL) 699 if (!ic)
700 return -ENOMEM; 700 return -ENOMEM;
701 701
702 INIT_LIST_HEAD(&ic->iw_node); 702 INIT_LIST_HEAD(&ic->iw_node);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 13dc1862d86..0e7accc23ee 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -34,7 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35 35
36#include "rds.h" 36#include "rds.h"
37#include "rdma.h"
38#include "iw.h" 37#include "iw.h"
39 38
40 39
@@ -207,9 +206,9 @@ void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *con
207 BUG_ON(list_empty(&ic->iw_node)); 206 BUG_ON(list_empty(&ic->iw_node));
208 list_del(&ic->iw_node); 207 list_del(&ic->iw_node);
209 208
210 spin_lock_irq(&rds_iwdev->spinlock); 209 spin_lock(&rds_iwdev->spinlock);
211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); 210 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
212 spin_unlock_irq(&rds_iwdev->spinlock); 211 spin_unlock(&rds_iwdev->spinlock);
213 spin_unlock_irq(&iw_nodev_conns_lock); 212 spin_unlock_irq(&iw_nodev_conns_lock);
214 213
215 ic->rds_iwdev = rds_iwdev; 214 ic->rds_iwdev = rds_iwdev;
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 3d479067d54..5e57347f49f 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -53,7 +53,7 @@ static void rds_iw_frag_drop_page(struct rds_page_frag *frag)
53static void rds_iw_frag_free(struct rds_page_frag *frag) 53static void rds_iw_frag_free(struct rds_page_frag *frag)
54{ 54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page); 55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page != NULL); 56 BUG_ON(frag->f_page);
57 kmem_cache_free(rds_iw_frag_slab, frag); 57 kmem_cache_free(rds_iw_frag_slab, frag);
58} 58}
59 59
@@ -143,14 +143,14 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn,
143 struct ib_sge *sge; 143 struct ib_sge *sge;
144 int ret = -ENOMEM; 144 int ret = -ENOMEM;
145 145
146 if (recv->r_iwinc == NULL) { 146 if (!recv->r_iwinc) {
147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) { 147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) {
148 rds_iw_stats_inc(s_iw_rx_alloc_limit); 148 rds_iw_stats_inc(s_iw_rx_alloc_limit);
149 goto out; 149 goto out;
150 } 150 }
151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab, 151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab,
152 kptr_gfp); 152 kptr_gfp);
153 if (recv->r_iwinc == NULL) { 153 if (!recv->r_iwinc) {
154 atomic_dec(&rds_iw_allocation); 154 atomic_dec(&rds_iw_allocation);
155 goto out; 155 goto out;
156 } 156 }
@@ -158,17 +158,17 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn,
158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr); 158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr);
159 } 159 }
160 160
161 if (recv->r_frag == NULL) { 161 if (!recv->r_frag) {
162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp); 162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp);
163 if (recv->r_frag == NULL) 163 if (!recv->r_frag)
164 goto out; 164 goto out;
165 INIT_LIST_HEAD(&recv->r_frag->f_item); 165 INIT_LIST_HEAD(&recv->r_frag->f_item);
166 recv->r_frag->f_page = NULL; 166 recv->r_frag->f_page = NULL;
167 } 167 }
168 168
169 if (ic->i_frag.f_page == NULL) { 169 if (!ic->i_frag.f_page) {
170 ic->i_frag.f_page = alloc_page(page_gfp); 170 ic->i_frag.f_page = alloc_page(page_gfp);
171 if (ic->i_frag.f_page == NULL) 171 if (!ic->i_frag.f_page)
172 goto out; 172 goto out;
173 ic->i_frag.f_offset = 0; 173 ic->i_frag.f_offset = 0;
174 } 174 }
@@ -273,7 +273,7 @@ int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
273 return ret; 273 return ret;
274} 274}
275 275
276void rds_iw_inc_purge(struct rds_incoming *inc) 276static void rds_iw_inc_purge(struct rds_incoming *inc)
277{ 277{
278 struct rds_iw_incoming *iwinc; 278 struct rds_iw_incoming *iwinc;
279 struct rds_page_frag *frag; 279 struct rds_page_frag *frag;
@@ -716,7 +716,7 @@ static void rds_iw_process_recv(struct rds_connection *conn,
716 * into the inc and save the inc so we can hang upcoming fragments 716 * into the inc and save the inc so we can hang upcoming fragments
717 * off its list. 717 * off its list.
718 */ 718 */
719 if (iwinc == NULL) { 719 if (!iwinc) {
720 iwinc = recv->r_iwinc; 720 iwinc = recv->r_iwinc;
721 recv->r_iwinc = NULL; 721 recv->r_iwinc = NULL;
722 ic->i_iwinc = iwinc; 722 ic->i_iwinc = iwinc;
@@ -887,7 +887,7 @@ int rds_iw_recv(struct rds_connection *conn)
887 return ret; 887 return ret;
888} 888}
889 889
890int __init rds_iw_recv_init(void) 890int rds_iw_recv_init(void)
891{ 891{
892 struct sysinfo si; 892 struct sysinfo si;
893 int ret = -ENOMEM; 893 int ret = -ENOMEM;
@@ -899,13 +899,13 @@ int __init rds_iw_recv_init(void)
899 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming", 899 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming",
900 sizeof(struct rds_iw_incoming), 900 sizeof(struct rds_iw_incoming),
901 0, 0, NULL); 901 0, 0, NULL);
902 if (rds_iw_incoming_slab == NULL) 902 if (!rds_iw_incoming_slab)
903 goto out; 903 goto out;
904 904
905 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag", 905 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag",
906 sizeof(struct rds_page_frag), 906 sizeof(struct rds_page_frag),
907 0, 0, NULL); 907 0, 0, NULL);
908 if (rds_iw_frag_slab == NULL) 908 if (!rds_iw_frag_slab)
909 kmem_cache_destroy(rds_iw_incoming_slab); 909 kmem_cache_destroy(rds_iw_incoming_slab);
910 else 910 else
911 ret = 0; 911 ret = 0;
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 52182ff7519..6280ea020d4 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -36,7 +36,6 @@
36#include <linux/dmapool.h> 36#include <linux/dmapool.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40#include "iw.h" 39#include "iw.h"
41 40
42static void rds_iw_send_rdma_complete(struct rds_message *rm, 41static void rds_iw_send_rdma_complete(struct rds_message *rm,
@@ -64,13 +63,13 @@ static void rds_iw_send_rdma_complete(struct rds_message *rm,
64} 63}
65 64
66static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic, 65static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic,
67 struct rds_rdma_op *op) 66 struct rm_rdma_op *op)
68{ 67{
69 if (op->r_mapped) { 68 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device, 69 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents, 70 op->op_sg, op->op_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 71 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0; 72 op->op_mapped = 0;
74 } 73 }
75} 74}
76 75
@@ -83,11 +82,11 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm); 82 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84 83
85 ib_dma_unmap_sg(ic->i_cm_id->device, 84 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->m_sg, rm->m_nents, 85 rm->data.op_sg, rm->data.op_nents,
87 DMA_TO_DEVICE); 86 DMA_TO_DEVICE);
88 87
89 if (rm->m_rdma_op != NULL) { 88 if (rm->rdma.op_active) {
90 rds_iw_send_unmap_rdma(ic, rm->m_rdma_op); 89 rds_iw_send_unmap_rdma(ic, &rm->rdma);
91 90
92 /* If the user asked for a completion notification on this 91 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics: 92 * message, we can implement three different semantics:
@@ -111,10 +110,10 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
111 */ 110 */
112 rds_iw_send_rdma_complete(rm, wc_status); 111 rds_iw_send_rdma_complete(rm, wc_status);
113 112
114 if (rm->m_rdma_op->r_write) 113 if (rm->rdma.op_write)
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); 114 rds_stats_add(s_send_rdma_bytes, rm->rdma.op_bytes);
116 else 115 else
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); 116 rds_stats_add(s_recv_rdma_bytes, rm->rdma.op_bytes);
118 } 117 }
119 118
120 /* If anyone waited for this message to get flushed out, wake 119 /* If anyone waited for this message to get flushed out, wake
@@ -556,25 +555,27 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
556 } 555 }
557 556
558 /* map the message the first time we see it */ 557 /* map the message the first time we see it */
559 if (ic->i_rm == NULL) { 558 if (!ic->i_rm) {
560 /* 559 /*
561 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n", 560 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n",
562 be16_to_cpu(rm->m_inc.i_hdr.h_dport), 561 be16_to_cpu(rm->m_inc.i_hdr.h_dport),
563 rm->m_inc.i_hdr.h_flags, 562 rm->m_inc.i_hdr.h_flags,
564 be32_to_cpu(rm->m_inc.i_hdr.h_len)); 563 be32_to_cpu(rm->m_inc.i_hdr.h_len));
565 */ 564 */
566 if (rm->m_nents) { 565 if (rm->data.op_nents) {
567 rm->m_count = ib_dma_map_sg(dev, 566 rm->data.op_count = ib_dma_map_sg(dev,
568 rm->m_sg, rm->m_nents, DMA_TO_DEVICE); 567 rm->data.op_sg,
569 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count); 568 rm->data.op_nents,
570 if (rm->m_count == 0) { 569 DMA_TO_DEVICE);
570 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
571 if (rm->data.op_count == 0) {
571 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); 572 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
572 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); 573 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
573 ret = -ENOMEM; /* XXX ? */ 574 ret = -ENOMEM; /* XXX ? */
574 goto out; 575 goto out;
575 } 576 }
576 } else { 577 } else {
577 rm->m_count = 0; 578 rm->data.op_count = 0;
578 } 579 }
579 580
580 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; 581 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
@@ -590,10 +591,10 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
590 591
591 /* If it has a RDMA op, tell the peer we did it. This is 592 /* If it has a RDMA op, tell the peer we did it. This is
592 * used by the peer to release use-once RDMA MRs. */ 593 * used by the peer to release use-once RDMA MRs. */
593 if (rm->m_rdma_op) { 594 if (rm->rdma.op_active) {
594 struct rds_ext_header_rdma ext_hdr; 595 struct rds_ext_header_rdma ext_hdr;
595 596
596 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); 597 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
597 rds_message_add_extension(&rm->m_inc.i_hdr, 598 rds_message_add_extension(&rm->m_inc.i_hdr,
598 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 599 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
599 } 600 }
@@ -621,7 +622,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
621 send = &ic->i_sends[pos]; 622 send = &ic->i_sends[pos];
622 first = send; 623 first = send;
623 prev = NULL; 624 prev = NULL;
624 scat = &rm->m_sg[sg]; 625 scat = &rm->data.op_sg[sg];
625 sent = 0; 626 sent = 0;
626 i = 0; 627 i = 0;
627 628
@@ -631,7 +632,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
631 * or when requested by the user. Right now, we let 632 * or when requested by the user. Right now, we let
632 * the application choose. 633 * the application choose.
633 */ 634 */
634 if (rm->m_rdma_op && rm->m_rdma_op->r_fence) 635 if (rm->rdma.op_active && rm->rdma.op_fence)
635 send_flags = IB_SEND_FENCE; 636 send_flags = IB_SEND_FENCE;
636 637
637 /* 638 /*
@@ -650,7 +651,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
650 } 651 }
651 652
652 /* if there's data reference it with a chain of work reqs */ 653 /* if there's data reference it with a chain of work reqs */
653 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { 654 for (; i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]; i++) {
654 unsigned int len; 655 unsigned int len;
655 656
656 send = &ic->i_sends[pos]; 657 send = &ic->i_sends[pos];
@@ -728,7 +729,7 @@ add_header:
728 sent += sizeof(struct rds_header); 729 sent += sizeof(struct rds_header);
729 730
730 /* if we finished the message then send completion owns it */ 731 /* if we finished the message then send completion owns it */
731 if (scat == &rm->m_sg[rm->m_count]) { 732 if (scat == &rm->data.op_sg[rm->data.op_count]) {
732 prev->s_rm = ic->i_rm; 733 prev->s_rm = ic->i_rm;
733 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 734 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
734 ic->i_rm = NULL; 735 ic->i_rm = NULL;
@@ -784,7 +785,7 @@ static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rd
784 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); 785 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
785} 786}
786 787
787int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) 788int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
788{ 789{
789 struct rds_iw_connection *ic = conn->c_transport_data; 790 struct rds_iw_connection *ic = conn->c_transport_data;
790 struct rds_iw_send_work *send = NULL; 791 struct rds_iw_send_work *send = NULL;
@@ -794,7 +795,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
794 struct rds_iw_device *rds_iwdev; 795 struct rds_iw_device *rds_iwdev;
795 struct scatterlist *scat; 796 struct scatterlist *scat;
796 unsigned long len; 797 unsigned long len;
797 u64 remote_addr = op->r_remote_addr; 798 u64 remote_addr = op->op_remote_addr;
798 u32 pos, fr_pos; 799 u32 pos, fr_pos;
799 u32 work_alloc; 800 u32 work_alloc;
800 u32 i; 801 u32 i;
@@ -806,21 +807,21 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
806 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); 807 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
807 808
808 /* map the message the first time we see it */ 809 /* map the message the first time we see it */
809 if (!op->r_mapped) { 810 if (!op->op_mapped) {
810 op->r_count = ib_dma_map_sg(ic->i_cm_id->device, 811 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
811 op->r_sg, op->r_nents, (op->r_write) ? 812 op->op_sg, op->op_nents, (op->op_write) ?
812 DMA_TO_DEVICE : DMA_FROM_DEVICE); 813 DMA_TO_DEVICE : DMA_FROM_DEVICE);
813 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); 814 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
814 if (op->r_count == 0) { 815 if (op->op_count == 0) {
815 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); 816 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
816 ret = -ENOMEM; /* XXX ? */ 817 ret = -ENOMEM; /* XXX ? */
817 goto out; 818 goto out;
818 } 819 }
819 820
820 op->r_mapped = 1; 821 op->op_mapped = 1;
821 } 822 }
822 823
823 if (!op->r_write) { 824 if (!op->op_write) {
824 /* Alloc space on the send queue for the fastreg */ 825 /* Alloc space on the send queue for the fastreg */
825 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos); 826 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos);
826 if (work_alloc != 1) { 827 if (work_alloc != 1) {
@@ -835,7 +836,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
835 * Instead of knowing how to return a partial rdma read/write we insist that there 836 * Instead of knowing how to return a partial rdma read/write we insist that there
836 * be enough work requests to send the entire message. 837 * be enough work requests to send the entire message.
837 */ 838 */
838 i = ceil(op->r_count, rds_iwdev->max_sge); 839 i = ceil(op->op_count, rds_iwdev->max_sge);
839 840
840 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos); 841 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
841 if (work_alloc != i) { 842 if (work_alloc != i) {
@@ -846,17 +847,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
846 } 847 }
847 848
848 send = &ic->i_sends[pos]; 849 send = &ic->i_sends[pos];
849 if (!op->r_write) { 850 if (!op->op_write) {
850 first = prev = &ic->i_sends[fr_pos]; 851 first = prev = &ic->i_sends[fr_pos];
851 } else { 852 } else {
852 first = send; 853 first = send;
853 prev = NULL; 854 prev = NULL;
854 } 855 }
855 scat = &op->r_sg[0]; 856 scat = &op->op_sg[0];
856 sent = 0; 857 sent = 0;
857 num_sge = op->r_count; 858 num_sge = op->op_count;
858 859
859 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { 860 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
860 send->s_wr.send_flags = 0; 861 send->s_wr.send_flags = 0;
861 send->s_queued = jiffies; 862 send->s_queued = jiffies;
862 863
@@ -873,13 +874,13 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
873 * for local access after RDS is finished with it, using 874 * for local access after RDS is finished with it, using
874 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed. 875 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
875 */ 876 */
876 if (op->r_write) 877 if (op->op_write)
877 send->s_wr.opcode = IB_WR_RDMA_WRITE; 878 send->s_wr.opcode = IB_WR_RDMA_WRITE;
878 else 879 else
879 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV; 880 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
880 881
881 send->s_wr.wr.rdma.remote_addr = remote_addr; 882 send->s_wr.wr.rdma.remote_addr = remote_addr;
882 send->s_wr.wr.rdma.rkey = op->r_key; 883 send->s_wr.wr.rdma.rkey = op->op_rkey;
883 send->s_op = op; 884 send->s_op = op;
884 885
885 if (num_sge > rds_iwdev->max_sge) { 886 if (num_sge > rds_iwdev->max_sge) {
@@ -893,7 +894,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
893 if (prev) 894 if (prev)
894 prev->s_wr.next = &send->s_wr; 895 prev->s_wr.next = &send->s_wr;
895 896
896 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { 897 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
897 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 898 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
898 899
899 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) 900 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
@@ -927,7 +928,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
927 } 928 }
928 929
929 /* if we finished the message then send completion owns it */ 930 /* if we finished the message then send completion owns it */
930 if (scat == &op->r_sg[op->r_count]) 931 if (scat == &op->op_sg[op->op_count])
931 first->s_wr.send_flags = IB_SEND_SIGNALED; 932 first->s_wr.send_flags = IB_SEND_SIGNALED;
932 933
933 if (i < work_alloc) { 934 if (i < work_alloc) {
@@ -941,9 +942,9 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
941 * adapters do not allow using the lkey for this at all. To bypass this use a 942 * adapters do not allow using the lkey for this at all. To bypass this use a
942 * fastreg_mr (or possibly a dma_mr) 943 * fastreg_mr (or possibly a dma_mr)
943 */ 944 */
944 if (!op->r_write) { 945 if (!op->op_write) {
945 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos], 946 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
946 op->r_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr); 947 op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
947 work_alloc++; 948 work_alloc++;
948 } 949 }
949 950
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
index 1c4428a61a0..23e3a9a26aa 100644
--- a/net/rds/iw_sysctl.c
+++ b/net/rds/iw_sysctl.c
@@ -122,10 +122,10 @@ void rds_iw_sysctl_exit(void)
122 unregister_sysctl_table(rds_iw_sysctl_hdr); 122 unregister_sysctl_table(rds_iw_sysctl_hdr);
123} 123}
124 124
125int __init rds_iw_sysctl_init(void) 125int rds_iw_sysctl_init(void)
126{ 126{
127 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table); 127 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table);
128 if (rds_iw_sysctl_hdr == NULL) 128 if (!rds_iw_sysctl_hdr)
129 return -ENOMEM; 129 return -ENOMEM;
130 return 0; 130 return 0;
131} 131}
diff --git a/net/rds/loop.c b/net/rds/loop.c
index dd987937945..c390156b426 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,17 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
61 unsigned int hdr_off, unsigned int sg, 61 unsigned int hdr_off, unsigned int sg,
62 unsigned int off) 62 unsigned int off)
63{ 63{
64 /* Do not send cong updates to loopback */
65 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
66 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
67 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
68 }
69
64 BUG_ON(hdr_off || sg || off); 70 BUG_ON(hdr_off || sg || off);
65 71
66 rds_inc_init(&rm->m_inc, conn, conn->c_laddr); 72 rds_inc_init(&rm->m_inc, conn, conn->c_laddr);
67 rds_message_addref(rm); /* for the inc */ 73 /* For the embedded inc. Matching put is in loop_inc_free() */
74 rds_message_addref(rm);
68 75
69 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc, 76 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc,
70 GFP_KERNEL, KM_USER0); 77 GFP_KERNEL, KM_USER0);
@@ -77,16 +84,14 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
77 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); 84 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len);
78} 85}
79 86
80static int rds_loop_xmit_cong_map(struct rds_connection *conn, 87/*
81 struct rds_cong_map *map, 88 * See rds_loop_xmit(). Since our inc is embedded in the rm, we
82 unsigned long offset) 89 * make sure the rm lives at least until the inc is done.
90 */
91static void rds_loop_inc_free(struct rds_incoming *inc)
83{ 92{
84 BUG_ON(offset); 93 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
85 BUG_ON(map != conn->c_lcong); 94 rds_message_put(rm);
86
87 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
88
89 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
90} 95}
91 96
92/* we need to at least give the thread something to succeed */ 97/* we need to at least give the thread something to succeed */
@@ -112,7 +117,7 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
112 unsigned long flags; 117 unsigned long flags;
113 118
114 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL); 119 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL);
115 if (lc == NULL) 120 if (!lc)
116 return -ENOMEM; 121 return -ENOMEM;
117 122
118 INIT_LIST_HEAD(&lc->loop_node); 123 INIT_LIST_HEAD(&lc->loop_node);
@@ -169,14 +174,12 @@ void rds_loop_exit(void)
169 */ 174 */
170struct rds_transport rds_loop_transport = { 175struct rds_transport rds_loop_transport = {
171 .xmit = rds_loop_xmit, 176 .xmit = rds_loop_xmit,
172 .xmit_cong_map = rds_loop_xmit_cong_map,
173 .recv = rds_loop_recv, 177 .recv = rds_loop_recv,
174 .conn_alloc = rds_loop_conn_alloc, 178 .conn_alloc = rds_loop_conn_alloc,
175 .conn_free = rds_loop_conn_free, 179 .conn_free = rds_loop_conn_free,
176 .conn_connect = rds_loop_conn_connect, 180 .conn_connect = rds_loop_conn_connect,
177 .conn_shutdown = rds_loop_conn_shutdown, 181 .conn_shutdown = rds_loop_conn_shutdown,
178 .inc_copy_to_user = rds_message_inc_copy_to_user, 182 .inc_copy_to_user = rds_message_inc_copy_to_user,
179 .inc_purge = rds_message_inc_purge, 183 .inc_free = rds_loop_inc_free,
180 .inc_free = rds_message_inc_free,
181 .t_name = "loopback", 184 .t_name = "loopback",
182}; 185};
diff --git a/net/rds/message.c b/net/rds/message.c
index 9a1d67e001b..84f937f11d4 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -34,9 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35 35
36#include "rds.h" 36#include "rds.h"
37#include "rdma.h"
38
39static DECLARE_WAIT_QUEUE_HEAD(rds_message_flush_waitq);
40 37
41static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { 38static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
42[RDS_EXTHDR_NONE] = 0, 39[RDS_EXTHDR_NONE] = 0,
@@ -63,29 +60,31 @@ static void rds_message_purge(struct rds_message *rm)
63 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) 60 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
64 return; 61 return;
65 62
66 for (i = 0; i < rm->m_nents; i++) { 63 for (i = 0; i < rm->data.op_nents; i++) {
67 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i])); 64 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
68 /* XXX will have to put_page for page refs */ 65 /* XXX will have to put_page for page refs */
69 __free_page(sg_page(&rm->m_sg[i])); 66 __free_page(sg_page(&rm->data.op_sg[i]));
70 } 67 }
71 rm->m_nents = 0; 68 rm->data.op_nents = 0;
72 69
73 if (rm->m_rdma_op) 70 if (rm->rdma.op_active)
74 rds_rdma_free_op(rm->m_rdma_op); 71 rds_rdma_free_op(&rm->rdma);
75 if (rm->m_rdma_mr) 72 if (rm->rdma.op_rdma_mr)
76 rds_mr_put(rm->m_rdma_mr); 73 rds_mr_put(rm->rdma.op_rdma_mr);
77}
78 74
79void rds_message_inc_purge(struct rds_incoming *inc) 75 if (rm->atomic.op_active)
80{ 76 rds_atomic_free_op(&rm->atomic);
81 struct rds_message *rm = container_of(inc, struct rds_message, m_inc); 77 if (rm->atomic.op_rdma_mr)
82 rds_message_purge(rm); 78 rds_mr_put(rm->atomic.op_rdma_mr);
83} 79}
84 80
85void rds_message_put(struct rds_message *rm) 81void rds_message_put(struct rds_message *rm)
86{ 82{
87 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); 83 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
88 84 if (atomic_read(&rm->m_refcount) == 0) {
85printk(KERN_CRIT "danger refcount zero on %p\n", rm);
86WARN_ON(1);
87 }
89 if (atomic_dec_and_test(&rm->m_refcount)) { 88 if (atomic_dec_and_test(&rm->m_refcount)) {
90 BUG_ON(!list_empty(&rm->m_sock_item)); 89 BUG_ON(!list_empty(&rm->m_sock_item));
91 BUG_ON(!list_empty(&rm->m_conn_item)); 90 BUG_ON(!list_empty(&rm->m_conn_item));
@@ -96,12 +95,6 @@ void rds_message_put(struct rds_message *rm)
96} 95}
97EXPORT_SYMBOL_GPL(rds_message_put); 96EXPORT_SYMBOL_GPL(rds_message_put);
98 97
99void rds_message_inc_free(struct rds_incoming *inc)
100{
101 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
102 rds_message_put(rm);
103}
104
105void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 98void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
106 __be16 dport, u64 seq) 99 __be16 dport, u64 seq)
107{ 100{
@@ -214,41 +207,68 @@ int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 o
214} 207}
215EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension); 208EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension);
216 209
217struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp) 210/*
211 * Each rds_message is allocated with extra space for the scatterlist entries
212 * rds ops will need. This is to minimize memory allocation count. Then, each rds op
213 * can grab SGs when initializing its part of the rds_message.
214 */
215struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp)
218{ 216{
219 struct rds_message *rm; 217 struct rds_message *rm;
220 218
221 rm = kzalloc(sizeof(struct rds_message) + 219 rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp);
222 (nents * sizeof(struct scatterlist)), gfp);
223 if (!rm) 220 if (!rm)
224 goto out; 221 goto out;
225 222
226 if (nents) 223 rm->m_used_sgs = 0;
227 sg_init_table(rm->m_sg, nents); 224 rm->m_total_sgs = extra_len / sizeof(struct scatterlist);
225
228 atomic_set(&rm->m_refcount, 1); 226 atomic_set(&rm->m_refcount, 1);
229 INIT_LIST_HEAD(&rm->m_sock_item); 227 INIT_LIST_HEAD(&rm->m_sock_item);
230 INIT_LIST_HEAD(&rm->m_conn_item); 228 INIT_LIST_HEAD(&rm->m_conn_item);
231 spin_lock_init(&rm->m_rs_lock); 229 spin_lock_init(&rm->m_rs_lock);
230 init_waitqueue_head(&rm->m_flush_wait);
232 231
233out: 232out:
234 return rm; 233 return rm;
235} 234}
236 235
236/*
237 * RDS ops use this to grab SG entries from the rm's sg pool.
238 */
239struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
240{
241 struct scatterlist *sg_first = (struct scatterlist *) &rm[1];
242 struct scatterlist *sg_ret;
243
244 WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs);
245 WARN_ON(!nents);
246
247 sg_ret = &sg_first[rm->m_used_sgs];
248 sg_init_table(sg_ret, nents);
249 rm->m_used_sgs += nents;
250
251 return sg_ret;
252}
253
237struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len) 254struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
238{ 255{
239 struct rds_message *rm; 256 struct rds_message *rm;
240 unsigned int i; 257 unsigned int i;
258 int num_sgs = ceil(total_len, PAGE_SIZE);
259 int extra_bytes = num_sgs * sizeof(struct scatterlist);
241 260
242 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL); 261 rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
243 if (rm == NULL) 262 if (!rm)
244 return ERR_PTR(-ENOMEM); 263 return ERR_PTR(-ENOMEM);
245 264
246 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); 265 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
247 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 266 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
248 rm->m_nents = ceil(total_len, PAGE_SIZE); 267 rm->data.op_nents = ceil(total_len, PAGE_SIZE);
268 rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
249 269
250 for (i = 0; i < rm->m_nents; ++i) { 270 for (i = 0; i < rm->data.op_nents; ++i) {
251 sg_set_page(&rm->m_sg[i], 271 sg_set_page(&rm->data.op_sg[i],
252 virt_to_page(page_addrs[i]), 272 virt_to_page(page_addrs[i]),
253 PAGE_SIZE, 0); 273 PAGE_SIZE, 0);
254 } 274 }
@@ -256,40 +276,33 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
256 return rm; 276 return rm;
257} 277}
258 278
259struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, 279int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
260 size_t total_len) 280 size_t total_len)
261{ 281{
262 unsigned long to_copy; 282 unsigned long to_copy;
263 unsigned long iov_off; 283 unsigned long iov_off;
264 unsigned long sg_off; 284 unsigned long sg_off;
265 struct rds_message *rm;
266 struct iovec *iov; 285 struct iovec *iov;
267 struct scatterlist *sg; 286 struct scatterlist *sg;
268 int ret; 287 int ret = 0;
269
270 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL);
271 if (rm == NULL) {
272 ret = -ENOMEM;
273 goto out;
274 }
275 288
276 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 289 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
277 290
278 /* 291 /*
279 * now allocate and copy in the data payload. 292 * now allocate and copy in the data payload.
280 */ 293 */
281 sg = rm->m_sg; 294 sg = rm->data.op_sg;
282 iov = first_iov; 295 iov = first_iov;
283 iov_off = 0; 296 iov_off = 0;
284 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ 297 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
285 298
286 while (total_len) { 299 while (total_len) {
287 if (sg_page(sg) == NULL) { 300 if (!sg_page(sg)) {
288 ret = rds_page_remainder_alloc(sg, total_len, 301 ret = rds_page_remainder_alloc(sg, total_len,
289 GFP_HIGHUSER); 302 GFP_HIGHUSER);
290 if (ret) 303 if (ret)
291 goto out; 304 goto out;
292 rm->m_nents++; 305 rm->data.op_nents++;
293 sg_off = 0; 306 sg_off = 0;
294 } 307 }
295 308
@@ -320,14 +333,8 @@ struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
320 sg++; 333 sg++;
321 } 334 }
322 335
323 ret = 0;
324out: 336out:
325 if (ret) { 337 return ret;
326 if (rm)
327 rds_message_put(rm);
328 rm = ERR_PTR(ret);
329 }
330 return rm;
331} 338}
332 339
333int rds_message_inc_copy_to_user(struct rds_incoming *inc, 340int rds_message_inc_copy_to_user(struct rds_incoming *inc,
@@ -348,7 +355,7 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,
348 355
349 iov = first_iov; 356 iov = first_iov;
350 iov_off = 0; 357 iov_off = 0;
351 sg = rm->m_sg; 358 sg = rm->data.op_sg;
352 vec_off = 0; 359 vec_off = 0;
353 copied = 0; 360 copied = 0;
354 361
@@ -394,15 +401,14 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,
394 */ 401 */
395void rds_message_wait(struct rds_message *rm) 402void rds_message_wait(struct rds_message *rm)
396{ 403{
397 wait_event(rds_message_flush_waitq, 404 wait_event_interruptible(rm->m_flush_wait,
398 !test_bit(RDS_MSG_MAPPED, &rm->m_flags)); 405 !test_bit(RDS_MSG_MAPPED, &rm->m_flags));
399} 406}
400 407
401void rds_message_unmapped(struct rds_message *rm) 408void rds_message_unmapped(struct rds_message *rm)
402{ 409{
403 clear_bit(RDS_MSG_MAPPED, &rm->m_flags); 410 clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
404 if (waitqueue_active(&rds_message_flush_waitq)) 411 wake_up_interruptible(&rm->m_flush_wait);
405 wake_up(&rds_message_flush_waitq);
406} 412}
407EXPORT_SYMBOL_GPL(rds_message_unmapped); 413EXPORT_SYMBOL_GPL(rds_message_unmapped);
408 414
diff --git a/net/rds/page.c b/net/rds/page.c
index 595a952d4b1..5e44f5ae789 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -116,7 +116,7 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
116 /* jump straight to allocation if we're trying for a huge page */ 116 /* jump straight to allocation if we're trying for a huge page */
117 if (bytes >= PAGE_SIZE) { 117 if (bytes >= PAGE_SIZE) {
118 page = alloc_page(gfp); 118 page = alloc_page(gfp);
119 if (page == NULL) { 119 if (!page) {
120 ret = -ENOMEM; 120 ret = -ENOMEM;
121 } else { 121 } else {
122 sg_set_page(scat, page, PAGE_SIZE, 0); 122 sg_set_page(scat, page, PAGE_SIZE, 0);
@@ -162,7 +162,7 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
162 rem = &per_cpu(rds_page_remainders, get_cpu()); 162 rem = &per_cpu(rds_page_remainders, get_cpu());
163 local_irq_save(flags); 163 local_irq_save(flags);
164 164
165 if (page == NULL) { 165 if (!page) {
166 ret = -ENOMEM; 166 ret = -ENOMEM;
167 break; 167 break;
168 } 168 }
@@ -186,6 +186,7 @@ out:
186 ret ? 0 : scat->length); 186 ret ? 0 : scat->length);
187 return ret; 187 return ret;
188} 188}
189EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
189 190
190static int rds_page_remainder_cpu_notify(struct notifier_block *self, 191static int rds_page_remainder_cpu_notify(struct notifier_block *self,
191 unsigned long action, void *hcpu) 192 unsigned long action, void *hcpu)
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 75fd13bb631..1a41debca1c 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -35,7 +35,7 @@
35#include <linux/rbtree.h> 35#include <linux/rbtree.h>
36#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */ 36#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */
37 37
38#include "rdma.h" 38#include "rds.h"
39 39
40/* 40/*
41 * XXX 41 * XXX
@@ -130,14 +130,22 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
130{ 130{
131 struct rds_mr *mr; 131 struct rds_mr *mr;
132 struct rb_node *node; 132 struct rb_node *node;
133 unsigned long flags;
133 134
134 /* Release any MRs associated with this socket */ 135 /* Release any MRs associated with this socket */
136 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
135 while ((node = rb_first(&rs->rs_rdma_keys))) { 137 while ((node = rb_first(&rs->rs_rdma_keys))) {
136 mr = container_of(node, struct rds_mr, r_rb_node); 138 mr = container_of(node, struct rds_mr, r_rb_node);
137 if (mr->r_trans == rs->rs_transport) 139 if (mr->r_trans == rs->rs_transport)
138 mr->r_invalidate = 0; 140 mr->r_invalidate = 0;
141 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
142 RB_CLEAR_NODE(&mr->r_rb_node);
143 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
144 rds_destroy_mr(mr);
139 rds_mr_put(mr); 145 rds_mr_put(mr);
146 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
140 } 147 }
148 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
141 149
142 if (rs->rs_transport && rs->rs_transport->flush_mrs) 150 if (rs->rs_transport && rs->rs_transport->flush_mrs)
143 rs->rs_transport->flush_mrs(); 151 rs->rs_transport->flush_mrs();
@@ -181,7 +189,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
181 goto out; 189 goto out;
182 } 190 }
183 191
184 if (rs->rs_transport->get_mr == NULL) { 192 if (!rs->rs_transport->get_mr) {
185 ret = -EOPNOTSUPP; 193 ret = -EOPNOTSUPP;
186 goto out; 194 goto out;
187 } 195 }
@@ -197,13 +205,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
197 205
198 /* XXX clamp nr_pages to limit the size of this alloc? */ 206 /* XXX clamp nr_pages to limit the size of this alloc? */
199 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 207 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
200 if (pages == NULL) { 208 if (!pages) {
201 ret = -ENOMEM; 209 ret = -ENOMEM;
202 goto out; 210 goto out;
203 } 211 }
204 212
205 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL); 213 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL);
206 if (mr == NULL) { 214 if (!mr) {
207 ret = -ENOMEM; 215 ret = -ENOMEM;
208 goto out; 216 goto out;
209 } 217 }
@@ -230,13 +238,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
230 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to 238 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to
231 * the zero page. 239 * the zero page.
232 */ 240 */
233 ret = rds_pin_pages(args->vec.addr & PAGE_MASK, nr_pages, pages, 1); 241 ret = rds_pin_pages(args->vec.addr, nr_pages, pages, 1);
234 if (ret < 0) 242 if (ret < 0)
235 goto out; 243 goto out;
236 244
237 nents = ret; 245 nents = ret;
238 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); 246 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
239 if (sg == NULL) { 247 if (!sg) {
240 ret = -ENOMEM; 248 ret = -ENOMEM;
241 goto out; 249 goto out;
242 } 250 }
@@ -406,68 +414,127 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
406 414
407 spin_lock_irqsave(&rs->rs_rdma_lock, flags); 415 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
408 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 416 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
409 if (mr && (mr->r_use_once || force)) { 417 if (!mr) {
418 printk(KERN_ERR "rds: trying to unuse MR with unknown r_key %u!\n", r_key);
419 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
420 return;
421 }
422
423 if (mr->r_use_once || force) {
410 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); 424 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
411 RB_CLEAR_NODE(&mr->r_rb_node); 425 RB_CLEAR_NODE(&mr->r_rb_node);
412 zot_me = 1; 426 zot_me = 1;
413 } else if (mr) 427 }
414 atomic_inc(&mr->r_refcount);
415 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); 428 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
416 429
417 /* May have to issue a dma_sync on this memory region. 430 /* May have to issue a dma_sync on this memory region.
418 * Note we could avoid this if the operation was a RDMA READ, 431 * Note we could avoid this if the operation was a RDMA READ,
419 * but at this point we can't tell. */ 432 * but at this point we can't tell. */
420 if (mr != NULL) { 433 if (mr->r_trans->sync_mr)
421 if (mr->r_trans->sync_mr) 434 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE);
422 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); 435
423 436 /* If the MR was marked as invalidate, this will
424 /* If the MR was marked as invalidate, this will 437 * trigger an async flush. */
425 * trigger an async flush. */ 438 if (zot_me)
426 if (zot_me) 439 rds_destroy_mr(mr);
427 rds_destroy_mr(mr); 440 rds_mr_put(mr);
428 rds_mr_put(mr);
429 }
430} 441}
431 442
432void rds_rdma_free_op(struct rds_rdma_op *ro) 443void rds_rdma_free_op(struct rm_rdma_op *ro)
433{ 444{
434 unsigned int i; 445 unsigned int i;
435 446
436 for (i = 0; i < ro->r_nents; i++) { 447 for (i = 0; i < ro->op_nents; i++) {
437 struct page *page = sg_page(&ro->r_sg[i]); 448 struct page *page = sg_page(&ro->op_sg[i]);
438 449
439 /* Mark page dirty if it was possibly modified, which 450 /* Mark page dirty if it was possibly modified, which
440 * is the case for a RDMA_READ which copies from remote 451 * is the case for a RDMA_READ which copies from remote
441 * to local memory */ 452 * to local memory */
442 if (!ro->r_write) { 453 if (!ro->op_write) {
443 BUG_ON(in_interrupt()); 454 BUG_ON(irqs_disabled());
444 set_page_dirty(page); 455 set_page_dirty(page);
445 } 456 }
446 put_page(page); 457 put_page(page);
447 } 458 }
448 459
449 kfree(ro->r_notifier); 460 kfree(ro->op_notifier);
450 kfree(ro); 461 ro->op_notifier = NULL;
462 ro->op_active = 0;
463}
464
465void rds_atomic_free_op(struct rm_atomic_op *ao)
466{
467 struct page *page = sg_page(ao->op_sg);
468
469 /* Mark page dirty if it was possibly modified, which
470 * is the case for a RDMA_READ which copies from remote
471 * to local memory */
472 set_page_dirty(page);
473 put_page(page);
474
475 kfree(ao->op_notifier);
476 ao->op_notifier = NULL;
477 ao->op_active = 0;
451} 478}
452 479
480
453/* 481/*
454 * args is a pointer to an in-kernel copy in the sendmsg cmsg. 482 * Count the number of pages needed to describe an incoming iovec.
455 */ 483 */
456static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, 484static int rds_rdma_pages(struct rds_rdma_args *args)
457 struct rds_rdma_args *args)
458{ 485{
459 struct rds_iovec vec; 486 struct rds_iovec vec;
460 struct rds_rdma_op *op = NULL; 487 struct rds_iovec __user *local_vec;
488 unsigned int tot_pages = 0;
461 unsigned int nr_pages; 489 unsigned int nr_pages;
462 unsigned int max_pages; 490 unsigned int i;
491
492 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
493
494 /* figure out the number of pages in the vector */
495 for (i = 0; i < args->nr_local; i++) {
496 if (copy_from_user(&vec, &local_vec[i],
497 sizeof(struct rds_iovec)))
498 return -EFAULT;
499
500 nr_pages = rds_pages_in_vec(&vec);
501 if (nr_pages == 0)
502 return -EINVAL;
503
504 tot_pages += nr_pages;
505 }
506
507 return tot_pages;
508}
509
510int rds_rdma_extra_size(struct rds_rdma_args *args)
511{
512 return rds_rdma_pages(args) * sizeof(struct scatterlist);
513}
514
515/*
516 * The application asks for a RDMA transfer.
517 * Extract all arguments and set up the rdma_op
518 */
519int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
520 struct cmsghdr *cmsg)
521{
522 struct rds_rdma_args *args;
523 struct rds_iovec vec;
524 struct rm_rdma_op *op = &rm->rdma;
525 int nr_pages;
463 unsigned int nr_bytes; 526 unsigned int nr_bytes;
464 struct page **pages = NULL; 527 struct page **pages = NULL;
465 struct rds_iovec __user *local_vec; 528 struct rds_iovec __user *local_vec;
466 struct scatterlist *sg;
467 unsigned int nr; 529 unsigned int nr;
468 unsigned int i, j; 530 unsigned int i, j;
469 int ret; 531 int ret = 0;
470 532
533 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args))
534 || rm->rdma.op_active)
535 return -EINVAL;
536
537 args = CMSG_DATA(cmsg);
471 538
472 if (rs->rs_bound_addr == 0) { 539 if (rs->rs_bound_addr == 0) {
473 ret = -ENOTCONN; /* XXX not a great errno */ 540 ret = -ENOTCONN; /* XXX not a great errno */
@@ -479,61 +546,38 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
479 goto out; 546 goto out;
480 } 547 }
481 548
482 nr_pages = 0; 549 nr_pages = rds_rdma_pages(args);
483 max_pages = 0; 550 if (nr_pages < 0)
484
485 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
486
487 /* figure out the number of pages in the vector */
488 for (i = 0; i < args->nr_local; i++) {
489 if (copy_from_user(&vec, &local_vec[i],
490 sizeof(struct rds_iovec))) {
491 ret = -EFAULT;
492 goto out;
493 }
494
495 nr = rds_pages_in_vec(&vec);
496 if (nr == 0) {
497 ret = -EINVAL;
498 goto out;
499 }
500
501 max_pages = max(nr, max_pages);
502 nr_pages += nr;
503 }
504
505 pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL);
506 if (pages == NULL) {
507 ret = -ENOMEM;
508 goto out; 551 goto out;
509 }
510 552
511 op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); 553 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
512 if (op == NULL) { 554 if (!pages) {
513 ret = -ENOMEM; 555 ret = -ENOMEM;
514 goto out; 556 goto out;
515 } 557 }
516 558
517 op->r_write = !!(args->flags & RDS_RDMA_READWRITE); 559 op->op_write = !!(args->flags & RDS_RDMA_READWRITE);
518 op->r_fence = !!(args->flags & RDS_RDMA_FENCE); 560 op->op_fence = !!(args->flags & RDS_RDMA_FENCE);
519 op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); 561 op->op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
520 op->r_recverr = rs->rs_recverr; 562 op->op_silent = !!(args->flags & RDS_RDMA_SILENT);
563 op->op_active = 1;
564 op->op_recverr = rs->rs_recverr;
521 WARN_ON(!nr_pages); 565 WARN_ON(!nr_pages);
522 sg_init_table(op->r_sg, nr_pages); 566 op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
523 567
524 if (op->r_notify || op->r_recverr) { 568 if (op->op_notify || op->op_recverr) {
525 /* We allocate an uninitialized notifier here, because 569 /* We allocate an uninitialized notifier here, because
526 * we don't want to do that in the completion handler. We 570 * we don't want to do that in the completion handler. We
527 * would have to use GFP_ATOMIC there, and don't want to deal 571 * would have to use GFP_ATOMIC there, and don't want to deal
528 * with failed allocations. 572 * with failed allocations.
529 */ 573 */
530 op->r_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); 574 op->op_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL);
531 if (!op->r_notifier) { 575 if (!op->op_notifier) {
532 ret = -ENOMEM; 576 ret = -ENOMEM;
533 goto out; 577 goto out;
534 } 578 }
535 op->r_notifier->n_user_token = args->user_token; 579 op->op_notifier->n_user_token = args->user_token;
536 op->r_notifier->n_status = RDS_RDMA_SUCCESS; 580 op->op_notifier->n_status = RDS_RDMA_SUCCESS;
537 } 581 }
538 582
539 /* The cookie contains the R_Key of the remote memory region, and 583 /* The cookie contains the R_Key of the remote memory region, and
@@ -543,15 +587,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
543 * destination address (which is really an offset into the MR) 587 * destination address (which is really an offset into the MR)
544 * FIXME: We may want to move this into ib_rdma.c 588 * FIXME: We may want to move this into ib_rdma.c
545 */ 589 */
546 op->r_key = rds_rdma_cookie_key(args->cookie); 590 op->op_rkey = rds_rdma_cookie_key(args->cookie);
547 op->r_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie); 591 op->op_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie);
548 592
549 nr_bytes = 0; 593 nr_bytes = 0;
550 594
551 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n", 595 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n",
552 (unsigned long long)args->nr_local, 596 (unsigned long long)args->nr_local,
553 (unsigned long long)args->remote_vec.addr, 597 (unsigned long long)args->remote_vec.addr,
554 op->r_key); 598 op->op_rkey);
599
600 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
555 601
556 for (i = 0; i < args->nr_local; i++) { 602 for (i = 0; i < args->nr_local; i++) {
557 if (copy_from_user(&vec, &local_vec[i], 603 if (copy_from_user(&vec, &local_vec[i],
@@ -569,15 +615,10 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
569 rs->rs_user_addr = vec.addr; 615 rs->rs_user_addr = vec.addr;
570 rs->rs_user_bytes = vec.bytes; 616 rs->rs_user_bytes = vec.bytes;
571 617
572 /* did the user change the vec under us? */
573 if (nr > max_pages || op->r_nents + nr > nr_pages) {
574 ret = -EINVAL;
575 goto out;
576 }
577 /* If it's a WRITE operation, we want to pin the pages for reading. 618 /* If it's a WRITE operation, we want to pin the pages for reading.
578 * If it's a READ operation, we need to pin the pages for writing. 619 * If it's a READ operation, we need to pin the pages for writing.
579 */ 620 */
580 ret = rds_pin_pages(vec.addr & PAGE_MASK, nr, pages, !op->r_write); 621 ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write);
581 if (ret < 0) 622 if (ret < 0)
582 goto out; 623 goto out;
583 624
@@ -588,8 +629,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
588 629
589 for (j = 0; j < nr; j++) { 630 for (j = 0; j < nr; j++) {
590 unsigned int offset = vec.addr & ~PAGE_MASK; 631 unsigned int offset = vec.addr & ~PAGE_MASK;
632 struct scatterlist *sg;
591 633
592 sg = &op->r_sg[op->r_nents + j]; 634 sg = &op->op_sg[op->op_nents + j];
593 sg_set_page(sg, pages[j], 635 sg_set_page(sg, pages[j],
594 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), 636 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset),
595 offset); 637 offset);
@@ -601,10 +643,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
601 vec.bytes -= sg->length; 643 vec.bytes -= sg->length;
602 } 644 }
603 645
604 op->r_nents += nr; 646 op->op_nents += nr;
605 } 647 }
606 648
607
608 if (nr_bytes > args->remote_vec.bytes) { 649 if (nr_bytes > args->remote_vec.bytes) {
609 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n", 650 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n",
610 nr_bytes, 651 nr_bytes,
@@ -612,38 +653,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
612 ret = -EINVAL; 653 ret = -EINVAL;
613 goto out; 654 goto out;
614 } 655 }
615 op->r_bytes = nr_bytes; 656 op->op_bytes = nr_bytes;
616 657
617 ret = 0; 658 ret = 0;
618out: 659out:
619 kfree(pages); 660 kfree(pages);
620 if (ret) { 661 if (ret)
621 if (op) 662 rds_rdma_free_op(op);
622 rds_rdma_free_op(op);
623 op = ERR_PTR(ret);
624 }
625 return op;
626}
627
628/*
629 * The application asks for a RDMA transfer.
630 * Extract all arguments and set up the rdma_op
631 */
632int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
633 struct cmsghdr *cmsg)
634{
635 struct rds_rdma_op *op;
636
637 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) ||
638 rm->m_rdma_op != NULL)
639 return -EINVAL;
640 663
641 op = rds_rdma_prepare(rs, CMSG_DATA(cmsg));
642 if (IS_ERR(op))
643 return PTR_ERR(op);
644 rds_stats_inc(s_send_rdma); 664 rds_stats_inc(s_send_rdma);
645 rm->m_rdma_op = op; 665
646 return 0; 666 return ret;
647} 667}
648 668
649/* 669/*
@@ -673,7 +693,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
673 693
674 spin_lock_irqsave(&rs->rs_rdma_lock, flags); 694 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
675 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 695 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
676 if (mr == NULL) 696 if (!mr)
677 err = -EINVAL; /* invalid r_key */ 697 err = -EINVAL; /* invalid r_key */
678 else 698 else
679 atomic_inc(&mr->r_refcount); 699 atomic_inc(&mr->r_refcount);
@@ -681,7 +701,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
681 701
682 if (mr) { 702 if (mr) {
683 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); 703 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
684 rm->m_rdma_mr = mr; 704 rm->rdma.op_rdma_mr = mr;
685 } 705 }
686 return err; 706 return err;
687} 707}
@@ -699,5 +719,98 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
699 rm->m_rdma_cookie != 0) 719 rm->m_rdma_cookie != 0)
700 return -EINVAL; 720 return -EINVAL;
701 721
702 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr); 722 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr);
723}
724
725/*
726 * Fill in rds_message for an atomic request.
727 */
728int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
729 struct cmsghdr *cmsg)
730{
731 struct page *page = NULL;
732 struct rds_atomic_args *args;
733 int ret = 0;
734
735 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args))
736 || rm->atomic.op_active)
737 return -EINVAL;
738
739 args = CMSG_DATA(cmsg);
740
741 /* Nonmasked & masked cmsg ops converted to masked hw ops */
742 switch (cmsg->cmsg_type) {
743 case RDS_CMSG_ATOMIC_FADD:
744 rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
745 rm->atomic.op_m_fadd.add = args->fadd.add;
746 rm->atomic.op_m_fadd.nocarry_mask = 0;
747 break;
748 case RDS_CMSG_MASKED_ATOMIC_FADD:
749 rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
750 rm->atomic.op_m_fadd.add = args->m_fadd.add;
751 rm->atomic.op_m_fadd.nocarry_mask = args->m_fadd.nocarry_mask;
752 break;
753 case RDS_CMSG_ATOMIC_CSWP:
754 rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
755 rm->atomic.op_m_cswp.compare = args->cswp.compare;
756 rm->atomic.op_m_cswp.swap = args->cswp.swap;
757 rm->atomic.op_m_cswp.compare_mask = ~0;
758 rm->atomic.op_m_cswp.swap_mask = ~0;
759 break;
760 case RDS_CMSG_MASKED_ATOMIC_CSWP:
761 rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
762 rm->atomic.op_m_cswp.compare = args->m_cswp.compare;
763 rm->atomic.op_m_cswp.swap = args->m_cswp.swap;
764 rm->atomic.op_m_cswp.compare_mask = args->m_cswp.compare_mask;
765 rm->atomic.op_m_cswp.swap_mask = args->m_cswp.swap_mask;
766 break;
767 default:
768 BUG(); /* should never happen */
769 }
770
771 rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
772 rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT);
773 rm->atomic.op_active = 1;
774 rm->atomic.op_recverr = rs->rs_recverr;
775 rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
776
777 /* verify 8 byte-aligned */
778 if (args->local_addr & 0x7) {
779 ret = -EFAULT;
780 goto err;
781 }
782
783 ret = rds_pin_pages(args->local_addr, 1, &page, 1);
784 if (ret != 1)
785 goto err;
786 ret = 0;
787
788 sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr));
789
790 if (rm->atomic.op_notify || rm->atomic.op_recverr) {
791 /* We allocate an uninitialized notifier here, because
792 * we don't want to do that in the completion handler. We
793 * would have to use GFP_ATOMIC there, and don't want to deal
794 * with failed allocations.
795 */
796 rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL);
797 if (!rm->atomic.op_notifier) {
798 ret = -ENOMEM;
799 goto err;
800 }
801
802 rm->atomic.op_notifier->n_user_token = args->user_token;
803 rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS;
804 }
805
806 rm->atomic.op_rkey = rds_rdma_cookie_key(args->cookie);
807 rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie);
808
809 return ret;
810err:
811 if (page)
812 put_page(page);
813 kfree(rm->atomic.op_notifier);
814
815 return ret;
703} 816}
diff --git a/net/rds/rdma.h b/net/rds/rdma.h
deleted file mode 100644
index 909c39835a5..00000000000
--- a/net/rds/rdma.h
+++ /dev/null
@@ -1,85 +0,0 @@
1#ifndef _RDS_RDMA_H
2#define _RDS_RDMA_H
3
4#include <linux/rbtree.h>
5#include <linux/spinlock.h>
6#include <linux/scatterlist.h>
7
8#include "rds.h"
9
10struct rds_mr {
11 struct rb_node r_rb_node;
12 atomic_t r_refcount;
13 u32 r_key;
14
15 /* A copy of the creation flags */
16 unsigned int r_use_once:1;
17 unsigned int r_invalidate:1;
18 unsigned int r_write:1;
19
20 /* This is for RDS_MR_DEAD.
21 * It would be nice & consistent to make this part of the above
22 * bit field here, but we need to use test_and_set_bit.
23 */
24 unsigned long r_state;
25 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
26 struct rds_transport *r_trans;
27 void *r_trans_private;
28};
29
30/* Flags for mr->r_state */
31#define RDS_MR_DEAD 0
32
33struct rds_rdma_op {
34 u32 r_key;
35 u64 r_remote_addr;
36 unsigned int r_write:1;
37 unsigned int r_fence:1;
38 unsigned int r_notify:1;
39 unsigned int r_recverr:1;
40 unsigned int r_mapped:1;
41 struct rds_notifier *r_notifier;
42 unsigned int r_bytes;
43 unsigned int r_nents;
44 unsigned int r_count;
45 struct scatterlist r_sg[0];
46};
47
48static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
49{
50 return r_key | (((u64) offset) << 32);
51}
52
53static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
54{
55 return cookie;
56}
57
58static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
59{
60 return cookie >> 32;
61}
62
63int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
64int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
65int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
66void rds_rdma_drop_keys(struct rds_sock *rs);
67int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
68 struct cmsghdr *cmsg);
69int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
70 struct cmsghdr *cmsg);
71int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
72 struct cmsghdr *cmsg);
73int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
74 struct cmsghdr *cmsg);
75void rds_rdma_free_op(struct rds_rdma_op *ro);
76void rds_rdma_send_complete(struct rds_message *rm, int);
77
78extern void __rds_put_mr_final(struct rds_mr *mr);
79static inline void rds_mr_put(struct rds_mr *mr)
80{
81 if (atomic_dec_and_test(&mr->r_refcount))
82 __rds_put_mr_final(mr);
83}
84
85#endif
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index e599ba2f950..e6ed10aee19 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -36,6 +36,34 @@
36 36
37static struct rdma_cm_id *rds_rdma_listen_id; 37static struct rdma_cm_id *rds_rdma_listen_id;
38 38
39static char *rds_cm_event_strings[] = {
40#define RDS_CM_EVENT_STRING(foo) \
41 [RDMA_CM_EVENT_##foo] = __stringify(RDMA_CM_EVENT_##foo)
42 RDS_CM_EVENT_STRING(ADDR_RESOLVED),
43 RDS_CM_EVENT_STRING(ADDR_ERROR),
44 RDS_CM_EVENT_STRING(ROUTE_RESOLVED),
45 RDS_CM_EVENT_STRING(ROUTE_ERROR),
46 RDS_CM_EVENT_STRING(CONNECT_REQUEST),
47 RDS_CM_EVENT_STRING(CONNECT_RESPONSE),
48 RDS_CM_EVENT_STRING(CONNECT_ERROR),
49 RDS_CM_EVENT_STRING(UNREACHABLE),
50 RDS_CM_EVENT_STRING(REJECTED),
51 RDS_CM_EVENT_STRING(ESTABLISHED),
52 RDS_CM_EVENT_STRING(DISCONNECTED),
53 RDS_CM_EVENT_STRING(DEVICE_REMOVAL),
54 RDS_CM_EVENT_STRING(MULTICAST_JOIN),
55 RDS_CM_EVENT_STRING(MULTICAST_ERROR),
56 RDS_CM_EVENT_STRING(ADDR_CHANGE),
57 RDS_CM_EVENT_STRING(TIMEWAIT_EXIT),
58#undef RDS_CM_EVENT_STRING
59};
60
61static char *rds_cm_event_str(enum rdma_cm_event_type type)
62{
63 return rds_str_array(rds_cm_event_strings,
64 ARRAY_SIZE(rds_cm_event_strings), type);
65};
66
39int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, 67int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
40 struct rdma_cm_event *event) 68 struct rdma_cm_event *event)
41{ 69{
@@ -44,8 +72,8 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
44 struct rds_transport *trans; 72 struct rds_transport *trans;
45 int ret = 0; 73 int ret = 0;
46 74
47 rdsdebug("conn %p id %p handling event %u\n", conn, cm_id, 75 rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
48 event->event); 76 event->event, rds_cm_event_str(event->event));
49 77
50 if (cm_id->device->node_type == RDMA_NODE_RNIC) 78 if (cm_id->device->node_type == RDMA_NODE_RNIC)
51 trans = &rds_iw_transport; 79 trans = &rds_iw_transport;
@@ -109,7 +137,8 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
109 137
110 default: 138 default:
111 /* things like device disconnect? */ 139 /* things like device disconnect? */
112 printk(KERN_ERR "RDS: unknown event %u!\n", event->event); 140 printk(KERN_ERR "RDS: unknown event %u (%s)!\n",
141 event->event, rds_cm_event_str(event->event));
113 break; 142 break;
114 } 143 }
115 144
@@ -117,12 +146,13 @@ out:
117 if (conn) 146 if (conn)
118 mutex_unlock(&conn->c_cm_lock); 147 mutex_unlock(&conn->c_cm_lock);
119 148
120 rdsdebug("id %p event %u handling ret %d\n", cm_id, event->event, ret); 149 rdsdebug("id %p event %u (%s) handling ret %d\n", cm_id, event->event,
150 rds_cm_event_str(event->event), ret);
121 151
122 return ret; 152 return ret;
123} 153}
124 154
125static int __init rds_rdma_listen_init(void) 155static int rds_rdma_listen_init(void)
126{ 156{
127 struct sockaddr_in sin; 157 struct sockaddr_in sin;
128 struct rdma_cm_id *cm_id; 158 struct rdma_cm_id *cm_id;
@@ -177,7 +207,7 @@ static void rds_rdma_listen_stop(void)
177 } 207 }
178} 208}
179 209
180int __init rds_rdma_init(void) 210int rds_rdma_init(void)
181{ 211{
182 int ret; 212 int ret;
183 213
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c224b5bb3ba..8103dcf8b97 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -80,6 +80,7 @@ enum {
80/* Bits for c_flags */ 80/* Bits for c_flags */
81#define RDS_LL_SEND_FULL 0 81#define RDS_LL_SEND_FULL 0
82#define RDS_RECONNECT_PENDING 1 82#define RDS_RECONNECT_PENDING 1
83#define RDS_IN_XMIT 2
83 84
84struct rds_connection { 85struct rds_connection {
85 struct hlist_node c_hash_node; 86 struct hlist_node c_hash_node;
@@ -91,12 +92,13 @@ struct rds_connection {
91 struct rds_cong_map *c_lcong; 92 struct rds_cong_map *c_lcong;
92 struct rds_cong_map *c_fcong; 93 struct rds_cong_map *c_fcong;
93 94
94 struct mutex c_send_lock; /* protect send ring */
95 struct rds_message *c_xmit_rm; 95 struct rds_message *c_xmit_rm;
96 unsigned long c_xmit_sg; 96 unsigned long c_xmit_sg;
97 unsigned int c_xmit_hdr_off; 97 unsigned int c_xmit_hdr_off;
98 unsigned int c_xmit_data_off; 98 unsigned int c_xmit_data_off;
99 unsigned int c_xmit_atomic_sent;
99 unsigned int c_xmit_rdma_sent; 100 unsigned int c_xmit_rdma_sent;
101 unsigned int c_xmit_data_sent;
100 102
101 spinlock_t c_lock; /* protect msg queues */ 103 spinlock_t c_lock; /* protect msg queues */
102 u64 c_next_tx_seq; 104 u64 c_next_tx_seq;
@@ -116,11 +118,10 @@ struct rds_connection {
116 struct delayed_work c_conn_w; 118 struct delayed_work c_conn_w;
117 struct work_struct c_down_w; 119 struct work_struct c_down_w;
118 struct mutex c_cm_lock; /* protect conn state & cm */ 120 struct mutex c_cm_lock; /* protect conn state & cm */
121 wait_queue_head_t c_waitq;
119 122
120 struct list_head c_map_item; 123 struct list_head c_map_item;
121 unsigned long c_map_queued; 124 unsigned long c_map_queued;
122 unsigned long c_map_offset;
123 unsigned long c_map_bytes;
124 125
125 unsigned int c_unacked_packets; 126 unsigned int c_unacked_packets;
126 unsigned int c_unacked_bytes; 127 unsigned int c_unacked_bytes;
@@ -206,6 +207,48 @@ struct rds_incoming {
206 rds_rdma_cookie_t i_rdma_cookie; 207 rds_rdma_cookie_t i_rdma_cookie;
207}; 208};
208 209
210struct rds_mr {
211 struct rb_node r_rb_node;
212 atomic_t r_refcount;
213 u32 r_key;
214
215 /* A copy of the creation flags */
216 unsigned int r_use_once:1;
217 unsigned int r_invalidate:1;
218 unsigned int r_write:1;
219
220 /* This is for RDS_MR_DEAD.
221 * It would be nice & consistent to make this part of the above
222 * bit field here, but we need to use test_and_set_bit.
223 */
224 unsigned long r_state;
225 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
226 struct rds_transport *r_trans;
227 void *r_trans_private;
228};
229
230/* Flags for mr->r_state */
231#define RDS_MR_DEAD 0
232
233static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
234{
235 return r_key | (((u64) offset) << 32);
236}
237
238static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
239{
240 return cookie;
241}
242
243static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
244{
245 return cookie >> 32;
246}
247
248/* atomic operation types */
249#define RDS_ATOMIC_TYPE_CSWP 0
250#define RDS_ATOMIC_TYPE_FADD 1
251
209/* 252/*
210 * m_sock_item and m_conn_item are on lists that are serialized under 253 * m_sock_item and m_conn_item are on lists that are serialized under
211 * conn->c_lock. m_sock_item has additional meaning in that once it is empty 254 * conn->c_lock. m_sock_item has additional meaning in that once it is empty
@@ -258,13 +301,71 @@ struct rds_message {
258 * -> rs->rs_lock 301 * -> rs->rs_lock
259 */ 302 */
260 spinlock_t m_rs_lock; 303 spinlock_t m_rs_lock;
304 wait_queue_head_t m_flush_wait;
305
261 struct rds_sock *m_rs; 306 struct rds_sock *m_rs;
262 struct rds_rdma_op *m_rdma_op; 307
308 /* cookie to send to remote, in rds header */
263 rds_rdma_cookie_t m_rdma_cookie; 309 rds_rdma_cookie_t m_rdma_cookie;
264 struct rds_mr *m_rdma_mr; 310
265 unsigned int m_nents; 311 unsigned int m_used_sgs;
266 unsigned int m_count; 312 unsigned int m_total_sgs;
267 struct scatterlist m_sg[0]; 313
314 void *m_final_op;
315
316 struct {
317 struct rm_atomic_op {
318 int op_type;
319 union {
320 struct {
321 uint64_t compare;
322 uint64_t swap;
323 uint64_t compare_mask;
324 uint64_t swap_mask;
325 } op_m_cswp;
326 struct {
327 uint64_t add;
328 uint64_t nocarry_mask;
329 } op_m_fadd;
330 };
331
332 u32 op_rkey;
333 u64 op_remote_addr;
334 unsigned int op_notify:1;
335 unsigned int op_recverr:1;
336 unsigned int op_mapped:1;
337 unsigned int op_silent:1;
338 unsigned int op_active:1;
339 struct scatterlist *op_sg;
340 struct rds_notifier *op_notifier;
341
342 struct rds_mr *op_rdma_mr;
343 } atomic;
344 struct rm_rdma_op {
345 u32 op_rkey;
346 u64 op_remote_addr;
347 unsigned int op_write:1;
348 unsigned int op_fence:1;
349 unsigned int op_notify:1;
350 unsigned int op_recverr:1;
351 unsigned int op_mapped:1;
352 unsigned int op_silent:1;
353 unsigned int op_active:1;
354 unsigned int op_bytes;
355 unsigned int op_nents;
356 unsigned int op_count;
357 struct scatterlist *op_sg;
358 struct rds_notifier *op_notifier;
359
360 struct rds_mr *op_rdma_mr;
361 } rdma;
362 struct rm_data_op {
363 unsigned int op_active:1;
364 unsigned int op_nents;
365 unsigned int op_count;
366 struct scatterlist *op_sg;
367 } data;
368 };
268}; 369};
269 370
270/* 371/*
@@ -305,10 +406,6 @@ struct rds_notifier {
305 * transport is responsible for other serialization, including 406 * transport is responsible for other serialization, including
306 * rds_recv_incoming(). This is called in process context but 407 * rds_recv_incoming(). This is called in process context but
307 * should try hard not to block. 408 * should try hard not to block.
308 *
309 * @xmit_cong_map: This asks the transport to send the local bitmap down the
310 * given connection. XXX get a better story about the bitmap
311 * flag and header.
312 */ 409 */
313 410
314#define RDS_TRANS_IB 0 411#define RDS_TRANS_IB 0
@@ -332,13 +429,11 @@ struct rds_transport {
332 void (*xmit_complete)(struct rds_connection *conn); 429 void (*xmit_complete)(struct rds_connection *conn);
333 int (*xmit)(struct rds_connection *conn, struct rds_message *rm, 430 int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
334 unsigned int hdr_off, unsigned int sg, unsigned int off); 431 unsigned int hdr_off, unsigned int sg, unsigned int off);
335 int (*xmit_cong_map)(struct rds_connection *conn, 432 int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
336 struct rds_cong_map *map, unsigned long offset); 433 int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
337 int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op);
338 int (*recv)(struct rds_connection *conn); 434 int (*recv)(struct rds_connection *conn);
339 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, 435 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov,
340 size_t size); 436 size_t size);
341 void (*inc_purge)(struct rds_incoming *inc);
342 void (*inc_free)(struct rds_incoming *inc); 437 void (*inc_free)(struct rds_incoming *inc);
343 438
344 int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 439 int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
@@ -367,17 +462,11 @@ struct rds_sock {
367 * bound_addr used for both incoming and outgoing, no INADDR_ANY 462 * bound_addr used for both incoming and outgoing, no INADDR_ANY
368 * support. 463 * support.
369 */ 464 */
370 struct rb_node rs_bound_node; 465 struct hlist_node rs_bound_node;
371 __be32 rs_bound_addr; 466 __be32 rs_bound_addr;
372 __be32 rs_conn_addr; 467 __be32 rs_conn_addr;
373 __be16 rs_bound_port; 468 __be16 rs_bound_port;
374 __be16 rs_conn_port; 469 __be16 rs_conn_port;
375
376 /*
377 * This is only used to communicate the transport between bind and
378 * initiating connections. All other trans use is referenced through
379 * the connection.
380 */
381 struct rds_transport *rs_transport; 470 struct rds_transport *rs_transport;
382 471
383 /* 472 /*
@@ -466,8 +555,8 @@ struct rds_statistics {
466 uint64_t s_recv_ping; 555 uint64_t s_recv_ping;
467 uint64_t s_send_queue_empty; 556 uint64_t s_send_queue_empty;
468 uint64_t s_send_queue_full; 557 uint64_t s_send_queue_full;
469 uint64_t s_send_sem_contention; 558 uint64_t s_send_lock_contention;
470 uint64_t s_send_sem_queue_raced; 559 uint64_t s_send_lock_queue_raced;
471 uint64_t s_send_immediate_retry; 560 uint64_t s_send_immediate_retry;
472 uint64_t s_send_delayed_retry; 561 uint64_t s_send_delayed_retry;
473 uint64_t s_send_drop_acked; 562 uint64_t s_send_drop_acked;
@@ -487,6 +576,7 @@ struct rds_statistics {
487}; 576};
488 577
489/* af_rds.c */ 578/* af_rds.c */
579char *rds_str_array(char **array, size_t elements, size_t index);
490void rds_sock_addref(struct rds_sock *rs); 580void rds_sock_addref(struct rds_sock *rs);
491void rds_sock_put(struct rds_sock *rs); 581void rds_sock_put(struct rds_sock *rs);
492void rds_wake_sk_sleep(struct rds_sock *rs); 582void rds_wake_sk_sleep(struct rds_sock *rs);
@@ -521,15 +611,17 @@ void rds_cong_exit(void);
521struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); 611struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
522 612
523/* conn.c */ 613/* conn.c */
524int __init rds_conn_init(void); 614int rds_conn_init(void);
525void rds_conn_exit(void); 615void rds_conn_exit(void);
526struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr, 616struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
527 struct rds_transport *trans, gfp_t gfp); 617 struct rds_transport *trans, gfp_t gfp);
528struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, 618struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
529 struct rds_transport *trans, gfp_t gfp); 619 struct rds_transport *trans, gfp_t gfp);
620void rds_conn_shutdown(struct rds_connection *conn);
530void rds_conn_destroy(struct rds_connection *conn); 621void rds_conn_destroy(struct rds_connection *conn);
531void rds_conn_reset(struct rds_connection *conn); 622void rds_conn_reset(struct rds_connection *conn);
532void rds_conn_drop(struct rds_connection *conn); 623void rds_conn_drop(struct rds_connection *conn);
624void rds_conn_connect_if_down(struct rds_connection *conn);
533void rds_for_each_conn_info(struct socket *sock, unsigned int len, 625void rds_for_each_conn_info(struct socket *sock, unsigned int len,
534 struct rds_info_iterator *iter, 626 struct rds_info_iterator *iter,
535 struct rds_info_lengths *lens, 627 struct rds_info_lengths *lens,
@@ -566,7 +658,8 @@ rds_conn_connecting(struct rds_connection *conn)
566 658
567/* message.c */ 659/* message.c */
568struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); 660struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
569struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, 661struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
662int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
570 size_t total_len); 663 size_t total_len);
571struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); 664struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
572void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 665void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
@@ -580,7 +673,6 @@ int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *vers
580int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset); 673int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
581int rds_message_inc_copy_to_user(struct rds_incoming *inc, 674int rds_message_inc_copy_to_user(struct rds_incoming *inc,
582 struct iovec *first_iov, size_t size); 675 struct iovec *first_iov, size_t size);
583void rds_message_inc_purge(struct rds_incoming *inc);
584void rds_message_inc_free(struct rds_incoming *inc); 676void rds_message_inc_free(struct rds_incoming *inc);
585void rds_message_addref(struct rds_message *rm); 677void rds_message_addref(struct rds_message *rm);
586void rds_message_put(struct rds_message *rm); 678void rds_message_put(struct rds_message *rm);
@@ -636,14 +728,39 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
636typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack); 728typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
637void rds_send_drop_acked(struct rds_connection *conn, u64 ack, 729void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
638 is_acked_func is_acked); 730 is_acked_func is_acked);
639int rds_send_acked_before(struct rds_connection *conn, u64 seq);
640void rds_send_remove_from_sock(struct list_head *messages, int status); 731void rds_send_remove_from_sock(struct list_head *messages, int status);
641int rds_send_pong(struct rds_connection *conn, __be16 dport); 732int rds_send_pong(struct rds_connection *conn, __be16 dport);
642struct rds_message *rds_send_get_message(struct rds_connection *, 733struct rds_message *rds_send_get_message(struct rds_connection *,
643 struct rds_rdma_op *); 734 struct rm_rdma_op *);
644 735
645/* rdma.c */ 736/* rdma.c */
646void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); 737void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
738int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
739int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
740int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
741void rds_rdma_drop_keys(struct rds_sock *rs);
742int rds_rdma_extra_size(struct rds_rdma_args *args);
743int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
744 struct cmsghdr *cmsg);
745int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
746 struct cmsghdr *cmsg);
747int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
748 struct cmsghdr *cmsg);
749int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
750 struct cmsghdr *cmsg);
751void rds_rdma_free_op(struct rm_rdma_op *ro);
752void rds_atomic_free_op(struct rm_atomic_op *ao);
753void rds_rdma_send_complete(struct rds_message *rm, int wc_status);
754void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
755int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
756 struct cmsghdr *cmsg);
757
758extern void __rds_put_mr_final(struct rds_mr *mr);
759static inline void rds_mr_put(struct rds_mr *mr)
760{
761 if (atomic_dec_and_test(&mr->r_refcount))
762 __rds_put_mr_final(mr);
763}
647 764
648/* stats.c */ 765/* stats.c */
649DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); 766DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
@@ -657,14 +774,14 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
657 put_cpu(); \ 774 put_cpu(); \
658} while (0) 775} while (0)
659#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count) 776#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count)
660int __init rds_stats_init(void); 777int rds_stats_init(void);
661void rds_stats_exit(void); 778void rds_stats_exit(void);
662void rds_stats_info_copy(struct rds_info_iterator *iter, 779void rds_stats_info_copy(struct rds_info_iterator *iter,
663 uint64_t *values, const char *const *names, 780 uint64_t *values, const char *const *names,
664 size_t nr); 781 size_t nr);
665 782
666/* sysctl.c */ 783/* sysctl.c */
667int __init rds_sysctl_init(void); 784int rds_sysctl_init(void);
668void rds_sysctl_exit(void); 785void rds_sysctl_exit(void);
669extern unsigned long rds_sysctl_sndbuf_min; 786extern unsigned long rds_sysctl_sndbuf_min;
670extern unsigned long rds_sysctl_sndbuf_default; 787extern unsigned long rds_sysctl_sndbuf_default;
@@ -678,9 +795,10 @@ extern unsigned long rds_sysctl_trace_flags;
678extern unsigned int rds_sysctl_trace_level; 795extern unsigned int rds_sysctl_trace_level;
679 796
680/* threads.c */ 797/* threads.c */
681int __init rds_threads_init(void); 798int rds_threads_init(void);
682void rds_threads_exit(void); 799void rds_threads_exit(void);
683extern struct workqueue_struct *rds_wq; 800extern struct workqueue_struct *rds_wq;
801void rds_queue_reconnect(struct rds_connection *conn);
684void rds_connect_worker(struct work_struct *); 802void rds_connect_worker(struct work_struct *);
685void rds_shutdown_worker(struct work_struct *); 803void rds_shutdown_worker(struct work_struct *);
686void rds_send_worker(struct work_struct *); 804void rds_send_worker(struct work_struct *);
@@ -691,9 +809,10 @@ void rds_connect_complete(struct rds_connection *conn);
691int rds_trans_register(struct rds_transport *trans); 809int rds_trans_register(struct rds_transport *trans);
692void rds_trans_unregister(struct rds_transport *trans); 810void rds_trans_unregister(struct rds_transport *trans);
693struct rds_transport *rds_trans_get_preferred(__be32 addr); 811struct rds_transport *rds_trans_get_preferred(__be32 addr);
812void rds_trans_put(struct rds_transport *trans);
694unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, 813unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
695 unsigned int avail); 814 unsigned int avail);
696int __init rds_trans_init(void); 815int rds_trans_init(void);
697void rds_trans_exit(void); 816void rds_trans_exit(void);
698 817
699#endif 818#endif
diff --git a/net/rds/recv.c b/net/rds/recv.c
index c93588c2d55..68800f02aa3 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -36,7 +36,6 @@
36#include <linux/in.h> 36#include <linux/in.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40 39
41void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, 40void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
42 __be32 saddr) 41 __be32 saddr)
@@ -210,7 +209,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
210 } 209 }
211 210
212 rs = rds_find_bound(daddr, inc->i_hdr.h_dport); 211 rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
213 if (rs == NULL) { 212 if (!rs) {
214 rds_stats_inc(s_recv_drop_no_sock); 213 rds_stats_inc(s_recv_drop_no_sock);
215 goto out; 214 goto out;
216 } 215 }
@@ -251,7 +250,7 @@ static int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc)
251{ 250{
252 unsigned long flags; 251 unsigned long flags;
253 252
254 if (*inc == NULL) { 253 if (!*inc) {
255 read_lock_irqsave(&rs->rs_recv_lock, flags); 254 read_lock_irqsave(&rs->rs_recv_lock, flags);
256 if (!list_empty(&rs->rs_recv_queue)) { 255 if (!list_empty(&rs->rs_recv_queue)) {
257 *inc = list_entry(rs->rs_recv_queue.next, 256 *inc = list_entry(rs->rs_recv_queue.next,
@@ -334,10 +333,10 @@ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
334 333
335 if (msghdr) { 334 if (msghdr) {
336 cmsg.user_token = notifier->n_user_token; 335 cmsg.user_token = notifier->n_user_token;
337 cmsg.status = notifier->n_status; 336 cmsg.status = notifier->n_status;
338 337
339 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS, 338 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS,
340 sizeof(cmsg), &cmsg); 339 sizeof(cmsg), &cmsg);
341 if (err) 340 if (err)
342 break; 341 break;
343 } 342 }
diff --git a/net/rds/send.c b/net/rds/send.c
index 9c1c6bcaa6c..9b951a0ab6b 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -37,7 +37,6 @@
37#include <linux/list.h> 37#include <linux/list.h>
38 38
39#include "rds.h" 39#include "rds.h"
40#include "rdma.h"
41 40
42/* When transmitting messages in rds_send_xmit, we need to emerge from 41/* When transmitting messages in rds_send_xmit, we need to emerge from
43 * time to time and briefly release the CPU. Otherwise the softlock watchdog 42 * time to time and briefly release the CPU. Otherwise the softlock watchdog
@@ -54,7 +53,8 @@ module_param(send_batch_count, int, 0444);
54MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue"); 53MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue");
55 54
56/* 55/*
57 * Reset the send state. Caller must hold c_send_lock when calling here. 56 * Reset the send state. Callers must ensure that this doesn't race with
57 * rds_send_xmit().
58 */ 58 */
59void rds_send_reset(struct rds_connection *conn) 59void rds_send_reset(struct rds_connection *conn)
60{ 60{
@@ -62,18 +62,22 @@ void rds_send_reset(struct rds_connection *conn)
62 unsigned long flags; 62 unsigned long flags;
63 63
64 if (conn->c_xmit_rm) { 64 if (conn->c_xmit_rm) {
65 rm = conn->c_xmit_rm;
66 conn->c_xmit_rm = NULL;
65 /* Tell the user the RDMA op is no longer mapped by the 67 /* Tell the user the RDMA op is no longer mapped by the
66 * transport. This isn't entirely true (it's flushed out 68 * transport. This isn't entirely true (it's flushed out
67 * independently) but as the connection is down, there's 69 * independently) but as the connection is down, there's
68 * no ongoing RDMA to/from that memory */ 70 * no ongoing RDMA to/from that memory */
69 rds_message_unmapped(conn->c_xmit_rm); 71 rds_message_unmapped(rm);
70 rds_message_put(conn->c_xmit_rm); 72 rds_message_put(rm);
71 conn->c_xmit_rm = NULL;
72 } 73 }
74
73 conn->c_xmit_sg = 0; 75 conn->c_xmit_sg = 0;
74 conn->c_xmit_hdr_off = 0; 76 conn->c_xmit_hdr_off = 0;
75 conn->c_xmit_data_off = 0; 77 conn->c_xmit_data_off = 0;
78 conn->c_xmit_atomic_sent = 0;
76 conn->c_xmit_rdma_sent = 0; 79 conn->c_xmit_rdma_sent = 0;
80 conn->c_xmit_data_sent = 0;
77 81
78 conn->c_map_queued = 0; 82 conn->c_map_queued = 0;
79 83
@@ -90,6 +94,25 @@ void rds_send_reset(struct rds_connection *conn)
90 spin_unlock_irqrestore(&conn->c_lock, flags); 94 spin_unlock_irqrestore(&conn->c_lock, flags);
91} 95}
92 96
97static int acquire_in_xmit(struct rds_connection *conn)
98{
99 return test_and_set_bit(RDS_IN_XMIT, &conn->c_flags) == 0;
100}
101
102static void release_in_xmit(struct rds_connection *conn)
103{
104 clear_bit(RDS_IN_XMIT, &conn->c_flags);
105 smp_mb__after_clear_bit();
106 /*
107 * We don't use wait_on_bit()/wake_up_bit() because our waking is in a
108 * hot path and finding waiters is very rare. We don't want to walk
109 * the system-wide hashed waitqueue buckets in the fast path only to
110 * almost never find waiters.
111 */
112 if (waitqueue_active(&conn->c_waitq))
113 wake_up_all(&conn->c_waitq);
114}
115
93/* 116/*
94 * We're making the concious trade-off here to only send one message 117 * We're making the concious trade-off here to only send one message
95 * down the connection at a time. 118 * down the connection at a time.
@@ -109,102 +132,69 @@ int rds_send_xmit(struct rds_connection *conn)
109 struct rds_message *rm; 132 struct rds_message *rm;
110 unsigned long flags; 133 unsigned long flags;
111 unsigned int tmp; 134 unsigned int tmp;
112 unsigned int send_quota = send_batch_count;
113 struct scatterlist *sg; 135 struct scatterlist *sg;
114 int ret = 0; 136 int ret = 0;
115 int was_empty = 0;
116 LIST_HEAD(to_be_dropped); 137 LIST_HEAD(to_be_dropped);
117 138
139restart:
140
118 /* 141 /*
119 * sendmsg calls here after having queued its message on the send 142 * sendmsg calls here after having queued its message on the send
120 * queue. We only have one task feeding the connection at a time. If 143 * queue. We only have one task feeding the connection at a time. If
121 * another thread is already feeding the queue then we back off. This 144 * another thread is already feeding the queue then we back off. This
122 * avoids blocking the caller and trading per-connection data between 145 * avoids blocking the caller and trading per-connection data between
123 * caches per message. 146 * caches per message.
124 *
125 * The sem holder will issue a retry if they notice that someone queued
126 * a message after they stopped walking the send queue but before they
127 * dropped the sem.
128 */ 147 */
129 if (!mutex_trylock(&conn->c_send_lock)) { 148 if (!acquire_in_xmit(conn)) {
130 rds_stats_inc(s_send_sem_contention); 149 rds_stats_inc(s_send_lock_contention);
131 ret = -ENOMEM; 150 ret = -ENOMEM;
132 goto out; 151 goto out;
133 } 152 }
134 153
154 /*
155 * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
156 * we do the opposite to avoid races.
157 */
158 if (!rds_conn_up(conn)) {
159 release_in_xmit(conn);
160 ret = 0;
161 goto out;
162 }
163
135 if (conn->c_trans->xmit_prepare) 164 if (conn->c_trans->xmit_prepare)
136 conn->c_trans->xmit_prepare(conn); 165 conn->c_trans->xmit_prepare(conn);
137 166
138 /* 167 /*
139 * spin trying to push headers and data down the connection until 168 * spin trying to push headers and data down the connection until
140 * the connection doens't make forward progress. 169 * the connection doesn't make forward progress.
141 */ 170 */
142 while (--send_quota) { 171 while (1) {
143 /*
144 * See if need to send a congestion map update if we're
145 * between sending messages. The send_sem protects our sole
146 * use of c_map_offset and _bytes.
147 * Note this is used only by transports that define a special
148 * xmit_cong_map function. For all others, we create allocate
149 * a cong_map message and treat it just like any other send.
150 */
151 if (conn->c_map_bytes) {
152 ret = conn->c_trans->xmit_cong_map(conn, conn->c_lcong,
153 conn->c_map_offset);
154 if (ret <= 0)
155 break;
156 172
157 conn->c_map_offset += ret;
158 conn->c_map_bytes -= ret;
159 if (conn->c_map_bytes)
160 continue;
161 }
162
163 /* If we're done sending the current message, clear the
164 * offset and S/G temporaries.
165 */
166 rm = conn->c_xmit_rm; 173 rm = conn->c_xmit_rm;
167 if (rm != NULL &&
168 conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
169 conn->c_xmit_sg == rm->m_nents) {
170 conn->c_xmit_rm = NULL;
171 conn->c_xmit_sg = 0;
172 conn->c_xmit_hdr_off = 0;
173 conn->c_xmit_data_off = 0;
174 conn->c_xmit_rdma_sent = 0;
175 174
176 /* Release the reference to the previous message. */ 175 /*
177 rds_message_put(rm); 176 * If between sending messages, we can send a pending congestion
178 rm = NULL; 177 * map update.
179 }
180
181 /* If we're asked to send a cong map update, do so.
182 */ 178 */
183 if (rm == NULL && test_and_clear_bit(0, &conn->c_map_queued)) { 179 if (!rm && test_and_clear_bit(0, &conn->c_map_queued)) {
184 if (conn->c_trans->xmit_cong_map != NULL) {
185 conn->c_map_offset = 0;
186 conn->c_map_bytes = sizeof(struct rds_header) +
187 RDS_CONG_MAP_BYTES;
188 continue;
189 }
190
191 rm = rds_cong_update_alloc(conn); 180 rm = rds_cong_update_alloc(conn);
192 if (IS_ERR(rm)) { 181 if (IS_ERR(rm)) {
193 ret = PTR_ERR(rm); 182 ret = PTR_ERR(rm);
194 break; 183 break;
195 } 184 }
185 rm->data.op_active = 1;
196 186
197 conn->c_xmit_rm = rm; 187 conn->c_xmit_rm = rm;
198 } 188 }
199 189
200 /* 190 /*
201 * Grab the next message from the send queue, if there is one. 191 * If not already working on one, grab the next message.
202 * 192 *
203 * c_xmit_rm holds a ref while we're sending this message down 193 * c_xmit_rm holds a ref while we're sending this message down
204 * the connction. We can use this ref while holding the 194 * the connction. We can use this ref while holding the
205 * send_sem.. rds_send_reset() is serialized with it. 195 * send_sem.. rds_send_reset() is serialized with it.
206 */ 196 */
207 if (rm == NULL) { 197 if (!rm) {
208 unsigned int len; 198 unsigned int len;
209 199
210 spin_lock_irqsave(&conn->c_lock, flags); 200 spin_lock_irqsave(&conn->c_lock, flags);
@@ -224,10 +214,8 @@ int rds_send_xmit(struct rds_connection *conn)
224 214
225 spin_unlock_irqrestore(&conn->c_lock, flags); 215 spin_unlock_irqrestore(&conn->c_lock, flags);
226 216
227 if (rm == NULL) { 217 if (!rm)
228 was_empty = 1;
229 break; 218 break;
230 }
231 219
232 /* Unfortunately, the way Infiniband deals with 220 /* Unfortunately, the way Infiniband deals with
233 * RDMA to a bad MR key is by moving the entire 221 * RDMA to a bad MR key is by moving the entire
@@ -236,13 +224,12 @@ int rds_send_xmit(struct rds_connection *conn)
236 * connection. 224 * connection.
237 * Therefore, we never retransmit messages with RDMA ops. 225 * Therefore, we never retransmit messages with RDMA ops.
238 */ 226 */
239 if (rm->m_rdma_op && 227 if (rm->rdma.op_active &&
240 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { 228 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
241 spin_lock_irqsave(&conn->c_lock, flags); 229 spin_lock_irqsave(&conn->c_lock, flags);
242 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 230 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
243 list_move(&rm->m_conn_item, &to_be_dropped); 231 list_move(&rm->m_conn_item, &to_be_dropped);
244 spin_unlock_irqrestore(&conn->c_lock, flags); 232 spin_unlock_irqrestore(&conn->c_lock, flags);
245 rds_message_put(rm);
246 continue; 233 continue;
247 } 234 }
248 235
@@ -263,23 +250,55 @@ int rds_send_xmit(struct rds_connection *conn)
263 conn->c_xmit_rm = rm; 250 conn->c_xmit_rm = rm;
264 } 251 }
265 252
266 /* 253 /* The transport either sends the whole rdma or none of it */
267 * Try and send an rdma message. Let's see if we can 254 if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) {
268 * keep this simple and require that the transport either 255 rm->m_final_op = &rm->rdma;
269 * send the whole rdma or none of it. 256 ret = conn->c_trans->xmit_rdma(conn, &rm->rdma);
270 */
271 if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) {
272 ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op);
273 if (ret) 257 if (ret)
274 break; 258 break;
275 conn->c_xmit_rdma_sent = 1; 259 conn->c_xmit_rdma_sent = 1;
260
276 /* The transport owns the mapped memory for now. 261 /* The transport owns the mapped memory for now.
277 * You can't unmap it while it's on the send queue */ 262 * You can't unmap it while it's on the send queue */
278 set_bit(RDS_MSG_MAPPED, &rm->m_flags); 263 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
279 } 264 }
280 265
281 if (conn->c_xmit_hdr_off < sizeof(struct rds_header) || 266 if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
282 conn->c_xmit_sg < rm->m_nents) { 267 rm->m_final_op = &rm->atomic;
268 ret = conn->c_trans->xmit_atomic(conn, &rm->atomic);
269 if (ret)
270 break;
271 conn->c_xmit_atomic_sent = 1;
272
273 /* The transport owns the mapped memory for now.
274 * You can't unmap it while it's on the send queue */
275 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
276 }
277
278 /*
279 * A number of cases require an RDS header to be sent
280 * even if there is no data.
281 * We permit 0-byte sends; rds-ping depends on this.
282 * However, if there are exclusively attached silent ops,
283 * we skip the hdr/data send, to enable silent operation.
284 */
285 if (rm->data.op_nents == 0) {
286 int ops_present;
287 int all_ops_are_silent = 1;
288
289 ops_present = (rm->atomic.op_active || rm->rdma.op_active);
290 if (rm->atomic.op_active && !rm->atomic.op_silent)
291 all_ops_are_silent = 0;
292 if (rm->rdma.op_active && !rm->rdma.op_silent)
293 all_ops_are_silent = 0;
294
295 if (ops_present && all_ops_are_silent
296 && !rm->m_rdma_cookie)
297 rm->data.op_active = 0;
298 }
299
300 if (rm->data.op_active && !conn->c_xmit_data_sent) {
301 rm->m_final_op = &rm->data;
283 ret = conn->c_trans->xmit(conn, rm, 302 ret = conn->c_trans->xmit(conn, rm,
284 conn->c_xmit_hdr_off, 303 conn->c_xmit_hdr_off,
285 conn->c_xmit_sg, 304 conn->c_xmit_sg,
@@ -295,7 +314,7 @@ int rds_send_xmit(struct rds_connection *conn)
295 ret -= tmp; 314 ret -= tmp;
296 } 315 }
297 316
298 sg = &rm->m_sg[conn->c_xmit_sg]; 317 sg = &rm->data.op_sg[conn->c_xmit_sg];
299 while (ret) { 318 while (ret) {
300 tmp = min_t(int, ret, sg->length - 319 tmp = min_t(int, ret, sg->length -
301 conn->c_xmit_data_off); 320 conn->c_xmit_data_off);
@@ -306,49 +325,63 @@ int rds_send_xmit(struct rds_connection *conn)
306 sg++; 325 sg++;
307 conn->c_xmit_sg++; 326 conn->c_xmit_sg++;
308 BUG_ON(ret != 0 && 327 BUG_ON(ret != 0 &&
309 conn->c_xmit_sg == rm->m_nents); 328 conn->c_xmit_sg == rm->data.op_nents);
310 } 329 }
311 } 330 }
331
332 if (conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
333 (conn->c_xmit_sg == rm->data.op_nents))
334 conn->c_xmit_data_sent = 1;
312 } 335 }
313 }
314 336
315 /* Nuke any messages we decided not to retransmit. */ 337 /*
316 if (!list_empty(&to_be_dropped)) 338 * A rm will only take multiple times through this loop
317 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED); 339 * if there is a data op. Thus, if the data is sent (or there was
340 * none), then we're done with the rm.
341 */
342 if (!rm->data.op_active || conn->c_xmit_data_sent) {
343 conn->c_xmit_rm = NULL;
344 conn->c_xmit_sg = 0;
345 conn->c_xmit_hdr_off = 0;
346 conn->c_xmit_data_off = 0;
347 conn->c_xmit_rdma_sent = 0;
348 conn->c_xmit_atomic_sent = 0;
349 conn->c_xmit_data_sent = 0;
350
351 rds_message_put(rm);
352 }
353 }
318 354
319 if (conn->c_trans->xmit_complete) 355 if (conn->c_trans->xmit_complete)
320 conn->c_trans->xmit_complete(conn); 356 conn->c_trans->xmit_complete(conn);
321 357
322 /* 358 release_in_xmit(conn);
323 * We might be racing with another sender who queued a message but
324 * backed off on noticing that we held the c_send_lock. If we check
325 * for queued messages after dropping the sem then either we'll
326 * see the queued message or the queuer will get the sem. If we
327 * notice the queued message then we trigger an immediate retry.
328 *
329 * We need to be careful only to do this when we stopped processing
330 * the send queue because it was empty. It's the only way we
331 * stop processing the loop when the transport hasn't taken
332 * responsibility for forward progress.
333 */
334 mutex_unlock(&conn->c_send_lock);
335 359
336 if (conn->c_map_bytes || (send_quota == 0 && !was_empty)) { 360 /* Nuke any messages we decided not to retransmit. */
337 /* We exhausted the send quota, but there's work left to 361 if (!list_empty(&to_be_dropped)) {
338 * do. Return and (re-)schedule the send worker. 362 /* irqs on here, so we can put(), unlike above */
339 */ 363 list_for_each_entry(rm, &to_be_dropped, m_conn_item)
340 ret = -EAGAIN; 364 rds_message_put(rm);
365 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED);
341 } 366 }
342 367
343 if (ret == 0 && was_empty) { 368 /*
344 /* A simple bit test would be way faster than taking the 369 * Other senders can queue a message after we last test the send queue
345 * spin lock */ 370 * but before we clear RDS_IN_XMIT. In that case they'd back off and
346 spin_lock_irqsave(&conn->c_lock, flags); 371 * not try and send their newly queued message. We need to check the
372 * send queue after having cleared RDS_IN_XMIT so that their message
373 * doesn't get stuck on the send queue.
374 *
375 * If the transport cannot continue (i.e ret != 0), then it must
376 * call us when more room is available, such as from the tx
377 * completion handler.
378 */
379 if (ret == 0) {
380 smp_mb();
347 if (!list_empty(&conn->c_send_queue)) { 381 if (!list_empty(&conn->c_send_queue)) {
348 rds_stats_inc(s_send_sem_queue_raced); 382 rds_stats_inc(s_send_lock_queue_raced);
349 ret = -EAGAIN; 383 goto restart;
350 } 384 }
351 spin_unlock_irqrestore(&conn->c_lock, flags);
352 } 385 }
353out: 386out:
354 return ret; 387 return ret;
@@ -376,52 +409,60 @@ static inline int rds_send_is_acked(struct rds_message *rm, u64 ack,
376} 409}
377 410
378/* 411/*
379 * Returns true if there are no messages on the send and retransmit queues 412 * This is pretty similar to what happens below in the ACK
380 * which have a sequence number greater than or equal to the given sequence 413 * handling code - except that we call here as soon as we get
381 * number. 414 * the IB send completion on the RDMA op and the accompanying
415 * message.
382 */ 416 */
383int rds_send_acked_before(struct rds_connection *conn, u64 seq) 417void rds_rdma_send_complete(struct rds_message *rm, int status)
384{ 418{
385 struct rds_message *rm, *tmp; 419 struct rds_sock *rs = NULL;
386 int ret = 1; 420 struct rm_rdma_op *ro;
421 struct rds_notifier *notifier;
422 unsigned long flags;
387 423
388 spin_lock(&conn->c_lock); 424 spin_lock_irqsave(&rm->m_rs_lock, flags);
389 425
390 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { 426 ro = &rm->rdma;
391 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) 427 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
392 ret = 0; 428 ro->op_active && ro->op_notify && ro->op_notifier) {
393 break; 429 notifier = ro->op_notifier;
394 } 430 rs = rm->m_rs;
431 sock_hold(rds_rs_to_sk(rs));
395 432
396 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { 433 notifier->n_status = status;
397 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) 434 spin_lock(&rs->rs_lock);
398 ret = 0; 435 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
399 break; 436 spin_unlock(&rs->rs_lock);
437
438 ro->op_notifier = NULL;
400 } 439 }
401 440
402 spin_unlock(&conn->c_lock); 441 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
403 442
404 return ret; 443 if (rs) {
444 rds_wake_sk_sleep(rs);
445 sock_put(rds_rs_to_sk(rs));
446 }
405} 447}
448EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
406 449
407/* 450/*
408 * This is pretty similar to what happens below in the ACK 451 * Just like above, except looks at atomic op
409 * handling code - except that we call here as soon as we get
410 * the IB send completion on the RDMA op and the accompanying
411 * message.
412 */ 452 */
413void rds_rdma_send_complete(struct rds_message *rm, int status) 453void rds_atomic_send_complete(struct rds_message *rm, int status)
414{ 454{
415 struct rds_sock *rs = NULL; 455 struct rds_sock *rs = NULL;
416 struct rds_rdma_op *ro; 456 struct rm_atomic_op *ao;
417 struct rds_notifier *notifier; 457 struct rds_notifier *notifier;
458 unsigned long flags;
418 459
419 spin_lock(&rm->m_rs_lock); 460 spin_lock_irqsave(&rm->m_rs_lock, flags);
420 461
421 ro = rm->m_rdma_op; 462 ao = &rm->atomic;
422 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && 463 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)
423 ro && ro->r_notify && ro->r_notifier) { 464 && ao->op_active && ao->op_notify && ao->op_notifier) {
424 notifier = ro->r_notifier; 465 notifier = ao->op_notifier;
425 rs = rm->m_rs; 466 rs = rm->m_rs;
426 sock_hold(rds_rs_to_sk(rs)); 467 sock_hold(rds_rs_to_sk(rs));
427 468
@@ -430,17 +471,17 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
430 list_add_tail(&notifier->n_list, &rs->rs_notify_queue); 471 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
431 spin_unlock(&rs->rs_lock); 472 spin_unlock(&rs->rs_lock);
432 473
433 ro->r_notifier = NULL; 474 ao->op_notifier = NULL;
434 } 475 }
435 476
436 spin_unlock(&rm->m_rs_lock); 477 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
437 478
438 if (rs) { 479 if (rs) {
439 rds_wake_sk_sleep(rs); 480 rds_wake_sk_sleep(rs);
440 sock_put(rds_rs_to_sk(rs)); 481 sock_put(rds_rs_to_sk(rs));
441 } 482 }
442} 483}
443EXPORT_SYMBOL_GPL(rds_rdma_send_complete); 484EXPORT_SYMBOL_GPL(rds_atomic_send_complete);
444 485
445/* 486/*
446 * This is the same as rds_rdma_send_complete except we 487 * This is the same as rds_rdma_send_complete except we
@@ -448,15 +489,23 @@ EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
448 * socket, socket lock) and can just move the notifier. 489 * socket, socket lock) and can just move the notifier.
449 */ 490 */
450static inline void 491static inline void
451__rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status) 492__rds_send_complete(struct rds_sock *rs, struct rds_message *rm, int status)
452{ 493{
453 struct rds_rdma_op *ro; 494 struct rm_rdma_op *ro;
495 struct rm_atomic_op *ao;
496
497 ro = &rm->rdma;
498 if (ro->op_active && ro->op_notify && ro->op_notifier) {
499 ro->op_notifier->n_status = status;
500 list_add_tail(&ro->op_notifier->n_list, &rs->rs_notify_queue);
501 ro->op_notifier = NULL;
502 }
454 503
455 ro = rm->m_rdma_op; 504 ao = &rm->atomic;
456 if (ro && ro->r_notify && ro->r_notifier) { 505 if (ao->op_active && ao->op_notify && ao->op_notifier) {
457 ro->r_notifier->n_status = status; 506 ao->op_notifier->n_status = status;
458 list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue); 507 list_add_tail(&ao->op_notifier->n_list, &rs->rs_notify_queue);
459 ro->r_notifier = NULL; 508 ao->op_notifier = NULL;
460 } 509 }
461 510
462 /* No need to wake the app - caller does this */ 511 /* No need to wake the app - caller does this */
@@ -468,7 +517,7 @@ __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status
468 * So speed is not an issue here. 517 * So speed is not an issue here.
469 */ 518 */
470struct rds_message *rds_send_get_message(struct rds_connection *conn, 519struct rds_message *rds_send_get_message(struct rds_connection *conn,
471 struct rds_rdma_op *op) 520 struct rm_rdma_op *op)
472{ 521{
473 struct rds_message *rm, *tmp, *found = NULL; 522 struct rds_message *rm, *tmp, *found = NULL;
474 unsigned long flags; 523 unsigned long flags;
@@ -476,7 +525,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
476 spin_lock_irqsave(&conn->c_lock, flags); 525 spin_lock_irqsave(&conn->c_lock, flags);
477 526
478 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { 527 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
479 if (rm->m_rdma_op == op) { 528 if (&rm->rdma == op) {
480 atomic_inc(&rm->m_refcount); 529 atomic_inc(&rm->m_refcount);
481 found = rm; 530 found = rm;
482 goto out; 531 goto out;
@@ -484,7 +533,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
484 } 533 }
485 534
486 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { 535 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
487 if (rm->m_rdma_op == op) { 536 if (&rm->rdma == op) {
488 atomic_inc(&rm->m_refcount); 537 atomic_inc(&rm->m_refcount);
489 found = rm; 538 found = rm;
490 break; 539 break;
@@ -544,19 +593,20 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
544 spin_lock(&rs->rs_lock); 593 spin_lock(&rs->rs_lock);
545 594
546 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { 595 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
547 struct rds_rdma_op *ro = rm->m_rdma_op; 596 struct rm_rdma_op *ro = &rm->rdma;
548 struct rds_notifier *notifier; 597 struct rds_notifier *notifier;
549 598
550 list_del_init(&rm->m_sock_item); 599 list_del_init(&rm->m_sock_item);
551 rds_send_sndbuf_remove(rs, rm); 600 rds_send_sndbuf_remove(rs, rm);
552 601
553 if (ro && ro->r_notifier && (status || ro->r_notify)) { 602 if (ro->op_active && ro->op_notifier &&
554 notifier = ro->r_notifier; 603 (ro->op_notify || (ro->op_recverr && status))) {
604 notifier = ro->op_notifier;
555 list_add_tail(&notifier->n_list, 605 list_add_tail(&notifier->n_list,
556 &rs->rs_notify_queue); 606 &rs->rs_notify_queue);
557 if (!notifier->n_status) 607 if (!notifier->n_status)
558 notifier->n_status = status; 608 notifier->n_status = status;
559 rm->m_rdma_op->r_notifier = NULL; 609 rm->rdma.op_notifier = NULL;
560 } 610 }
561 was_on_sock = 1; 611 was_on_sock = 1;
562 rm->m_rs = NULL; 612 rm->m_rs = NULL;
@@ -619,9 +669,8 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
619{ 669{
620 struct rds_message *rm, *tmp; 670 struct rds_message *rm, *tmp;
621 struct rds_connection *conn; 671 struct rds_connection *conn;
622 unsigned long flags, flags2; 672 unsigned long flags;
623 LIST_HEAD(list); 673 LIST_HEAD(list);
624 int wake = 0;
625 674
626 /* get all the messages we're dropping under the rs lock */ 675 /* get all the messages we're dropping under the rs lock */
627 spin_lock_irqsave(&rs->rs_lock, flags); 676 spin_lock_irqsave(&rs->rs_lock, flags);
@@ -631,59 +680,54 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
631 dest->sin_port != rm->m_inc.i_hdr.h_dport)) 680 dest->sin_port != rm->m_inc.i_hdr.h_dport))
632 continue; 681 continue;
633 682
634 wake = 1;
635 list_move(&rm->m_sock_item, &list); 683 list_move(&rm->m_sock_item, &list);
636 rds_send_sndbuf_remove(rs, rm); 684 rds_send_sndbuf_remove(rs, rm);
637 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); 685 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
638 } 686 }
639 687
640 /* order flag updates with the rs lock */ 688 /* order flag updates with the rs lock */
641 if (wake) 689 smp_mb__after_clear_bit();
642 smp_mb__after_clear_bit();
643 690
644 spin_unlock_irqrestore(&rs->rs_lock, flags); 691 spin_unlock_irqrestore(&rs->rs_lock, flags);
645 692
646 conn = NULL; 693 if (list_empty(&list))
694 return;
647 695
648 /* now remove the messages from the conn list as needed */ 696 /* Remove the messages from the conn */
649 list_for_each_entry(rm, &list, m_sock_item) { 697 list_for_each_entry(rm, &list, m_sock_item) {
650 /* We do this here rather than in the loop above, so that
651 * we don't have to nest m_rs_lock under rs->rs_lock */
652 spin_lock_irqsave(&rm->m_rs_lock, flags2);
653 /* If this is a RDMA operation, notify the app. */
654 spin_lock(&rs->rs_lock);
655 __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
656 spin_unlock(&rs->rs_lock);
657 rm->m_rs = NULL;
658 spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
659 698
699 conn = rm->m_inc.i_conn;
700
701 spin_lock_irqsave(&conn->c_lock, flags);
660 /* 702 /*
661 * If we see this flag cleared then we're *sure* that someone 703 * Maybe someone else beat us to removing rm from the conn.
662 * else beat us to removing it from the conn. If we race 704 * If we race with their flag update we'll get the lock and
663 * with their flag update we'll get the lock and then really 705 * then really see that the flag has been cleared.
664 * see that the flag has been cleared.
665 */ 706 */
666 if (!test_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 707 if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
708 spin_unlock_irqrestore(&conn->c_lock, flags);
667 continue; 709 continue;
668
669 if (conn != rm->m_inc.i_conn) {
670 if (conn)
671 spin_unlock_irqrestore(&conn->c_lock, flags);
672 conn = rm->m_inc.i_conn;
673 spin_lock_irqsave(&conn->c_lock, flags);
674 } 710 }
711 list_del_init(&rm->m_conn_item);
712 spin_unlock_irqrestore(&conn->c_lock, flags);
675 713
676 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { 714 /*
677 list_del_init(&rm->m_conn_item); 715 * Couldn't grab m_rs_lock in top loop (lock ordering),
678 rds_message_put(rm); 716 * but we can now.
679 } 717 */
680 } 718 spin_lock_irqsave(&rm->m_rs_lock, flags);
681 719
682 if (conn) 720 spin_lock(&rs->rs_lock);
683 spin_unlock_irqrestore(&conn->c_lock, flags); 721 __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
722 spin_unlock(&rs->rs_lock);
684 723
685 if (wake) 724 rm->m_rs = NULL;
686 rds_wake_sk_sleep(rs); 725 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
726
727 rds_message_put(rm);
728 }
729
730 rds_wake_sk_sleep(rs);
687 731
688 while (!list_empty(&list)) { 732 while (!list_empty(&list)) {
689 rm = list_entry(list.next, struct rds_message, m_sock_item); 733 rm = list_entry(list.next, struct rds_message, m_sock_item);
@@ -763,6 +807,63 @@ out:
763 return *queued; 807 return *queued;
764} 808}
765 809
810/*
811 * rds_message is getting to be quite complicated, and we'd like to allocate
812 * it all in one go. This figures out how big it needs to be up front.
813 */
814static int rds_rm_size(struct msghdr *msg, int data_len)
815{
816 struct cmsghdr *cmsg;
817 int size = 0;
818 int cmsg_groups = 0;
819 int retval;
820
821 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
822 if (!CMSG_OK(msg, cmsg))
823 return -EINVAL;
824
825 if (cmsg->cmsg_level != SOL_RDS)
826 continue;
827
828 switch (cmsg->cmsg_type) {
829 case RDS_CMSG_RDMA_ARGS:
830 cmsg_groups |= 1;
831 retval = rds_rdma_extra_size(CMSG_DATA(cmsg));
832 if (retval < 0)
833 return retval;
834 size += retval;
835
836 break;
837
838 case RDS_CMSG_RDMA_DEST:
839 case RDS_CMSG_RDMA_MAP:
840 cmsg_groups |= 2;
841 /* these are valid but do no add any size */
842 break;
843
844 case RDS_CMSG_ATOMIC_CSWP:
845 case RDS_CMSG_ATOMIC_FADD:
846 case RDS_CMSG_MASKED_ATOMIC_CSWP:
847 case RDS_CMSG_MASKED_ATOMIC_FADD:
848 cmsg_groups |= 1;
849 size += sizeof(struct scatterlist);
850 break;
851
852 default:
853 return -EINVAL;
854 }
855
856 }
857
858 size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
859
860 /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
861 if (cmsg_groups == 3)
862 return -EINVAL;
863
864 return size;
865}
866
766static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, 867static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
767 struct msghdr *msg, int *allocated_mr) 868 struct msghdr *msg, int *allocated_mr)
768{ 869{
@@ -777,7 +878,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
777 continue; 878 continue;
778 879
779 /* As a side effect, RDMA_DEST and RDMA_MAP will set 880 /* As a side effect, RDMA_DEST and RDMA_MAP will set
780 * rm->m_rdma_cookie and rm->m_rdma_mr. 881 * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr.
781 */ 882 */
782 switch (cmsg->cmsg_type) { 883 switch (cmsg->cmsg_type) {
783 case RDS_CMSG_RDMA_ARGS: 884 case RDS_CMSG_RDMA_ARGS:
@@ -793,6 +894,12 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
793 if (!ret) 894 if (!ret)
794 *allocated_mr = 1; 895 *allocated_mr = 1;
795 break; 896 break;
897 case RDS_CMSG_ATOMIC_CSWP:
898 case RDS_CMSG_ATOMIC_FADD:
899 case RDS_CMSG_MASKED_ATOMIC_CSWP:
900 case RDS_CMSG_MASKED_ATOMIC_FADD:
901 ret = rds_cmsg_atomic(rs, rm, cmsg);
902 break;
796 903
797 default: 904 default:
798 return -EINVAL; 905 return -EINVAL;
@@ -850,13 +957,26 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
850 goto out; 957 goto out;
851 } 958 }
852 959
853 rm = rds_message_copy_from_user(msg->msg_iov, payload_len); 960 /* size of rm including all sgs */
854 if (IS_ERR(rm)) { 961 ret = rds_rm_size(msg, payload_len);
855 ret = PTR_ERR(rm); 962 if (ret < 0)
856 rm = NULL; 963 goto out;
964
965 rm = rds_message_alloc(ret, GFP_KERNEL);
966 if (!rm) {
967 ret = -ENOMEM;
857 goto out; 968 goto out;
858 } 969 }
859 970
971 /* Attach data to the rm */
972 if (payload_len) {
973 rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
974 ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
975 if (ret)
976 goto out;
977 }
978 rm->data.op_active = 1;
979
860 rm->m_daddr = daddr; 980 rm->m_daddr = daddr;
861 981
862 /* rds_conn_create has a spinlock that runs with IRQ off. 982 /* rds_conn_create has a spinlock that runs with IRQ off.
@@ -879,22 +999,23 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
879 if (ret) 999 if (ret)
880 goto out; 1000 goto out;
881 1001
882 if ((rm->m_rdma_cookie || rm->m_rdma_op) && 1002 if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
883 conn->c_trans->xmit_rdma == NULL) {
884 if (printk_ratelimit()) 1003 if (printk_ratelimit())
885 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", 1004 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
886 rm->m_rdma_op, conn->c_trans->xmit_rdma); 1005 &rm->rdma, conn->c_trans->xmit_rdma);
887 ret = -EOPNOTSUPP; 1006 ret = -EOPNOTSUPP;
888 goto out; 1007 goto out;
889 } 1008 }
890 1009
891 /* If the connection is down, trigger a connect. We may 1010 if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) {
892 * have scheduled a delayed reconnect however - in this case 1011 if (printk_ratelimit())
893 * we should not interfere. 1012 printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n",
894 */ 1013 &rm->atomic, conn->c_trans->xmit_atomic);
895 if (rds_conn_state(conn) == RDS_CONN_DOWN && 1014 ret = -EOPNOTSUPP;
896 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) 1015 goto out;
897 queue_delayed_work(rds_wq, &conn->c_conn_w, 0); 1016 }
1017
1018 rds_conn_connect_if_down(conn);
898 1019
899 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); 1020 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
900 if (ret) { 1021 if (ret) {
@@ -938,7 +1059,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
938 rds_stats_inc(s_send_queued); 1059 rds_stats_inc(s_send_queued);
939 1060
940 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) 1061 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
941 rds_send_worker(&conn->c_send_w.work); 1062 rds_send_xmit(conn);
942 1063
943 rds_message_put(rm); 1064 rds_message_put(rm);
944 return payload_len; 1065 return payload_len;
@@ -966,20 +1087,15 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
966 int ret = 0; 1087 int ret = 0;
967 1088
968 rm = rds_message_alloc(0, GFP_ATOMIC); 1089 rm = rds_message_alloc(0, GFP_ATOMIC);
969 if (rm == NULL) { 1090 if (!rm) {
970 ret = -ENOMEM; 1091 ret = -ENOMEM;
971 goto out; 1092 goto out;
972 } 1093 }
973 1094
974 rm->m_daddr = conn->c_faddr; 1095 rm->m_daddr = conn->c_faddr;
1096 rm->data.op_active = 1;
975 1097
976 /* If the connection is down, trigger a connect. We may 1098 rds_conn_connect_if_down(conn);
977 * have scheduled a delayed reconnect however - in this case
978 * we should not interfere.
979 */
980 if (rds_conn_state(conn) == RDS_CONN_DOWN &&
981 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
982 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
983 1099
984 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL); 1100 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL);
985 if (ret) 1101 if (ret)
@@ -999,7 +1115,9 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
999 rds_stats_inc(s_send_queued); 1115 rds_stats_inc(s_send_queued);
1000 rds_stats_inc(s_send_pong); 1116 rds_stats_inc(s_send_pong);
1001 1117
1002 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 1118 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
1119 rds_send_xmit(conn);
1120
1003 rds_message_put(rm); 1121 rds_message_put(rm);
1004 return 0; 1122 return 0;
1005 1123
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 7598eb07cfb..10c759ccac0 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -57,8 +57,8 @@ static const char *const rds_stat_names[] = {
57 "recv_ping", 57 "recv_ping",
58 "send_queue_empty", 58 "send_queue_empty",
59 "send_queue_full", 59 "send_queue_full",
60 "send_sem_contention", 60 "send_lock_contention",
61 "send_sem_queue_raced", 61 "send_lock_queue_raced",
62 "send_immediate_retry", 62 "send_immediate_retry",
63 "send_delayed_retry", 63 "send_delayed_retry",
64 "send_drop_acked", 64 "send_drop_acked",
@@ -143,7 +143,7 @@ void rds_stats_exit(void)
143 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info); 143 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info);
144} 144}
145 145
146int __init rds_stats_init(void) 146int rds_stats_init(void)
147{ 147{
148 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info); 148 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info);
149 return 0; 149 return 0;
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index 7829a20325d..25ad0c77a26 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -105,13 +105,13 @@ void rds_sysctl_exit(void)
105 unregister_sysctl_table(rds_sysctl_reg_table); 105 unregister_sysctl_table(rds_sysctl_reg_table);
106} 106}
107 107
108int __init rds_sysctl_init(void) 108int rds_sysctl_init(void)
109{ 109{
110 rds_sysctl_reconnect_min = msecs_to_jiffies(1); 110 rds_sysctl_reconnect_min = msecs_to_jiffies(1);
111 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; 111 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min;
112 112
113 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table); 113 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table);
114 if (rds_sysctl_reg_table == NULL) 114 if (!rds_sysctl_reg_table)
115 return -ENOMEM; 115 return -ENOMEM;
116 return 0; 116 return 0;
117} 117}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index babf4577ff7..eeb08e6ab96 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -200,7 +200,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
200 struct rds_tcp_connection *tc; 200 struct rds_tcp_connection *tc;
201 201
202 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); 202 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
203 if (tc == NULL) 203 if (!tc)
204 return -ENOMEM; 204 return -ENOMEM;
205 205
206 tc->t_sock = NULL; 206 tc->t_sock = NULL;
@@ -258,7 +258,6 @@ struct rds_transport rds_tcp_transport = {
258 .laddr_check = rds_tcp_laddr_check, 258 .laddr_check = rds_tcp_laddr_check,
259 .xmit_prepare = rds_tcp_xmit_prepare, 259 .xmit_prepare = rds_tcp_xmit_prepare,
260 .xmit_complete = rds_tcp_xmit_complete, 260 .xmit_complete = rds_tcp_xmit_complete,
261 .xmit_cong_map = rds_tcp_xmit_cong_map,
262 .xmit = rds_tcp_xmit, 261 .xmit = rds_tcp_xmit,
263 .recv = rds_tcp_recv, 262 .recv = rds_tcp_recv,
264 .conn_alloc = rds_tcp_conn_alloc, 263 .conn_alloc = rds_tcp_conn_alloc,
@@ -266,7 +265,6 @@ struct rds_transport rds_tcp_transport = {
266 .conn_connect = rds_tcp_conn_connect, 265 .conn_connect = rds_tcp_conn_connect,
267 .conn_shutdown = rds_tcp_conn_shutdown, 266 .conn_shutdown = rds_tcp_conn_shutdown,
268 .inc_copy_to_user = rds_tcp_inc_copy_to_user, 267 .inc_copy_to_user = rds_tcp_inc_copy_to_user,
269 .inc_purge = rds_tcp_inc_purge,
270 .inc_free = rds_tcp_inc_free, 268 .inc_free = rds_tcp_inc_free,
271 .stats_info_copy = rds_tcp_stats_info_copy, 269 .stats_info_copy = rds_tcp_stats_info_copy,
272 .exit = rds_tcp_exit, 270 .exit = rds_tcp_exit,
@@ -276,14 +274,14 @@ struct rds_transport rds_tcp_transport = {
276 .t_prefer_loopback = 1, 274 .t_prefer_loopback = 1,
277}; 275};
278 276
279int __init rds_tcp_init(void) 277int rds_tcp_init(void)
280{ 278{
281 int ret; 279 int ret;
282 280
283 rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection", 281 rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection",
284 sizeof(struct rds_tcp_connection), 282 sizeof(struct rds_tcp_connection),
285 0, 0, NULL); 283 0, 0, NULL);
286 if (rds_tcp_conn_slab == NULL) { 284 if (!rds_tcp_conn_slab) {
287 ret = -ENOMEM; 285 ret = -ENOMEM;
288 goto out; 286 goto out;
289 } 287 }
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 844fa6b9cf5..f5e6f7bebb5 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -43,7 +43,7 @@ struct rds_tcp_statistics {
43}; 43};
44 44
45/* tcp.c */ 45/* tcp.c */
46int __init rds_tcp_init(void); 46int rds_tcp_init(void);
47void rds_tcp_exit(void); 47void rds_tcp_exit(void);
48void rds_tcp_tune(struct socket *sock); 48void rds_tcp_tune(struct socket *sock);
49void rds_tcp_nonagle(struct socket *sock); 49void rds_tcp_nonagle(struct socket *sock);
@@ -61,16 +61,15 @@ void rds_tcp_conn_shutdown(struct rds_connection *conn);
61void rds_tcp_state_change(struct sock *sk); 61void rds_tcp_state_change(struct sock *sk);
62 62
63/* tcp_listen.c */ 63/* tcp_listen.c */
64int __init rds_tcp_listen_init(void); 64int rds_tcp_listen_init(void);
65void rds_tcp_listen_stop(void); 65void rds_tcp_listen_stop(void);
66void rds_tcp_listen_data_ready(struct sock *sk, int bytes); 66void rds_tcp_listen_data_ready(struct sock *sk, int bytes);
67 67
68/* tcp_recv.c */ 68/* tcp_recv.c */
69int __init rds_tcp_recv_init(void); 69int rds_tcp_recv_init(void);
70void rds_tcp_recv_exit(void); 70void rds_tcp_recv_exit(void);
71void rds_tcp_data_ready(struct sock *sk, int bytes); 71void rds_tcp_data_ready(struct sock *sk, int bytes);
72int rds_tcp_recv(struct rds_connection *conn); 72int rds_tcp_recv(struct rds_connection *conn);
73void rds_tcp_inc_purge(struct rds_incoming *inc);
74void rds_tcp_inc_free(struct rds_incoming *inc); 73void rds_tcp_inc_free(struct rds_incoming *inc);
75int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 74int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
76 size_t size); 75 size_t size);
@@ -81,8 +80,6 @@ void rds_tcp_xmit_complete(struct rds_connection *conn);
81int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, 80int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
82 unsigned int hdr_off, unsigned int sg, unsigned int off); 81 unsigned int hdr_off, unsigned int sg, unsigned int off);
83void rds_tcp_write_space(struct sock *sk); 82void rds_tcp_write_space(struct sock *sk);
84int rds_tcp_xmit_cong_map(struct rds_connection *conn,
85 struct rds_cong_map *map, unsigned long offset);
86 83
87/* tcp_stats.c */ 84/* tcp_stats.c */
88DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats); 85DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index c519939e8da..af95c8e058f 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -45,7 +45,7 @@ void rds_tcp_state_change(struct sock *sk)
45 45
46 read_lock_bh(&sk->sk_callback_lock); 46 read_lock_bh(&sk->sk_callback_lock);
47 conn = sk->sk_user_data; 47 conn = sk->sk_user_data;
48 if (conn == NULL) { 48 if (!conn) {
49 state_change = sk->sk_state_change; 49 state_change = sk->sk_state_change;
50 goto out; 50 goto out;
51 } 51 }
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 27844f231d1..8b5cc4aa886 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -116,7 +116,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
116 116
117 read_lock_bh(&sk->sk_callback_lock); 117 read_lock_bh(&sk->sk_callback_lock);
118 ready = sk->sk_user_data; 118 ready = sk->sk_user_data;
119 if (ready == NULL) { /* check for teardown race */ 119 if (!ready) { /* check for teardown race */
120 ready = sk->sk_data_ready; 120 ready = sk->sk_data_ready;
121 goto out; 121 goto out;
122 } 122 }
@@ -135,7 +135,7 @@ out:
135 ready(sk, bytes); 135 ready(sk, bytes);
136} 136}
137 137
138int __init rds_tcp_listen_init(void) 138int rds_tcp_listen_init(void)
139{ 139{
140 struct sockaddr_in sin; 140 struct sockaddr_in sin;
141 struct socket *sock = NULL; 141 struct socket *sock = NULL;
@@ -178,7 +178,7 @@ void rds_tcp_listen_stop(void)
178 struct socket *sock = rds_tcp_listen_sock; 178 struct socket *sock = rds_tcp_listen_sock;
179 struct sock *sk; 179 struct sock *sk;
180 180
181 if (sock == NULL) 181 if (!sock)
182 return; 182 return;
183 183
184 sk = sock->sk; 184 sk = sock->sk;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index e4379740410..67263fbee62 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -39,7 +39,7 @@
39 39
40static struct kmem_cache *rds_tcp_incoming_slab; 40static struct kmem_cache *rds_tcp_incoming_slab;
41 41
42void rds_tcp_inc_purge(struct rds_incoming *inc) 42static void rds_tcp_inc_purge(struct rds_incoming *inc)
43{ 43{
44 struct rds_tcp_incoming *tinc; 44 struct rds_tcp_incoming *tinc;
45 tinc = container_of(inc, struct rds_tcp_incoming, ti_inc); 45 tinc = container_of(inc, struct rds_tcp_incoming, ti_inc);
@@ -190,10 +190,10 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
190 * processing. 190 * processing.
191 */ 191 */
192 while (left) { 192 while (left) {
193 if (tinc == NULL) { 193 if (!tinc) {
194 tinc = kmem_cache_alloc(rds_tcp_incoming_slab, 194 tinc = kmem_cache_alloc(rds_tcp_incoming_slab,
195 arg->gfp); 195 arg->gfp);
196 if (tinc == NULL) { 196 if (!tinc) {
197 desc->error = -ENOMEM; 197 desc->error = -ENOMEM;
198 goto out; 198 goto out;
199 } 199 }
@@ -229,7 +229,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
229 229
230 if (left && tc->t_tinc_data_rem) { 230 if (left && tc->t_tinc_data_rem) {
231 clone = skb_clone(skb, arg->gfp); 231 clone = skb_clone(skb, arg->gfp);
232 if (clone == NULL) { 232 if (!clone) {
233 desc->error = -ENOMEM; 233 desc->error = -ENOMEM;
234 goto out; 234 goto out;
235 } 235 }
@@ -326,7 +326,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
326 326
327 read_lock_bh(&sk->sk_callback_lock); 327 read_lock_bh(&sk->sk_callback_lock);
328 conn = sk->sk_user_data; 328 conn = sk->sk_user_data;
329 if (conn == NULL) { /* check for teardown race */ 329 if (!conn) { /* check for teardown race */
330 ready = sk->sk_data_ready; 330 ready = sk->sk_data_ready;
331 goto out; 331 goto out;
332 } 332 }
@@ -342,12 +342,12 @@ out:
342 ready(sk, bytes); 342 ready(sk, bytes);
343} 343}
344 344
345int __init rds_tcp_recv_init(void) 345int rds_tcp_recv_init(void)
346{ 346{
347 rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming", 347 rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming",
348 sizeof(struct rds_tcp_incoming), 348 sizeof(struct rds_tcp_incoming),
349 0, 0, NULL); 349 0, 0, NULL);
350 if (rds_tcp_incoming_slab == NULL) 350 if (!rds_tcp_incoming_slab)
351 return -ENOMEM; 351 return -ENOMEM;
352 return 0; 352 return 0;
353} 353}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 2f012a07d94..aa16841afbd 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -77,56 +77,6 @@ int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
77} 77}
78 78
79/* the core send_sem serializes this with other xmit and shutdown */ 79/* the core send_sem serializes this with other xmit and shutdown */
80int rds_tcp_xmit_cong_map(struct rds_connection *conn,
81 struct rds_cong_map *map, unsigned long offset)
82{
83 static struct rds_header rds_tcp_map_header = {
84 .h_flags = RDS_FLAG_CONG_BITMAP,
85 };
86 struct rds_tcp_connection *tc = conn->c_transport_data;
87 unsigned long i;
88 int ret;
89 int copied = 0;
90
91 /* Some problem claims cpu_to_be32(constant) isn't a constant. */
92 rds_tcp_map_header.h_len = cpu_to_be32(RDS_CONG_MAP_BYTES);
93
94 if (offset < sizeof(struct rds_header)) {
95 ret = rds_tcp_sendmsg(tc->t_sock,
96 (void *)&rds_tcp_map_header + offset,
97 sizeof(struct rds_header) - offset);
98 if (ret <= 0)
99 return ret;
100 offset += ret;
101 copied = ret;
102 if (offset < sizeof(struct rds_header))
103 return ret;
104 }
105
106 offset -= sizeof(struct rds_header);
107 i = offset / PAGE_SIZE;
108 offset = offset % PAGE_SIZE;
109 BUG_ON(i >= RDS_CONG_MAP_PAGES);
110
111 do {
112 ret = tc->t_sock->ops->sendpage(tc->t_sock,
113 virt_to_page(map->m_page_addrs[i]),
114 offset, PAGE_SIZE - offset,
115 MSG_DONTWAIT);
116 if (ret <= 0)
117 break;
118 copied += ret;
119 offset += ret;
120 if (offset == PAGE_SIZE) {
121 offset = 0;
122 i++;
123 }
124 } while (i < RDS_CONG_MAP_PAGES);
125
126 return copied ? copied : ret;
127}
128
129/* the core send_sem serializes this with other xmit and shutdown */
130int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, 80int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
131 unsigned int hdr_off, unsigned int sg, unsigned int off) 81 unsigned int hdr_off, unsigned int sg, unsigned int off)
132{ 82{
@@ -166,21 +116,21 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
166 goto out; 116 goto out;
167 } 117 }
168 118
169 while (sg < rm->m_nents) { 119 while (sg < rm->data.op_nents) {
170 ret = tc->t_sock->ops->sendpage(tc->t_sock, 120 ret = tc->t_sock->ops->sendpage(tc->t_sock,
171 sg_page(&rm->m_sg[sg]), 121 sg_page(&rm->data.op_sg[sg]),
172 rm->m_sg[sg].offset + off, 122 rm->data.op_sg[sg].offset + off,
173 rm->m_sg[sg].length - off, 123 rm->data.op_sg[sg].length - off,
174 MSG_DONTWAIT|MSG_NOSIGNAL); 124 MSG_DONTWAIT|MSG_NOSIGNAL);
175 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->m_sg[sg]), 125 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]),
176 rm->m_sg[sg].offset + off, rm->m_sg[sg].length - off, 126 rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off,
177 ret); 127 ret);
178 if (ret <= 0) 128 if (ret <= 0)
179 break; 129 break;
180 130
181 off += ret; 131 off += ret;
182 done += ret; 132 done += ret;
183 if (off == rm->m_sg[sg].length) { 133 if (off == rm->data.op_sg[sg].length) {
184 off = 0; 134 off = 0;
185 sg++; 135 sg++;
186 } 136 }
@@ -226,7 +176,7 @@ void rds_tcp_write_space(struct sock *sk)
226 176
227 read_lock_bh(&sk->sk_callback_lock); 177 read_lock_bh(&sk->sk_callback_lock);
228 conn = sk->sk_user_data; 178 conn = sk->sk_user_data;
229 if (conn == NULL) { 179 if (!conn) {
230 write_space = sk->sk_write_space; 180 write_space = sk->sk_write_space;
231 goto out; 181 goto out;
232 } 182 }
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 786c20eaaf5..0fd90f8c5f5 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -61,7 +61,7 @@
61 * 61 *
62 * Transition to state DISCONNECTING/DOWN: 62 * Transition to state DISCONNECTING/DOWN:
63 * - Inside the shutdown worker; synchronizes with xmit path 63 * - Inside the shutdown worker; synchronizes with xmit path
64 * through c_send_lock, and with connection management callbacks 64 * through RDS_IN_XMIT, and with connection management callbacks
65 * via c_cm_lock. 65 * via c_cm_lock.
66 * 66 *
67 * For receive callbacks, we rely on the underlying transport 67 * For receive callbacks, we rely on the underlying transport
@@ -110,7 +110,7 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
110 * We should *always* start with a random backoff; otherwise a broken connection 110 * We should *always* start with a random backoff; otherwise a broken connection
111 * will always take several iterations to be re-established. 111 * will always take several iterations to be re-established.
112 */ 112 */
113static void rds_queue_reconnect(struct rds_connection *conn) 113void rds_queue_reconnect(struct rds_connection *conn)
114{ 114{
115 unsigned long rand; 115 unsigned long rand;
116 116
@@ -156,58 +156,6 @@ void rds_connect_worker(struct work_struct *work)
156 } 156 }
157} 157}
158 158
159void rds_shutdown_worker(struct work_struct *work)
160{
161 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
162
163 /* shut it down unless it's down already */
164 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
165 /*
166 * Quiesce the connection mgmt handlers before we start tearing
167 * things down. We don't hold the mutex for the entire
168 * duration of the shutdown operation, else we may be
169 * deadlocking with the CM handler. Instead, the CM event
170 * handler is supposed to check for state DISCONNECTING
171 */
172 mutex_lock(&conn->c_cm_lock);
173 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING) &&
174 !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
175 rds_conn_error(conn, "shutdown called in state %d\n",
176 atomic_read(&conn->c_state));
177 mutex_unlock(&conn->c_cm_lock);
178 return;
179 }
180 mutex_unlock(&conn->c_cm_lock);
181
182 mutex_lock(&conn->c_send_lock);
183 conn->c_trans->conn_shutdown(conn);
184 rds_conn_reset(conn);
185 mutex_unlock(&conn->c_send_lock);
186
187 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
188 /* This can happen - eg when we're in the middle of tearing
189 * down the connection, and someone unloads the rds module.
190 * Quite reproduceable with loopback connections.
191 * Mostly harmless.
192 */
193 rds_conn_error(conn,
194 "%s: failed to transition to state DOWN, "
195 "current state is %d\n",
196 __func__,
197 atomic_read(&conn->c_state));
198 return;
199 }
200 }
201
202 /* Then reconnect if it's still live.
203 * The passive side of an IB loopback connection is never added
204 * to the conn hash, so we never trigger a reconnect on this
205 * conn - the reconnect is always triggered by the active peer. */
206 cancel_delayed_work(&conn->c_conn_w);
207 if (!hlist_unhashed(&conn->c_hash_node))
208 rds_queue_reconnect(conn);
209}
210
211void rds_send_worker(struct work_struct *work) 159void rds_send_worker(struct work_struct *work)
212{ 160{
213 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work); 161 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work);
@@ -252,15 +200,22 @@ void rds_recv_worker(struct work_struct *work)
252 } 200 }
253} 201}
254 202
203void rds_shutdown_worker(struct work_struct *work)
204{
205 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
206
207 rds_conn_shutdown(conn);
208}
209
255void rds_threads_exit(void) 210void rds_threads_exit(void)
256{ 211{
257 destroy_workqueue(rds_wq); 212 destroy_workqueue(rds_wq);
258} 213}
259 214
260int __init rds_threads_init(void) 215int rds_threads_init(void)
261{ 216{
262 rds_wq = create_workqueue("krdsd"); 217 rds_wq = create_singlethread_workqueue("krdsd");
263 if (rds_wq == NULL) 218 if (!rds_wq)
264 return -ENOMEM; 219 return -ENOMEM;
265 220
266 return 0; 221 return 0;
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7e106790135..7f2ac4fec36 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -71,19 +71,28 @@ void rds_trans_unregister(struct rds_transport *trans)
71} 71}
72EXPORT_SYMBOL_GPL(rds_trans_unregister); 72EXPORT_SYMBOL_GPL(rds_trans_unregister);
73 73
74void rds_trans_put(struct rds_transport *trans)
75{
76 if (trans && trans->t_owner)
77 module_put(trans->t_owner);
78}
79
74struct rds_transport *rds_trans_get_preferred(__be32 addr) 80struct rds_transport *rds_trans_get_preferred(__be32 addr)
75{ 81{
76 struct rds_transport *ret = NULL; 82 struct rds_transport *ret = NULL;
77 int i; 83 struct rds_transport *trans;
84 unsigned int i;
78 85
79 if (IN_LOOPBACK(ntohl(addr))) 86 if (IN_LOOPBACK(ntohl(addr)))
80 return &rds_loop_transport; 87 return &rds_loop_transport;
81 88
82 down_read(&rds_trans_sem); 89 down_read(&rds_trans_sem);
83 for (i = 0; i < RDS_TRANS_COUNT; i++) 90 for (i = 0; i < RDS_TRANS_COUNT; i++) {
84 { 91 trans = transports[i];
85 if (transports[i] && (transports[i]->laddr_check(addr) == 0)) { 92
86 ret = transports[i]; 93 if (trans && (trans->laddr_check(addr) == 0) &&
94 (!trans->t_owner || try_module_get(trans->t_owner))) {
95 ret = trans;
87 break; 96 break;
88 } 97 }
89 } 98 }
diff --git a/net/rds/xlist.h b/net/rds/xlist.h
new file mode 100644
index 00000000000..e6b5190dadd
--- /dev/null
+++ b/net/rds/xlist.h
@@ -0,0 +1,80 @@
1#ifndef _LINUX_XLIST_H
2#define _LINUX_XLIST_H
3
4#include <linux/stddef.h>
5#include <linux/poison.h>
6#include <linux/prefetch.h>
7#include <asm/system.h>
8
9struct xlist_head {
10 struct xlist_head *next;
11};
12
13static inline void INIT_XLIST_HEAD(struct xlist_head *list)
14{
15 list->next = NULL;
16}
17
18static inline int xlist_empty(struct xlist_head *head)
19{
20 return head->next == NULL;
21}
22
23static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail,
24 struct xlist_head *head)
25{
26 struct xlist_head *cur;
27 struct xlist_head *check;
28
29 while (1) {
30 cur = head->next;
31 tail->next = cur;
32 check = cmpxchg(&head->next, cur, new);
33 if (check == cur)
34 break;
35 }
36}
37
38static inline struct xlist_head *xlist_del_head(struct xlist_head *head)
39{
40 struct xlist_head *cur;
41 struct xlist_head *check;
42 struct xlist_head *next;
43
44 while (1) {
45 cur = head->next;
46 if (!cur)
47 goto out;
48
49 next = cur->next;
50 check = cmpxchg(&head->next, cur, next);
51 if (check == cur)
52 goto out;
53 }
54out:
55 return cur;
56}
57
58static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head)
59{
60 struct xlist_head *cur;
61
62 cur = head->next;
63 if (!cur)
64 return NULL;
65
66 head->next = cur->next;
67 return cur;
68}
69
70static inline void xlist_splice(struct xlist_head *list,
71 struct xlist_head *head)
72{
73 struct xlist_head *cur;
74
75 WARN_ON(head->next);
76 cur = xchg(&list->next, NULL);
77 head->next = cur;
78}
79
80#endif
diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index 3713d7ecab9..1bca6d49ec9 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -142,7 +142,7 @@ static unsigned long rfkill_last_scheduled;
142static unsigned long rfkill_ratelimit(const unsigned long last) 142static unsigned long rfkill_ratelimit(const unsigned long last)
143{ 143{
144 const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY); 144 const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY);
145 return (time_after(jiffies, last + delay)) ? 0 : delay; 145 return time_after(jiffies, last + delay) ? 0 : delay;
146} 146}
147 147
148static void rfkill_schedule_ratelimited(void) 148static void rfkill_schedule_ratelimited(void)
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index a750a28e022..fa5f5641a2c 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -114,7 +114,7 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
114 if (ax25s) 114 if (ax25s)
115 ax25_cb_put(ax25s); 115 ax25_cb_put(ax25s);
116 116
117 return (neigh->ax25 != NULL); 117 return neigh->ax25 != NULL;
118} 118}
119 119
120/* 120/*
@@ -137,7 +137,7 @@ static int rose_link_up(struct rose_neigh *neigh)
137 if (ax25s) 137 if (ax25s)
138 ax25_cb_put(ax25s); 138 ax25_cb_put(ax25s);
139 139
140 return (neigh->ax25 != NULL); 140 return neigh->ax25 != NULL;
141} 141}
142 142
143/* 143/*
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f691fb180d..a36270a994d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -518,6 +518,16 @@ config NET_ACT_SKBEDIT
518 To compile this code as a module, choose M here: the 518 To compile this code as a module, choose M here: the
519 module will be called act_skbedit. 519 module will be called act_skbedit.
520 520
521config NET_ACT_CSUM
522 tristate "Checksum Updating"
523 depends on NET_CLS_ACT && INET
524 ---help---
525 Say Y here to update some common checksum after some direct
526 packet alterations.
527
528 To compile this code as a module, choose M here: the
529 module will be called act_csum.
530
521config NET_CLS_IND 531config NET_CLS_IND
522 bool "Incoming device classification" 532 bool "Incoming device classification"
523 depends on NET_CLS_U32 || NET_CLS_FW 533 depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f14e71bfa58..960f5dba630 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o 15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o 16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o 17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
18obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
18obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o 19obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
19obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o 20obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
20obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o 21obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
new file mode 100644
index 00000000000..67dc7ce9b63
--- /dev/null
+++ b/net/sched/act_csum.c
@@ -0,0 +1,595 @@
1/*
2 * Checksum updating actions
3 *
4 * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#include <linux/types.h>
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/spinlock.h>
18
19#include <linux/netlink.h>
20#include <net/netlink.h>
21#include <linux/rtnetlink.h>
22
23#include <linux/skbuff.h>
24
25#include <net/ip.h>
26#include <net/ipv6.h>
27#include <net/icmp.h>
28#include <linux/icmpv6.h>
29#include <linux/igmp.h>
30#include <net/tcp.h>
31#include <net/udp.h>
32#include <net/ip6_checksum.h>
33
34#include <net/act_api.h>
35
36#include <linux/tc_act/tc_csum.h>
37#include <net/tc_act/tc_csum.h>
38
39#define CSUM_TAB_MASK 15
40static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
41static u32 csum_idx_gen;
42static DEFINE_RWLOCK(csum_lock);
43
44static struct tcf_hashinfo csum_hash_info = {
45 .htab = tcf_csum_ht,
46 .hmask = CSUM_TAB_MASK,
47 .lock = &csum_lock,
48};
49
50static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
51 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
52};
53
54static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
55 struct tc_action *a, int ovr, int bind)
56{
57 struct nlattr *tb[TCA_CSUM_MAX + 1];
58 struct tc_csum *parm;
59 struct tcf_common *pc;
60 struct tcf_csum *p;
61 int ret = 0, err;
62
63 if (nla == NULL)
64 return -EINVAL;
65
66 err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
67 if (err < 0)
68 return err;
69
70 if (tb[TCA_CSUM_PARMS] == NULL)
71 return -EINVAL;
72 parm = nla_data(tb[TCA_CSUM_PARMS]);
73
74 pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info);
75 if (!pc) {
76 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
77 &csum_idx_gen, &csum_hash_info);
78 if (IS_ERR(pc))
79 return PTR_ERR(pc);
80 p = to_tcf_csum(pc);
81 ret = ACT_P_CREATED;
82 } else {
83 p = to_tcf_csum(pc);
84 if (!ovr) {
85 tcf_hash_release(pc, bind, &csum_hash_info);
86 return -EEXIST;
87 }
88 }
89
90 spin_lock_bh(&p->tcf_lock);
91 p->tcf_action = parm->action;
92 p->update_flags = parm->update_flags;
93 spin_unlock_bh(&p->tcf_lock);
94
95 if (ret == ACT_P_CREATED)
96 tcf_hash_insert(pc, &csum_hash_info);
97
98 return ret;
99}
100
101static int tcf_csum_cleanup(struct tc_action *a, int bind)
102{
103 struct tcf_csum *p = a->priv;
104 return tcf_hash_release(&p->common, bind, &csum_hash_info);
105}
106
107/**
108 * tcf_csum_skb_nextlayer - Get next layer pointer
109 * @skb: sk_buff to use
110 * @ihl: previous summed headers length
111 * @ipl: complete packet length
112 * @jhl: next header length
113 *
114 * Check the expected next layer availability in the specified sk_buff.
115 * Return the next layer pointer if pass, NULL otherwise.
116 */
117static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
118 unsigned int ihl, unsigned int ipl,
119 unsigned int jhl)
120{
121 int ntkoff = skb_network_offset(skb);
122 int hl = ihl + jhl;
123
124 if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
125 (skb_cloned(skb) &&
126 !skb_clone_writable(skb, hl + ntkoff) &&
127 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
128 return NULL;
129 else
130 return (void *)(skb_network_header(skb) + ihl);
131}
132
133static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
134 unsigned int ihl, unsigned int ipl)
135{
136 struct icmphdr *icmph;
137
138 icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph));
139 if (icmph == NULL)
140 return 0;
141
142 icmph->checksum = 0;
143 skb->csum = csum_partial(icmph, ipl - ihl, 0);
144 icmph->checksum = csum_fold(skb->csum);
145
146 skb->ip_summed = CHECKSUM_NONE;
147
148 return 1;
149}
150
151static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
152 unsigned int ihl, unsigned int ipl)
153{
154 struct igmphdr *igmph;
155
156 igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph));
157 if (igmph == NULL)
158 return 0;
159
160 igmph->csum = 0;
161 skb->csum = csum_partial(igmph, ipl - ihl, 0);
162 igmph->csum = csum_fold(skb->csum);
163
164 skb->ip_summed = CHECKSUM_NONE;
165
166 return 1;
167}
168
169static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
170 unsigned int ihl, unsigned int ipl)
171{
172 struct icmp6hdr *icmp6h;
173
174 icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
175 if (icmp6h == NULL)
176 return 0;
177
178 icmp6h->icmp6_cksum = 0;
179 skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
180 icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
181 ipl - ihl, IPPROTO_ICMPV6,
182 skb->csum);
183
184 skb->ip_summed = CHECKSUM_NONE;
185
186 return 1;
187}
188
189static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
190 unsigned int ihl, unsigned int ipl)
191{
192 struct tcphdr *tcph;
193
194 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
195 if (tcph == NULL)
196 return 0;
197
198 tcph->check = 0;
199 skb->csum = csum_partial(tcph, ipl - ihl, 0);
200 tcph->check = tcp_v4_check(ipl - ihl,
201 iph->saddr, iph->daddr, skb->csum);
202
203 skb->ip_summed = CHECKSUM_NONE;
204
205 return 1;
206}
207
208static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
209 unsigned int ihl, unsigned int ipl)
210{
211 struct tcphdr *tcph;
212
213 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
214 if (tcph == NULL)
215 return 0;
216
217 tcph->check = 0;
218 skb->csum = csum_partial(tcph, ipl - ihl, 0);
219 tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
220 ipl - ihl, IPPROTO_TCP,
221 skb->csum);
222
223 skb->ip_summed = CHECKSUM_NONE;
224
225 return 1;
226}
227
228static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
229 unsigned int ihl, unsigned int ipl, int udplite)
230{
231 struct udphdr *udph;
232 u16 ul;
233
234 /*
235 * Support both UDP and UDPLITE checksum algorithms, Don't use
236 * udph->len to get the real length without any protocol check,
237 * UDPLITE uses udph->len for another thing,
238 * Use iph->tot_len, or just ipl.
239 */
240
241 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
242 if (udph == NULL)
243 return 0;
244
245 ul = ntohs(udph->len);
246
247 if (udplite || udph->check) {
248
249 udph->check = 0;
250
251 if (udplite) {
252 if (ul == 0)
253 skb->csum = csum_partial(udph, ipl - ihl, 0);
254 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
255 skb->csum = csum_partial(udph, ul, 0);
256 else
257 goto ignore_obscure_skb;
258 } else {
259 if (ul != ipl - ihl)
260 goto ignore_obscure_skb;
261
262 skb->csum = csum_partial(udph, ul, 0);
263 }
264
265 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
266 ul, iph->protocol,
267 skb->csum);
268
269 if (!udph->check)
270 udph->check = CSUM_MANGLED_0;
271 }
272
273 skb->ip_summed = CHECKSUM_NONE;
274
275ignore_obscure_skb:
276 return 1;
277}
278
279static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
280 unsigned int ihl, unsigned int ipl, int udplite)
281{
282 struct udphdr *udph;
283 u16 ul;
284
285 /*
286 * Support both UDP and UDPLITE checksum algorithms, Don't use
287 * udph->len to get the real length without any protocol check,
288 * UDPLITE uses udph->len for another thing,
289 * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl.
290 */
291
292 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
293 if (udph == NULL)
294 return 0;
295
296 ul = ntohs(udph->len);
297
298 udph->check = 0;
299
300 if (udplite) {
301 if (ul == 0)
302 skb->csum = csum_partial(udph, ipl - ihl, 0);
303
304 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
305 skb->csum = csum_partial(udph, ul, 0);
306
307 else
308 goto ignore_obscure_skb;
309 } else {
310 if (ul != ipl - ihl)
311 goto ignore_obscure_skb;
312
313 skb->csum = csum_partial(udph, ul, 0);
314 }
315
316 udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul,
317 udplite ? IPPROTO_UDPLITE : IPPROTO_UDP,
318 skb->csum);
319
320 if (!udph->check)
321 udph->check = CSUM_MANGLED_0;
322
323 skb->ip_summed = CHECKSUM_NONE;
324
325ignore_obscure_skb:
326 return 1;
327}
328
329static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
330{
331 struct iphdr *iph;
332 int ntkoff;
333
334 ntkoff = skb_network_offset(skb);
335
336 if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff))
337 goto fail;
338
339 iph = ip_hdr(skb);
340
341 switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
342 case IPPROTO_ICMP:
343 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
344 if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4,
345 ntohs(iph->tot_len)))
346 goto fail;
347 break;
348 case IPPROTO_IGMP:
349 if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP)
350 if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4,
351 ntohs(iph->tot_len)))
352 goto fail;
353 break;
354 case IPPROTO_TCP:
355 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
356 if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4,
357 ntohs(iph->tot_len)))
358 goto fail;
359 break;
360 case IPPROTO_UDP:
361 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
362 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
363 ntohs(iph->tot_len), 0))
364 goto fail;
365 break;
366 case IPPROTO_UDPLITE:
367 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
368 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
369 ntohs(iph->tot_len), 1))
370 goto fail;
371 break;
372 }
373
374 if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
375 if (skb_cloned(skb) &&
376 !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
377 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
378 goto fail;
379
380 ip_send_check(iph);
381 }
382
383 return 1;
384
385fail:
386 return 0;
387}
388
389static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
390 unsigned int ixhl, unsigned int *pl)
391{
392 int off, len, optlen;
393 unsigned char *xh = (void *)ip6xh;
394
395 off = sizeof(*ip6xh);
396 len = ixhl - off;
397
398 while (len > 1) {
399 switch (xh[off]) {
400 case IPV6_TLV_PAD0:
401 optlen = 1;
402 break;
403 case IPV6_TLV_JUMBO:
404 optlen = xh[off + 1] + 2;
405 if (optlen != 6 || len < 6 || (off & 3) != 2)
406 /* wrong jumbo option length/alignment */
407 return 0;
408 *pl = ntohl(*(__be32 *)(xh + off + 2));
409 goto done;
410 default:
411 optlen = xh[off + 1] + 2;
412 if (optlen > len)
413 /* ignore obscure options */
414 goto done;
415 break;
416 }
417 off += optlen;
418 len -= optlen;
419 }
420
421done:
422 return 1;
423}
424
425static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
426{
427 struct ipv6hdr *ip6h;
428 struct ipv6_opt_hdr *ip6xh;
429 unsigned int hl, ixhl;
430 unsigned int pl;
431 int ntkoff;
432 u8 nexthdr;
433
434 ntkoff = skb_network_offset(skb);
435
436 hl = sizeof(*ip6h);
437
438 if (!pskb_may_pull(skb, hl + ntkoff))
439 goto fail;
440
441 ip6h = ipv6_hdr(skb);
442
443 pl = ntohs(ip6h->payload_len);
444 nexthdr = ip6h->nexthdr;
445
446 do {
447 switch (nexthdr) {
448 case NEXTHDR_FRAGMENT:
449 goto ignore_skb;
450 case NEXTHDR_ROUTING:
451 case NEXTHDR_HOP:
452 case NEXTHDR_DEST:
453 if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff))
454 goto fail;
455 ip6xh = (void *)(skb_network_header(skb) + hl);
456 ixhl = ipv6_optlen(ip6xh);
457 if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
458 goto fail;
459 if ((nexthdr == NEXTHDR_HOP) &&
460 !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
461 goto fail;
462 nexthdr = ip6xh->nexthdr;
463 hl += ixhl;
464 break;
465 case IPPROTO_ICMPV6:
466 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
467 if (!tcf_csum_ipv6_icmp(skb, ip6h,
468 hl, pl + sizeof(*ip6h)))
469 goto fail;
470 goto done;
471 case IPPROTO_TCP:
472 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
473 if (!tcf_csum_ipv6_tcp(skb, ip6h,
474 hl, pl + sizeof(*ip6h)))
475 goto fail;
476 goto done;
477 case IPPROTO_UDP:
478 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
479 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
480 pl + sizeof(*ip6h), 0))
481 goto fail;
482 goto done;
483 case IPPROTO_UDPLITE:
484 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
485 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
486 pl + sizeof(*ip6h), 1))
487 goto fail;
488 goto done;
489 default:
490 goto ignore_skb;
491 }
492 } while (pskb_may_pull(skb, hl + 1 + ntkoff));
493
494done:
495ignore_skb:
496 return 1;
497
498fail:
499 return 0;
500}
501
502static int tcf_csum(struct sk_buff *skb,
503 struct tc_action *a, struct tcf_result *res)
504{
505 struct tcf_csum *p = a->priv;
506 int action;
507 u32 update_flags;
508
509 spin_lock(&p->tcf_lock);
510 p->tcf_tm.lastuse = jiffies;
511 p->tcf_bstats.bytes += qdisc_pkt_len(skb);
512 p->tcf_bstats.packets++;
513 action = p->tcf_action;
514 update_flags = p->update_flags;
515 spin_unlock(&p->tcf_lock);
516
517 if (unlikely(action == TC_ACT_SHOT))
518 goto drop;
519
520 switch (skb->protocol) {
521 case cpu_to_be16(ETH_P_IP):
522 if (!tcf_csum_ipv4(skb, update_flags))
523 goto drop;
524 break;
525 case cpu_to_be16(ETH_P_IPV6):
526 if (!tcf_csum_ipv6(skb, update_flags))
527 goto drop;
528 break;
529 }
530
531 return action;
532
533drop:
534 spin_lock(&p->tcf_lock);
535 p->tcf_qstats.drops++;
536 spin_unlock(&p->tcf_lock);
537 return TC_ACT_SHOT;
538}
539
540static int tcf_csum_dump(struct sk_buff *skb,
541 struct tc_action *a, int bind, int ref)
542{
543 unsigned char *b = skb_tail_pointer(skb);
544 struct tcf_csum *p = a->priv;
545 struct tc_csum opt = {
546 .update_flags = p->update_flags,
547 .index = p->tcf_index,
548 .action = p->tcf_action,
549 .refcnt = p->tcf_refcnt - ref,
550 .bindcnt = p->tcf_bindcnt - bind,
551 };
552 struct tcf_t t;
553
554 NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt);
555 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
556 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
557 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
558 NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t);
559
560 return skb->len;
561
562nla_put_failure:
563 nlmsg_trim(skb, b);
564 return -1;
565}
566
567static struct tc_action_ops act_csum_ops = {
568 .kind = "csum",
569 .hinfo = &csum_hash_info,
570 .type = TCA_ACT_CSUM,
571 .capab = TCA_CAP_NONE,
572 .owner = THIS_MODULE,
573 .act = tcf_csum,
574 .dump = tcf_csum_dump,
575 .cleanup = tcf_csum_cleanup,
576 .lookup = tcf_hash_search,
577 .init = tcf_csum_init,
578 .walk = tcf_generic_walker
579};
580
581MODULE_DESCRIPTION("Checksum updating actions");
582MODULE_LICENSE("GPL");
583
584static int __init csum_init_module(void)
585{
586 return tcf_register_action(&act_csum_ops);
587}
588
589static void __exit csum_cleanup_module(void)
590{
591 tcf_unregister_action(&act_csum_ops);
592}
593
594module_init(csum_init_module);
595module_exit(csum_cleanup_module);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index e17096e3913..5b271a18bc3 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -111,44 +111,41 @@ static u32 flow_get_proto(struct sk_buff *skb)
111 } 111 }
112} 112}
113 113
114static int has_ports(u8 protocol)
115{
116 switch (protocol) {
117 case IPPROTO_TCP:
118 case IPPROTO_UDP:
119 case IPPROTO_UDPLITE:
120 case IPPROTO_SCTP:
121 case IPPROTO_DCCP:
122 case IPPROTO_ESP:
123 return 1;
124 default:
125 return 0;
126 }
127}
128
129static u32 flow_get_proto_src(struct sk_buff *skb) 114static u32 flow_get_proto_src(struct sk_buff *skb)
130{ 115{
131 switch (skb->protocol) { 116 switch (skb->protocol) {
132 case htons(ETH_P_IP): { 117 case htons(ETH_P_IP): {
133 struct iphdr *iph; 118 struct iphdr *iph;
119 int poff;
134 120
135 if (!pskb_network_may_pull(skb, sizeof(*iph))) 121 if (!pskb_network_may_pull(skb, sizeof(*iph)))
136 break; 122 break;
137 iph = ip_hdr(skb); 123 iph = ip_hdr(skb);
138 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 124 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
139 has_ports(iph->protocol) && 125 break;
140 pskb_network_may_pull(skb, iph->ihl * 4 + 2)) 126 poff = proto_ports_offset(iph->protocol);
141 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); 127 if (poff >= 0 &&
128 pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) {
129 iph = ip_hdr(skb);
130 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
131 poff));
132 }
142 break; 133 break;
143 } 134 }
144 case htons(ETH_P_IPV6): { 135 case htons(ETH_P_IPV6): {
145 struct ipv6hdr *iph; 136 struct ipv6hdr *iph;
137 int poff;
146 138
147 if (!pskb_network_may_pull(skb, sizeof(*iph) + 2)) 139 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 break; 140 break;
149 iph = ipv6_hdr(skb); 141 iph = ipv6_hdr(skb);
150 if (has_ports(iph->nexthdr)) 142 poff = proto_ports_offset(iph->nexthdr);
151 return ntohs(*(__be16 *)&iph[1]); 143 if (poff >= 0 &&
144 pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) {
145 iph = ipv6_hdr(skb);
146 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
147 poff));
148 }
152 break; 149 break;
153 } 150 }
154 } 151 }
@@ -161,24 +158,36 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
161 switch (skb->protocol) { 158 switch (skb->protocol) {
162 case htons(ETH_P_IP): { 159 case htons(ETH_P_IP): {
163 struct iphdr *iph; 160 struct iphdr *iph;
161 int poff;
164 162
165 if (!pskb_network_may_pull(skb, sizeof(*iph))) 163 if (!pskb_network_may_pull(skb, sizeof(*iph)))
166 break; 164 break;
167 iph = ip_hdr(skb); 165 iph = ip_hdr(skb);
168 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 166 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
169 has_ports(iph->protocol) && 167 break;
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 168 poff = proto_ports_offset(iph->protocol);
171 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); 169 if (poff >= 0 &&
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
171 iph = ip_hdr(skb);
172 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
173 2 + poff));
174 }
172 break; 175 break;
173 } 176 }
174 case htons(ETH_P_IPV6): { 177 case htons(ETH_P_IPV6): {
175 struct ipv6hdr *iph; 178 struct ipv6hdr *iph;
179 int poff;
176 180
177 if (!pskb_network_may_pull(skb, sizeof(*iph) + 4)) 181 if (!pskb_network_may_pull(skb, sizeof(*iph)))
178 break; 182 break;
179 iph = ipv6_hdr(skb); 183 iph = ipv6_hdr(skb);
180 if (has_ports(iph->nexthdr)) 184 poff = proto_ports_offset(iph->nexthdr);
181 return ntohs(*(__be16 *)((void *)&iph[1] + 2)); 185 if (poff >= 0 &&
186 pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) {
187 iph = ipv6_hdr(skb);
188 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
189 poff + 2));
190 }
182 break; 191 break;
183 } 192 }
184 } 193 }
@@ -297,6 +306,11 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
297 return tag & VLAN_VID_MASK; 306 return tag & VLAN_VID_MASK;
298} 307}
299 308
309static u32 flow_get_rxhash(struct sk_buff *skb)
310{
311 return skb_get_rxhash(skb);
312}
313
300static u32 flow_key_get(struct sk_buff *skb, int key) 314static u32 flow_key_get(struct sk_buff *skb, int key)
301{ 315{
302 switch (key) { 316 switch (key) {
@@ -334,6 +348,8 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
334 return flow_get_skgid(skb); 348 return flow_get_skgid(skb);
335 case FLOW_KEY_VLAN_TAG: 349 case FLOW_KEY_VLAN_TAG:
336 return flow_get_vlan_tag(skb); 350 return flow_get_vlan_tag(skb);
351 case FLOW_KEY_RXHASH:
352 return flow_get_rxhash(skb);
337 default: 353 default:
338 WARN_ON(1); 354 WARN_ON(1);
339 return 0; 355 return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 3bcac8aa333..34da5e29ea1 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -223,6 +223,11 @@ META_COLLECTOR(int_maclen)
223 dst->value = skb->mac_len; 223 dst->value = skb->mac_len;
224} 224}
225 225
226META_COLLECTOR(int_rxhash)
227{
228 dst->value = skb_get_rxhash(skb);
229}
230
226/************************************************************************** 231/**************************************************************************
227 * Netfilter 232 * Netfilter
228 **************************************************************************/ 233 **************************************************************************/
@@ -541,6 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
541 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), 546 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off),
542 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), 547 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend),
543 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), 548 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag),
549 [META_ID(RXHASH)] = META_FUNC(int_rxhash),
544 } 550 }
545}; 551};
546 552
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 408eea7086a..6fb3d41c0e4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -360,7 +360,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
360 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 360 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
361 } 361 }
362 362
363 if (!s || tsize != s->tsize || (!tab && tsize > 0)) 363 if (tsize != s->tsize || (!tab && tsize > 0))
364 return ERR_PTR(-EINVAL); 364 return ERR_PTR(-EINVAL);
365 365
366 spin_lock(&qdisc_stab_lock); 366 spin_lock(&qdisc_stab_lock);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 201cbac2b32..3cf478d012d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -123,40 +123,39 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
123 case htons(ETH_P_IP): 123 case htons(ETH_P_IP):
124 { 124 {
125 const struct iphdr *iph; 125 const struct iphdr *iph;
126 int poff;
126 127
127 if (!pskb_network_may_pull(skb, sizeof(*iph))) 128 if (!pskb_network_may_pull(skb, sizeof(*iph)))
128 goto err; 129 goto err;
129 iph = ip_hdr(skb); 130 iph = ip_hdr(skb);
130 h = (__force u32)iph->daddr; 131 h = (__force u32)iph->daddr;
131 h2 = (__force u32)iph->saddr ^ iph->protocol; 132 h2 = (__force u32)iph->saddr ^ iph->protocol;
132 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 133 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
133 (iph->protocol == IPPROTO_TCP || 134 break;
134 iph->protocol == IPPROTO_UDP || 135 poff = proto_ports_offset(iph->protocol);
135 iph->protocol == IPPROTO_UDPLITE || 136 if (poff >= 0 &&
136 iph->protocol == IPPROTO_SCTP || 137 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
137 iph->protocol == IPPROTO_DCCP || 138 iph = ip_hdr(skb);
138 iph->protocol == IPPROTO_ESP) && 139 h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
139 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 140 }
140 h2 ^= *(((u32*)iph) + iph->ihl);
141 break; 141 break;
142 } 142 }
143 case htons(ETH_P_IPV6): 143 case htons(ETH_P_IPV6):
144 { 144 {
145 struct ipv6hdr *iph; 145 struct ipv6hdr *iph;
146 int poff;
146 147
147 if (!pskb_network_may_pull(skb, sizeof(*iph))) 148 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 goto err; 149 goto err;
149 iph = ipv6_hdr(skb); 150 iph = ipv6_hdr(skb);
150 h = (__force u32)iph->daddr.s6_addr32[3]; 151 h = (__force u32)iph->daddr.s6_addr32[3];
151 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; 152 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
152 if ((iph->nexthdr == IPPROTO_TCP || 153 poff = proto_ports_offset(iph->nexthdr);
153 iph->nexthdr == IPPROTO_UDP || 154 if (poff >= 0 &&
154 iph->nexthdr == IPPROTO_UDPLITE || 155 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
155 iph->nexthdr == IPPROTO_SCTP || 156 iph = ipv6_hdr(skb);
156 iph->nexthdr == IPPROTO_DCCP || 157 h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
157 iph->nexthdr == IPPROTO_ESP) && 158 }
158 pskb_network_may_pull(skb, sizeof(*iph) + 4))
159 h2 ^= *(u32*)&iph[1];
160 break; 159 break;
161 } 160 }
162 default: 161 default:
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0b85e525643..5f1fb8bd862 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -48,6 +48,8 @@
48 * be incorporated into the next SCTP release. 48 * be incorporated into the next SCTP release.
49 */ 49 */
50 50
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52
51#include <linux/types.h> 53#include <linux/types.h>
52#include <linux/fcntl.h> 54#include <linux/fcntl.h>
53#include <linux/poll.h> 55#include <linux/poll.h>
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 476caaf100e..6c8556459a7 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -37,6 +37,8 @@
37 * be incorporated into the next SCTP release. 37 * be incorporated into the next SCTP release.
38 */ 38 */
39 39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
40#include <linux/types.h> 42#include <linux/types.h>
41#include <linux/kernel.h> 43#include <linux/kernel.h>
42#include <linux/net.h> 44#include <linux/net.h>
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index ccb6dc48d15..397296fb156 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -43,6 +43,8 @@
43 * be incorporated into the next SCTP release. 43 * be incorporated into the next SCTP release.
44 */ 44 */
45 45
46#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
47
46#include <net/sctp/sctp.h> 48#include <net/sctp/sctp.h>
47#include <net/sctp/sm.h> 49#include <net/sctp/sm.h>
48#include <linux/interrupt.h> 50#include <linux/interrupt.h>
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 732689140fb..95e0c8eda1a 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -47,6 +47,8 @@
47 * be incorporated into the next SCTP release. 47 * be incorporated into the next SCTP release.
48 */ 48 */
49 49
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51
50#include <linux/module.h> 52#include <linux/module.h>
51#include <linux/errno.h> 53#include <linux/errno.h>
52#include <linux/types.h> 54#include <linux/types.h>
@@ -336,7 +338,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
336 memcpy(saddr, baddr, sizeof(union sctp_addr)); 338 memcpy(saddr, baddr, sizeof(union sctp_addr));
337 SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); 339 SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr);
338 } else { 340 } else {
339 printk(KERN_ERR "%s: asoc:%p Could not find a valid source " 341 pr_err("%s: asoc:%p Could not find a valid source "
340 "address for the dest:%pI6\n", 342 "address for the dest:%pI6\n",
341 __func__, asoc, &daddr->v6.sin6_addr); 343 __func__, asoc, &daddr->v6.sin6_addr);
342 } 344 }
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index f73ec0ea93b..8ef8e7d9eb6 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -38,6 +38,8 @@
38 * be incorporated into the next SCTP release. 38 * be incorporated into the next SCTP release.
39 */ 39 */
40 40
41#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
42
41#include <linux/kernel.h> 43#include <linux/kernel.h>
42#include <net/sctp/sctp.h> 44#include <net/sctp/sctp.h>
43 45
@@ -134,8 +136,7 @@ void sctp_dbg_objcnt_init(void)
134 ent = proc_create("sctp_dbg_objcnt", 0, 136 ent = proc_create("sctp_dbg_objcnt", 0,
135 proc_net_sctp, &sctp_objcnt_ops); 137 proc_net_sctp, &sctp_objcnt_ops);
136 if (!ent) 138 if (!ent)
137 printk(KERN_WARNING 139 pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
138 "sctp_dbg_objcnt: Unable to create /proc entry.\n");
139} 140}
140 141
141/* Cleanup the objcount entry in the proc filesystem. */ 142/* Cleanup the objcount entry in the proc filesystem. */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index bcc4590ccaf..60600d337a3 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -41,6 +41,8 @@
41 * be incorporated into the next SCTP release. 41 * be incorporated into the next SCTP release.
42 */ 42 */
43 43
44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
45
44#include <linux/types.h> 46#include <linux/types.h>
45#include <linux/kernel.h> 47#include <linux/kernel.h>
46#include <linux/wait.h> 48#include <linux/wait.h>
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index c04b2eb5918..8c6d379b4bb 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/types.h> 51#include <linux/types.h>
50#include <linux/list.h> /* For struct list_head */ 52#include <linux/list.h> /* For struct list_head */
51#include <linux/socket.h> 53#include <linux/socket.h>
@@ -1463,23 +1465,23 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1463 /* Display the end of the 1465 /* Display the end of the
1464 * current range. 1466 * current range.
1465 */ 1467 */
1466 SCTP_DEBUG_PRINTK("-%08x", 1468 SCTP_DEBUG_PRINTK_CONT("-%08x",
1467 dbg_last_ack_tsn); 1469 dbg_last_ack_tsn);
1468 } 1470 }
1469 1471
1470 /* Start a new range. */ 1472 /* Start a new range. */
1471 SCTP_DEBUG_PRINTK(",%08x", tsn); 1473 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1472 dbg_ack_tsn = tsn; 1474 dbg_ack_tsn = tsn;
1473 break; 1475 break;
1474 1476
1475 case 1: /* The last TSN was NOT ACKed. */ 1477 case 1: /* The last TSN was NOT ACKed. */
1476 if (dbg_last_kept_tsn != dbg_kept_tsn) { 1478 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1477 /* Display the end of current range. */ 1479 /* Display the end of current range. */
1478 SCTP_DEBUG_PRINTK("-%08x", 1480 SCTP_DEBUG_PRINTK_CONT("-%08x",
1479 dbg_last_kept_tsn); 1481 dbg_last_kept_tsn);
1480 } 1482 }
1481 1483
1482 SCTP_DEBUG_PRINTK("\n"); 1484 SCTP_DEBUG_PRINTK_CONT("\n");
1483 1485
1484 /* FALL THROUGH... */ 1486 /* FALL THROUGH... */
1485 default: 1487 default:
@@ -1526,18 +1528,18 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1526 break; 1528 break;
1527 1529
1528 if (dbg_last_kept_tsn != dbg_kept_tsn) 1530 if (dbg_last_kept_tsn != dbg_kept_tsn)
1529 SCTP_DEBUG_PRINTK("-%08x", 1531 SCTP_DEBUG_PRINTK_CONT("-%08x",
1530 dbg_last_kept_tsn); 1532 dbg_last_kept_tsn);
1531 1533
1532 SCTP_DEBUG_PRINTK(",%08x", tsn); 1534 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1533 dbg_kept_tsn = tsn; 1535 dbg_kept_tsn = tsn;
1534 break; 1536 break;
1535 1537
1536 case 0: 1538 case 0:
1537 if (dbg_last_ack_tsn != dbg_ack_tsn) 1539 if (dbg_last_ack_tsn != dbg_ack_tsn)
1538 SCTP_DEBUG_PRINTK("-%08x", 1540 SCTP_DEBUG_PRINTK_CONT("-%08x",
1539 dbg_last_ack_tsn); 1541 dbg_last_ack_tsn);
1540 SCTP_DEBUG_PRINTK("\n"); 1542 SCTP_DEBUG_PRINTK_CONT("\n");
1541 1543
1542 /* FALL THROUGH... */ 1544 /* FALL THROUGH... */
1543 default: 1545 default:
@@ -1556,17 +1558,17 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1556 switch (dbg_prt_state) { 1558 switch (dbg_prt_state) {
1557 case 0: 1559 case 0:
1558 if (dbg_last_ack_tsn != dbg_ack_tsn) { 1560 if (dbg_last_ack_tsn != dbg_ack_tsn) {
1559 SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_ack_tsn); 1561 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn);
1560 } else { 1562 } else {
1561 SCTP_DEBUG_PRINTK("\n"); 1563 SCTP_DEBUG_PRINTK_CONT("\n");
1562 } 1564 }
1563 break; 1565 break;
1564 1566
1565 case 1: 1567 case 1:
1566 if (dbg_last_kept_tsn != dbg_kept_tsn) { 1568 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1567 SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_kept_tsn); 1569 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn);
1568 } else { 1570 } else {
1569 SCTP_DEBUG_PRINTK("\n"); 1571 SCTP_DEBUG_PRINTK_CONT("\n");
1570 } 1572 }
1571 } 1573 }
1572#endif /* SCTP_DEBUG */ 1574#endif /* SCTP_DEBUG */
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index db3a42b8b34..2e63e9dc010 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -22,6 +22,8 @@
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 23 */
24 24
25#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
25#include <linux/kernel.h> 27#include <linux/kernel.h>
26#include <linux/kprobes.h> 28#include <linux/kprobes.h>
27#include <linux/socket.h> 29#include <linux/socket.h>
@@ -192,7 +194,7 @@ static __init int sctpprobe_init(void)
192 if (ret) 194 if (ret)
193 goto remove_proc; 195 goto remove_proc;
194 196
195 pr_info("SCTP probe registered (port=%d)\n", port); 197 pr_info("probe registered (port=%d)\n", port);
196 198
197 return 0; 199 return 0;
198 200
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5027b83f1cc..1ef29c74d85 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/module.h> 51#include <linux/module.h>
50#include <linux/init.h> 52#include <linux/init.h>
51#include <linux/netdevice.h> 53#include <linux/netdevice.h>
@@ -707,8 +709,7 @@ static int sctp_ctl_sock_init(void)
707 &init_net); 709 &init_net);
708 710
709 if (err < 0) { 711 if (err < 0) {
710 printk(KERN_ERR 712 pr_err("Failed to create the SCTP control socket\n");
711 "SCTP: Failed to create the SCTP control socket.\n");
712 return err; 713 return err;
713 } 714 }
714 return 0; 715 return 0;
@@ -798,7 +799,7 @@ static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
798static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp) 799static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp)
799{ 800{
800 /* PF_INET only supports AF_INET addresses. */ 801 /* PF_INET only supports AF_INET addresses. */
801 return (AF_INET == family); 802 return AF_INET == family;
802} 803}
803 804
804/* Address matching with wildcards allowed. */ 805/* Address matching with wildcards allowed. */
@@ -1206,7 +1207,7 @@ SCTP_STATIC __init int sctp_init(void)
1206 __get_free_pages(GFP_ATOMIC, order); 1207 __get_free_pages(GFP_ATOMIC, order);
1207 } while (!sctp_assoc_hashtable && --order > 0); 1208 } while (!sctp_assoc_hashtable && --order > 0);
1208 if (!sctp_assoc_hashtable) { 1209 if (!sctp_assoc_hashtable) {
1209 printk(KERN_ERR "SCTP: Failed association hash alloc.\n"); 1210 pr_err("Failed association hash alloc\n");
1210 status = -ENOMEM; 1211 status = -ENOMEM;
1211 goto err_ahash_alloc; 1212 goto err_ahash_alloc;
1212 } 1213 }
@@ -1220,7 +1221,7 @@ SCTP_STATIC __init int sctp_init(void)
1220 sctp_ep_hashtable = (struct sctp_hashbucket *) 1221 sctp_ep_hashtable = (struct sctp_hashbucket *)
1221 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL); 1222 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL);
1222 if (!sctp_ep_hashtable) { 1223 if (!sctp_ep_hashtable) {
1223 printk(KERN_ERR "SCTP: Failed endpoint_hash alloc.\n"); 1224 pr_err("Failed endpoint_hash alloc\n");
1224 status = -ENOMEM; 1225 status = -ENOMEM;
1225 goto err_ehash_alloc; 1226 goto err_ehash_alloc;
1226 } 1227 }
@@ -1239,7 +1240,7 @@ SCTP_STATIC __init int sctp_init(void)
1239 __get_free_pages(GFP_ATOMIC, order); 1240 __get_free_pages(GFP_ATOMIC, order);
1240 } while (!sctp_port_hashtable && --order > 0); 1241 } while (!sctp_port_hashtable && --order > 0);
1241 if (!sctp_port_hashtable) { 1242 if (!sctp_port_hashtable) {
1242 printk(KERN_ERR "SCTP: Failed bind hash alloc."); 1243 pr_err("Failed bind hash alloc\n");
1243 status = -ENOMEM; 1244 status = -ENOMEM;
1244 goto err_bhash_alloc; 1245 goto err_bhash_alloc;
1245 } 1246 }
@@ -1248,8 +1249,7 @@ SCTP_STATIC __init int sctp_init(void)
1248 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); 1249 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
1249 } 1250 }
1250 1251
1251 printk(KERN_INFO "SCTP: Hash tables configured " 1252 pr_info("Hash tables configured (established %d bind %d)\n",
1252 "(established %d bind %d)\n",
1253 sctp_assoc_hashsize, sctp_port_hashsize); 1253 sctp_assoc_hashsize, sctp_port_hashsize);
1254 1254
1255 /* Disable ADDIP by default. */ 1255 /* Disable ADDIP by default. */
@@ -1290,8 +1290,7 @@ SCTP_STATIC __init int sctp_init(void)
1290 1290
1291 /* Initialize the control inode/socket for handling OOTB packets. */ 1291 /* Initialize the control inode/socket for handling OOTB packets. */
1292 if ((status = sctp_ctl_sock_init())) { 1292 if ((status = sctp_ctl_sock_init())) {
1293 printk (KERN_ERR 1293 pr_err("Failed to initialize the SCTP control sock\n");
1294 "SCTP: Failed to initialize the SCTP control sock.\n");
1295 goto err_ctl_sock_init; 1294 goto err_ctl_sock_init;
1296 } 1295 }
1297 1296
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 246f9292465..2cc46f0962c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -50,6 +50,8 @@
50 * be incorporated into the next SCTP release. 50 * be incorporated into the next SCTP release.
51 */ 51 */
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54
53#include <linux/types.h> 55#include <linux/types.h>
54#include <linux/kernel.h> 56#include <linux/kernel.h>
55#include <linux/ip.h> 57#include <linux/ip.h>
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index f5e5e27cac5..b21b218d564 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -47,6 +47,8 @@
47 * be incorporated into the next SCTP release. 47 * be incorporated into the next SCTP release.
48 */ 48 */
49 49
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51
50#include <linux/skbuff.h> 52#include <linux/skbuff.h>
51#include <linux/types.h> 53#include <linux/types.h>
52#include <linux/socket.h> 54#include <linux/socket.h>
@@ -1146,26 +1148,23 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
1146 1148
1147 case SCTP_DISPOSITION_VIOLATION: 1149 case SCTP_DISPOSITION_VIOLATION:
1148 if (net_ratelimit()) 1150 if (net_ratelimit())
1149 printk(KERN_ERR "sctp protocol violation state %d " 1151 pr_err("protocol violation state %d chunkid %d\n",
1150 "chunkid %d\n", state, subtype.chunk); 1152 state, subtype.chunk);
1151 break; 1153 break;
1152 1154
1153 case SCTP_DISPOSITION_NOT_IMPL: 1155 case SCTP_DISPOSITION_NOT_IMPL:
1154 printk(KERN_WARNING "sctp unimplemented feature in state %d, " 1156 pr_warn("unimplemented feature in state %d, event_type %d, event_id %d\n",
1155 "event_type %d, event_id %d\n", 1157 state, event_type, subtype.chunk);
1156 state, event_type, subtype.chunk);
1157 break; 1158 break;
1158 1159
1159 case SCTP_DISPOSITION_BUG: 1160 case SCTP_DISPOSITION_BUG:
1160 printk(KERN_ERR "sctp bug in state %d, " 1161 pr_err("bug in state %d, event_type %d, event_id %d\n",
1161 "event_type %d, event_id %d\n",
1162 state, event_type, subtype.chunk); 1162 state, event_type, subtype.chunk);
1163 BUG(); 1163 BUG();
1164 break; 1164 break;
1165 1165
1166 default: 1166 default:
1167 printk(KERN_ERR "sctp impossible disposition %d " 1167 pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n",
1168 "in state %d, event_type %d, event_id %d\n",
1169 status, state, event_type, subtype.chunk); 1168 status, state, event_type, subtype.chunk);
1170 BUG(); 1169 BUG();
1171 break; 1170 break;
@@ -1679,8 +1678,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1679 sctp_cmd_send_asconf(asoc); 1678 sctp_cmd_send_asconf(asoc);
1680 break; 1679 break;
1681 default: 1680 default:
1682 printk(KERN_WARNING "Impossible command: %u, %p\n", 1681 pr_warn("Impossible command: %u, %p\n",
1683 cmd->verb, cmd->obj.ptr); 1682 cmd->verb, cmd->obj.ptr);
1684 break; 1683 break;
1685 } 1684 }
1686 1685
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d344dc481cc..4b4eb7c96bb 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -50,6 +50,8 @@
50 * be incorporated into the next SCTP release. 50 * be incorporated into the next SCTP release.
51 */ 51 */
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54
53#include <linux/types.h> 55#include <linux/types.h>
54#include <linux/kernel.h> 56#include <linux/kernel.h>
55#include <linux/ip.h> 57#include <linux/ip.h>
@@ -1138,18 +1140,16 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
1138 if (unlikely(!link)) { 1140 if (unlikely(!link)) {
1139 if (from_addr.sa.sa_family == AF_INET6) { 1141 if (from_addr.sa.sa_family == AF_INET6) {
1140 if (net_ratelimit()) 1142 if (net_ratelimit())
1141 printk(KERN_WARNING 1143 pr_warn("%s association %p could not find address %pI6\n",
1142 "%s association %p could not find address %pI6\n", 1144 __func__,
1143 __func__, 1145 asoc,
1144 asoc, 1146 &from_addr.v6.sin6_addr);
1145 &from_addr.v6.sin6_addr);
1146 } else { 1147 } else {
1147 if (net_ratelimit()) 1148 if (net_ratelimit())
1148 printk(KERN_WARNING 1149 pr_warn("%s association %p could not find address %pI4\n",
1149 "%s association %p could not find address %pI4\n", 1150 __func__,
1150 __func__, 1151 asoc,
1151 asoc, 1152 &from_addr.v4.sin_addr.s_addr);
1152 &from_addr.v4.sin_addr.s_addr);
1153 } 1153 }
1154 return SCTP_DISPOSITION_DISCARD; 1154 return SCTP_DISPOSITION_DISCARD;
1155 } 1155 }
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 6d9b3aafcc5..546d4387fb3 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/skbuff.h> 51#include <linux/skbuff.h>
50#include <net/sctp/sctp.h> 52#include <net/sctp/sctp.h>
51#include <net/sctp/sm.h> 53#include <net/sctp/sm.h>
@@ -66,15 +68,19 @@ static const sctp_sm_table_entry_t bug = {
66 .name = "sctp_sf_bug" 68 .name = "sctp_sf_bug"
67}; 69};
68 70
69#define DO_LOOKUP(_max, _type, _table) \ 71#define DO_LOOKUP(_max, _type, _table) \
70 if ((event_subtype._type > (_max))) { \ 72({ \
71 printk(KERN_WARNING \ 73 const sctp_sm_table_entry_t *rtn; \
72 "sctp table %p possible attack:" \ 74 \
73 " event %d exceeds max %d\n", \ 75 if ((event_subtype._type > (_max))) { \
74 _table, event_subtype._type, _max); \ 76 pr_warn("table %p possible attack: event %d exceeds max %d\n", \
75 return &bug; \ 77 _table, event_subtype._type, _max); \
76 } \ 78 rtn = &bug; \
77 return &_table[event_subtype._type][(int)state]; 79 } else \
80 rtn = &_table[event_subtype._type][(int)state]; \
81 \
82 rtn; \
83})
78 84
79const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, 85const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
80 sctp_state_t state, 86 sctp_state_t state,
@@ -83,21 +89,15 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
83 switch (event_type) { 89 switch (event_type) {
84 case SCTP_EVENT_T_CHUNK: 90 case SCTP_EVENT_T_CHUNK:
85 return sctp_chunk_event_lookup(event_subtype.chunk, state); 91 return sctp_chunk_event_lookup(event_subtype.chunk, state);
86 break;
87 case SCTP_EVENT_T_TIMEOUT: 92 case SCTP_EVENT_T_TIMEOUT:
88 DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, 93 return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout,
89 timeout_event_table); 94 timeout_event_table);
90 break;
91
92 case SCTP_EVENT_T_OTHER: 95 case SCTP_EVENT_T_OTHER:
93 DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, other_event_table); 96 return DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other,
94 break; 97 other_event_table);
95
96 case SCTP_EVENT_T_PRIMITIVE: 98 case SCTP_EVENT_T_PRIMITIVE:
97 DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive, 99 return DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive,
98 primitive_event_table); 100 primitive_event_table);
99 break;
100
101 default: 101 default:
102 /* Yikes! We got an illegal event type. */ 102 /* Yikes! We got an illegal event type. */
103 return &bug; 103 return &bug;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ca44917872d..535659fdbaa 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -57,6 +57,8 @@
57 * be incorporated into the next SCTP release. 57 * be incorporated into the next SCTP release.
58 */ 58 */
59 59
60#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
61
60#include <linux/types.h> 62#include <linux/types.h>
61#include <linux/kernel.h> 63#include <linux/kernel.h>
62#include <linux/wait.h> 64#include <linux/wait.h>
@@ -2458,9 +2460,8 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
2458 if (params.sack_delay == 0 && params.sack_freq == 0) 2460 if (params.sack_delay == 0 && params.sack_freq == 0)
2459 return 0; 2461 return 0;
2460 } else if (optlen == sizeof(struct sctp_assoc_value)) { 2462 } else if (optlen == sizeof(struct sctp_assoc_value)) {
2461 printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " 2463 pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
2462 "in delayed_ack socket option deprecated\n"); 2464 pr_warn("Use struct sctp_sack_info instead\n");
2463 printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
2464 if (copy_from_user(&params, optval, optlen)) 2465 if (copy_from_user(&params, optval, optlen))
2465 return -EFAULT; 2466 return -EFAULT;
2466 2467
@@ -2868,10 +2869,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
2868 int val; 2869 int val;
2869 2870
2870 if (optlen == sizeof(int)) { 2871 if (optlen == sizeof(int)) {
2871 printk(KERN_WARNING 2872 pr_warn("Use of int in maxseg socket option deprecated\n");
2872 "SCTP: Use of int in maxseg socket option deprecated\n"); 2873 pr_warn("Use struct sctp_assoc_value instead\n");
2873 printk(KERN_WARNING
2874 "SCTP: Use struct sctp_assoc_value instead\n");
2875 if (copy_from_user(&val, optval, optlen)) 2874 if (copy_from_user(&val, optval, optlen))
2876 return -EFAULT; 2875 return -EFAULT;
2877 params.assoc_id = 0; 2876 params.assoc_id = 0;
@@ -3121,10 +3120,8 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
3121 int assoc_id = 0; 3120 int assoc_id = 0;
3122 3121
3123 if (optlen == sizeof(int)) { 3122 if (optlen == sizeof(int)) {
3124 printk(KERN_WARNING 3123 pr_warn("Use of int in max_burst socket option deprecated\n");
3125 "SCTP: Use of int in max_burst socket option deprecated\n"); 3124 pr_warn("Use struct sctp_assoc_value instead\n");
3126 printk(KERN_WARNING
3127 "SCTP: Use struct sctp_assoc_value instead\n");
3128 if (copy_from_user(&val, optval, optlen)) 3125 if (copy_from_user(&val, optval, optlen))
3129 return -EFAULT; 3126 return -EFAULT;
3130 } else if (optlen == sizeof(struct sctp_assoc_value)) { 3127 } else if (optlen == sizeof(struct sctp_assoc_value)) {
@@ -3595,7 +3592,40 @@ out:
3595/* The SCTP ioctl handler. */ 3592/* The SCTP ioctl handler. */
3596SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) 3593SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
3597{ 3594{
3598 return -ENOIOCTLCMD; 3595 int rc = -ENOTCONN;
3596
3597 sctp_lock_sock(sk);
3598
3599 /*
3600 * SEQPACKET-style sockets in LISTENING state are valid, for
3601 * SCTP, so only discard TCP-style sockets in LISTENING state.
3602 */
3603 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
3604 goto out;
3605
3606 switch (cmd) {
3607 case SIOCINQ: {
3608 struct sk_buff *skb;
3609 unsigned int amount = 0;
3610
3611 skb = skb_peek(&sk->sk_receive_queue);
3612 if (skb != NULL) {
3613 /*
3614 * We will only return the amount of this packet since
3615 * that is all that will be read.
3616 */
3617 amount = skb->len;
3618 }
3619 rc = put_user(amount, (int __user *)arg);
3620 }
3621 break;
3622 default:
3623 rc = -ENOIOCTLCMD;
3624 break;
3625 }
3626out:
3627 sctp_release_sock(sk);
3628 return rc;
3599} 3629}
3600 3630
3601/* This is the function which gets called during socket creation to 3631/* This is the function which gets called during socket creation to
@@ -3854,7 +3884,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
3854 } 3884 }
3855 3885
3856out: 3886out:
3857 return (retval); 3887 return retval;
3858} 3888}
3859 3889
3860 3890
@@ -3910,7 +3940,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
3910 } 3940 }
3911 3941
3912out: 3942out:
3913 return (retval); 3943 return retval;
3914} 3944}
3915 3945
3916/* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS) 3946/* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS)
@@ -4281,9 +4311,8 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len,
4281 if (copy_from_user(&params, optval, len)) 4311 if (copy_from_user(&params, optval, len))
4282 return -EFAULT; 4312 return -EFAULT;
4283 } else if (len == sizeof(struct sctp_assoc_value)) { 4313 } else if (len == sizeof(struct sctp_assoc_value)) {
4284 printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " 4314 pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
4285 "in delayed_ack socket option deprecated\n"); 4315 pr_warn("Use struct sctp_sack_info instead\n");
4286 printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
4287 if (copy_from_user(&params, optval, len)) 4316 if (copy_from_user(&params, optval, len))
4288 return -EFAULT; 4317 return -EFAULT;
4289 } else 4318 } else
@@ -4929,10 +4958,8 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
4929 struct sctp_association *asoc; 4958 struct sctp_association *asoc;
4930 4959
4931 if (len == sizeof(int)) { 4960 if (len == sizeof(int)) {
4932 printk(KERN_WARNING 4961 pr_warn("Use of int in maxseg socket option deprecated\n");
4933 "SCTP: Use of int in maxseg socket option deprecated\n"); 4962 pr_warn("Use struct sctp_assoc_value instead\n");
4934 printk(KERN_WARNING
4935 "SCTP: Use struct sctp_assoc_value instead\n");
4936 params.assoc_id = 0; 4963 params.assoc_id = 0;
4937 } else if (len >= sizeof(struct sctp_assoc_value)) { 4964 } else if (len >= sizeof(struct sctp_assoc_value)) {
4938 len = sizeof(struct sctp_assoc_value); 4965 len = sizeof(struct sctp_assoc_value);
@@ -5023,10 +5050,8 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
5023 struct sctp_association *asoc; 5050 struct sctp_association *asoc;
5024 5051
5025 if (len == sizeof(int)) { 5052 if (len == sizeof(int)) {
5026 printk(KERN_WARNING 5053 pr_warn("Use of int in max_burst socket option deprecated\n");
5027 "SCTP: Use of int in max_burst socket option deprecated\n"); 5054 pr_warn("Use struct sctp_assoc_value instead\n");
5028 printk(KERN_WARNING
5029 "SCTP: Use struct sctp_assoc_value instead\n");
5030 params.assoc_id = 0; 5055 params.assoc_id = 0;
5031 } else if (len >= sizeof(struct sctp_assoc_value)) { 5056 } else if (len >= sizeof(struct sctp_assoc_value)) {
5032 len = sizeof(struct sctp_assoc_value); 5057 len = sizeof(struct sctp_assoc_value);
@@ -5569,7 +5594,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum)
5569 /* Note: sk->sk_num gets filled in if ephemeral port request. */ 5594 /* Note: sk->sk_num gets filled in if ephemeral port request. */
5570 ret = sctp_get_port_local(sk, &addr); 5595 ret = sctp_get_port_local(sk, &addr);
5571 5596
5572 return (ret ? 1 : 0); 5597 return ret ? 1 : 0;
5573} 5598}
5574 5599
5575/* 5600/*
@@ -5586,8 +5611,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog)
5586 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); 5611 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
5587 if (IS_ERR(tfm)) { 5612 if (IS_ERR(tfm)) {
5588 if (net_ratelimit()) { 5613 if (net_ratelimit()) {
5589 printk(KERN_INFO 5614 pr_info("failed to load transform for %s: %ld\n",
5590 "SCTP: failed to load transform for %s: %ld\n",
5591 sctp_hmac_alg, PTR_ERR(tfm)); 5615 sctp_hmac_alg, PTR_ERR(tfm));
5592 } 5616 }
5593 return -ENOSYS; 5617 return -ENOSYS;
@@ -5716,13 +5740,12 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
5716 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 5740 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
5717 mask |= POLLERR; 5741 mask |= POLLERR;
5718 if (sk->sk_shutdown & RCV_SHUTDOWN) 5742 if (sk->sk_shutdown & RCV_SHUTDOWN)
5719 mask |= POLLRDHUP; 5743 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
5720 if (sk->sk_shutdown == SHUTDOWN_MASK) 5744 if (sk->sk_shutdown == SHUTDOWN_MASK)
5721 mask |= POLLHUP; 5745 mask |= POLLHUP;
5722 5746
5723 /* Is it readable? Reconsider this code with TCP-style support. */ 5747 /* Is it readable? Reconsider this code with TCP-style support. */
5724 if (!skb_queue_empty(&sk->sk_receive_queue) || 5748 if (!skb_queue_empty(&sk->sk_receive_queue))
5725 (sk->sk_shutdown & RCV_SHUTDOWN))
5726 mask |= POLLIN | POLLRDNORM; 5749 mask |= POLLIN | POLLRDNORM;
5727 5750
5728 /* The association is either gone or not ready. */ 5751 /* The association is either gone or not ready. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 132046cb82f..d3ae493d234 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -48,6 +48,8 @@
48 * be incorporated into the next SCTP release. 48 * be incorporated into the next SCTP release.
49 */ 49 */
50 50
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52
51#include <linux/slab.h> 53#include <linux/slab.h>
52#include <linux/types.h> 54#include <linux/types.h>
53#include <linux/random.h> 55#include <linux/random.h>
@@ -244,10 +246,9 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
244 struct dst_entry *dst; 246 struct dst_entry *dst;
245 247
246 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { 248 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
247 printk(KERN_WARNING "%s: Reported pmtu %d too low, " 249 pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
248 "using default minimum of %d\n", 250 __func__, pmtu,
249 __func__, pmtu, 251 SCTP_DEFAULT_MINSEGMENT);
250 SCTP_DEFAULT_MINSEGMENT);
251 /* Use default minimum segment size and disable 252 /* Use default minimum segment size and disable
252 * pmtu discovery on this transport. 253 * pmtu discovery on this transport.
253 */ 254 */
diff --git a/net/socket.c b/net/socket.c
index 2270b941bcc..717a5f1c879 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -535,14 +535,13 @@ void sock_release(struct socket *sock)
535} 535}
536EXPORT_SYMBOL(sock_release); 536EXPORT_SYMBOL(sock_release);
537 537
538int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, 538int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
539 union skb_shared_tx *shtx)
540{ 539{
541 shtx->flags = 0; 540 *tx_flags = 0;
542 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 541 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
543 shtx->hardware = 1; 542 *tx_flags |= SKBTX_HW_TSTAMP;
544 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 543 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
545 shtx->software = 1; 544 *tx_flags |= SKBTX_SW_TSTAMP;
546 return 0; 545 return 0;
547} 546}
548EXPORT_SYMBOL(sock_tx_timestamp); 547EXPORT_SYMBOL(sock_tx_timestamp);
@@ -1919,7 +1918,8 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1919 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1918 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1920 * checking falls down on this. 1919 * checking falls down on this.
1921 */ 1920 */
1922 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, 1921 if (copy_from_user(ctl_buf,
1922 (void __user __force *)msg_sys.msg_control,
1923 ctl_len)) 1923 ctl_len))
1924 goto out_freectl; 1924 goto out_freectl;
1925 msg_sys.msg_control = ctl_buf; 1925 msg_sys.msg_control = ctl_buf;
@@ -3054,14 +3054,19 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
3054 char *optval, int *optlen) 3054 char *optval, int *optlen)
3055{ 3055{
3056 mm_segment_t oldfs = get_fs(); 3056 mm_segment_t oldfs = get_fs();
3057 char __user *uoptval;
3058 int __user *uoptlen;
3057 int err; 3059 int err;
3058 3060
3061 uoptval = (char __user __force *) optval;
3062 uoptlen = (int __user __force *) optlen;
3063
3059 set_fs(KERNEL_DS); 3064 set_fs(KERNEL_DS);
3060 if (level == SOL_SOCKET) 3065 if (level == SOL_SOCKET)
3061 err = sock_getsockopt(sock, level, optname, optval, optlen); 3066 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
3062 else 3067 else
3063 err = sock->ops->getsockopt(sock, level, optname, optval, 3068 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3064 optlen); 3069 uoptlen);
3065 set_fs(oldfs); 3070 set_fs(oldfs);
3066 return err; 3071 return err;
3067} 3072}
@@ -3071,13 +3076,16 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
3071 char *optval, unsigned int optlen) 3076 char *optval, unsigned int optlen)
3072{ 3077{
3073 mm_segment_t oldfs = get_fs(); 3078 mm_segment_t oldfs = get_fs();
3079 char __user *uoptval;
3074 int err; 3080 int err;
3075 3081
3082 uoptval = (char __user __force *) optval;
3083
3076 set_fs(KERNEL_DS); 3084 set_fs(KERNEL_DS);
3077 if (level == SOL_SOCKET) 3085 if (level == SOL_SOCKET)
3078 err = sock_setsockopt(sock, level, optname, optval, optlen); 3086 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
3079 else 3087 else
3080 err = sock->ops->setsockopt(sock, level, optname, optval, 3088 err = sock->ops->setsockopt(sock, level, optname, uoptval,
3081 optlen); 3089 optlen);
3082 set_fs(oldfs); 3090 set_fs(oldfs);
3083 return err; 3091 return err;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index dcfc66bab2b..597c493392a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1049,7 +1049,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
1049out: 1049out:
1050 if (acred->machine_cred != gss_cred->gc_machine_cred) 1050 if (acred->machine_cred != gss_cred->gc_machine_cred)
1051 return 0; 1051 return 0;
1052 return (rc->cr_uid == acred->uid); 1052 return rc->cr_uid == acred->uid;
1053} 1053}
1054 1054
1055/* 1055/*
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index 310b78e9945..c586e92bcf7 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -76,19 +76,19 @@ static int
76der_length_size( int length) 76der_length_size( int length)
77{ 77{
78 if (length < (1<<7)) 78 if (length < (1<<7))
79 return(1); 79 return 1;
80 else if (length < (1<<8)) 80 else if (length < (1<<8))
81 return(2); 81 return 2;
82#if (SIZEOF_INT == 2) 82#if (SIZEOF_INT == 2)
83 else 83 else
84 return(3); 84 return 3;
85#else 85#else
86 else if (length < (1<<16)) 86 else if (length < (1<<16))
87 return(3); 87 return 3;
88 else if (length < (1<<24)) 88 else if (length < (1<<24))
89 return(4); 89 return 4;
90 else 90 else
91 return(5); 91 return 5;
92#endif 92#endif
93} 93}
94 94
@@ -121,14 +121,14 @@ der_read_length(unsigned char **buf, int *bufsize)
121 int ret; 121 int ret;
122 122
123 if (*bufsize < 1) 123 if (*bufsize < 1)
124 return(-1); 124 return -1;
125 sf = *(*buf)++; 125 sf = *(*buf)++;
126 (*bufsize)--; 126 (*bufsize)--;
127 if (sf & 0x80) { 127 if (sf & 0x80) {
128 if ((sf &= 0x7f) > ((*bufsize)-1)) 128 if ((sf &= 0x7f) > ((*bufsize)-1))
129 return(-1); 129 return -1;
130 if (sf > SIZEOF_INT) 130 if (sf > SIZEOF_INT)
131 return (-1); 131 return -1;
132 ret = 0; 132 ret = 0;
133 for (; sf; sf--) { 133 for (; sf; sf--) {
134 ret = (ret<<8) + (*(*buf)++); 134 ret = (ret<<8) + (*(*buf)++);
@@ -138,7 +138,7 @@ der_read_length(unsigned char **buf, int *bufsize)
138 ret = sf; 138 ret = sf;
139 } 139 }
140 140
141 return(ret); 141 return ret;
142} 142}
143 143
144/* returns the length of a token, given the mech oid and the body size */ 144/* returns the length of a token, given the mech oid and the body size */
@@ -148,7 +148,7 @@ g_token_size(struct xdr_netobj *mech, unsigned int body_size)
148{ 148{
149 /* set body_size to sequence contents size */ 149 /* set body_size to sequence contents size */
150 body_size += 2 + (int) mech->len; /* NEED overflow check */ 150 body_size += 2 + (int) mech->len; /* NEED overflow check */
151 return(1 + der_length_size(body_size) + body_size); 151 return 1 + der_length_size(body_size) + body_size;
152} 152}
153 153
154EXPORT_SYMBOL_GPL(g_token_size); 154EXPORT_SYMBOL_GPL(g_token_size);
@@ -186,27 +186,27 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
186 int ret = 0; 186 int ret = 0;
187 187
188 if ((toksize-=1) < 0) 188 if ((toksize-=1) < 0)
189 return(G_BAD_TOK_HEADER); 189 return G_BAD_TOK_HEADER;
190 if (*buf++ != 0x60) 190 if (*buf++ != 0x60)
191 return(G_BAD_TOK_HEADER); 191 return G_BAD_TOK_HEADER;
192 192
193 if ((seqsize = der_read_length(&buf, &toksize)) < 0) 193 if ((seqsize = der_read_length(&buf, &toksize)) < 0)
194 return(G_BAD_TOK_HEADER); 194 return G_BAD_TOK_HEADER;
195 195
196 if (seqsize != toksize) 196 if (seqsize != toksize)
197 return(G_BAD_TOK_HEADER); 197 return G_BAD_TOK_HEADER;
198 198
199 if ((toksize-=1) < 0) 199 if ((toksize-=1) < 0)
200 return(G_BAD_TOK_HEADER); 200 return G_BAD_TOK_HEADER;
201 if (*buf++ != 0x06) 201 if (*buf++ != 0x06)
202 return(G_BAD_TOK_HEADER); 202 return G_BAD_TOK_HEADER;
203 203
204 if ((toksize-=1) < 0) 204 if ((toksize-=1) < 0)
205 return(G_BAD_TOK_HEADER); 205 return G_BAD_TOK_HEADER;
206 toid.len = *buf++; 206 toid.len = *buf++;
207 207
208 if ((toksize-=toid.len) < 0) 208 if ((toksize-=toid.len) < 0)
209 return(G_BAD_TOK_HEADER); 209 return G_BAD_TOK_HEADER;
210 toid.data = buf; 210 toid.data = buf;
211 buf+=toid.len; 211 buf+=toid.len;
212 212
@@ -217,17 +217,17 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
217 to return G_BAD_TOK_HEADER if the token header is in fact bad */ 217 to return G_BAD_TOK_HEADER if the token header is in fact bad */
218 218
219 if ((toksize-=2) < 0) 219 if ((toksize-=2) < 0)
220 return(G_BAD_TOK_HEADER); 220 return G_BAD_TOK_HEADER;
221 221
222 if (ret) 222 if (ret)
223 return(ret); 223 return ret;
224 224
225 if (!ret) { 225 if (!ret) {
226 *buf_in = buf; 226 *buf_in = buf;
227 *body_size = toksize; 227 *body_size = toksize;
228 } 228 }
229 229
230 return(ret); 230 return ret;
231} 231}
232 232
233EXPORT_SYMBOL_GPL(g_verify_token_header); 233EXPORT_SYMBOL_GPL(g_verify_token_header);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 415c013ba38..62ac90c62cb 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -162,5 +162,5 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
162 *seqnum = ((plain[0]) | 162 *seqnum = ((plain[0]) |
163 (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24)); 163 (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
164 164
165 return (0); 165 return 0;
166} 166}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 2689de39dc7..8b4061049d7 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -331,7 +331,7 @@ gss_delete_sec_context(struct gss_ctx **context_handle)
331 *context_handle); 331 *context_handle);
332 332
333 if (!*context_handle) 333 if (!*context_handle)
334 return(GSS_S_NO_CONTEXT); 334 return GSS_S_NO_CONTEXT;
335 if ((*context_handle)->internal_ctx_id) 335 if ((*context_handle)->internal_ctx_id)
336 (*context_handle)->mech_type->gm_ops 336 (*context_handle)->mech_type->gm_ops
337 ->gss_delete_sec_context((*context_handle) 337 ->gss_delete_sec_context((*context_handle)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cace6049e4a..aa5dbda6608 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -376,7 +376,7 @@ int rpc_queue_empty(struct rpc_wait_queue *queue)
376 spin_lock_bh(&queue->lock); 376 spin_lock_bh(&queue->lock);
377 res = queue->qlen; 377 res = queue->qlen;
378 spin_unlock_bh(&queue->lock); 378 spin_unlock_bh(&queue->lock);
379 return (res == 0); 379 return res == 0;
380} 380}
381EXPORT_SYMBOL_GPL(rpc_queue_empty); 381EXPORT_SYMBOL_GPL(rpc_queue_empty);
382 382
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index c048543ffbe..2ddc351b3be 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -89,7 +89,7 @@ int tipc_addr_domain_valid(u32 addr)
89 89
90int tipc_addr_node_valid(u32 addr) 90int tipc_addr_node_valid(u32 addr)
91{ 91{
92 return (tipc_addr_domain_valid(addr) && tipc_node(addr)); 92 return tipc_addr_domain_valid(addr) && tipc_node(addr);
93} 93}
94 94
95int tipc_in_scope(u32 domain, u32 addr) 95int tipc_in_scope(u32 domain, u32 addr)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a008c668930..ecfaac10d0b 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -143,6 +143,19 @@ static void bcbuf_decr_acks(struct sk_buff *buf)
143} 143}
144 144
145 145
146static void bclink_set_last_sent(void)
147{
148 if (bcl->next_out)
149 bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1);
150 else
151 bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1);
152}
153
154u32 tipc_bclink_get_last_sent(void)
155{
156 return bcl->fsm_msg_cnt;
157}
158
146/** 159/**
147 * bclink_set_gap - set gap according to contents of current deferred pkt queue 160 * bclink_set_gap - set gap according to contents of current deferred pkt queue
148 * 161 *
@@ -171,7 +184,7 @@ static void bclink_set_gap(struct tipc_node *n_ptr)
171 184
172static int bclink_ack_allowed(u32 n) 185static int bclink_ack_allowed(u32 n)
173{ 186{
174 return((n % TIPC_MIN_LINK_WIN) == tipc_own_tag); 187 return (n % TIPC_MIN_LINK_WIN) == tipc_own_tag;
175} 188}
176 189
177 190
@@ -237,8 +250,10 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
237 250
238 /* Try resolving broadcast link congestion, if necessary */ 251 /* Try resolving broadcast link congestion, if necessary */
239 252
240 if (unlikely(bcl->next_out)) 253 if (unlikely(bcl->next_out)) {
241 tipc_link_push_queue(bcl); 254 tipc_link_push_queue(bcl);
255 bclink_set_last_sent();
256 }
242 if (unlikely(released && !list_empty(&bcl->waiting_ports))) 257 if (unlikely(released && !list_empty(&bcl->waiting_ports)))
243 tipc_link_wakeup_ports(bcl, 0); 258 tipc_link_wakeup_ports(bcl, 0);
244 spin_unlock_bh(&bc_lock); 259 spin_unlock_bh(&bc_lock);
@@ -395,7 +410,7 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
395 if (unlikely(res == -ELINKCONG)) 410 if (unlikely(res == -ELINKCONG))
396 buf_discard(buf); 411 buf_discard(buf);
397 else 412 else
398 bcl->stats.sent_info++; 413 bclink_set_last_sent();
399 414
400 if (bcl->out_queue_size > bcl->stats.max_queue_sz) 415 if (bcl->out_queue_size > bcl->stats.max_queue_sz)
401 bcl->stats.max_queue_sz = bcl->out_queue_size; 416 bcl->stats.max_queue_sz = bcl->out_queue_size;
@@ -529,15 +544,6 @@ receive:
529 tipc_node_unlock(node); 544 tipc_node_unlock(node);
530} 545}
531 546
532u32 tipc_bclink_get_last_sent(void)
533{
534 u32 last_sent = mod(bcl->next_out_no - 1);
535
536 if (bcl->next_out)
537 last_sent = mod(buf_seqno(bcl->next_out) - 1);
538 return last_sent;
539}
540
541u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) 547u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
542{ 548{
543 return (n_ptr->bclink.supported && 549 return (n_ptr->bclink.supported &&
@@ -570,6 +576,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
570 msg = buf_msg(buf); 576 msg = buf_msg(buf);
571 msg_set_non_seq(msg, 1); 577 msg_set_non_seq(msg, 1);
572 msg_set_mc_netid(msg, tipc_net_id); 578 msg_set_mc_netid(msg, tipc_net_id);
579 bcl->stats.sent_info++;
573 } 580 }
574 581
575 /* Send buffer over bearers until all targets reached */ 582 /* Send buffer over bearers until all targets reached */
@@ -609,11 +616,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
609 bcbearer->remains = bcbearer->remains_new; 616 bcbearer->remains = bcbearer->remains_new;
610 } 617 }
611 618
612 /* Unable to reach all targets */ 619 /*
620 * Unable to reach all targets (indicate success, since currently
621 * there isn't code in place to properly block & unblock the
622 * pseudo-bearer used by the broadcast link)
623 */
613 624
614 bcbearer->bearer.publ.blocked = 1; 625 return TIPC_OK;
615 bcl->stats.bearer_congs++;
616 return 1;
617} 626}
618 627
619/** 628/**
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 52ae17b2583..9c10c6b7c12 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -63,7 +63,7 @@ static int media_name_valid(const char *name)
63 len = strlen(name); 63 len = strlen(name);
64 if ((len + 1) > TIPC_MAX_MEDIA_NAME) 64 if ((len + 1) > TIPC_MAX_MEDIA_NAME)
65 return 0; 65 return 0;
66 return (strspn(name, tipc_alphabet) == len); 66 return strspn(name, tipc_alphabet) == len;
67} 67}
68 68
69/** 69/**
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 69646811798..466b861dab9 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -169,6 +169,7 @@ void tipc_core_stop(void)
169 tipc_nametbl_stop(); 169 tipc_nametbl_stop();
170 tipc_ref_table_stop(); 170 tipc_ref_table_stop();
171 tipc_socket_stop(); 171 tipc_socket_stop();
172 tipc_log_resize(0);
172} 173}
173 174
174/** 175/**
@@ -203,7 +204,9 @@ static int __init tipc_init(void)
203{ 204{
204 int res; 205 int res;
205 206
206 tipc_log_resize(CONFIG_TIPC_LOG); 207 if (tipc_log_resize(CONFIG_TIPC_LOG) != 0)
208 warn("Unable to create log buffer\n");
209
207 info("Activated (version " TIPC_MOD_VER 210 info("Activated (version " TIPC_MOD_VER
208 " compiled " __DATE__ " " __TIME__ ")\n"); 211 " compiled " __DATE__ " " __TIME__ ")\n");
209 212
@@ -230,7 +233,6 @@ static void __exit tipc_exit(void)
230 tipc_core_stop_net(); 233 tipc_core_stop_net();
231 tipc_core_stop(); 234 tipc_core_stop();
232 info("Deactivated\n"); 235 info("Deactivated\n");
233 tipc_log_resize(0);
234} 236}
235 237
236module_init(tipc_init); 238module_init(tipc_init);
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index 1885a7edb0c..6569d45bfb9 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -134,7 +134,7 @@ void tipc_printbuf_reset(struct print_buf *pb)
134 134
135int tipc_printbuf_empty(struct print_buf *pb) 135int tipc_printbuf_empty(struct print_buf *pb)
136{ 136{
137 return (!pb->buf || (pb->crs == pb->buf)); 137 return !pb->buf || (pb->crs == pb->buf);
138} 138}
139 139
140/** 140/**
@@ -169,7 +169,7 @@ int tipc_printbuf_validate(struct print_buf *pb)
169 tipc_printf(pb, err); 169 tipc_printf(pb, err);
170 } 170 }
171 } 171 }
172 return (pb->crs - pb->buf + 1); 172 return pb->crs - pb->buf + 1;
173} 173}
174 174
175/** 175/**
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index fc1fcf5e6b5..f28d1ae9312 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -203,6 +203,14 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
203 return; 203 return;
204 } 204 }
205 spin_lock_bh(&n_ptr->lock); 205 spin_lock_bh(&n_ptr->lock);
206
207 /* Don't talk to neighbor during cleanup after last session */
208
209 if (n_ptr->cleanup_required) {
210 spin_unlock_bh(&n_ptr->lock);
211 return;
212 }
213
206 link = n_ptr->links[b_ptr->identity]; 214 link = n_ptr->links[b_ptr->identity];
207 if (!link) { 215 if (!link) {
208 dbg("creating link\n"); 216 dbg("creating link\n");
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 6230d16020c..6e988ba485f 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -72,17 +72,26 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
72{ 72{
73 struct sk_buff *clone; 73 struct sk_buff *clone;
74 struct net_device *dev; 74 struct net_device *dev;
75 int delta;
75 76
76 clone = skb_clone(buf, GFP_ATOMIC); 77 clone = skb_clone(buf, GFP_ATOMIC);
77 if (clone) { 78 if (!clone)
78 skb_reset_network_header(clone); 79 return 0;
79 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev; 80
80 clone->dev = dev; 81 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
81 dev_hard_header(clone, dev, ETH_P_TIPC, 82 delta = dev->hard_header_len - skb_headroom(buf);
82 &dest->dev_addr.eth_addr, 83
83 dev->dev_addr, clone->len); 84 if ((delta > 0) &&
84 dev_queue_xmit(clone); 85 pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
86 kfree_skb(clone);
87 return 0;
85 } 88 }
89
90 skb_reset_network_header(clone);
91 clone->dev = dev;
92 dev_hard_header(clone, dev, ETH_P_TIPC, &dest->dev_addr.eth_addr,
93 dev->dev_addr, clone->len);
94 dev_queue_xmit(clone);
86 return 0; 95 return 0;
87} 96}
88 97
@@ -92,15 +101,12 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
92 * Accept only packets explicitly sent to this node, or broadcast packets; 101 * Accept only packets explicitly sent to this node, or broadcast packets;
93 * ignores packets sent using Ethernet multicast, and traffic sent to other 102 * ignores packets sent using Ethernet multicast, and traffic sent to other
94 * nodes (which can happen if interface is running in promiscuous mode). 103 * nodes (which can happen if interface is running in promiscuous mode).
95 * Routine truncates any Ethernet padding/CRC appended to the message,
96 * and ensures message size matches actual length
97 */ 104 */
98 105
99static int recv_msg(struct sk_buff *buf, struct net_device *dev, 106static int recv_msg(struct sk_buff *buf, struct net_device *dev,
100 struct packet_type *pt, struct net_device *orig_dev) 107 struct packet_type *pt, struct net_device *orig_dev)
101{ 108{
102 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; 109 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
103 u32 size;
104 110
105 if (!net_eq(dev_net(dev), &init_net)) { 111 if (!net_eq(dev_net(dev), &init_net)) {
106 kfree_skb(buf); 112 kfree_skb(buf);
@@ -109,13 +115,9 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
109 115
110 if (likely(eb_ptr->bearer)) { 116 if (likely(eb_ptr->bearer)) {
111 if (likely(buf->pkt_type <= PACKET_BROADCAST)) { 117 if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
112 size = msg_size((struct tipc_msg *)buf->data); 118 buf->next = NULL;
113 skb_trim(buf, size); 119 tipc_recv_msg(buf, eb_ptr->bearer);
114 if (likely(buf->len == size)) { 120 return 0;
115 buf->next = NULL;
116 tipc_recv_msg(buf, eb_ptr->bearer);
117 return 0;
118 }
119 } 121 }
120 } 122 }
121 kfree_skb(buf); 123 kfree_skb(buf);
@@ -133,6 +135,16 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
133 struct eth_bearer *eb_ptr = &eth_bearers[0]; 135 struct eth_bearer *eb_ptr = &eth_bearers[0];
134 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS]; 136 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
135 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; 137 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
138 int pending_dev = 0;
139
140 /* Find unused Ethernet bearer structure */
141
142 while (eb_ptr->dev) {
143 if (!eb_ptr->bearer)
144 pending_dev++;
145 if (++eb_ptr == stop)
146 return pending_dev ? -EAGAIN : -EDQUOT;
147 }
136 148
137 /* Find device with specified name */ 149 /* Find device with specified name */
138 150
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a3616b99529..b8cf1e9d0b8 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -239,13 +239,13 @@ int tipc_link_is_up(struct link *l_ptr)
239{ 239{
240 if (!l_ptr) 240 if (!l_ptr)
241 return 0; 241 return 0;
242 return (link_working_working(l_ptr) || link_working_unknown(l_ptr)); 242 return link_working_working(l_ptr) || link_working_unknown(l_ptr);
243} 243}
244 244
245int tipc_link_is_active(struct link *l_ptr) 245int tipc_link_is_active(struct link *l_ptr)
246{ 246{
247 return ((l_ptr->owner->active_links[0] == l_ptr) || 247 return (l_ptr->owner->active_links[0] == l_ptr) ||
248 (l_ptr->owner->active_links[1] == l_ptr)); 248 (l_ptr->owner->active_links[1] == l_ptr);
249} 249}
250 250
251/** 251/**
@@ -1802,6 +1802,15 @@ static int link_recv_buf_validate(struct sk_buff *buf)
1802 return pskb_may_pull(buf, hdr_size); 1802 return pskb_may_pull(buf, hdr_size);
1803} 1803}
1804 1804
1805/**
1806 * tipc_recv_msg - process TIPC messages arriving from off-node
1807 * @head: pointer to message buffer chain
1808 * @tb_ptr: pointer to bearer message arrived on
1809 *
1810 * Invoked with no locks held. Bearer pointer must point to a valid bearer
1811 * structure (i.e. cannot be NULL), but bearer can be inactive.
1812 */
1813
1805void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) 1814void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1806{ 1815{
1807 read_lock_bh(&tipc_net_lock); 1816 read_lock_bh(&tipc_net_lock);
@@ -1819,6 +1828,11 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1819 1828
1820 head = head->next; 1829 head = head->next;
1821 1830
1831 /* Ensure bearer is still enabled */
1832
1833 if (unlikely(!b_ptr->active))
1834 goto cont;
1835
1822 /* Ensure message is well-formed */ 1836 /* Ensure message is well-formed */
1823 1837
1824 if (unlikely(!link_recv_buf_validate(buf))) 1838 if (unlikely(!link_recv_buf_validate(buf)))
@@ -1855,13 +1869,22 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1855 goto cont; 1869 goto cont;
1856 } 1870 }
1857 1871
1858 /* Locate unicast link endpoint that should handle message */ 1872 /* Locate neighboring node that sent message */
1859 1873
1860 n_ptr = tipc_node_find(msg_prevnode(msg)); 1874 n_ptr = tipc_node_find(msg_prevnode(msg));
1861 if (unlikely(!n_ptr)) 1875 if (unlikely(!n_ptr))
1862 goto cont; 1876 goto cont;
1863 tipc_node_lock(n_ptr); 1877 tipc_node_lock(n_ptr);
1864 1878
1879 /* Don't talk to neighbor during cleanup after last session */
1880
1881 if (n_ptr->cleanup_required) {
1882 tipc_node_unlock(n_ptr);
1883 goto cont;
1884 }
1885
1886 /* Locate unicast link endpoint that should handle message */
1887
1865 l_ptr = n_ptr->links[b_ptr->identity]; 1888 l_ptr = n_ptr->links[b_ptr->identity];
1866 if (unlikely(!l_ptr)) { 1889 if (unlikely(!l_ptr)) {
1867 tipc_node_unlock(n_ptr); 1890 tipc_node_unlock(n_ptr);
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 2e5385c47d3..26151d30589 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -279,12 +279,12 @@ static inline int between(u32 lower, u32 upper, u32 n)
279 279
280static inline int less_eq(u32 left, u32 right) 280static inline int less_eq(u32 left, u32 right)
281{ 281{
282 return (mod(right - left) < 32768u); 282 return mod(right - left) < 32768u;
283} 283}
284 284
285static inline int less(u32 left, u32 right) 285static inline int less(u32 left, u32 right)
286{ 286{
287 return (less_eq(left, right) && (mod(right) != mod(left))); 287 return less_eq(left, right) && (mod(right) != mod(left));
288} 288}
289 289
290static inline u32 lesser(u32 left, u32 right) 290static inline u32 lesser(u32 left, u32 right)
@@ -299,32 +299,32 @@ static inline u32 lesser(u32 left, u32 right)
299 299
300static inline int link_working_working(struct link *l_ptr) 300static inline int link_working_working(struct link *l_ptr)
301{ 301{
302 return (l_ptr->state == WORKING_WORKING); 302 return l_ptr->state == WORKING_WORKING;
303} 303}
304 304
305static inline int link_working_unknown(struct link *l_ptr) 305static inline int link_working_unknown(struct link *l_ptr)
306{ 306{
307 return (l_ptr->state == WORKING_UNKNOWN); 307 return l_ptr->state == WORKING_UNKNOWN;
308} 308}
309 309
310static inline int link_reset_unknown(struct link *l_ptr) 310static inline int link_reset_unknown(struct link *l_ptr)
311{ 311{
312 return (l_ptr->state == RESET_UNKNOWN); 312 return l_ptr->state == RESET_UNKNOWN;
313} 313}
314 314
315static inline int link_reset_reset(struct link *l_ptr) 315static inline int link_reset_reset(struct link *l_ptr)
316{ 316{
317 return (l_ptr->state == RESET_RESET); 317 return l_ptr->state == RESET_RESET;
318} 318}
319 319
320static inline int link_blocked(struct link *l_ptr) 320static inline int link_blocked(struct link *l_ptr)
321{ 321{
322 return (l_ptr->exp_msg_count || l_ptr->blocked); 322 return l_ptr->exp_msg_count || l_ptr->blocked;
323} 323}
324 324
325static inline int link_congested(struct link *l_ptr) 325static inline int link_congested(struct link *l_ptr)
326{ 326{
327 return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]); 327 return l_ptr->out_queue_size >= l_ptr->queue_limit[0];
328} 328}
329 329
330#endif 330#endif
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 995d2da35b0..031aad18efc 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -104,7 +104,7 @@ static inline u32 msg_user(struct tipc_msg *m)
104 104
105static inline u32 msg_isdata(struct tipc_msg *m) 105static inline u32 msg_isdata(struct tipc_msg *m)
106{ 106{
107 return (msg_user(m) <= TIPC_CRITICAL_IMPORTANCE); 107 return msg_user(m) <= TIPC_CRITICAL_IMPORTANCE;
108} 108}
109 109
110static inline void msg_set_user(struct tipc_msg *m, u32 n) 110static inline void msg_set_user(struct tipc_msg *m, u32 n)
@@ -289,7 +289,7 @@ static inline void msg_set_destnode(struct tipc_msg *m, u32 a)
289 289
290static inline int msg_is_dest(struct tipc_msg *m, u32 d) 290static inline int msg_is_dest(struct tipc_msg *m, u32 d)
291{ 291{
292 return(msg_short(m) || (msg_destnode(m) == d)); 292 return msg_short(m) || (msg_destnode(m) == d);
293} 293}
294 294
295static inline u32 msg_routed(struct tipc_msg *m) 295static inline u32 msg_routed(struct tipc_msg *m)
@@ -632,7 +632,7 @@ static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n)
632 632
633static inline u32 msg_max_pkt(struct tipc_msg *m) 633static inline u32 msg_max_pkt(struct tipc_msg *m)
634{ 634{
635 return (msg_bits(m, 9, 16, 0xffff) * 4); 635 return msg_bits(m, 9, 16, 0xffff) * 4;
636} 636}
637 637
638static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n) 638static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n)
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 8ba79620db3..9ca4b068923 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -116,7 +116,7 @@ DEFINE_RWLOCK(tipc_nametbl_lock);
116 116
117static int hash(int x) 117static int hash(int x)
118{ 118{
119 return(x & (tipc_nametbl_size - 1)); 119 return x & (tipc_nametbl_size - 1);
120} 120}
121 121
122/** 122/**
@@ -613,8 +613,7 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
613} 613}
614 614
615/* 615/*
616 * tipc_nametbl_translate(): Translate tipc_name -> tipc_portid. 616 * tipc_nametbl_translate - translate name to port id
617 * Very time-critical.
618 * 617 *
619 * Note: on entry 'destnode' is the search domain used during translation; 618 * Note: on entry 'destnode' is the search domain used during translation;
620 * on exit it passes back the node address of the matching port (if any) 619 * on exit it passes back the node address of the matching port (if any)
@@ -685,7 +684,6 @@ found:
685 } 684 }
686 spin_unlock_bh(&seq->lock); 685 spin_unlock_bh(&seq->lock);
687not_found: 686not_found:
688 *destnode = 0;
689 read_unlock_bh(&tipc_nametbl_lock); 687 read_unlock_bh(&tipc_nametbl_lock);
690 return 0; 688 return 0;
691} 689}
@@ -877,7 +875,7 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
877 u32 index) 875 u32 index)
878{ 876{
879 char portIdStr[27]; 877 char portIdStr[27];
880 char *scopeStr; 878 const char *scope_str[] = {"", " zone", " cluster", " node"};
881 struct publication *publ = sseq->zone_list; 879 struct publication *publ = sseq->zone_list;
882 880
883 tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper); 881 tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper);
@@ -893,15 +891,8 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
893 tipc_node(publ->node), publ->ref); 891 tipc_node(publ->node), publ->ref);
894 tipc_printf(buf, "%-26s ", portIdStr); 892 tipc_printf(buf, "%-26s ", portIdStr);
895 if (depth > 3) { 893 if (depth > 3) {
896 if (publ->node != tipc_own_addr) 894 tipc_printf(buf, "%-10u %s", publ->key,
897 scopeStr = ""; 895 scope_str[publ->scope]);
898 else if (publ->scope == TIPC_NODE_SCOPE)
899 scopeStr = "node";
900 else if (publ->scope == TIPC_CLUSTER_SCOPE)
901 scopeStr = "cluster";
902 else
903 scopeStr = "zone";
904 tipc_printf(buf, "%-10u %s", publ->key, scopeStr);
905 } 896 }
906 897
907 publ = publ->zone_list_next; 898 publ = publ->zone_list_next;
@@ -951,24 +942,19 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth,
951 942
952static void nametbl_header(struct print_buf *buf, u32 depth) 943static void nametbl_header(struct print_buf *buf, u32 depth)
953{ 944{
954 tipc_printf(buf, "Type "); 945 const char *header[] = {
955 946 "Type ",
956 if (depth > 1) 947 "Lower Upper ",
957 tipc_printf(buf, "Lower Upper "); 948 "Port Identity ",
958 if (depth > 2) 949 "Publication Scope"
959 tipc_printf(buf, "Port Identity "); 950 };
960 if (depth > 3) 951
961 tipc_printf(buf, "Publication"); 952 int i;
962 953
963 tipc_printf(buf, "\n-----------"); 954 if (depth > 4)
964 955 depth = 4;
965 if (depth > 1) 956 for (i = 0; i < depth; i++)
966 tipc_printf(buf, "--------------------- "); 957 tipc_printf(buf, header[i]);
967 if (depth > 2)
968 tipc_printf(buf, "-------------------------- ");
969 if (depth > 3)
970 tipc_printf(buf, "------------------");
971
972 tipc_printf(buf, "\n"); 958 tipc_printf(buf, "\n");
973} 959}
974 960
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f61b7694138..7e05af47a19 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -248,6 +248,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
248 248
249 /* Handle message for another node */ 249 /* Handle message for another node */
250 msg_dbg(msg, "NET>SEND>: "); 250 msg_dbg(msg, "NET>SEND>: ");
251 skb_trim(buf, msg_size(msg));
251 tipc_link_send(buf, dnode, msg_link_selector(msg)); 252 tipc_link_send(buf, dnode, msg_link_selector(msg));
252} 253}
253 254
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b634942caba..7c49cd056df 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -237,23 +237,22 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr)
237 237
238int tipc_node_has_active_links(struct tipc_node *n_ptr) 238int tipc_node_has_active_links(struct tipc_node *n_ptr)
239{ 239{
240 return (n_ptr && 240 return n_ptr->active_links[0] != NULL;
241 ((n_ptr->active_links[0]) || (n_ptr->active_links[1])));
242} 241}
243 242
244int tipc_node_has_redundant_links(struct tipc_node *n_ptr) 243int tipc_node_has_redundant_links(struct tipc_node *n_ptr)
245{ 244{
246 return (n_ptr->working_links > 1); 245 return n_ptr->working_links > 1;
247} 246}
248 247
249static int tipc_node_has_active_routes(struct tipc_node *n_ptr) 248static int tipc_node_has_active_routes(struct tipc_node *n_ptr)
250{ 249{
251 return (n_ptr && (n_ptr->last_router >= 0)); 250 return n_ptr && (n_ptr->last_router >= 0);
252} 251}
253 252
254int tipc_node_is_up(struct tipc_node *n_ptr) 253int tipc_node_is_up(struct tipc_node *n_ptr)
255{ 254{
256 return (tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr)); 255 return tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr);
257} 256}
258 257
259struct tipc_node *tipc_node_attach_link(struct link *l_ptr) 258struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
@@ -384,6 +383,20 @@ static void node_established_contact(struct tipc_node *n_ptr)
384 tipc_highest_allowed_slave); 383 tipc_highest_allowed_slave);
385} 384}
386 385
386static void node_cleanup_finished(unsigned long node_addr)
387{
388 struct tipc_node *n_ptr;
389
390 read_lock_bh(&tipc_net_lock);
391 n_ptr = tipc_node_find(node_addr);
392 if (n_ptr) {
393 tipc_node_lock(n_ptr);
394 n_ptr->cleanup_required = 0;
395 tipc_node_unlock(n_ptr);
396 }
397 read_unlock_bh(&tipc_net_lock);
398}
399
387static void node_lost_contact(struct tipc_node *n_ptr) 400static void node_lost_contact(struct tipc_node *n_ptr)
388{ 401{
389 struct cluster *c_ptr; 402 struct cluster *c_ptr;
@@ -458,6 +471,11 @@ static void node_lost_contact(struct tipc_node *n_ptr)
458 tipc_k_signal((Handler)ns->handle_node_down, 471 tipc_k_signal((Handler)ns->handle_node_down,
459 (unsigned long)ns->usr_handle); 472 (unsigned long)ns->usr_handle);
460 } 473 }
474
475 /* Prevent re-contact with node until all cleanup is done */
476
477 n_ptr->cleanup_required = 1;
478 tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr);
461} 479}
462 480
463/** 481/**
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 6f990da5d14..45f3db3a595 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -52,6 +52,7 @@
52 * @active_links: pointers to active links to node 52 * @active_links: pointers to active links to node
53 * @links: pointers to all links to node 53 * @links: pointers to all links to node
54 * @working_links: number of working links to node (both active and standby) 54 * @working_links: number of working links to node (both active and standby)
55 * @cleanup_required: non-zero if cleaning up after a prior loss of contact
55 * @link_cnt: number of links to node 56 * @link_cnt: number of links to node
56 * @permit_changeover: non-zero if node has redundant links to this system 57 * @permit_changeover: non-zero if node has redundant links to this system
57 * @routers: bitmap (used for multicluster communication) 58 * @routers: bitmap (used for multicluster communication)
@@ -78,6 +79,7 @@ struct tipc_node {
78 struct link *links[MAX_BEARERS]; 79 struct link *links[MAX_BEARERS];
79 int link_cnt; 80 int link_cnt;
80 int working_links; 81 int working_links;
82 int cleanup_required;
81 int permit_changeover; 83 int permit_changeover;
82 u32 routers[512/32]; 84 u32 routers[512/32];
83 int last_router; 85 int last_router;
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 0737680e926..d760336f2ca 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -588,19 +588,10 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
588 if (!p_ptr) { 588 if (!p_ptr) {
589 err = TIPC_ERR_NO_PORT; 589 err = TIPC_ERR_NO_PORT;
590 } else if (p_ptr->publ.connected) { 590 } else if (p_ptr->publ.connected) {
591 if (port_peernode(p_ptr) != msg_orignode(msg)) 591 if ((port_peernode(p_ptr) != msg_orignode(msg)) ||
592 (port_peerport(p_ptr) != msg_origport(msg))) {
592 err = TIPC_ERR_NO_PORT; 593 err = TIPC_ERR_NO_PORT;
593 if (port_peerport(p_ptr) != msg_origport(msg)) 594 } else if (msg_type(msg) == CONN_ACK) {
594 err = TIPC_ERR_NO_PORT;
595 if (!err && msg_routed(msg)) {
596 u32 seqno = msg_transp_seqno(msg);
597 u32 myno = ++p_ptr->last_in_seqno;
598 if (seqno != myno) {
599 err = TIPC_ERR_NO_PORT;
600 abort_buf = port_build_self_abort_msg(p_ptr, err);
601 }
602 }
603 if (msg_type(msg) == CONN_ACK) {
604 int wakeup = tipc_port_congested(p_ptr) && 595 int wakeup = tipc_port_congested(p_ptr) &&
605 p_ptr->publ.congested && 596 p_ptr->publ.congested &&
606 p_ptr->wakeup; 597 p_ptr->wakeup;
@@ -1473,7 +1464,7 @@ int tipc_forward2name(u32 ref,
1473 msg_set_destnode(msg, destnode); 1464 msg_set_destnode(msg, destnode);
1474 msg_set_destport(msg, destport); 1465 msg_set_destport(msg, destport);
1475 1466
1476 if (likely(destport || destnode)) { 1467 if (likely(destport)) {
1477 p_ptr->sent++; 1468 p_ptr->sent++;
1478 if (likely(destnode == tipc_own_addr)) 1469 if (likely(destnode == tipc_own_addr))
1479 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1470 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
@@ -1551,7 +1542,7 @@ int tipc_forward_buf2name(u32 ref,
1551 skb_push(buf, LONG_H_SIZE); 1542 skb_push(buf, LONG_H_SIZE);
1552 skb_copy_to_linear_data(buf, msg, LONG_H_SIZE); 1543 skb_copy_to_linear_data(buf, msg, LONG_H_SIZE);
1553 msg_dbg(buf_msg(buf),"PREP:"); 1544 msg_dbg(buf_msg(buf),"PREP:");
1554 if (likely(destport || destnode)) { 1545 if (likely(destport)) {
1555 p_ptr->sent++; 1546 p_ptr->sent++;
1556 if (destnode == tipc_own_addr) 1547 if (destnode == tipc_own_addr)
1557 return tipc_port_recv_msg(buf); 1548 return tipc_port_recv_msg(buf);
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 8d1652aab29..e74bd956373 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -157,7 +157,7 @@ static inline u32 tipc_peer_node(struct port *p_ptr)
157 157
158static inline int tipc_port_congested(struct port *p_ptr) 158static inline int tipc_port_congested(struct port *p_ptr)
159{ 159{
160 return((p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2)); 160 return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
161} 161}
162 162
163/** 163/**
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 66e889ba48f..33217fc3d69 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -64,6 +64,7 @@ struct tipc_sock {
64 struct sock sk; 64 struct sock sk;
65 struct tipc_port *p; 65 struct tipc_port *p;
66 struct tipc_portid peer_name; 66 struct tipc_portid peer_name;
67 long conn_timeout;
67}; 68};
68 69
69#define tipc_sk(sk) ((struct tipc_sock *)(sk)) 70#define tipc_sk(sk) ((struct tipc_sock *)(sk))
@@ -240,9 +241,9 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
240 sock->state = state; 241 sock->state = state;
241 242
242 sock_init_data(sock, sk); 243 sock_init_data(sock, sk);
243 sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
244 sk->sk_backlog_rcv = backlog_rcv; 244 sk->sk_backlog_rcv = backlog_rcv;
245 tipc_sk(sk)->p = tp_ptr; 245 tipc_sk(sk)->p = tp_ptr;
246 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
246 247
247 spin_unlock_bh(tp_ptr->lock); 248 spin_unlock_bh(tp_ptr->lock);
248 249
@@ -429,36 +430,55 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
429 * to handle any preventable race conditions, so TIPC will do the same ... 430 * to handle any preventable race conditions, so TIPC will do the same ...
430 * 431 *
431 * TIPC sets the returned events as follows: 432 * TIPC sets the returned events as follows:
432 * a) POLLRDNORM and POLLIN are set if the socket's receive queue is non-empty 433 *
433 * or if a connection-oriented socket is does not have an active connection 434 * socket state flags set
434 * (i.e. a read operation will not block). 435 * ------------ ---------
435 * b) POLLOUT is set except when a socket's connection has been terminated 436 * unconnected no read flags
436 * (i.e. a write operation will not block). 437 * no write flags
437 * c) POLLHUP is set when a socket's connection has been terminated. 438 *
438 * 439 * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue
439 * IMPORTANT: The fact that a read or write operation will not block does NOT 440 * no write flags
440 * imply that the operation will succeed! 441 *
442 * connected POLLIN/POLLRDNORM if data in rx queue
443 * POLLOUT if port is not congested
444 *
445 * disconnecting POLLIN/POLLRDNORM/POLLHUP
446 * no write flags
447 *
448 * listening POLLIN if SYN in rx queue
449 * no write flags
450 *
451 * ready POLLIN/POLLRDNORM if data in rx queue
452 * [connectionless] POLLOUT (since port cannot be congested)
453 *
454 * IMPORTANT: The fact that a read or write operation is indicated does NOT
455 * imply that the operation will succeed, merely that it should be performed
456 * and will not block.
441 */ 457 */
442 458
443static unsigned int poll(struct file *file, struct socket *sock, 459static unsigned int poll(struct file *file, struct socket *sock,
444 poll_table *wait) 460 poll_table *wait)
445{ 461{
446 struct sock *sk = sock->sk; 462 struct sock *sk = sock->sk;
447 u32 mask; 463 u32 mask = 0;
448 464
449 poll_wait(file, sk_sleep(sk), wait); 465 poll_wait(file, sk_sleep(sk), wait);
450 466
451 if (!skb_queue_empty(&sk->sk_receive_queue) || 467 switch ((int)sock->state) {
452 (sock->state == SS_UNCONNECTED) || 468 case SS_READY:
453 (sock->state == SS_DISCONNECTING)) 469 case SS_CONNECTED:
454 mask = (POLLRDNORM | POLLIN); 470 if (!tipc_sk_port(sk)->congested)
455 else 471 mask |= POLLOUT;
456 mask = 0; 472 /* fall thru' */
457 473 case SS_CONNECTING:
458 if (sock->state == SS_DISCONNECTING) 474 case SS_LISTENING:
459 mask |= POLLHUP; 475 if (!skb_queue_empty(&sk->sk_receive_queue))
460 else 476 mask |= (POLLIN | POLLRDNORM);
461 mask |= POLLOUT; 477 break;
478 case SS_DISCONNECTING:
479 mask = (POLLIN | POLLRDNORM | POLLHUP);
480 break;
481 }
462 482
463 return mask; 483 return mask;
464} 484}
@@ -1026,9 +1046,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1026 struct sk_buff *buf; 1046 struct sk_buff *buf;
1027 struct tipc_msg *msg; 1047 struct tipc_msg *msg;
1028 unsigned int sz; 1048 unsigned int sz;
1029 int sz_to_copy; 1049 int sz_to_copy, target, needed;
1030 int sz_copied = 0; 1050 int sz_copied = 0;
1031 int needed;
1032 char __user *crs = m->msg_iov->iov_base; 1051 char __user *crs = m->msg_iov->iov_base;
1033 unsigned char *buf_crs; 1052 unsigned char *buf_crs;
1034 u32 err; 1053 u32 err;
@@ -1050,6 +1069,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1050 goto exit; 1069 goto exit;
1051 } 1070 }
1052 1071
1072 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
1073
1053restart: 1074restart:
1054 1075
1055 /* Look for a message in receive queue; wait if necessary */ 1076 /* Look for a message in receive queue; wait if necessary */
@@ -1138,7 +1159,7 @@ restart:
1138 1159
1139 if ((sz_copied < buf_len) && /* didn't get all requested data */ 1160 if ((sz_copied < buf_len) && /* didn't get all requested data */
1140 (!skb_queue_empty(&sk->sk_receive_queue) || 1161 (!skb_queue_empty(&sk->sk_receive_queue) ||
1141 (flags & MSG_WAITALL)) && /* and more is ready or required */ 1162 (sz_copied < target)) && /* and more is ready or required */
1142 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */ 1163 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */
1143 (!err)) /* and haven't reached a FIN */ 1164 (!err)) /* and haven't reached a FIN */
1144 goto restart; 1165 goto restart;
@@ -1174,7 +1195,7 @@ static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
1174 if (msg_connected(msg)) 1195 if (msg_connected(msg))
1175 threshold *= 4; 1196 threshold *= 4;
1176 1197
1177 return (queue_size >= threshold); 1198 return queue_size >= threshold;
1178} 1199}
1179 1200
1180/** 1201/**
@@ -1365,6 +1386,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1365 struct msghdr m = {NULL,}; 1386 struct msghdr m = {NULL,};
1366 struct sk_buff *buf; 1387 struct sk_buff *buf;
1367 struct tipc_msg *msg; 1388 struct tipc_msg *msg;
1389 long timeout;
1368 int res; 1390 int res;
1369 1391
1370 lock_sock(sk); 1392 lock_sock(sk);
@@ -1379,7 +1401,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1379 /* For now, TIPC does not support the non-blocking form of connect() */ 1401 /* For now, TIPC does not support the non-blocking form of connect() */
1380 1402
1381 if (flags & O_NONBLOCK) { 1403 if (flags & O_NONBLOCK) {
1382 res = -EWOULDBLOCK; 1404 res = -EOPNOTSUPP;
1383 goto exit; 1405 goto exit;
1384 } 1406 }
1385 1407
@@ -1425,11 +1447,12 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1425 1447
1426 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ 1448 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1427 1449
1450 timeout = tipc_sk(sk)->conn_timeout;
1428 release_sock(sk); 1451 release_sock(sk);
1429 res = wait_event_interruptible_timeout(*sk_sleep(sk), 1452 res = wait_event_interruptible_timeout(*sk_sleep(sk),
1430 (!skb_queue_empty(&sk->sk_receive_queue) || 1453 (!skb_queue_empty(&sk->sk_receive_queue) ||
1431 (sock->state != SS_CONNECTING)), 1454 (sock->state != SS_CONNECTING)),
1432 sk->sk_rcvtimeo); 1455 timeout ? timeout : MAX_SCHEDULE_TIMEOUT);
1433 lock_sock(sk); 1456 lock_sock(sk);
1434 1457
1435 if (res > 0) { 1458 if (res > 0) {
@@ -1692,7 +1715,7 @@ static int setsockopt(struct socket *sock,
1692 res = tipc_set_portunreturnable(tport->ref, value); 1715 res = tipc_set_portunreturnable(tport->ref, value);
1693 break; 1716 break;
1694 case TIPC_CONN_TIMEOUT: 1717 case TIPC_CONN_TIMEOUT:
1695 sk->sk_rcvtimeo = msecs_to_jiffies(value); 1718 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(value);
1696 /* no need to set "res", since already 0 at this point */ 1719 /* no need to set "res", since already 0 at this point */
1697 break; 1720 break;
1698 default: 1721 default:
@@ -1747,7 +1770,7 @@ static int getsockopt(struct socket *sock,
1747 res = tipc_portunreturnable(tport->ref, &value); 1770 res = tipc_portunreturnable(tport->ref, &value);
1748 break; 1771 break;
1749 case TIPC_CONN_TIMEOUT: 1772 case TIPC_CONN_TIMEOUT:
1750 value = jiffies_to_msecs(sk->sk_rcvtimeo); 1773 value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout);
1751 /* no need to set "res", since already 0 at this point */ 1774 /* no need to set "res", since already 0 at this point */
1752 break; 1775 break;
1753 case TIPC_NODE_RECVQ_DEPTH: 1776 case TIPC_NODE_RECVQ_DEPTH:
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ab6eab4c45e..1a5b9a6bd12 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -604,6 +604,6 @@ int tipc_ispublished(struct tipc_name const *name)
604{ 604{
605 u32 domain = 0; 605 u32 domain = 0;
606 606
607 return(tipc_nametbl_translate(name->type, name->instance,&domain) != 0); 607 return tipc_nametbl_translate(name->type, name->instance, &domain) != 0;
608} 608}
609 609
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0b39b2451ea..c586da3f4f1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2033,11 +2033,10 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
2033 if (sk->sk_shutdown == SHUTDOWN_MASK) 2033 if (sk->sk_shutdown == SHUTDOWN_MASK)
2034 mask |= POLLHUP; 2034 mask |= POLLHUP;
2035 if (sk->sk_shutdown & RCV_SHUTDOWN) 2035 if (sk->sk_shutdown & RCV_SHUTDOWN)
2036 mask |= POLLRDHUP; 2036 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2037 2037
2038 /* readable? */ 2038 /* readable? */
2039 if (!skb_queue_empty(&sk->sk_receive_queue) || 2039 if (!skb_queue_empty(&sk->sk_receive_queue))
2040 (sk->sk_shutdown & RCV_SHUTDOWN))
2041 mask |= POLLIN | POLLRDNORM; 2040 mask |= POLLIN | POLLRDNORM;
2042 2041
2043 /* Connection-based need to check for termination and startup */ 2042 /* Connection-based need to check for termination and startup */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index d6d046b9f6f..9c21ebf9780 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -253,11 +253,16 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
253 WARN_ON(err); 253 WARN_ON(err);
254 wdev->netdev->features |= NETIF_F_NETNS_LOCAL; 254 wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
255 } 255 }
256
257 return err;
256 } 258 }
257 259
258 wiphy_net_set(&rdev->wiphy, net); 260 wiphy_net_set(&rdev->wiphy, net);
259 261
260 return err; 262 err = device_rename(&rdev->wiphy.dev, dev_name(&rdev->wiphy.dev));
263 WARN_ON(err);
264
265 return 0;
261} 266}
262 267
263static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) 268static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
@@ -428,7 +433,7 @@ int wiphy_register(struct wiphy *wiphy)
428 433
429 /* sanity check ifmodes */ 434 /* sanity check ifmodes */
430 WARN_ON(!ifmodes); 435 WARN_ON(!ifmodes);
431 ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; 436 ifmodes &= ((1 << NUM_NL80211_IFTYPES) - 1) & ~1;
432 if (WARN_ON(ifmodes != wiphy->interface_modes)) 437 if (WARN_ON(ifmodes != wiphy->interface_modes))
433 wiphy->interface_modes = ifmodes; 438 wiphy->interface_modes = ifmodes;
434 439
@@ -683,8 +688,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
683 INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); 688 INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work);
684 INIT_LIST_HEAD(&wdev->event_list); 689 INIT_LIST_HEAD(&wdev->event_list);
685 spin_lock_init(&wdev->event_lock); 690 spin_lock_init(&wdev->event_lock);
686 INIT_LIST_HEAD(&wdev->action_registrations); 691 INIT_LIST_HEAD(&wdev->mgmt_registrations);
687 spin_lock_init(&wdev->action_registrations_lock); 692 spin_lock_init(&wdev->mgmt_registrations_lock);
688 693
689 mutex_lock(&rdev->devlist_mtx); 694 mutex_lock(&rdev->devlist_mtx);
690 list_add_rcu(&wdev->list, &rdev->netdev_list); 695 list_add_rcu(&wdev->list, &rdev->netdev_list);
@@ -724,6 +729,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
724 dev->ethtool_ops = &cfg80211_ethtool_ops; 729 dev->ethtool_ops = &cfg80211_ethtool_ops;
725 730
726 if ((wdev->iftype == NL80211_IFTYPE_STATION || 731 if ((wdev->iftype == NL80211_IFTYPE_STATION ||
732 wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
727 wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) 733 wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
728 dev->priv_flags |= IFF_DONT_BRIDGE; 734 dev->priv_flags |= IFF_DONT_BRIDGE;
729 break; 735 break;
@@ -732,6 +738,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
732 case NL80211_IFTYPE_ADHOC: 738 case NL80211_IFTYPE_ADHOC:
733 cfg80211_leave_ibss(rdev, dev, true); 739 cfg80211_leave_ibss(rdev, dev, true);
734 break; 740 break;
741 case NL80211_IFTYPE_P2P_CLIENT:
735 case NL80211_IFTYPE_STATION: 742 case NL80211_IFTYPE_STATION:
736 wdev_lock(wdev); 743 wdev_lock(wdev);
737#ifdef CONFIG_CFG80211_WEXT 744#ifdef CONFIG_CFG80211_WEXT
@@ -804,7 +811,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
804 sysfs_remove_link(&dev->dev.kobj, "phy80211"); 811 sysfs_remove_link(&dev->dev.kobj, "phy80211");
805 list_del_rcu(&wdev->list); 812 list_del_rcu(&wdev->list);
806 rdev->devlist_generation++; 813 rdev->devlist_generation++;
807 cfg80211_mlme_purge_actions(wdev); 814 cfg80211_mlme_purge_registrations(wdev);
808#ifdef CONFIG_CFG80211_WEXT 815#ifdef CONFIG_CFG80211_WEXT
809 kfree(wdev->wext.keys); 816 kfree(wdev->wext.keys);
810#endif 817#endif
@@ -910,52 +917,3 @@ static void __exit cfg80211_exit(void)
910 destroy_workqueue(cfg80211_wq); 917 destroy_workqueue(cfg80211_wq);
911} 918}
912module_exit(cfg80211_exit); 919module_exit(cfg80211_exit);
913
914static int ___wiphy_printk(const char *level, const struct wiphy *wiphy,
915 struct va_format *vaf)
916{
917 if (!wiphy)
918 return printk("%s(NULL wiphy *): %pV", level, vaf);
919
920 return printk("%s%s: %pV", level, wiphy_name(wiphy), vaf);
921}
922
923int __wiphy_printk(const char *level, const struct wiphy *wiphy,
924 const char *fmt, ...)
925{
926 struct va_format vaf;
927 va_list args;
928 int r;
929
930 va_start(args, fmt);
931
932 vaf.fmt = fmt;
933 vaf.va = &args;
934
935 r = ___wiphy_printk(level, wiphy, &vaf);
936 va_end(args);
937
938 return r;
939}
940EXPORT_SYMBOL(__wiphy_printk);
941
942#define define_wiphy_printk_level(func, kern_level) \
943int func(const struct wiphy *wiphy, const char *fmt, ...) \
944{ \
945 struct va_format vaf; \
946 va_list args; \
947 int r; \
948 \
949 va_start(args, fmt); \
950 \
951 vaf.fmt = fmt; \
952 vaf.va = &args; \
953 \
954 r = ___wiphy_printk(kern_level, wiphy, &vaf); \
955 va_end(args); \
956 \
957 return r; \
958} \
959EXPORT_SYMBOL(func);
960
961define_wiphy_printk_level(wiphy_debug, KERN_DEBUG);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 63d57ae399c..5d89310b358 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -86,7 +86,7 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
86static inline 86static inline
87bool wiphy_idx_valid(int wiphy_idx) 87bool wiphy_idx_valid(int wiphy_idx)
88{ 88{
89 return (wiphy_idx >= 0); 89 return wiphy_idx >= 0;
90} 90}
91 91
92 92
@@ -95,7 +95,10 @@ extern struct mutex cfg80211_mutex;
95extern struct list_head cfg80211_rdev_list; 95extern struct list_head cfg80211_rdev_list;
96extern int cfg80211_rdev_list_generation; 96extern int cfg80211_rdev_list_generation;
97 97
98#define assert_cfg80211_lock() WARN_ON(!mutex_is_locked(&cfg80211_mutex)) 98static inline void assert_cfg80211_lock(void)
99{
100 lockdep_assert_held(&cfg80211_mutex);
101}
99 102
100/* 103/*
101 * You can use this to mark a wiphy_idx as not having an associated wiphy. 104 * You can use this to mark a wiphy_idx as not having an associated wiphy.
@@ -202,8 +205,8 @@ static inline void wdev_unlock(struct wireless_dev *wdev)
202 mutex_unlock(&wdev->mtx); 205 mutex_unlock(&wdev->mtx);
203} 206}
204 207
205#define ASSERT_RDEV_LOCK(rdev) WARN_ON(!mutex_is_locked(&(rdev)->mtx)); 208#define ASSERT_RDEV_LOCK(rdev) lockdep_assert_held(&(rdev)->mtx)
206#define ASSERT_WDEV_LOCK(wdev) WARN_ON(!mutex_is_locked(&(wdev)->mtx)); 209#define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx)
207 210
208enum cfg80211_event_type { 211enum cfg80211_event_type {
209 EVENT_CONNECT_RESULT, 212 EVENT_CONNECT_RESULT,
@@ -331,16 +334,17 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
331 const u8 *resp_ie, size_t resp_ie_len, 334 const u8 *resp_ie, size_t resp_ie_len,
332 u16 status, bool wextev, 335 u16 status, bool wextev,
333 struct cfg80211_bss *bss); 336 struct cfg80211_bss *bss);
334int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, 337int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
335 const u8 *match_data, int match_len); 338 u16 frame_type, const u8 *match_data,
336void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid); 339 int match_len);
337void cfg80211_mlme_purge_actions(struct wireless_dev *wdev); 340void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
338int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, 341void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
339 struct net_device *dev, 342int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
340 struct ieee80211_channel *chan, 343 struct net_device *dev,
341 enum nl80211_channel_type channel_type, 344 struct ieee80211_channel *chan,
342 bool channel_type_valid, 345 enum nl80211_channel_type channel_type,
343 const u8 *buf, size_t len, u64 *cookie); 346 bool channel_type_valid,
347 const u8 *buf, size_t len, u64 *cookie);
344 348
345/* SME */ 349/* SME */
346int __cfg80211_connect(struct cfg80211_registered_device *rdev, 350int __cfg80211_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d1a3fb99fdf..46f37116089 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -149,7 +149,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
149 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; 149 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
150 const u8 *bssid = mgmt->bssid; 150 const u8 *bssid = mgmt->bssid;
151 int i; 151 int i;
152 bool found = false; 152 bool found = false, was_current = false;
153 153
154 ASSERT_WDEV_LOCK(wdev); 154 ASSERT_WDEV_LOCK(wdev);
155 155
@@ -159,6 +159,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
159 cfg80211_put_bss(&wdev->current_bss->pub); 159 cfg80211_put_bss(&wdev->current_bss->pub);
160 wdev->current_bss = NULL; 160 wdev->current_bss = NULL;
161 found = true; 161 found = true;
162 was_current = true;
162 } else for (i = 0; i < MAX_AUTH_BSSES; i++) { 163 } else for (i = 0; i < MAX_AUTH_BSSES; i++) {
163 if (wdev->auth_bsses[i] && 164 if (wdev->auth_bsses[i] &&
164 memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) { 165 memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) {
@@ -183,7 +184,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
183 184
184 nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL); 185 nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL);
185 186
186 if (wdev->sme_state == CFG80211_SME_CONNECTED) { 187 if (wdev->sme_state == CFG80211_SME_CONNECTED && was_current) {
187 u16 reason_code; 188 u16 reason_code;
188 bool from_ap; 189 bool from_ap;
189 190
@@ -747,31 +748,51 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
747} 748}
748EXPORT_SYMBOL(cfg80211_new_sta); 749EXPORT_SYMBOL(cfg80211_new_sta);
749 750
750struct cfg80211_action_registration { 751struct cfg80211_mgmt_registration {
751 struct list_head list; 752 struct list_head list;
752 753
753 u32 nlpid; 754 u32 nlpid;
754 755
755 int match_len; 756 int match_len;
756 757
758 __le16 frame_type;
759
757 u8 match[]; 760 u8 match[];
758}; 761};
759 762
760int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, 763int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
761 const u8 *match_data, int match_len) 764 u16 frame_type, const u8 *match_data,
765 int match_len)
762{ 766{
763 struct cfg80211_action_registration *reg, *nreg; 767 struct cfg80211_mgmt_registration *reg, *nreg;
764 int err = 0; 768 int err = 0;
769 u16 mgmt_type;
770
771 if (!wdev->wiphy->mgmt_stypes)
772 return -EOPNOTSUPP;
773
774 if ((frame_type & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT)
775 return -EINVAL;
776
777 if (frame_type & ~(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE))
778 return -EINVAL;
779
780 mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4;
781 if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].rx & BIT(mgmt_type)))
782 return -EINVAL;
765 783
766 nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL); 784 nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL);
767 if (!nreg) 785 if (!nreg)
768 return -ENOMEM; 786 return -ENOMEM;
769 787
770 spin_lock_bh(&wdev->action_registrations_lock); 788 spin_lock_bh(&wdev->mgmt_registrations_lock);
771 789
772 list_for_each_entry(reg, &wdev->action_registrations, list) { 790 list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
773 int mlen = min(match_len, reg->match_len); 791 int mlen = min(match_len, reg->match_len);
774 792
793 if (frame_type != le16_to_cpu(reg->frame_type))
794 continue;
795
775 if (memcmp(reg->match, match_data, mlen) == 0) { 796 if (memcmp(reg->match, match_data, mlen) == 0) {
776 err = -EALREADY; 797 err = -EALREADY;
777 break; 798 break;
@@ -786,69 +807,83 @@ int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
786 memcpy(nreg->match, match_data, match_len); 807 memcpy(nreg->match, match_data, match_len);
787 nreg->match_len = match_len; 808 nreg->match_len = match_len;
788 nreg->nlpid = snd_pid; 809 nreg->nlpid = snd_pid;
789 list_add(&nreg->list, &wdev->action_registrations); 810 nreg->frame_type = cpu_to_le16(frame_type);
811 list_add(&nreg->list, &wdev->mgmt_registrations);
790 812
791 out: 813 out:
792 spin_unlock_bh(&wdev->action_registrations_lock); 814 spin_unlock_bh(&wdev->mgmt_registrations_lock);
793 return err; 815 return err;
794} 816}
795 817
796void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid) 818void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
797{ 819{
798 struct cfg80211_action_registration *reg, *tmp; 820 struct cfg80211_mgmt_registration *reg, *tmp;
799 821
800 spin_lock_bh(&wdev->action_registrations_lock); 822 spin_lock_bh(&wdev->mgmt_registrations_lock);
801 823
802 list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { 824 list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
803 if (reg->nlpid == nlpid) { 825 if (reg->nlpid == nlpid) {
804 list_del(&reg->list); 826 list_del(&reg->list);
805 kfree(reg); 827 kfree(reg);
806 } 828 }
807 } 829 }
808 830
809 spin_unlock_bh(&wdev->action_registrations_lock); 831 spin_unlock_bh(&wdev->mgmt_registrations_lock);
810} 832}
811 833
812void cfg80211_mlme_purge_actions(struct wireless_dev *wdev) 834void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
813{ 835{
814 struct cfg80211_action_registration *reg, *tmp; 836 struct cfg80211_mgmt_registration *reg, *tmp;
815 837
816 spin_lock_bh(&wdev->action_registrations_lock); 838 spin_lock_bh(&wdev->mgmt_registrations_lock);
817 839
818 list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { 840 list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
819 list_del(&reg->list); 841 list_del(&reg->list);
820 kfree(reg); 842 kfree(reg);
821 } 843 }
822 844
823 spin_unlock_bh(&wdev->action_registrations_lock); 845 spin_unlock_bh(&wdev->mgmt_registrations_lock);
824} 846}
825 847
826int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, 848int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
827 struct net_device *dev, 849 struct net_device *dev,
828 struct ieee80211_channel *chan, 850 struct ieee80211_channel *chan,
829 enum nl80211_channel_type channel_type, 851 enum nl80211_channel_type channel_type,
830 bool channel_type_valid, 852 bool channel_type_valid,
831 const u8 *buf, size_t len, u64 *cookie) 853 const u8 *buf, size_t len, u64 *cookie)
832{ 854{
833 struct wireless_dev *wdev = dev->ieee80211_ptr; 855 struct wireless_dev *wdev = dev->ieee80211_ptr;
834 const struct ieee80211_mgmt *mgmt; 856 const struct ieee80211_mgmt *mgmt;
857 u16 stype;
858
859 if (!wdev->wiphy->mgmt_stypes)
860 return -EOPNOTSUPP;
835 861
836 if (rdev->ops->action == NULL) 862 if (!rdev->ops->mgmt_tx)
837 return -EOPNOTSUPP; 863 return -EOPNOTSUPP;
864
838 if (len < 24 + 1) 865 if (len < 24 + 1)
839 return -EINVAL; 866 return -EINVAL;
840 867
841 mgmt = (const struct ieee80211_mgmt *) buf; 868 mgmt = (const struct ieee80211_mgmt *) buf;
842 if (!ieee80211_is_action(mgmt->frame_control)) 869
870 if (!ieee80211_is_mgmt(mgmt->frame_control))
843 return -EINVAL; 871 return -EINVAL;
844 if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { 872
873 stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
874 if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].tx & BIT(stype >> 4)))
875 return -EINVAL;
876
877 if (ieee80211_is_action(mgmt->frame_control) &&
878 mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) {
845 /* Verify that we are associated with the destination AP */ 879 /* Verify that we are associated with the destination AP */
846 wdev_lock(wdev); 880 wdev_lock(wdev);
847 881
848 if (!wdev->current_bss || 882 if (!wdev->current_bss ||
849 memcmp(wdev->current_bss->pub.bssid, mgmt->bssid, 883 memcmp(wdev->current_bss->pub.bssid, mgmt->bssid,
850 ETH_ALEN) != 0 || 884 ETH_ALEN) != 0 ||
851 (wdev->iftype == NL80211_IFTYPE_STATION && 885 ((wdev->iftype == NL80211_IFTYPE_STATION ||
886 wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
852 memcmp(wdev->current_bss->pub.bssid, mgmt->da, 887 memcmp(wdev->current_bss->pub.bssid, mgmt->da,
853 ETH_ALEN) != 0)) { 888 ETH_ALEN) != 0)) {
854 wdev_unlock(wdev); 889 wdev_unlock(wdev);
@@ -862,64 +897,75 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
862 return -EINVAL; 897 return -EINVAL;
863 898
864 /* Transmit the Action frame as requested by user space */ 899 /* Transmit the Action frame as requested by user space */
865 return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, 900 return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type,
866 channel_type_valid, buf, len, cookie); 901 channel_type_valid, buf, len, cookie);
867} 902}
868 903
869bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, 904bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf,
870 size_t len, gfp_t gfp) 905 size_t len, gfp_t gfp)
871{ 906{
872 struct wireless_dev *wdev = dev->ieee80211_ptr; 907 struct wireless_dev *wdev = dev->ieee80211_ptr;
873 struct wiphy *wiphy = wdev->wiphy; 908 struct wiphy *wiphy = wdev->wiphy;
874 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 909 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
875 struct cfg80211_action_registration *reg; 910 struct cfg80211_mgmt_registration *reg;
876 const u8 *action_data; 911 const struct ieee80211_txrx_stypes *stypes =
877 int action_data_len; 912 &wiphy->mgmt_stypes[wdev->iftype];
913 struct ieee80211_mgmt *mgmt = (void *)buf;
914 const u8 *data;
915 int data_len;
878 bool result = false; 916 bool result = false;
917 __le16 ftype = mgmt->frame_control &
918 cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE);
919 u16 stype;
879 920
880 /* frame length - min size excluding category */ 921 stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4;
881 action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1);
882 922
883 /* action data starts with category */ 923 if (!(stypes->rx & BIT(stype)))
884 action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1; 924 return false;
885 925
886 spin_lock_bh(&wdev->action_registrations_lock); 926 data = buf + ieee80211_hdrlen(mgmt->frame_control);
927 data_len = len - ieee80211_hdrlen(mgmt->frame_control);
928
929 spin_lock_bh(&wdev->mgmt_registrations_lock);
930
931 list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
932 if (reg->frame_type != ftype)
933 continue;
887 934
888 list_for_each_entry(reg, &wdev->action_registrations, list) { 935 if (reg->match_len > data_len)
889 if (reg->match_len > action_data_len)
890 continue; 936 continue;
891 937
892 if (memcmp(reg->match, action_data, reg->match_len)) 938 if (memcmp(reg->match, data, reg->match_len))
893 continue; 939 continue;
894 940
895 /* found match! */ 941 /* found match! */
896 942
897 /* Indicate the received Action frame to user space */ 943 /* Indicate the received Action frame to user space */
898 if (nl80211_send_action(rdev, dev, reg->nlpid, freq, 944 if (nl80211_send_mgmt(rdev, dev, reg->nlpid, freq,
899 buf, len, gfp)) 945 buf, len, gfp))
900 continue; 946 continue;
901 947
902 result = true; 948 result = true;
903 break; 949 break;
904 } 950 }
905 951
906 spin_unlock_bh(&wdev->action_registrations_lock); 952 spin_unlock_bh(&wdev->mgmt_registrations_lock);
907 953
908 return result; 954 return result;
909} 955}
910EXPORT_SYMBOL(cfg80211_rx_action); 956EXPORT_SYMBOL(cfg80211_rx_mgmt);
911 957
912void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, 958void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie,
913 const u8 *buf, size_t len, bool ack, gfp_t gfp) 959 const u8 *buf, size_t len, bool ack, gfp_t gfp)
914{ 960{
915 struct wireless_dev *wdev = dev->ieee80211_ptr; 961 struct wireless_dev *wdev = dev->ieee80211_ptr;
916 struct wiphy *wiphy = wdev->wiphy; 962 struct wiphy *wiphy = wdev->wiphy;
917 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 963 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
918 964
919 /* Indicate TX status of the Action frame to user space */ 965 /* Indicate TX status of the Action frame to user space */
920 nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp); 966 nl80211_send_mgmt_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
921} 967}
922EXPORT_SYMBOL(cfg80211_action_tx_status); 968EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
923 969
924void cfg80211_cqm_rssi_notify(struct net_device *dev, 970void cfg80211_cqm_rssi_notify(struct net_device *dev,
925 enum nl80211_cqm_rssi_threshold_event rssi_event, 971 enum nl80211_cqm_rssi_threshold_event rssi_event,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 37902a54e9c..f15b1af2c76 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -136,6 +136,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
136 .len = sizeof(struct nl80211_sta_flag_update), 136 .len = sizeof(struct nl80211_sta_flag_update),
137 }, 137 },
138 [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, 138 [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG },
139 [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 },
140 [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
139 [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, 141 [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
140 [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, 142 [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
141 [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, 143 [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
@@ -156,6 +158,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
156 158
157 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, 159 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
158 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, 160 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
161 [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
159}; 162};
160 163
161/* policy for the attributes */ 164/* policy for the attributes */
@@ -407,12 +410,14 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
407 switch (wdev->iftype) { 410 switch (wdev->iftype) {
408 case NL80211_IFTYPE_AP: 411 case NL80211_IFTYPE_AP:
409 case NL80211_IFTYPE_AP_VLAN: 412 case NL80211_IFTYPE_AP_VLAN:
413 case NL80211_IFTYPE_P2P_GO:
410 break; 414 break;
411 case NL80211_IFTYPE_ADHOC: 415 case NL80211_IFTYPE_ADHOC:
412 if (!wdev->current_bss) 416 if (!wdev->current_bss)
413 return -ENOLINK; 417 return -ENOLINK;
414 break; 418 break;
415 case NL80211_IFTYPE_STATION: 419 case NL80211_IFTYPE_STATION:
420 case NL80211_IFTYPE_P2P_CLIENT:
416 if (wdev->sme_state != CFG80211_SME_CONNECTED) 421 if (wdev->sme_state != CFG80211_SME_CONNECTED)
417 return -ENOLINK; 422 return -ENOLINK;
418 break; 423 break;
@@ -437,6 +442,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
437 struct ieee80211_rate *rate; 442 struct ieee80211_rate *rate;
438 int i; 443 int i;
439 u16 ifmodes = dev->wiphy.interface_modes; 444 u16 ifmodes = dev->wiphy.interface_modes;
445 const struct ieee80211_txrx_stypes *mgmt_stypes =
446 dev->wiphy.mgmt_stypes;
440 447
441 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); 448 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY);
442 if (!hdr) 449 if (!hdr)
@@ -471,6 +478,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
471 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, 478 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
472 dev->wiphy.max_num_pmkids); 479 dev->wiphy.max_num_pmkids);
473 480
481 if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
482 NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
483
474 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); 484 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
475 if (!nl_modes) 485 if (!nl_modes)
476 goto nla_put_failure; 486 goto nla_put_failure;
@@ -587,7 +597,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
587 CMD(flush_pmksa, FLUSH_PMKSA); 597 CMD(flush_pmksa, FLUSH_PMKSA);
588 CMD(remain_on_channel, REMAIN_ON_CHANNEL); 598 CMD(remain_on_channel, REMAIN_ON_CHANNEL);
589 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); 599 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
590 CMD(action, ACTION); 600 CMD(mgmt_tx, FRAME);
591 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { 601 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
592 i++; 602 i++;
593 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); 603 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -608,6 +618,55 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
608 618
609 nla_nest_end(msg, nl_cmds); 619 nla_nest_end(msg, nl_cmds);
610 620
621 if (mgmt_stypes) {
622 u16 stypes;
623 struct nlattr *nl_ftypes, *nl_ifs;
624 enum nl80211_iftype ift;
625
626 nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
627 if (!nl_ifs)
628 goto nla_put_failure;
629
630 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
631 nl_ftypes = nla_nest_start(msg, ift);
632 if (!nl_ftypes)
633 goto nla_put_failure;
634 i = 0;
635 stypes = mgmt_stypes[ift].tx;
636 while (stypes) {
637 if (stypes & 1)
638 NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
639 (i << 4) | IEEE80211_FTYPE_MGMT);
640 stypes >>= 1;
641 i++;
642 }
643 nla_nest_end(msg, nl_ftypes);
644 }
645
646 nla_nest_end(msg, nl_ifs);
647
648 nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
649 if (!nl_ifs)
650 goto nla_put_failure;
651
652 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
653 nl_ftypes = nla_nest_start(msg, ift);
654 if (!nl_ftypes)
655 goto nla_put_failure;
656 i = 0;
657 stypes = mgmt_stypes[ift].rx;
658 while (stypes) {
659 if (stypes & 1)
660 NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
661 (i << 4) | IEEE80211_FTYPE_MGMT);
662 stypes >>= 1;
663 i++;
664 }
665 nla_nest_end(msg, nl_ftypes);
666 }
667 nla_nest_end(msg, nl_ifs);
668 }
669
611 return genlmsg_end(msg, hdr); 670 return genlmsg_end(msg, hdr);
612 671
613 nla_put_failure: 672 nla_put_failure:
@@ -709,7 +768,8 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
709 wdev->iftype == NL80211_IFTYPE_AP || 768 wdev->iftype == NL80211_IFTYPE_AP ||
710 wdev->iftype == NL80211_IFTYPE_WDS || 769 wdev->iftype == NL80211_IFTYPE_WDS ||
711 wdev->iftype == NL80211_IFTYPE_MESH_POINT || 770 wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
712 wdev->iftype == NL80211_IFTYPE_MONITOR; 771 wdev->iftype == NL80211_IFTYPE_MONITOR ||
772 wdev->iftype == NL80211_IFTYPE_P2P_GO;
713} 773}
714 774
715static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, 775static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
@@ -776,7 +836,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
776 struct cfg80211_registered_device *rdev; 836 struct cfg80211_registered_device *rdev;
777 struct net_device *netdev = NULL; 837 struct net_device *netdev = NULL;
778 struct wireless_dev *wdev; 838 struct wireless_dev *wdev;
779 int result, rem_txq_params = 0; 839 int result = 0, rem_txq_params = 0;
780 struct nlattr *nl_txq_params; 840 struct nlattr *nl_txq_params;
781 u32 changed; 841 u32 changed;
782 u8 retry_short = 0, retry_long = 0; 842 u8 retry_short = 0, retry_long = 0;
@@ -1636,7 +1696,8 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1636 if (err) 1696 if (err)
1637 goto unlock_rtnl; 1697 goto unlock_rtnl;
1638 1698
1639 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) { 1699 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
1700 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
1640 err = -EOPNOTSUPP; 1701 err = -EOPNOTSUPP;
1641 goto out; 1702 goto out;
1642 } 1703 }
@@ -1728,7 +1789,8 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
1728 goto out; 1789 goto out;
1729 } 1790 }
1730 1791
1731 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) { 1792 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
1793 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
1732 err = -EOPNOTSUPP; 1794 err = -EOPNOTSUPP;
1733 goto out; 1795 goto out;
1734 } 1796 }
@@ -2071,10 +2133,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2071 switch (dev->ieee80211_ptr->iftype) { 2133 switch (dev->ieee80211_ptr->iftype) {
2072 case NL80211_IFTYPE_AP: 2134 case NL80211_IFTYPE_AP:
2073 case NL80211_IFTYPE_AP_VLAN: 2135 case NL80211_IFTYPE_AP_VLAN:
2136 case NL80211_IFTYPE_P2P_GO:
2074 /* disallow mesh-specific things */ 2137 /* disallow mesh-specific things */
2075 if (params.plink_action) 2138 if (params.plink_action)
2076 err = -EINVAL; 2139 err = -EINVAL;
2077 break; 2140 break;
2141 case NL80211_IFTYPE_P2P_CLIENT:
2078 case NL80211_IFTYPE_STATION: 2142 case NL80211_IFTYPE_STATION:
2079 /* disallow everything but AUTHORIZED flag */ 2143 /* disallow everything but AUTHORIZED flag */
2080 if (params.plink_action) 2144 if (params.plink_action)
@@ -2176,7 +2240,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
2176 goto out_rtnl; 2240 goto out_rtnl;
2177 2241
2178 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && 2242 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2179 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { 2243 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
2244 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
2180 err = -EINVAL; 2245 err = -EINVAL;
2181 goto out; 2246 goto out;
2182 } 2247 }
@@ -2229,7 +2294,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
2229 2294
2230 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && 2295 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2231 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && 2296 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
2232 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { 2297 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
2298 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
2233 err = -EINVAL; 2299 err = -EINVAL;
2234 goto out; 2300 goto out;
2235 } 2301 }
@@ -2603,7 +2669,8 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
2603 goto out; 2669 goto out;
2604 } 2670 }
2605 2671
2606 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) { 2672 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2673 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
2607 err = -EOPNOTSUPP; 2674 err = -EOPNOTSUPP;
2608 goto out; 2675 goto out;
2609 } 2676 }
@@ -3306,6 +3373,7 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
3306 } 3373 }
3307 3374
3308 switch (wdev->iftype) { 3375 switch (wdev->iftype) {
3376 case NL80211_IFTYPE_P2P_CLIENT:
3309 case NL80211_IFTYPE_STATION: 3377 case NL80211_IFTYPE_STATION:
3310 if (intbss == wdev->current_bss) 3378 if (intbss == wdev->current_bss)
3311 NLA_PUT_U32(msg, NL80211_BSS_STATUS, 3379 NLA_PUT_U32(msg, NL80211_BSS_STATUS,
@@ -3572,12 +3640,28 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3572 if (err) 3640 if (err)
3573 goto unlock_rtnl; 3641 goto unlock_rtnl;
3574 3642
3643 if (key.idx >= 0) {
3644 int i;
3645 bool ok = false;
3646 for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) {
3647 if (key.p.cipher == rdev->wiphy.cipher_suites[i]) {
3648 ok = true;
3649 break;
3650 }
3651 }
3652 if (!ok) {
3653 err = -EINVAL;
3654 goto out;
3655 }
3656 }
3657
3575 if (!rdev->ops->auth) { 3658 if (!rdev->ops->auth) {
3576 err = -EOPNOTSUPP; 3659 err = -EOPNOTSUPP;
3577 goto out; 3660 goto out;
3578 } 3661 }
3579 3662
3580 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3663 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3664 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
3581 err = -EOPNOTSUPP; 3665 err = -EOPNOTSUPP;
3582 goto out; 3666 goto out;
3583 } 3667 }
@@ -3624,7 +3708,8 @@ unlock_rtnl:
3624 return err; 3708 return err;
3625} 3709}
3626 3710
3627static int nl80211_crypto_settings(struct genl_info *info, 3711static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
3712 struct genl_info *info,
3628 struct cfg80211_crypto_settings *settings, 3713 struct cfg80211_crypto_settings *settings,
3629 int cipher_limit) 3714 int cipher_limit)
3630{ 3715{
@@ -3632,6 +3717,19 @@ static int nl80211_crypto_settings(struct genl_info *info,
3632 3717
3633 settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; 3718 settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT];
3634 3719
3720 if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) {
3721 u16 proto;
3722 proto = nla_get_u16(
3723 info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]);
3724 settings->control_port_ethertype = cpu_to_be16(proto);
3725 if (!(rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
3726 proto != ETH_P_PAE)
3727 return -EINVAL;
3728 if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT])
3729 settings->control_port_no_encrypt = true;
3730 } else
3731 settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE);
3732
3635 if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { 3733 if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) {
3636 void *data; 3734 void *data;
3637 int len, i; 3735 int len, i;
@@ -3718,7 +3816,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3718 goto out; 3816 goto out;
3719 } 3817 }
3720 3818
3721 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3819 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3820 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
3722 err = -EOPNOTSUPP; 3821 err = -EOPNOTSUPP;
3723 goto out; 3822 goto out;
3724 } 3823 }
@@ -3759,7 +3858,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3759 if (info->attrs[NL80211_ATTR_PREV_BSSID]) 3858 if (info->attrs[NL80211_ATTR_PREV_BSSID])
3760 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); 3859 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
3761 3860
3762 err = nl80211_crypto_settings(info, &crypto, 1); 3861 err = nl80211_crypto_settings(rdev, info, &crypto, 1);
3763 if (!err) 3862 if (!err)
3764 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, 3863 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
3765 ssid, ssid_len, ie, ie_len, use_mfp, 3864 ssid, ssid_len, ie, ie_len, use_mfp,
@@ -3802,7 +3901,8 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
3802 goto out; 3901 goto out;
3803 } 3902 }
3804 3903
3805 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3904 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3905 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
3806 err = -EOPNOTSUPP; 3906 err = -EOPNOTSUPP;
3807 goto out; 3907 goto out;
3808 } 3908 }
@@ -3868,7 +3968,8 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
3868 goto out; 3968 goto out;
3869 } 3969 }
3870 3970
3871 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3971 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3972 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
3872 err = -EOPNOTSUPP; 3973 err = -EOPNOTSUPP;
3873 goto out; 3974 goto out;
3874 } 3975 }
@@ -4236,7 +4337,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
4236 4337
4237 connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; 4338 connect.privacy = info->attrs[NL80211_ATTR_PRIVACY];
4238 4339
4239 err = nl80211_crypto_settings(info, &connect.crypto, 4340 err = nl80211_crypto_settings(rdev, info, &connect.crypto,
4240 NL80211_MAX_NR_CIPHER_SUITES); 4341 NL80211_MAX_NR_CIPHER_SUITES);
4241 if (err) 4342 if (err)
4242 return err; 4343 return err;
@@ -4246,7 +4347,8 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
4246 if (err) 4347 if (err)
4247 goto unlock_rtnl; 4348 goto unlock_rtnl;
4248 4349
4249 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 4350 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4351 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4250 err = -EOPNOTSUPP; 4352 err = -EOPNOTSUPP;
4251 goto out; 4353 goto out;
4252 } 4354 }
@@ -4322,7 +4424,8 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
4322 if (err) 4424 if (err)
4323 goto unlock_rtnl; 4425 goto unlock_rtnl;
4324 4426
4325 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 4427 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4428 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4326 err = -EOPNOTSUPP; 4429 err = -EOPNOTSUPP;
4327 goto out; 4430 goto out;
4328 } 4431 }
@@ -4410,7 +4513,8 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
4410 pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); 4513 pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
4411 pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 4514 pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
4412 4515
4413 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 4516 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4517 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4414 err = -EOPNOTSUPP; 4518 err = -EOPNOTSUPP;
4415 goto out; 4519 goto out;
4416 } 4520 }
@@ -4455,7 +4559,8 @@ static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info)
4455 if (err) 4559 if (err)
4456 goto out_rtnl; 4560 goto out_rtnl;
4457 4561
4458 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 4562 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4563 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4459 err = -EOPNOTSUPP; 4564 err = -EOPNOTSUPP;
4460 goto out; 4565 goto out;
4461 } 4566 }
@@ -4717,17 +4822,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
4717 return err; 4822 return err;
4718} 4823}
4719 4824
4720static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) 4825static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
4721{ 4826{
4722 struct cfg80211_registered_device *rdev; 4827 struct cfg80211_registered_device *rdev;
4723 struct net_device *dev; 4828 struct net_device *dev;
4829 u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION;
4724 int err; 4830 int err;
4725 4831
4726 if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) 4832 if (!info->attrs[NL80211_ATTR_FRAME_MATCH])
4727 return -EINVAL; 4833 return -EINVAL;
4728 4834
4729 if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1) 4835 if (info->attrs[NL80211_ATTR_FRAME_TYPE])
4730 return -EINVAL; 4836 frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]);
4731 4837
4732 rtnl_lock(); 4838 rtnl_lock();
4733 4839
@@ -4736,18 +4842,20 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
4736 goto unlock_rtnl; 4842 goto unlock_rtnl;
4737 4843
4738 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && 4844 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4739 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { 4845 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
4846 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4740 err = -EOPNOTSUPP; 4847 err = -EOPNOTSUPP;
4741 goto out; 4848 goto out;
4742 } 4849 }
4743 4850
4744 /* not much point in registering if we can't reply */ 4851 /* not much point in registering if we can't reply */
4745 if (!rdev->ops->action) { 4852 if (!rdev->ops->mgmt_tx) {
4746 err = -EOPNOTSUPP; 4853 err = -EOPNOTSUPP;
4747 goto out; 4854 goto out;
4748 } 4855 }
4749 4856
4750 err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid, 4857 err = cfg80211_mlme_register_mgmt(dev->ieee80211_ptr, info->snd_pid,
4858 frame_type,
4751 nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), 4859 nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
4752 nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); 4860 nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]));
4753 out: 4861 out:
@@ -4758,7 +4866,7 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
4758 return err; 4866 return err;
4759} 4867}
4760 4868
4761static int nl80211_action(struct sk_buff *skb, struct genl_info *info) 4869static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
4762{ 4870{
4763 struct cfg80211_registered_device *rdev; 4871 struct cfg80211_registered_device *rdev;
4764 struct net_device *dev; 4872 struct net_device *dev;
@@ -4781,13 +4889,14 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4781 if (err) 4889 if (err)
4782 goto unlock_rtnl; 4890 goto unlock_rtnl;
4783 4891
4784 if (!rdev->ops->action) { 4892 if (!rdev->ops->mgmt_tx) {
4785 err = -EOPNOTSUPP; 4893 err = -EOPNOTSUPP;
4786 goto out; 4894 goto out;
4787 } 4895 }
4788 4896
4789 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && 4897 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4790 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { 4898 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
4899 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4791 err = -EOPNOTSUPP; 4900 err = -EOPNOTSUPP;
4792 goto out; 4901 goto out;
4793 } 4902 }
@@ -4824,17 +4933,17 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4824 } 4933 }
4825 4934
4826 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 4935 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
4827 NL80211_CMD_ACTION); 4936 NL80211_CMD_FRAME);
4828 4937
4829 if (IS_ERR(hdr)) { 4938 if (IS_ERR(hdr)) {
4830 err = PTR_ERR(hdr); 4939 err = PTR_ERR(hdr);
4831 goto free_msg; 4940 goto free_msg;
4832 } 4941 }
4833 err = cfg80211_mlme_action(rdev, dev, chan, channel_type, 4942 err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type,
4834 channel_type_valid, 4943 channel_type_valid,
4835 nla_data(info->attrs[NL80211_ATTR_FRAME]), 4944 nla_data(info->attrs[NL80211_ATTR_FRAME]),
4836 nla_len(info->attrs[NL80211_ATTR_FRAME]), 4945 nla_len(info->attrs[NL80211_ATTR_FRAME]),
4837 &cookie); 4946 &cookie);
4838 if (err) 4947 if (err)
4839 goto free_msg; 4948 goto free_msg;
4840 4949
@@ -5005,7 +5114,8 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
5005 goto unlock_rdev; 5114 goto unlock_rdev;
5006 } 5115 }
5007 5116
5008 if (wdev->iftype != NL80211_IFTYPE_STATION) { 5117 if (wdev->iftype != NL80211_IFTYPE_STATION &&
5118 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) {
5009 err = -EOPNOTSUPP; 5119 err = -EOPNOTSUPP;
5010 goto unlock_rdev; 5120 goto unlock_rdev;
5011 } 5121 }
@@ -5333,14 +5443,14 @@ static struct genl_ops nl80211_ops[] = {
5333 .flags = GENL_ADMIN_PERM, 5443 .flags = GENL_ADMIN_PERM,
5334 }, 5444 },
5335 { 5445 {
5336 .cmd = NL80211_CMD_REGISTER_ACTION, 5446 .cmd = NL80211_CMD_REGISTER_FRAME,
5337 .doit = nl80211_register_action, 5447 .doit = nl80211_register_mgmt,
5338 .policy = nl80211_policy, 5448 .policy = nl80211_policy,
5339 .flags = GENL_ADMIN_PERM, 5449 .flags = GENL_ADMIN_PERM,
5340 }, 5450 },
5341 { 5451 {
5342 .cmd = NL80211_CMD_ACTION, 5452 .cmd = NL80211_CMD_FRAME,
5343 .doit = nl80211_action, 5453 .doit = nl80211_tx_mgmt,
5344 .policy = nl80211_policy, 5454 .policy = nl80211_policy,
5345 .flags = GENL_ADMIN_PERM, 5455 .flags = GENL_ADMIN_PERM,
5346 }, 5456 },
@@ -6040,9 +6150,9 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
6040 nl80211_mlme_mcgrp.id, gfp); 6150 nl80211_mlme_mcgrp.id, gfp);
6041} 6151}
6042 6152
6043int nl80211_send_action(struct cfg80211_registered_device *rdev, 6153int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
6044 struct net_device *netdev, u32 nlpid, 6154 struct net_device *netdev, u32 nlpid,
6045 int freq, const u8 *buf, size_t len, gfp_t gfp) 6155 int freq, const u8 *buf, size_t len, gfp_t gfp)
6046{ 6156{
6047 struct sk_buff *msg; 6157 struct sk_buff *msg;
6048 void *hdr; 6158 void *hdr;
@@ -6052,7 +6162,7 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
6052 if (!msg) 6162 if (!msg)
6053 return -ENOMEM; 6163 return -ENOMEM;
6054 6164
6055 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION); 6165 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME);
6056 if (!hdr) { 6166 if (!hdr) {
6057 nlmsg_free(msg); 6167 nlmsg_free(msg);
6058 return -ENOMEM; 6168 return -ENOMEM;
@@ -6080,10 +6190,10 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
6080 return -ENOBUFS; 6190 return -ENOBUFS;
6081} 6191}
6082 6192
6083void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, 6193void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
6084 struct net_device *netdev, u64 cookie, 6194 struct net_device *netdev, u64 cookie,
6085 const u8 *buf, size_t len, bool ack, 6195 const u8 *buf, size_t len, bool ack,
6086 gfp_t gfp) 6196 gfp_t gfp)
6087{ 6197{
6088 struct sk_buff *msg; 6198 struct sk_buff *msg;
6089 void *hdr; 6199 void *hdr;
@@ -6092,7 +6202,7 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
6092 if (!msg) 6202 if (!msg)
6093 return; 6203 return;
6094 6204
6095 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS); 6205 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS);
6096 if (!hdr) { 6206 if (!hdr) {
6097 nlmsg_free(msg); 6207 nlmsg_free(msg);
6098 return; 6208 return;
@@ -6179,7 +6289,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
6179 6289
6180 list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) 6290 list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list)
6181 list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) 6291 list_for_each_entry_rcu(wdev, &rdev->netdev_list, list)
6182 cfg80211_mlme_unregister_actions(wdev, notify->pid); 6292 cfg80211_mlme_unregister_socket(wdev, notify->pid);
6183 6293
6184 rcu_read_unlock(); 6294 rcu_read_unlock();
6185 6295
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 2ad7fbc7d9f..30d2f939150 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -74,13 +74,13 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
74 struct net_device *dev, const u8 *mac_addr, 74 struct net_device *dev, const u8 *mac_addr,
75 struct station_info *sinfo, gfp_t gfp); 75 struct station_info *sinfo, gfp_t gfp);
76 76
77int nl80211_send_action(struct cfg80211_registered_device *rdev, 77int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
78 struct net_device *netdev, u32 nlpid, int freq, 78 struct net_device *netdev, u32 nlpid, int freq,
79 const u8 *buf, size_t len, gfp_t gfp); 79 const u8 *buf, size_t len, gfp_t gfp);
80void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, 80void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
81 struct net_device *netdev, u64 cookie, 81 struct net_device *netdev, u64 cookie,
82 const u8 *buf, size_t len, bool ack, 82 const u8 *buf, size_t len, bool ack,
83 gfp_t gfp); 83 gfp_t gfp);
84 84
85void 85void
86nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, 86nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 1332c445d1c..c774bc0f155 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -14,6 +14,7 @@
14 * See COPYING for more details. 14 * See COPYING for more details.
15 */ 15 */
16 16
17#include <linux/kernel.h>
17#include <net/cfg80211.h> 18#include <net/cfg80211.h>
18#include <net/ieee80211_radiotap.h> 19#include <net/ieee80211_radiotap.h>
19#include <asm/unaligned.h> 20#include <asm/unaligned.h>
@@ -45,7 +46,7 @@ static const struct radiotap_align_size rtap_namespace_sizes[] = {
45}; 46};
46 47
47static const struct ieee80211_radiotap_namespace radiotap_ns = { 48static const struct ieee80211_radiotap_namespace radiotap_ns = {
48 .n_bits = sizeof(rtap_namespace_sizes) / sizeof(rtap_namespace_sizes[0]), 49 .n_bits = ARRAY_SIZE(rtap_namespace_sizes),
49 .align_size = rtap_namespace_sizes, 50 .align_size = rtap_namespace_sizes,
50}; 51};
51 52
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f180db0de66..d14bbf960c1 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -36,6 +36,7 @@
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/list.h> 37#include <linux/list.h>
38#include <linux/random.h> 38#include <linux/random.h>
39#include <linux/ctype.h>
39#include <linux/nl80211.h> 40#include <linux/nl80211.h>
40#include <linux/platform_device.h> 41#include <linux/platform_device.h>
41#include <net/cfg80211.h> 42#include <net/cfg80211.h>
@@ -73,7 +74,11 @@ const struct ieee80211_regdomain *cfg80211_regdomain;
73 * - last_request 74 * - last_request
74 */ 75 */
75static DEFINE_MUTEX(reg_mutex); 76static DEFINE_MUTEX(reg_mutex);
76#define assert_reg_lock() WARN_ON(!mutex_is_locked(&reg_mutex)) 77
78static inline void assert_reg_lock(void)
79{
80 lockdep_assert_held(&reg_mutex);
81}
77 82
78/* Used to queue up regulatory hints */ 83/* Used to queue up regulatory hints */
79static LIST_HEAD(reg_requests_list); 84static LIST_HEAD(reg_requests_list);
@@ -181,14 +186,6 @@ static bool is_alpha2_set(const char *alpha2)
181 return false; 186 return false;
182} 187}
183 188
184static bool is_alpha_upper(char letter)
185{
186 /* ASCII A - Z */
187 if (letter >= 65 && letter <= 90)
188 return true;
189 return false;
190}
191
192static bool is_unknown_alpha2(const char *alpha2) 189static bool is_unknown_alpha2(const char *alpha2)
193{ 190{
194 if (!alpha2) 191 if (!alpha2)
@@ -220,7 +217,7 @@ static bool is_an_alpha2(const char *alpha2)
220{ 217{
221 if (!alpha2) 218 if (!alpha2)
222 return false; 219 return false;
223 if (is_alpha_upper(alpha2[0]) && is_alpha_upper(alpha2[1])) 220 if (isalpha(alpha2[0]) && isalpha(alpha2[1]))
224 return true; 221 return true;
225 return false; 222 return false;
226} 223}
@@ -1399,6 +1396,11 @@ static DECLARE_WORK(reg_work, reg_todo);
1399 1396
1400static void queue_regulatory_request(struct regulatory_request *request) 1397static void queue_regulatory_request(struct regulatory_request *request)
1401{ 1398{
1399 if (isalpha(request->alpha2[0]))
1400 request->alpha2[0] = toupper(request->alpha2[0]);
1401 if (isalpha(request->alpha2[1]))
1402 request->alpha2[1] = toupper(request->alpha2[1]);
1403
1402 spin_lock(&reg_requests_lock); 1404 spin_lock(&reg_requests_lock);
1403 list_add_tail(&request->list, &reg_requests_list); 1405 list_add_tail(&request->list, &reg_requests_list);
1404 spin_unlock(&reg_requests_lock); 1406 spin_unlock(&reg_requests_lock);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index a8c2d6b877a..f161b984454 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -411,7 +411,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
411 411
412 ASSERT_WDEV_LOCK(wdev); 412 ASSERT_WDEV_LOCK(wdev);
413 413
414 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 414 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
415 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
415 return; 416 return;
416 417
417 if (wdev->sme_state != CFG80211_SME_CONNECTING) 418 if (wdev->sme_state != CFG80211_SME_CONNECTING)
@@ -548,7 +549,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid,
548 549
549 ASSERT_WDEV_LOCK(wdev); 550 ASSERT_WDEV_LOCK(wdev);
550 551
551 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 552 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
553 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
552 return; 554 return;
553 555
554 if (wdev->sme_state != CFG80211_SME_CONNECTED) 556 if (wdev->sme_state != CFG80211_SME_CONNECTED)
@@ -644,7 +646,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
644 646
645 ASSERT_WDEV_LOCK(wdev); 647 ASSERT_WDEV_LOCK(wdev);
646 648
647 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 649 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
650 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
648 return; 651 return;
649 652
650 if (wdev->sme_state != CFG80211_SME_CONNECTED) 653 if (wdev->sme_state != CFG80211_SME_CONNECTED)
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 9f2cef3e0ca..74a9e3cce45 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -110,6 +110,13 @@ static int wiphy_resume(struct device *dev)
110 return ret; 110 return ret;
111} 111}
112 112
113static const void *wiphy_namespace(struct device *d)
114{
115 struct wiphy *wiphy = container_of(d, struct wiphy, dev);
116
117 return wiphy_net(wiphy);
118}
119
113struct class ieee80211_class = { 120struct class ieee80211_class = {
114 .name = "ieee80211", 121 .name = "ieee80211",
115 .owner = THIS_MODULE, 122 .owner = THIS_MODULE,
@@ -120,6 +127,8 @@ struct class ieee80211_class = {
120#endif 127#endif
121 .suspend = wiphy_suspend, 128 .suspend = wiphy_suspend,
122 .resume = wiphy_resume, 129 .resume = wiphy_resume,
130 .ns_type = &net_ns_type_operations,
131 .namespace = wiphy_namespace,
123}; 132};
124 133
125int wiphy_sysfs_init(void) 134int wiphy_sysfs_init(void)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 0c8a1e8b769..fb5448f7d55 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -183,7 +183,14 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
183 return -EINVAL; 183 return -EINVAL;
184 break; 184 break;
185 default: 185 default:
186 return -EINVAL; 186 /*
187 * We don't know anything about this algorithm,
188 * allow using it -- but the driver must check
189 * all parameters! We still check below whether
190 * or not the driver supports this algorithm,
191 * of course.
192 */
193 break;
187 } 194 }
188 195
189 if (params->seq) { 196 if (params->seq) {
@@ -221,7 +228,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) =
221 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; 228 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
222EXPORT_SYMBOL(bridge_tunnel_header); 229EXPORT_SYMBOL(bridge_tunnel_header);
223 230
224unsigned int ieee80211_hdrlen(__le16 fc) 231unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc)
225{ 232{
226 unsigned int hdrlen = 24; 233 unsigned int hdrlen = 24;
227 234
@@ -319,7 +326,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
319 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { 326 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
320 case cpu_to_le16(IEEE80211_FCTL_TODS): 327 case cpu_to_le16(IEEE80211_FCTL_TODS):
321 if (unlikely(iftype != NL80211_IFTYPE_AP && 328 if (unlikely(iftype != NL80211_IFTYPE_AP &&
322 iftype != NL80211_IFTYPE_AP_VLAN)) 329 iftype != NL80211_IFTYPE_AP_VLAN &&
330 iftype != NL80211_IFTYPE_P2P_GO))
323 return -1; 331 return -1;
324 break; 332 break;
325 case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): 333 case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
@@ -347,7 +355,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
347 break; 355 break;
348 case cpu_to_le16(IEEE80211_FCTL_FROMDS): 356 case cpu_to_le16(IEEE80211_FCTL_FROMDS):
349 if ((iftype != NL80211_IFTYPE_STATION && 357 if ((iftype != NL80211_IFTYPE_STATION &&
350 iftype != NL80211_IFTYPE_MESH_POINT) || 358 iftype != NL80211_IFTYPE_P2P_CLIENT &&
359 iftype != NL80211_IFTYPE_MESH_POINT) ||
351 (is_multicast_ether_addr(dst) && 360 (is_multicast_ether_addr(dst) &&
352 !compare_ether_addr(src, addr))) 361 !compare_ether_addr(src, addr)))
353 return -1; 362 return -1;
@@ -424,6 +433,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
424 switch (iftype) { 433 switch (iftype) {
425 case NL80211_IFTYPE_AP: 434 case NL80211_IFTYPE_AP:
426 case NL80211_IFTYPE_AP_VLAN: 435 case NL80211_IFTYPE_AP_VLAN:
436 case NL80211_IFTYPE_P2P_GO:
427 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); 437 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
428 /* DA BSSID SA */ 438 /* DA BSSID SA */
429 memcpy(hdr.addr1, skb->data, ETH_ALEN); 439 memcpy(hdr.addr1, skb->data, ETH_ALEN);
@@ -432,6 +442,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
432 hdrlen = 24; 442 hdrlen = 24;
433 break; 443 break;
434 case NL80211_IFTYPE_STATION: 444 case NL80211_IFTYPE_STATION:
445 case NL80211_IFTYPE_P2P_CLIENT:
435 fc |= cpu_to_le16(IEEE80211_FCTL_TODS); 446 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
436 /* BSSID SA DA */ 447 /* BSSID SA DA */
437 memcpy(hdr.addr1, bssid, ETH_ALEN); 448 memcpy(hdr.addr1, bssid, ETH_ALEN);
@@ -771,7 +782,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
771 782
772 /* if it's part of a bridge, reject changing type to station/ibss */ 783 /* if it's part of a bridge, reject changing type to station/ibss */
773 if ((dev->priv_flags & IFF_BRIDGE_PORT) && 784 if ((dev->priv_flags & IFF_BRIDGE_PORT) &&
774 (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION)) 785 (ntype == NL80211_IFTYPE_ADHOC ||
786 ntype == NL80211_IFTYPE_STATION ||
787 ntype == NL80211_IFTYPE_P2P_CLIENT))
775 return -EBUSY; 788 return -EBUSY;
776 789
777 if (ntype != otype) { 790 if (ntype != otype) {
@@ -782,6 +795,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
782 cfg80211_leave_ibss(rdev, dev, false); 795 cfg80211_leave_ibss(rdev, dev, false);
783 break; 796 break;
784 case NL80211_IFTYPE_STATION: 797 case NL80211_IFTYPE_STATION:
798 case NL80211_IFTYPE_P2P_CLIENT:
785 cfg80211_disconnect(rdev, dev, 799 cfg80211_disconnect(rdev, dev,
786 WLAN_REASON_DEAUTH_LEAVING, true); 800 WLAN_REASON_DEAUTH_LEAVING, true);
787 break; 801 break;
@@ -810,9 +824,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
810 if (dev->ieee80211_ptr->use_4addr) 824 if (dev->ieee80211_ptr->use_4addr)
811 break; 825 break;
812 /* fall through */ 826 /* fall through */
827 case NL80211_IFTYPE_P2P_CLIENT:
813 case NL80211_IFTYPE_ADHOC: 828 case NL80211_IFTYPE_ADHOC:
814 dev->priv_flags |= IFF_DONT_BRIDGE; 829 dev->priv_flags |= IFF_DONT_BRIDGE;
815 break; 830 break;
831 case NL80211_IFTYPE_P2P_GO:
816 case NL80211_IFTYPE_AP: 832 case NL80211_IFTYPE_AP:
817 case NL80211_IFTYPE_AP_VLAN: 833 case NL80211_IFTYPE_AP_VLAN:
818 case NL80211_IFTYPE_WDS: 834 case NL80211_IFTYPE_WDS:
@@ -823,7 +839,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
823 /* monitor can't bridge anyway */ 839 /* monitor can't bridge anyway */
824 break; 840 break;
825 case NL80211_IFTYPE_UNSPECIFIED: 841 case NL80211_IFTYPE_UNSPECIFIED:
826 case __NL80211_IFTYPE_AFTER_LAST: 842 case NUM_NL80211_IFTYPES:
827 /* not happening */ 843 /* not happening */
828 break; 844 break;
829 } 845 }
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 8f5116f5af1..dc675a3daa3 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -611,7 +611,7 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev)
611#endif 611#endif
612 612
613#ifdef CONFIG_CFG80211_WEXT 613#ifdef CONFIG_CFG80211_WEXT
614 if (dev->ieee80211_ptr && dev->ieee80211_ptr && 614 if (dev->ieee80211_ptr &&
615 dev->ieee80211_ptr->wiphy && 615 dev->ieee80211_ptr->wiphy &&
616 dev->ieee80211_ptr->wiphy->wext && 616 dev->ieee80211_ptr->wiphy->wext &&
617 dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) 617 dev->ieee80211_ptr->wiphy->wext->get_wireless_stats)
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 9818198add8..6fffe62d7c2 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -197,6 +197,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
197 wdev->wext.connect.ssid_len = len; 197 wdev->wext.connect.ssid_len = len;
198 198
199 wdev->wext.connect.crypto.control_port = false; 199 wdev->wext.connect.crypto.control_port = false;
200 wdev->wext.connect.crypto.control_port_ethertype =
201 cpu_to_be16(ETH_P_PAE);
200 202
201 err = cfg80211_mgd_wext_connect(rdev, wdev); 203 err = cfg80211_mgd_wext_connect(rdev, wdev);
202 out: 204 out:
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5e86d4e97dc..f7af98dff40 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -507,14 +507,14 @@ static int x25_listen(struct socket *sock, int backlog)
507 struct sock *sk = sock->sk; 507 struct sock *sk = sock->sk;
508 int rc = -EOPNOTSUPP; 508 int rc = -EOPNOTSUPP;
509 509
510 lock_kernel(); 510 lock_sock(sk);
511 if (sk->sk_state != TCP_LISTEN) { 511 if (sk->sk_state != TCP_LISTEN) {
512 memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN); 512 memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN);
513 sk->sk_max_ack_backlog = backlog; 513 sk->sk_max_ack_backlog = backlog;
514 sk->sk_state = TCP_LISTEN; 514 sk->sk_state = TCP_LISTEN;
515 rc = 0; 515 rc = 0;
516 } 516 }
517 unlock_kernel(); 517 release_sock(sk);
518 518
519 return rc; 519 return rc;
520} 520}
@@ -688,7 +688,6 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
688 struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; 688 struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
689 int len, i, rc = 0; 689 int len, i, rc = 0;
690 690
691 lock_kernel();
692 if (!sock_flag(sk, SOCK_ZAPPED) || 691 if (!sock_flag(sk, SOCK_ZAPPED) ||
693 addr_len != sizeof(struct sockaddr_x25) || 692 addr_len != sizeof(struct sockaddr_x25) ||
694 addr->sx25_family != AF_X25) { 693 addr->sx25_family != AF_X25) {
@@ -704,12 +703,13 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
704 } 703 }
705 } 704 }
706 705
706 lock_sock(sk);
707 x25_sk(sk)->source_addr = addr->sx25_addr; 707 x25_sk(sk)->source_addr = addr->sx25_addr;
708 x25_insert_socket(sk); 708 x25_insert_socket(sk);
709 sock_reset_flag(sk, SOCK_ZAPPED); 709 sock_reset_flag(sk, SOCK_ZAPPED);
710 release_sock(sk);
710 SOCK_DEBUG(sk, "x25_bind: socket is bound\n"); 711 SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
711out: 712out:
712 unlock_kernel();
713 return rc; 713 return rc;
714} 714}
715 715
@@ -751,7 +751,6 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
751 struct x25_route *rt; 751 struct x25_route *rt;
752 int rc = 0; 752 int rc = 0;
753 753
754 lock_kernel();
755 lock_sock(sk); 754 lock_sock(sk);
756 if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { 755 if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
757 sock->state = SS_CONNECTED; 756 sock->state = SS_CONNECTED;
@@ -829,7 +828,6 @@ out_put_route:
829 x25_route_put(rt); 828 x25_route_put(rt);
830out: 829out:
831 release_sock(sk); 830 release_sock(sk);
832 unlock_kernel();
833 return rc; 831 return rc;
834} 832}
835 833
@@ -869,8 +867,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
869 struct sk_buff *skb; 867 struct sk_buff *skb;
870 int rc = -EINVAL; 868 int rc = -EINVAL;
871 869
872 lock_kernel(); 870 if (!sk)
873 if (!sk || sk->sk_state != TCP_LISTEN)
874 goto out; 871 goto out;
875 872
876 rc = -EOPNOTSUPP; 873 rc = -EOPNOTSUPP;
@@ -878,6 +875,10 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
878 goto out; 875 goto out;
879 876
880 lock_sock(sk); 877 lock_sock(sk);
878 rc = -EINVAL;
879 if (sk->sk_state != TCP_LISTEN)
880 goto out2;
881
881 rc = x25_wait_for_data(sk, sk->sk_rcvtimeo); 882 rc = x25_wait_for_data(sk, sk->sk_rcvtimeo);
882 if (rc) 883 if (rc)
883 goto out2; 884 goto out2;
@@ -897,7 +898,6 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
897out2: 898out2:
898 release_sock(sk); 899 release_sock(sk);
899out: 900out:
900 unlock_kernel();
901 return rc; 901 return rc;
902} 902}
903 903
@@ -909,7 +909,6 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
909 struct x25_sock *x25 = x25_sk(sk); 909 struct x25_sock *x25 = x25_sk(sk);
910 int rc = 0; 910 int rc = 0;
911 911
912 lock_kernel();
913 if (peer) { 912 if (peer) {
914 if (sk->sk_state != TCP_ESTABLISHED) { 913 if (sk->sk_state != TCP_ESTABLISHED) {
915 rc = -ENOTCONN; 914 rc = -ENOTCONN;
@@ -923,19 +922,6 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
923 *uaddr_len = sizeof(*sx25); 922 *uaddr_len = sizeof(*sx25);
924 923
925out: 924out:
926 unlock_kernel();
927 return rc;
928}
929
930static unsigned int x25_datagram_poll(struct file *file, struct socket *sock,
931 poll_table *wait)
932{
933 int rc;
934
935 lock_kernel();
936 rc = datagram_poll(file, sock, wait);
937 unlock_kernel();
938
939 return rc; 925 return rc;
940} 926}
941 927
@@ -1746,7 +1732,7 @@ static const struct proto_ops x25_proto_ops = {
1746 .socketpair = sock_no_socketpair, 1732 .socketpair = sock_no_socketpair,
1747 .accept = x25_accept, 1733 .accept = x25_accept,
1748 .getname = x25_getname, 1734 .getname = x25_getname,
1749 .poll = x25_datagram_poll, 1735 .poll = datagram_poll,
1750 .ioctl = x25_ioctl, 1736 .ioctl = x25_ioctl,
1751#ifdef CONFIG_COMPAT 1737#ifdef CONFIG_COMPAT
1752 .compat_ioctl = compat_x25_ioctl, 1738 .compat_ioctl = compat_x25_ioctl,