aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/fc.c2
-rw-r--r--net/802/fddi.c12
-rw-r--r--net/802/hippi.c2
-rw-r--r--net/802/tr.c2
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_core.c16
-rw-r--r--net/8021q/vlan_dev.c13
-rw-r--r--net/9p/client.c4
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.c1
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_route.c4
-rw-r--r--net/bluetooth/af_bluetooth.c5
-rw-r--r--net/bluetooth/rfcomm/core.c4
-rw-r--r--net/bridge/br_if.c29
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_netfilter.c107
-rw-r--r--net/caif/caif_dev.c24
-rw-r--r--net/caif/caif_socket.c26
-rw-r--r--net/caif/cfcnfg.c49
-rw-r--r--net/caif/cfctrl.c59
-rw-r--r--net/caif/cfdbgl.c4
-rw-r--r--net/caif/cfdgml.c11
-rw-r--r--net/caif/cffrml.c14
-rw-r--r--net/caif/cfmuxl.c14
-rw-r--r--net/caif/cfpkt_skbuff.c48
-rw-r--r--net/caif/cfrfml.c12
-rw-r--r--net/caif/cfserl.c4
-rw-r--r--net/caif/cfsrvl.c17
-rw-r--r--net/caif/cfutill.c12
-rw-r--r--net/caif/cfveil.c11
-rw-r--r--net/caif/cfvidl.c6
-rw-r--r--net/caif/chnl_net.c47
-rw-r--r--net/can/raw.c4
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c342
-rw-r--r--net/core/ethtool.c88
-rw-r--r--net/core/fib_rules.c11
-rw-r--r--net/core/filter.c10
-rw-r--r--net/core/flow.c82
-rw-r--r--net/core/gen_estimator.c4
-rw-r--r--net/core/iovec.c6
-rw-r--r--net/core/neighbour.c10
-rw-r--r--net/core/net-sysfs.c37
-rw-r--r--net/core/net-sysfs.h4
-rw-r--r--net/core/pktgen.c12
-rw-r--r--net/core/rtnetlink.c31
-rw-r--r--net/core/skbuff.c92
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/utils.c15
-rw-r--r--net/dccp/ccid.h46
-rw-r--r--net/dccp/ccids/Kconfig31
-rw-r--r--net/dccp/ccids/ccid2.c287
-rw-r--r--net/dccp/ccids/ccid2.h35
-rw-r--r--net/dccp/ccids/ccid3.c253
-rw-r--r--net/dccp/ccids/ccid3.h51
-rw-r--r--net/dccp/ccids/lib/loss_interval.c2
-rw-r--r--net/dccp/ccids/lib/packet_history.c39
-rw-r--r--net/dccp/ccids/lib/packet_history.h22
-rw-r--r--net/dccp/ccids/lib/tfrc.h1
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c14
-rw-r--r--net/dccp/options.c25
-rw-r--r--net/decnet/dn_nsp_out.c8
-rw-r--r--net/econet/af_econet.c6
-rw-r--r--net/ethernet/eth.c8
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/arp.c231
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c4
-rw-r--r--net/ipv4/fib_frontend.c35
-rw-r--r--net/ipv4/fib_trie.c55
-rw-r--r--net/ipv4/gre.c151
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c236
-rw-r--r--net/ipv4/ip_options.c3
-rw-r--r--net/ipv4/ip_output.c24
-rw-r--r--net/ipv4/ipip.c215
-rw-r--r--net/ipv4/ipmr.c428
-rw-r--r--net/ipv4/netfilter/arp_tables.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c31
-rw-r--r--net/ipv4/protocol.c31
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c83
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_input.c29
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c31
-rw-r--r--net/ipv4/tcp_timer.c37
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/tunnel4.c19
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/xfrm4_tunnel.c4
-rw-r--r--net/ipv6/addrconf.c5
-rw-r--r--net/ipv6/addrlabel.c5
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/exthdrs_core.c4
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_tunnel.c160
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/ndisc.c26
-rw-r--r--net/ipv6/netfilter/ip6_tables.c14
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/protocol.c32
-rw-r--r--net/ipv6/raw.c12
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c24
-rw-r--r--net/ipv6/sit.c168
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/tunnel6.c17
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/ipv6/xfrm6_tunnel.c4
-rw-r--r--net/irda/af_irda.c14
-rw-r--r--net/irda/discovery.c2
-rw-r--r--net/irda/ircomm/ircomm_tty.c4
-rw-r--r--net/irda/irlan/irlan_eth.c32
-rw-r--r--net/irda/irlan/irlan_event.c2
-rw-r--r--net/irda/irlmp.c2
-rw-r--r--net/irda/irlmp_frame.c2
-rw-r--r--net/irda/irnet/irnet_irda.c22
-rw-r--r--net/irda/irnet/irnet_ppp.c8
-rw-r--r--net/irda/irnet/irnet_ppp.h3
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/l2tp/l2tp_eth.c1
-rw-r--r--net/l2tp/l2tp_ppp.c2
-rw-r--r--net/mac80211/aes_ccm.c6
-rw-r--r--net/mac80211/aes_cmac.c6
-rw-r--r--net/mac80211/agg-rx.c22
-rw-r--r--net/mac80211/cfg.c145
-rw-r--r--net/mac80211/chan.c2
-rw-r--r--net/mac80211/debugfs.c7
-rw-r--r--net/mac80211/debugfs_key.c55
-rw-r--r--net/mac80211/debugfs_netdev.c3
-rw-r--r--net/mac80211/debugfs_sta.c2
-rw-r--r--net/mac80211/driver-ops.h14
-rw-r--r--net/mac80211/driver-trace.h42
-rw-r--r--net/mac80211/ht.c28
-rw-r--r--net/mac80211/ibss.c12
-rw-r--r--net/mac80211/ieee80211_i.h109
-rw-r--r--net/mac80211/iface.c401
-rw-r--r--net/mac80211/key.c113
-rw-r--r--net/mac80211/key.h10
-rw-r--r--net/mac80211/main.c182
-rw-r--r--net/mac80211/mlme.c122
-rw-r--r--net/mac80211/offchannel.c26
-rw-r--r--net/mac80211/pm.c3
-rw-r--r--net/mac80211/rate.c11
-rw-r--r--net/mac80211/rc80211_pid_debugfs.c2
-rw-r--r--net/mac80211/rx.c757
-rw-r--r--net/mac80211/scan.c69
-rw-r--r--net/mac80211/sta_info.c40
-rw-r--r--net/mac80211/sta_info.h16
-rw-r--r--net/mac80211/status.c12
-rw-r--r--net/mac80211/tx.c68
-rw-r--r--net/mac80211/util.c82
-rw-r--r--net/mac80211/wep.c2
-rw-r--r--net/mac80211/work.c39
-rw-r--r--net/mac80211/wpa.c34
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c22
-rw-r--r--net/netfilter/xt_hashlimit.c15
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/phonet/Kconfig11
-rw-r--r--net/phonet/af_phonet.c17
-rw-r--r--net/phonet/datagram.c13
-rw-r--r--net/phonet/pep.c477
-rw-r--r--net/phonet/pn_dev.c5
-rw-r--r--net/phonet/socket.c190
-rw-r--r--net/rds/af_rds.c26
-rw-r--r--net/rds/bind.c82
-rw-r--r--net/rds/cong.c8
-rw-r--r--net/rds/connection.c157
-rw-r--r--net/rds/ib.c194
-rw-r--r--net/rds/ib.h100
-rw-r--r--net/rds/ib_cm.c184
-rw-r--r--net/rds/ib_rdma.c318
-rw-r--r--net/rds/ib_recv.c549
-rw-r--r--net/rds/ib_send.c682
-rw-r--r--net/rds/ib_stats.c2
-rw-r--r--net/rds/ib_sysctl.c17
-rw-r--r--net/rds/info.c12
-rw-r--r--net/rds/iw.c4
-rw-r--r--net/rds/iw.h11
-rw-r--r--net/rds/iw_cm.c14
-rw-r--r--net/rds/iw_rdma.c5
-rw-r--r--net/rds/iw_recv.c24
-rw-r--r--net/rds/iw_send.c93
-rw-r--r--net/rds/iw_sysctl.c4
-rw-r--r--net/rds/loop.c31
-rw-r--r--net/rds/message.c118
-rw-r--r--net/rds/page.c5
-rw-r--r--net/rds/rdma.c339
-rw-r--r--net/rds/rdma.h85
-rw-r--r--net/rds/rdma_transport.c42
-rw-r--r--net/rds/rds.h187
-rw-r--r--net/rds/recv.c9
-rw-r--r--net/rds/send.c544
-rw-r--r--net/rds/stats.c6
-rw-r--r--net/rds/sysctl.c4
-rw-r--r--net/rds/tcp.c8
-rw-r--r--net/rds/tcp.h9
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_listen.c6
-rw-r--r--net/rds/tcp_recv.c14
-rw-r--r--net/rds/tcp_send.c66
-rw-r--r--net/rds/threads.c69
-rw-r--r--net/rds/transport.c19
-rw-r--r--net/rds/xlist.h80
-rw-r--r--net/rfkill/input.c2
-rw-r--r--net/rose/rose_link.c4
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_csum.c595
-rw-r--r--net/sched/cls_flow.c74
-rw-r--r--net/sched/em_meta.c6
-rw-r--r--net/sched/sch_api.c16
-rw-r--r--net/sched/sch_generic.c8
-rw-r--r--net/sched/sch_sfq.c33
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/objcnt.c5
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c34
-rw-r--r--net/sctp/probe.c4
-rw-r--r--net/sctp/protocol.c19
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_sideeffect.c21
-rw-r--r--net/sctp/sm_statefuns.c20
-rw-r--r--net/sctp/sm_statetable.c42
-rw-r--r--net/sctp/socket.c85
-rw-r--r--net/sctp/transport.c9
-rw-r--r--net/socket.c30
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c44
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c2
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c2
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/tipc/addr.c2
-rw-r--r--net/tipc/bcast.c41
-rw-r--r--net/tipc/bearer.c2
-rw-r--r--net/tipc/core.c6
-rw-r--r--net/tipc/dbg.c4
-rw-r--r--net/tipc/discover.c8
-rw-r--r--net/tipc/eth_media.c48
-rw-r--r--net/tipc/link.c31
-rw-r--r--net/tipc/link.h16
-rw-r--r--net/tipc/msg.h6
-rw-r--r--net/tipc/name_table.c50
-rw-r--r--net/tipc/net.c1
-rw-r--r--net/tipc/node.c28
-rw-r--r--net/tipc/node.h2
-rw-r--r--net/tipc/port.c19
-rw-r--r--net/tipc/port.h2
-rw-r--r--net/tipc/socket.c83
-rw-r--r--net/tipc/subscr.c2
-rw-r--r--net/unix/af_unix.c5
-rw-r--r--net/wireless/core.c66
-rw-r--r--net/wireless/core.h32
-rw-r--r--net/wireless/ibss.c19
-rw-r--r--net/wireless/mlme.c152
-rw-r--r--net/wireless/nl80211.c228
-rw-r--r--net/wireless/nl80211.h14
-rw-r--r--net/wireless/radiotap.c3
-rw-r--r--net/wireless/reg.c22
-rw-r--r--net/wireless/sme.c9
-rw-r--r--net/wireless/sysfs.c9
-rw-r--r--net/wireless/util.c28
-rw-r--r--net/wireless/wext-core.c2
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/x25/af_x25.c34
279 files changed, 8738 insertions, 5133 deletions
diff --git a/net/802/fc.c b/net/802/fc.c
index 34cf1ee014b8..1e49f2d4ea96 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -70,7 +70,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
70 if(daddr) 70 if(daddr)
71 { 71 {
72 memcpy(fch->daddr,daddr,dev->addr_len); 72 memcpy(fch->daddr,daddr,dev->addr_len);
73 return(hdr_len); 73 return hdr_len;
74 } 74 }
75 return -hdr_len; 75 return -hdr_len;
76} 76}
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 3ef0ab0a543a..94b3ad08f39a 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -82,10 +82,10 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev,
82 if (daddr != NULL) 82 if (daddr != NULL)
83 { 83 {
84 memcpy(fddi->daddr, daddr, dev->addr_len); 84 memcpy(fddi->daddr, daddr, dev->addr_len);
85 return(hl); 85 return hl;
86 } 86 }
87 87
88 return(-hl); 88 return -hl;
89} 89}
90 90
91 91
@@ -108,7 +108,7 @@ static int fddi_rebuild_header(struct sk_buff *skb)
108 { 108 {
109 printk("%s: Don't know how to resolve type %04X addresses.\n", 109 printk("%s: Don't know how to resolve type %04X addresses.\n",
110 skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype)); 110 skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype));
111 return(0); 111 return 0;
112 } 112 }
113} 113}
114 114
@@ -162,7 +162,7 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
162 162
163 /* Assume 802.2 SNAP frames, for now */ 163 /* Assume 802.2 SNAP frames, for now */
164 164
165 return(type); 165 return type;
166} 166}
167 167
168EXPORT_SYMBOL(fddi_type_trans); 168EXPORT_SYMBOL(fddi_type_trans);
@@ -170,9 +170,9 @@ EXPORT_SYMBOL(fddi_type_trans);
170int fddi_change_mtu(struct net_device *dev, int new_mtu) 170int fddi_change_mtu(struct net_device *dev, int new_mtu)
171{ 171{
172 if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) 172 if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
173 return(-EINVAL); 173 return -EINVAL;
174 dev->mtu = new_mtu; 174 dev->mtu = new_mtu;
175 return(0); 175 return 0;
176} 176}
177EXPORT_SYMBOL(fddi_change_mtu); 177EXPORT_SYMBOL(fddi_change_mtu);
178 178
diff --git a/net/802/hippi.c b/net/802/hippi.c
index cd3e8e929529..91aca8780fd0 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -152,7 +152,7 @@ int hippi_change_mtu(struct net_device *dev, int new_mtu)
152 if ((new_mtu < 68) || (new_mtu > 65280)) 152 if ((new_mtu < 68) || (new_mtu > 65280))
153 return -EINVAL; 153 return -EINVAL;
154 dev->mtu = new_mtu; 154 dev->mtu = new_mtu;
155 return(0); 155 return 0;
156} 156}
157EXPORT_SYMBOL(hippi_change_mtu); 157EXPORT_SYMBOL(hippi_change_mtu);
158 158
diff --git a/net/802/tr.c b/net/802/tr.c
index 1c6e596074df..5e20cf8a074b 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -145,7 +145,7 @@ static int tr_header(struct sk_buff *skb, struct net_device *dev,
145 { 145 {
146 memcpy(trh->daddr,daddr,dev->addr_len); 146 memcpy(trh->daddr,daddr,dev->addr_len);
147 tr_source_route(skb, trh, dev); 147 tr_source_route(skb, trh, dev);
148 return(hdr_len); 148 return hdr_len;
149 } 149 }
150 150
151 return -hdr_len; 151 return -hdr_len;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a2ad15250575..25c21332e9c3 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -321,7 +321,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
321 if (new_dev == NULL) 321 if (new_dev == NULL)
322 return -ENOBUFS; 322 return -ENOBUFS;
323 323
324 new_dev->real_num_tx_queues = real_dev->real_num_tx_queues; 324 netif_copy_real_num_queues(new_dev, real_dev);
325 dev_net_set(new_dev, net); 325 dev_net_set(new_dev, net);
326 /* need 4 bytes for extra VLAN header info, 326 /* need 4 bytes for extra VLAN header info,
327 * hope the underlying device can handle it. 327 * hope the underlying device can handle it.
@@ -525,6 +525,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
525 break; 525 break;
526 526
527 case NETDEV_UNREGISTER: 527 case NETDEV_UNREGISTER:
528 /* twiddle thumbs on netns device moves */
529 if (dev->reg_state != NETREG_UNREGISTERING)
530 break;
531
528 /* Delete all VLANs for this dev. */ 532 /* Delete all VLANs for this dev. */
529 grp->killall = 1; 533 grp->killall = 1;
530 534
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 8d9503ad01da..b26ce343072c 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -25,6 +25,7 @@ struct vlan_priority_tci_mapping {
25 * @rx_multicast: number of received multicast packets 25 * @rx_multicast: number of received multicast packets
26 * @syncp: synchronization point for 64bit counters 26 * @syncp: synchronization point for 64bit counters
27 * @rx_errors: number of errors 27 * @rx_errors: number of errors
28 * @rx_dropped: number of dropped packets
28 */ 29 */
29struct vlan_rx_stats { 30struct vlan_rx_stats {
30 u64 rx_packets; 31 u64 rx_packets;
@@ -32,6 +33,7 @@ struct vlan_rx_stats {
32 u64 rx_multicast; 33 u64 rx_multicast;
33 struct u64_stats_sync syncp; 34 struct u64_stats_sync syncp;
34 unsigned long rx_errors; 35 unsigned long rx_errors;
36 unsigned long rx_dropped;
35}; 37};
36 38
37/** 39/**
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 0eb96f7e44be..b6d55a9304f2 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
30 skb->pkt_type = PACKET_OTHERHOST; 30 skb->pkt_type = PACKET_OTHERHOST;
31 } 31 }
32 32
33 return (polling ? netif_receive_skb(skb) : netif_rx(skb)); 33 return polling ? netif_receive_skb(skb) : netif_rx(skb);
34 34
35drop: 35drop:
36 dev_kfree_skb_any(skb); 36 dev_kfree_skb_any(skb);
@@ -38,12 +38,12 @@ drop:
38} 38}
39EXPORT_SYMBOL(__vlan_hwaccel_rx); 39EXPORT_SYMBOL(__vlan_hwaccel_rx);
40 40
41int vlan_hwaccel_do_receive(struct sk_buff *skb) 41void vlan_hwaccel_do_receive(struct sk_buff *skb)
42{ 42{
43 struct net_device *dev = skb->dev; 43 struct net_device *dev = skb->dev;
44 struct vlan_rx_stats *rx_stats; 44 struct vlan_rx_stats *rx_stats;
45 45
46 skb->dev = vlan_dev_info(dev)->real_dev; 46 skb->dev = vlan_dev_real_dev(dev);
47 netif_nit_deliver(skb); 47 netif_nit_deliver(skb);
48 48
49 skb->dev = dev; 49 skb->dev = dev;
@@ -72,7 +72,6 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
72 break; 72 break;
73 } 73 }
74 u64_stats_update_end(&rx_stats->syncp); 74 u64_stats_update_end(&rx_stats->syncp);
75 return 0;
76} 75}
77 76
78struct net_device *vlan_dev_real_dev(const struct net_device *dev) 77struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -112,9 +111,12 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
112 } 111 }
113 112
114 for (p = napi->gro_list; p; p = p->next) { 113 for (p = napi->gro_list; p; p = p->next) {
115 NAPI_GRO_CB(p)->same_flow = 114 unsigned long diffs;
116 p->dev == skb->dev && !compare_ether_header( 115
117 skb_mac_header(p), skb_gro_mac_header(skb)); 116 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
117 diffs |= compare_ether_header(skb_mac_header(p),
118 skb_gro_mac_header(skb));
119 NAPI_GRO_CB(p)->same_flow = !diffs;
118 NAPI_GRO_CB(p)->flush = 0; 120 NAPI_GRO_CB(p)->flush = 0;
119 } 121 }
120 122
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3bccdd12a264..f6fbcc0f1af9 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -177,8 +177,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
177 } else { 177 } else {
178 skb->dev = vlan_dev; 178 skb->dev = vlan_dev;
179 179
180 rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats, 180 rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats);
181 smp_processor_id()); 181
182 u64_stats_update_begin(&rx_stats->syncp); 182 u64_stats_update_begin(&rx_stats->syncp);
183 rx_stats->rx_packets++; 183 rx_stats->rx_packets++;
184 rx_stats->rx_bytes += skb->len; 184 rx_stats->rx_bytes += skb->len;
@@ -225,7 +225,10 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
225 } 225 }
226 } 226 }
227 227
228 netif_rx(skb); 228 if (unlikely(netif_rx(skb) == NET_RX_DROP)) {
229 if (rx_stats)
230 rx_stats->rx_dropped++;
231 }
229 rcu_read_unlock(); 232 rcu_read_unlock();
230 return NET_RX_SUCCESS; 233 return NET_RX_SUCCESS;
231 234
@@ -843,13 +846,15 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
843 accum.rx_packets += rxpackets; 846 accum.rx_packets += rxpackets;
844 accum.rx_bytes += rxbytes; 847 accum.rx_bytes += rxbytes;
845 accum.rx_multicast += rxmulticast; 848 accum.rx_multicast += rxmulticast;
846 /* rx_errors is an ulong, not protected by syncp */ 849 /* rx_errors, rx_dropped are ulong, not protected by syncp */
847 accum.rx_errors += p->rx_errors; 850 accum.rx_errors += p->rx_errors;
851 accum.rx_dropped += p->rx_dropped;
848 } 852 }
849 stats->rx_packets = accum.rx_packets; 853 stats->rx_packets = accum.rx_packets;
850 stats->rx_bytes = accum.rx_bytes; 854 stats->rx_bytes = accum.rx_bytes;
851 stats->rx_errors = accum.rx_errors; 855 stats->rx_errors = accum.rx_errors;
852 stats->multicast = accum.rx_multicast; 856 stats->multicast = accum.rx_multicast;
857 stats->rx_dropped = accum.rx_dropped;
853 } 858 }
854 return stats; 859 return stats;
855} 860}
diff --git a/net/9p/client.c b/net/9p/client.c
index dc6f2f26d023..f34b9f510818 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -61,13 +61,13 @@ static const match_table_t tokens = {
61 61
62inline int p9_is_proto_dotl(struct p9_client *clnt) 62inline int p9_is_proto_dotl(struct p9_client *clnt)
63{ 63{
64 return (clnt->proto_version == p9_proto_2000L); 64 return clnt->proto_version == p9_proto_2000L;
65} 65}
66EXPORT_SYMBOL(p9_is_proto_dotl); 66EXPORT_SYMBOL(p9_is_proto_dotl);
67 67
68inline int p9_is_proto_dotu(struct p9_client *clnt) 68inline int p9_is_proto_dotu(struct p9_client *clnt)
69{ 69{
70 return (clnt->proto_version == p9_proto_2000u); 70 return clnt->proto_version == p9_proto_2000u;
71} 71}
72EXPORT_SYMBOL(p9_is_proto_dotu); 72EXPORT_SYMBOL(p9_is_proto_dotu);
73 73
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c85109d809ca..078eb162d9bf 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -222,7 +222,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
222 } 222 }
223} 223}
224 224
225static unsigned int 225static int
226p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) 226p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
227{ 227{
228 int ret, n; 228 int ret, n;
diff --git a/net/atm/common.c b/net/atm/common.c
index 940404a73b3d..1b9c52a02cd3 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -792,7 +792,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
792 default: 792 default:
793 if (level == SOL_SOCKET) 793 if (level == SOL_SOCKET)
794 return -EINVAL; 794 return -EINVAL;
795 break; 795 break;
796 } 796 }
797 if (!vcc->dev || !vcc->dev->ops->getsockopt) 797 if (!vcc->dev || !vcc->dev->ops->getsockopt)
798 return -EINVAL; 798 return -EINVAL;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index d98bde1a0ac8..181d70c73d70 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -220,7 +220,6 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
220static int lec_open(struct net_device *dev) 220static int lec_open(struct net_device *dev)
221{ 221{
222 netif_start_queue(dev); 222 netif_start_queue(dev);
223 memset(&dev->stats, 0, sizeof(struct net_device_stats));
224 223
225 return 0; 224 return 0;
226} 225}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index cfdfd7e2a172..26eaebf4aaa9 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1103,7 +1103,7 @@ done:
1103out: 1103out:
1104 release_sock(sk); 1104 release_sock(sk);
1105 1105
1106 return 0; 1106 return err;
1107} 1107}
1108 1108
1109/* 1109/*
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 7805945a5fd6..a1690845dc6e 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -412,7 +412,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
412{ 412{
413 ax25_uid_assoc *user; 413 ax25_uid_assoc *user;
414 ax25_route *ax25_rt; 414 ax25_route *ax25_rt;
415 int err; 415 int err = 0;
416 416
417 if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL) 417 if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
418 return -EHOSTUNREACH; 418 return -EHOSTUNREACH;
@@ -453,7 +453,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
453put: 453put:
454 ax25_put_route(ax25_rt); 454 ax25_put_route(ax25_rt);
455 455
456 return 0; 456 return err;
457} 457}
458 458
459struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, 459struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 421c45bd1b95..ed0f22f57668 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -297,13 +297,12 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w
297 mask |= POLLERR; 297 mask |= POLLERR;
298 298
299 if (sk->sk_shutdown & RCV_SHUTDOWN) 299 if (sk->sk_shutdown & RCV_SHUTDOWN)
300 mask |= POLLRDHUP; 300 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
301 301
302 if (sk->sk_shutdown == SHUTDOWN_MASK) 302 if (sk->sk_shutdown == SHUTDOWN_MASK)
303 mask |= POLLHUP; 303 mask |= POLLHUP;
304 304
305 if (!skb_queue_empty(&sk->sk_receive_queue) || 305 if (!skb_queue_empty(&sk->sk_receive_queue))
306 (sk->sk_shutdown & RCV_SHUTDOWN))
307 mask |= POLLIN | POLLRDNORM; 306 mask |= POLLIN | POLLRDNORM;
308 307
309 if (sk->sk_state == BT_CLOSED) 308 if (sk->sk_state == BT_CLOSED)
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 7dca91bb8c57..15ea84ba344e 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -179,13 +179,13 @@ static unsigned char rfcomm_crc_table[256] = {
179/* FCS on 2 bytes */ 179/* FCS on 2 bytes */
180static inline u8 __fcs(u8 *data) 180static inline u8 __fcs(u8 *data)
181{ 181{
182 return (0xff - __crc(data)); 182 return 0xff - __crc(data);
183} 183}
184 184
185/* FCS on 3 bytes */ 185/* FCS on 3 bytes */
186static inline u8 __fcs2(u8 *data) 186static inline u8 __fcs2(u8 *data)
187{ 187{
188 return (0xff - rfcomm_crc_table[__crc(data) ^ data[2]]); 188 return 0xff - rfcomm_crc_table[__crc(data) ^ data[2]];
189} 189}
190 190
191/* Check FCS */ 191/* Check FCS */
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c03d2c3ff03e..89ad25a76202 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -61,30 +61,27 @@ static int port_cost(struct net_device *dev)
61} 61}
62 62
63 63
64/* 64/* Check for port carrier transistions. */
65 * Check for port carrier transistions.
66 * Called from work queue to allow for calling functions that
67 * might sleep (such as speed check), and to debounce.
68 */
69void br_port_carrier_check(struct net_bridge_port *p) 65void br_port_carrier_check(struct net_bridge_port *p)
70{ 66{
71 struct net_device *dev = p->dev; 67 struct net_device *dev = p->dev;
72 struct net_bridge *br = p->br; 68 struct net_bridge *br = p->br;
73 69
74 if (netif_carrier_ok(dev)) 70 if (netif_running(dev) && netif_carrier_ok(dev))
75 p->path_cost = port_cost(dev); 71 p->path_cost = port_cost(dev);
76 72
77 if (netif_running(br->dev)) { 73 if (!netif_running(br->dev))
78 spin_lock_bh(&br->lock); 74 return;
79 if (netif_carrier_ok(dev)) { 75
80 if (p->state == BR_STATE_DISABLED) 76 spin_lock_bh(&br->lock);
81 br_stp_enable_port(p); 77 if (netif_running(dev) && netif_carrier_ok(dev)) {
82 } else { 78 if (p->state == BR_STATE_DISABLED)
83 if (p->state != BR_STATE_DISABLED) 79 br_stp_enable_port(p);
84 br_stp_disable_port(p); 80 } else {
85 } 81 if (p->state != BR_STATE_DISABLED)
86 spin_unlock_bh(&br->lock); 82 br_stp_disable_port(p);
87 } 83 }
84 spin_unlock_bh(&br->lock);
88} 85}
89 86
90static void release_nbp(struct kobject *kobj) 87static void release_nbp(struct kobject *kobj)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 826cd5221536..6d04cfdf4541 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -141,7 +141,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
141 const unsigned char *dest = eth_hdr(skb)->h_dest; 141 const unsigned char *dest = eth_hdr(skb)->h_dest;
142 int (*rhook)(struct sk_buff *skb); 142 int (*rhook)(struct sk_buff *skb);
143 143
144 if (skb->pkt_type == PACKET_LOOPBACK) 144 if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
145 return skb; 145 return skb;
146 146
147 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 147 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 137f23259a93..77f7b5fda45a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -209,6 +209,72 @@ static inline void nf_bridge_update_protocol(struct sk_buff *skb)
209 skb->protocol = htons(ETH_P_PPP_SES); 209 skb->protocol = htons(ETH_P_PPP_SES);
210} 210}
211 211
212/* When handing a packet over to the IP layer
213 * check whether we have a skb that is in the
214 * expected format
215 */
216
217int br_parse_ip_options(struct sk_buff *skb)
218{
219 struct ip_options *opt;
220 struct iphdr *iph;
221 struct net_device *dev = skb->dev;
222 u32 len;
223
224 iph = ip_hdr(skb);
225 opt = &(IPCB(skb)->opt);
226
227 /* Basic sanity checks */
228 if (iph->ihl < 5 || iph->version != 4)
229 goto inhdr_error;
230
231 if (!pskb_may_pull(skb, iph->ihl*4))
232 goto inhdr_error;
233
234 iph = ip_hdr(skb);
235 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
236 goto inhdr_error;
237
238 len = ntohs(iph->tot_len);
239 if (skb->len < len) {
240 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
241 goto drop;
242 } else if (len < (iph->ihl*4))
243 goto inhdr_error;
244
245 if (pskb_trim_rcsum(skb, len)) {
246 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
247 goto drop;
248 }
249
250 /* Zero out the CB buffer if no options present */
251 if (iph->ihl == 5) {
252 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
253 return 0;
254 }
255
256 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
257 if (ip_options_compile(dev_net(dev), opt, skb))
258 goto inhdr_error;
259
260 /* Check correct handling of SRR option */
261 if (unlikely(opt->srr)) {
262 struct in_device *in_dev = __in_dev_get_rcu(dev);
263 if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev))
264 goto drop;
265
266 if (ip_options_rcv_srr(skb))
267 goto drop;
268 }
269
270 return 0;
271
272inhdr_error:
273 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
274drop:
275 return -1;
276}
277
212/* Fill in the header for fragmented IP packets handled by 278/* Fill in the header for fragmented IP packets handled by
213 * the IPv4 connection tracking code. 279 * the IPv4 connection tracking code.
214 */ 280 */
@@ -549,7 +615,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
549{ 615{
550 struct net_bridge_port *p; 616 struct net_bridge_port *p;
551 struct net_bridge *br; 617 struct net_bridge *br;
552 struct iphdr *iph;
553 __u32 len = nf_bridge_encap_header_len(skb); 618 __u32 len = nf_bridge_encap_header_len(skb);
554 619
555 if (unlikely(!pskb_may_pull(skb, len))) 620 if (unlikely(!pskb_may_pull(skb, len)))
@@ -578,28 +643,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
578 643
579 nf_bridge_pull_encap_header_rcsum(skb); 644 nf_bridge_pull_encap_header_rcsum(skb);
580 645
581 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 646 if (br_parse_ip_options(skb))
582 goto inhdr_error; 647 /* Drop invalid packet */
583 648 goto out;
584 iph = ip_hdr(skb);
585 if (iph->ihl < 5 || iph->version != 4)
586 goto inhdr_error;
587
588 if (!pskb_may_pull(skb, 4 * iph->ihl))
589 goto inhdr_error;
590
591 iph = ip_hdr(skb);
592 if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
593 goto inhdr_error;
594
595 len = ntohs(iph->tot_len);
596 if (skb->len < len || len < 4 * iph->ihl)
597 goto inhdr_error;
598
599 pskb_trim_rcsum(skb, len);
600
601 /* BUG: Should really parse the IP options here. */
602 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
603 649
604 nf_bridge_put(skb->nf_bridge); 650 nf_bridge_put(skb->nf_bridge);
605 if (!nf_bridge_alloc(skb)) 651 if (!nf_bridge_alloc(skb))
@@ -614,8 +660,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
614 660
615 return NF_STOLEN; 661 return NF_STOLEN;
616 662
617inhdr_error:
618// IP_INC_STATS_BH(IpInHdrErrors);
619out: 663out:
620 return NF_DROP; 664 return NF_DROP;
621} 665}
@@ -759,14 +803,19 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
759#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) 803#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
760static int br_nf_dev_queue_xmit(struct sk_buff *skb) 804static int br_nf_dev_queue_xmit(struct sk_buff *skb)
761{ 805{
806 int ret;
807
762 if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && 808 if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
763 skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && 809 skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
764 !skb_is_gso(skb)) { 810 !skb_is_gso(skb)) {
765 /* BUG: Should really parse the IP options here. */ 811 if (br_parse_ip_options(skb))
766 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 812 /* Drop invalid packet */
767 return ip_fragment(skb, br_dev_queue_push_xmit); 813 return NF_DROP;
814 ret = ip_fragment(skb, br_dev_queue_push_xmit);
768 } else 815 } else
769 return br_dev_queue_push_xmit(skb); 816 ret = br_dev_queue_push_xmit(skb);
817
818 return ret;
770} 819}
771#else 820#else
772static int br_nf_dev_queue_xmit(struct sk_buff *skb) 821static int br_nf_dev_queue_xmit(struct sk_buff *skb)
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 0b586e9d1378..b99369a055d1 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -9,6 +9,8 @@
9 * and Sakari Ailus <sakari.ailus@nokia.com> 9 * and Sakari Ailus <sakari.ailus@nokia.com>
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
13
12#include <linux/version.h> 14#include <linux/version.h>
13#include <linux/module.h> 15#include <linux/module.h>
14#include <linux/kernel.h> 16#include <linux/kernel.h>
@@ -171,7 +173,7 @@ static int receive(struct sk_buff *skb, struct net_device *dev,
171 net = dev_net(dev); 173 net = dev_net(dev);
172 pkt = cfpkt_fromnative(CAIF_DIR_IN, skb); 174 pkt = cfpkt_fromnative(CAIF_DIR_IN, skb);
173 caifd = caif_get(dev); 175 caifd = caif_get(dev);
174 if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd) 176 if (!caifd || !caifd->layer.up || !caifd->layer.up->receive)
175 return NET_RX_DROP; 177 return NET_RX_DROP;
176 178
177 if (caifd->layer.up->receive(caifd->layer.up, pkt)) 179 if (caifd->layer.up->receive(caifd->layer.up, pkt))
@@ -214,7 +216,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
214 216
215 switch (what) { 217 switch (what) {
216 case NETDEV_REGISTER: 218 case NETDEV_REGISTER:
217 pr_info("CAIF: %s():register %s\n", __func__, dev->name); 219 netdev_info(dev, "register\n");
218 caifd = caif_device_alloc(dev); 220 caifd = caif_device_alloc(dev);
219 if (caifd == NULL) 221 if (caifd == NULL)
220 break; 222 break;
@@ -225,14 +227,13 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
225 break; 227 break;
226 228
227 case NETDEV_UP: 229 case NETDEV_UP:
228 pr_info("CAIF: %s(): up %s\n", __func__, dev->name); 230 netdev_info(dev, "up\n");
229 caifd = caif_get(dev); 231 caifd = caif_get(dev);
230 if (caifd == NULL) 232 if (caifd == NULL)
231 break; 233 break;
232 caifdev = netdev_priv(dev); 234 caifdev = netdev_priv(dev);
233 if (atomic_read(&caifd->state) == NETDEV_UP) { 235 if (atomic_read(&caifd->state) == NETDEV_UP) {
234 pr_info("CAIF: %s():%s already up\n", 236 netdev_info(dev, "already up\n");
235 __func__, dev->name);
236 break; 237 break;
237 } 238 }
238 atomic_set(&caifd->state, what); 239 atomic_set(&caifd->state, what);
@@ -273,7 +274,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
273 caifd = caif_get(dev); 274 caifd = caif_get(dev);
274 if (caifd == NULL) 275 if (caifd == NULL)
275 break; 276 break;
276 pr_info("CAIF: %s():going down %s\n", __func__, dev->name); 277 netdev_info(dev, "going down\n");
277 278
278 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN || 279 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN ||
279 atomic_read(&caifd->state) == NETDEV_DOWN) 280 atomic_read(&caifd->state) == NETDEV_DOWN)
@@ -295,11 +296,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
295 caifd = caif_get(dev); 296 caifd = caif_get(dev);
296 if (caifd == NULL) 297 if (caifd == NULL)
297 break; 298 break;
298 pr_info("CAIF: %s(): down %s\n", __func__, dev->name); 299 netdev_info(dev, "down\n");
299 if (atomic_read(&caifd->in_use)) 300 if (atomic_read(&caifd->in_use))
300 pr_warning("CAIF: %s(): " 301 netdev_warn(dev,
301 "Unregistering an active CAIF device: %s\n", 302 "Unregistering an active CAIF device\n");
302 __func__, dev->name);
303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer); 303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
304 dev_put(dev); 304 dev_put(dev);
305 atomic_set(&caifd->state, what); 305 atomic_set(&caifd->state, what);
@@ -307,7 +307,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
307 307
308 case NETDEV_UNREGISTER: 308 case NETDEV_UNREGISTER:
309 caifd = caif_get(dev); 309 caifd = caif_get(dev);
310 pr_info("CAIF: %s(): unregister %s\n", __func__, dev->name); 310 netdev_info(dev, "unregister\n");
311 atomic_set(&caifd->state, what); 311 atomic_set(&caifd->state, what);
312 caif_device_destroy(dev); 312 caif_device_destroy(dev);
313 break; 313 break;
@@ -391,7 +391,7 @@ static int __init caif_device_init(void)
391 int result; 391 int result;
392 cfg = cfcnfg_create(); 392 cfg = cfcnfg_create();
393 if (!cfg) { 393 if (!cfg) {
394 pr_warning("CAIF: %s(): can't create cfcnfg.\n", __func__); 394 pr_warn("can't create cfcnfg\n");
395 goto err_cfcnfg_create_failed; 395 goto err_cfcnfg_create_failed;
396 } 396 }
397 result = register_pernet_device(&caif_net_ops); 397 result = register_pernet_device(&caif_net_ops);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 8ce904786116..4d918f8f4e67 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/fs.h> 9#include <linux/fs.h>
8#include <linux/init.h> 10#include <linux/init.h>
9#include <linux/module.h> 11#include <linux/module.h>
@@ -28,9 +30,6 @@
28MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
29MODULE_ALIAS_NETPROTO(AF_CAIF); 31MODULE_ALIAS_NETPROTO(AF_CAIF);
30 32
31#define CAIF_DEF_SNDBUF (4096*10)
32#define CAIF_DEF_RCVBUF (4096*100)
33
34/* 33/*
35 * CAIF state is re-using the TCP socket states. 34 * CAIF state is re-using the TCP socket states.
36 * caif_states stored in sk_state reflect the state as reported by 35 * caif_states stored in sk_state reflect the state as reported by
@@ -157,9 +156,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
157 156
158 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 157 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
159 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) { 158 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
160 trace_printk("CAIF: %s():" 159 pr_debug("sending flow OFF (queue len = %d %d)\n",
161 " sending flow OFF (queue len = %d %d)\n",
162 __func__,
163 atomic_read(&cf_sk->sk.sk_rmem_alloc), 160 atomic_read(&cf_sk->sk.sk_rmem_alloc),
164 sk_rcvbuf_lowwater(cf_sk)); 161 sk_rcvbuf_lowwater(cf_sk));
165 set_rx_flow_off(cf_sk); 162 set_rx_flow_off(cf_sk);
@@ -172,9 +169,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
172 return err; 169 return err;
173 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { 170 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
174 set_rx_flow_off(cf_sk); 171 set_rx_flow_off(cf_sk);
175 trace_printk("CAIF: %s():" 172 pr_debug("sending flow OFF due to rmem_schedule\n");
176 " sending flow OFF due to rmem_schedule\n",
177 __func__);
178 dbfs_atomic_inc(&cnt.num_rx_flow_off); 173 dbfs_atomic_inc(&cnt.num_rx_flow_off);
179 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); 174 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
180 } 175 }
@@ -275,8 +270,7 @@ static void caif_ctrl_cb(struct cflayer *layr,
275 break; 270 break;
276 271
277 default: 272 default:
278 pr_debug("CAIF: %s(): Unexpected flow command %d\n", 273 pr_debug("Unexpected flow command %d\n", flow);
279 __func__, flow);
280 } 274 }
281} 275}
282 276
@@ -536,8 +530,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
536 530
537 /* Slight paranoia, probably not needed. */ 531 /* Slight paranoia, probably not needed. */
538 if (unlikely(loopcnt++ > 1000)) { 532 if (unlikely(loopcnt++ > 1000)) {
539 pr_warning("CAIF: %s(): transmit retries failed," 533 pr_warn("transmit retries failed, error = %d\n", ret);
540 " error = %d\n", __func__, ret);
541 break; 534 break;
542 } 535 }
543 536
@@ -902,8 +895,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
902 cf_sk->maxframe = dev->mtu - (headroom + tailroom); 895 cf_sk->maxframe = dev->mtu - (headroom + tailroom);
903 dev_put(dev); 896 dev_put(dev);
904 if (cf_sk->maxframe < 1) { 897 if (cf_sk->maxframe < 1) {
905 pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n", 898 pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
906 __func__, dev->mtu);
907 err = -ENODEV; 899 err = -ENODEV;
908 goto out; 900 goto out;
909 } 901 }
@@ -1123,10 +1115,6 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
1123 /* Store the protocol */ 1115 /* Store the protocol */
1124 sk->sk_protocol = (unsigned char) protocol; 1116 sk->sk_protocol = (unsigned char) protocol;
1125 1117
1126 /* Sendbuf dictates the amount of outbound packets not yet sent */
1127 sk->sk_sndbuf = CAIF_DEF_SNDBUF;
1128 sk->sk_rcvbuf = CAIF_DEF_RCVBUF;
1129
1130 /* 1118 /*
1131 * Lock in order to try to stop someone from opening the socket 1119 * Lock in order to try to stop someone from opening the socket
1132 * too early. 1120 * too early.
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 1c29189b344d..41adafd18914 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -3,6 +3,9 @@
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
6#include <linux/kernel.h> 9#include <linux/kernel.h>
7#include <linux/stddef.h> 10#include <linux/stddef.h>
8#include <linux/slab.h> 11#include <linux/slab.h>
@@ -78,7 +81,7 @@ struct cfcnfg *cfcnfg_create(void)
78 /* Initiate this layer */ 81 /* Initiate this layer */
79 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC); 82 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
80 if (!this) { 83 if (!this) {
81 pr_warning("CAIF: %s(): Out of memory\n", __func__); 84 pr_warn("Out of memory\n");
82 return NULL; 85 return NULL;
83 } 86 }
84 this->mux = cfmuxl_create(); 87 this->mux = cfmuxl_create();
@@ -106,7 +109,7 @@ struct cfcnfg *cfcnfg_create(void)
106 layer_set_up(this->ctrl, this); 109 layer_set_up(this->ctrl, this);
107 return this; 110 return this;
108out_of_mem: 111out_of_mem:
109 pr_warning("CAIF: %s(): Out of memory\n", __func__); 112 pr_warn("Out of memory\n");
110 kfree(this->mux); 113 kfree(this->mux);
111 kfree(this->ctrl); 114 kfree(this->ctrl);
112 kfree(this); 115 kfree(this);
@@ -194,7 +197,7 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
194 caif_assert(adap_layer != NULL); 197 caif_assert(adap_layer != NULL);
195 channel_id = adap_layer->id; 198 channel_id = adap_layer->id;
196 if (adap_layer->dn == NULL || channel_id == 0) { 199 if (adap_layer->dn == NULL || channel_id == 0) {
197 pr_err("CAIF: %s():adap_layer->id is 0\n", __func__); 200 pr_err("adap_layer->dn == NULL or adap_layer->id is 0\n");
198 ret = -ENOTCONN; 201 ret = -ENOTCONN;
199 goto end; 202 goto end;
200 } 203 }
@@ -204,9 +207,8 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
204 layer_set_up(servl, NULL); 207 layer_set_up(servl, NULL);
205 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); 208 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
206 if (servl == NULL) { 209 if (servl == NULL) {
207 pr_err("CAIF: %s(): PROTOCOL ERROR " 210 pr_err("PROTOCOL ERROR - Error removing service_layer Channel_Id(%d)",
208 "- Error removing service_layer Channel_Id(%d)", 211 channel_id);
209 __func__, channel_id);
210 ret = -EINVAL; 212 ret = -EINVAL;
211 goto end; 213 goto end;
212 } 214 }
@@ -216,18 +218,14 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
216 218
217 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid); 219 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
218 if (phyinfo == NULL) { 220 if (phyinfo == NULL) {
219 pr_warning("CAIF: %s(): " 221 pr_warn("No interface to send disconnect to\n");
220 "No interface to send disconnect to\n",
221 __func__);
222 ret = -ENODEV; 222 ret = -ENODEV;
223 goto end; 223 goto end;
224 } 224 }
225 if (phyinfo->id != phyid || 225 if (phyinfo->id != phyid ||
226 phyinfo->phy_layer->id != phyid || 226 phyinfo->phy_layer->id != phyid ||
227 phyinfo->frm_layer->id != phyid) { 227 phyinfo->frm_layer->id != phyid) {
228 pr_err("CAIF: %s(): " 228 pr_err("Inconsistency in phy registration\n");
229 "Inconsistency in phy registration\n",
230 __func__);
231 ret = -EINVAL; 229 ret = -EINVAL;
232 goto end; 230 goto end;
233 } 231 }
@@ -276,21 +274,20 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
276{ 274{
277 struct cflayer *frml; 275 struct cflayer *frml;
278 if (adap_layer == NULL) { 276 if (adap_layer == NULL) {
279 pr_err("CAIF: %s(): adap_layer is zero", __func__); 277 pr_err("adap_layer is zero\n");
280 return -EINVAL; 278 return -EINVAL;
281 } 279 }
282 if (adap_layer->receive == NULL) { 280 if (adap_layer->receive == NULL) {
283 pr_err("CAIF: %s(): adap_layer->receive is NULL", __func__); 281 pr_err("adap_layer->receive is NULL\n");
284 return -EINVAL; 282 return -EINVAL;
285 } 283 }
286 if (adap_layer->ctrlcmd == NULL) { 284 if (adap_layer->ctrlcmd == NULL) {
287 pr_err("CAIF: %s(): adap_layer->ctrlcmd == NULL", __func__); 285 pr_err("adap_layer->ctrlcmd == NULL\n");
288 return -EINVAL; 286 return -EINVAL;
289 } 287 }
290 frml = cnfg->phy_layers[param->phyid].frm_layer; 288 frml = cnfg->phy_layers[param->phyid].frm_layer;
291 if (frml == NULL) { 289 if (frml == NULL) {
292 pr_err("CAIF: %s(): Specified PHY type does not exist!", 290 pr_err("Specified PHY type does not exist!\n");
293 __func__);
294 return -ENODEV; 291 return -ENODEV;
295 } 292 }
296 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id); 293 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id);
@@ -330,9 +327,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
330 struct net_device *netdev; 327 struct net_device *netdev;
331 328
332 if (adapt_layer == NULL) { 329 if (adapt_layer == NULL) {
333 pr_debug("CAIF: %s(): link setup response " 330 pr_debug("link setup response but no client exist, send linkdown back\n");
334 "but no client exist, send linkdown back\n",
335 __func__);
336 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL); 331 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
337 return; 332 return;
338 } 333 }
@@ -374,13 +369,11 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
374 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info); 369 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
375 break; 370 break;
376 default: 371 default:
377 pr_err("CAIF: %s(): Protocol error. " 372 pr_err("Protocol error. Link setup response - unknown channel type\n");
378 "Link setup response - unknown channel type\n",
379 __func__);
380 return; 373 return;
381 } 374 }
382 if (!servicel) { 375 if (!servicel) {
383 pr_warning("CAIF: %s(): Out of memory\n", __func__); 376 pr_warn("Out of memory\n");
384 return; 377 return;
385 } 378 }
386 layer_set_dn(servicel, cnfg->mux); 379 layer_set_dn(servicel, cnfg->mux);
@@ -418,7 +411,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
418 } 411 }
419 } 412 }
420 if (*phyid == 0) { 413 if (*phyid == 0) {
421 pr_err("CAIF: %s(): No Available PHY ID\n", __func__); 414 pr_err("No Available PHY ID\n");
422 return; 415 return;
423 } 416 }
424 417
@@ -427,7 +420,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
427 phy_driver = 420 phy_driver =
428 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx); 421 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx);
429 if (!phy_driver) { 422 if (!phy_driver) {
430 pr_warning("CAIF: %s(): Out of memory\n", __func__); 423 pr_warn("Out of memory\n");
431 return; 424 return;
432 } 425 }
433 426
@@ -436,7 +429,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
436 phy_driver = NULL; 429 phy_driver = NULL;
437 break; 430 break;
438 default: 431 default:
439 pr_err("CAIF: %s(): %d", __func__, phy_type); 432 pr_err("%d\n", phy_type);
440 return; 433 return;
441 break; 434 break;
442 } 435 }
@@ -455,7 +448,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
455 phy_layer->type = phy_type; 448 phy_layer->type = phy_type;
456 frml = cffrml_create(*phyid, fcs); 449 frml = cffrml_create(*phyid, fcs);
457 if (!frml) { 450 if (!frml) {
458 pr_warning("CAIF: %s(): Out of memory\n", __func__); 451 pr_warn("Out of memory\n");
459 return; 452 return;
460 } 453 }
461 cnfg->phy_layers[*phyid].frm_layer = frml; 454 cnfg->phy_layers[*phyid].frm_layer = frml;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 563145fdc4c3..08f267a109aa 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -36,7 +38,7 @@ struct cflayer *cfctrl_create(void)
36 struct cfctrl *this = 38 struct cfctrl *this =
37 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC); 39 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
38 if (!this) { 40 if (!this) {
39 pr_warning("CAIF: %s(): Out of memory\n", __func__); 41 pr_warn("Out of memory\n");
40 return NULL; 42 return NULL;
41 } 43 }
42 caif_assert(offsetof(struct cfctrl, serv.layer) == 0); 44 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -132,9 +134,7 @@ struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
132 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 134 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
133 if (cfctrl_req_eq(req, p)) { 135 if (cfctrl_req_eq(req, p)) {
134 if (p != first) 136 if (p != first)
135 pr_warning("CAIF: %s(): Requests are not " 137 pr_warn("Requests are not received in order\n");
136 "received in order\n",
137 __func__);
138 138
139 atomic_set(&ctrl->rsp_seq_no, 139 atomic_set(&ctrl->rsp_seq_no,
140 p->sequence_no); 140 p->sequence_no);
@@ -177,7 +177,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
177 int ret; 177 int ret;
178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
179 if (!pkt) { 179 if (!pkt) {
180 pr_warning("CAIF: %s(): Out of memory\n", __func__); 180 pr_warn("Out of memory\n");
181 return; 181 return;
182 } 182 }
183 caif_assert(offsetof(struct cfctrl, serv.layer) == 0); 183 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -189,8 +189,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
189 ret = 189 ret =
190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
191 if (ret < 0) { 191 if (ret < 0) {
192 pr_err("CAIF: %s(): Could not transmit enum message\n", 192 pr_err("Could not transmit enum message\n");
193 __func__);
194 cfpkt_destroy(pkt); 193 cfpkt_destroy(pkt);
195 } 194 }
196} 195}
@@ -208,7 +207,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
208 char utility_name[16]; 207 char utility_name[16];
209 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 208 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
210 if (!pkt) { 209 if (!pkt) {
211 pr_warning("CAIF: %s(): Out of memory\n", __func__); 210 pr_warn("Out of memory\n");
212 return -ENOMEM; 211 return -ENOMEM;
213 } 212 }
214 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); 213 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
@@ -253,13 +252,13 @@ int cfctrl_linkup_request(struct cflayer *layer,
253 param->u.utility.paramlen); 252 param->u.utility.paramlen);
254 break; 253 break;
255 default: 254 default:
256 pr_warning("CAIF: %s():Request setup of bad link type = %d\n", 255 pr_warn("Request setup of bad link type = %d\n",
257 __func__, param->linktype); 256 param->linktype);
258 return -EINVAL; 257 return -EINVAL;
259 } 258 }
260 req = kzalloc(sizeof(*req), GFP_KERNEL); 259 req = kzalloc(sizeof(*req), GFP_KERNEL);
261 if (!req) { 260 if (!req) {
262 pr_warning("CAIF: %s(): Out of memory\n", __func__); 261 pr_warn("Out of memory\n");
263 return -ENOMEM; 262 return -ENOMEM;
264 } 263 }
265 req->client_layer = user_layer; 264 req->client_layer = user_layer;
@@ -276,8 +275,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
276 ret = 275 ret =
277 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 276 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
278 if (ret < 0) { 277 if (ret < 0) {
279 pr_err("CAIF: %s(): Could not transmit linksetup request\n", 278 pr_err("Could not transmit linksetup request\n");
280 __func__);
281 cfpkt_destroy(pkt); 279 cfpkt_destroy(pkt);
282 return -ENODEV; 280 return -ENODEV;
283 } 281 }
@@ -291,7 +289,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
291 struct cfctrl *cfctrl = container_obj(layer); 289 struct cfctrl *cfctrl = container_obj(layer);
292 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 290 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
293 if (!pkt) { 291 if (!pkt) {
294 pr_warning("CAIF: %s(): Out of memory\n", __func__); 292 pr_warn("Out of memory\n");
295 return -ENOMEM; 293 return -ENOMEM;
296 } 294 }
297 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY); 295 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
@@ -300,8 +298,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
300 ret = 298 ret =
301 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 299 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
302 if (ret < 0) { 300 if (ret < 0) {
303 pr_err("CAIF: %s(): Could not transmit link-down request\n", 301 pr_err("Could not transmit link-down request\n");
304 __func__);
305 cfpkt_destroy(pkt); 302 cfpkt_destroy(pkt);
306 } 303 }
307 return ret; 304 return ret;
@@ -313,7 +310,7 @@ void cfctrl_sleep_req(struct cflayer *layer)
313 struct cfctrl *cfctrl = container_obj(layer); 310 struct cfctrl *cfctrl = container_obj(layer);
314 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 311 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
315 if (!pkt) { 312 if (!pkt) {
316 pr_warning("CAIF: %s(): Out of memory\n", __func__); 313 pr_warn("Out of memory\n");
317 return; 314 return;
318 } 315 }
319 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP); 316 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP);
@@ -330,7 +327,7 @@ void cfctrl_wake_req(struct cflayer *layer)
330 struct cfctrl *cfctrl = container_obj(layer); 327 struct cfctrl *cfctrl = container_obj(layer);
331 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 328 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
332 if (!pkt) { 329 if (!pkt) {
333 pr_warning("CAIF: %s(): Out of memory\n", __func__); 330 pr_warn("Out of memory\n");
334 return; 331 return;
335 } 332 }
336 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE); 333 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE);
@@ -347,7 +344,7 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
347 struct cfctrl *cfctrl = container_obj(layer); 344 struct cfctrl *cfctrl = container_obj(layer);
348 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 345 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
349 if (!pkt) { 346 if (!pkt) {
350 pr_warning("CAIF: %s(): Out of memory\n", __func__); 347 pr_warn("Out of memory\n");
351 return; 348 return;
352 } 349 }
353 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON); 350 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON);
@@ -364,12 +361,11 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
364 struct cfctrl_request_info *p, *tmp; 361 struct cfctrl_request_info *p, *tmp;
365 struct cfctrl *ctrl = container_obj(layr); 362 struct cfctrl *ctrl = container_obj(layr);
366 spin_lock(&ctrl->info_list_lock); 363 spin_lock(&ctrl->info_list_lock);
367 pr_warning("CAIF: %s(): enter\n", __func__); 364 pr_warn("enter\n");
368 365
369 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 366 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
370 if (p->client_layer == adap_layer) { 367 if (p->client_layer == adap_layer) {
371 pr_warning("CAIF: %s(): cancel req :%d\n", __func__, 368 pr_warn("cancel req :%d\n", p->sequence_no);
372 p->sequence_no);
373 list_del(&p->list); 369 list_del(&p->list);
374 kfree(p); 370 kfree(p);
375 } 371 }
@@ -520,9 +516,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
520 cfpkt_extr_head(pkt, &param, len); 516 cfpkt_extr_head(pkt, &param, len);
521 break; 517 break;
522 default: 518 default:
523 pr_warning("CAIF: %s(): Request setup " 519 pr_warn("Request setup - invalid link type (%d)\n",
524 "- invalid link type (%d)", 520 serv);
525 __func__, serv);
526 goto error; 521 goto error;
527 } 522 }
528 523
@@ -532,9 +527,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
532 527
533 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || 528 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
534 cfpkt_erroneous(pkt)) { 529 cfpkt_erroneous(pkt)) {
535 pr_err("CAIF: %s(): Invalid O/E bit or parse " 530 pr_err("Invalid O/E bit or parse error on CAIF control channel\n");
536 "error on CAIF control channel",
537 __func__);
538 cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 531 cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
539 0, 532 0,
540 req ? req->client_layer 533 req ? req->client_layer
@@ -556,8 +549,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
556 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); 549 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
557 break; 550 break;
558 case CFCTRL_CMD_LINK_ERR: 551 case CFCTRL_CMD_LINK_ERR:
559 pr_err("CAIF: %s(): Frame Error Indication received\n", 552 pr_err("Frame Error Indication received\n");
560 __func__);
561 cfctrl->res.linkerror_ind(); 553 cfctrl->res.linkerror_ind();
562 break; 554 break;
563 case CFCTRL_CMD_ENUM: 555 case CFCTRL_CMD_ENUM:
@@ -576,7 +568,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
576 cfctrl->res.radioset_rsp(); 568 cfctrl->res.radioset_rsp();
577 break; 569 break;
578 default: 570 default:
579 pr_err("CAIF: %s(): Unrecognized Control Frame\n", __func__); 571 pr_err("Unrecognized Control Frame\n");
580 goto error; 572 goto error;
581 break; 573 break;
582 } 574 }
@@ -595,8 +587,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
595 case CAIF_CTRLCMD_FLOW_OFF_IND: 587 case CAIF_CTRLCMD_FLOW_OFF_IND:
596 spin_lock(&this->info_list_lock); 588 spin_lock(&this->info_list_lock);
597 if (!list_empty(&this->list)) { 589 if (!list_empty(&this->list)) {
598 pr_debug("CAIF: %s(): Received flow off in " 590 pr_debug("Received flow off in control layer\n");
599 "control layer", __func__);
600 } 591 }
601 spin_unlock(&this->info_list_lock); 592 spin_unlock(&this->info_list_lock);
602 break; 593 break;
@@ -620,7 +611,7 @@ static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
620 if (!ctrl->loop_linkused[linkid]) 611 if (!ctrl->loop_linkused[linkid])
621 goto found; 612 goto found;
622 spin_unlock(&ctrl->loop_linkid_lock); 613 spin_unlock(&ctrl->loop_linkid_lock);
623 pr_err("CAIF: %s(): Out of link-ids\n", __func__); 614 pr_err("Out of link-ids\n");
624 return -EINVAL; 615 return -EINVAL;
625found: 616found:
626 if (!ctrl->loop_linkused[linkid]) 617 if (!ctrl->loop_linkused[linkid])
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 676648cac8dd..496fda9ac66f 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/slab.h> 10#include <linux/slab.h>
9#include <net/caif/caif_layer.h> 11#include <net/caif/caif_layer.h>
@@ -17,7 +19,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
17{ 19{
18 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 20 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
19 if (!dbg) { 21 if (!dbg) {
20 pr_warning("CAIF: %s(): Out of memory\n", __func__); 22 pr_warn("Out of memory\n");
21 return NULL; 23 return NULL;
22 } 24 }
23 caif_assert(offsetof(struct cfsrvl, layer) == 0); 25 caif_assert(offsetof(struct cfsrvl, layer) == 0);
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index ed9d53aff280..d3ed264ad6c4 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
26{ 28{
27 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!dgm) { 30 if (!dgm) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__); 31 pr_warn("Out of memory\n");
30 return NULL; 32 return NULL;
31 } 33 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0); 34 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -49,14 +51,14 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
49 caif_assert(layr->ctrlcmd != NULL); 51 caif_assert(layr->ctrlcmd != NULL);
50 52
51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 53 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
52 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 54 pr_err("Packet is erroneous!\n");
53 cfpkt_destroy(pkt); 55 cfpkt_destroy(pkt);
54 return -EPROTO; 56 return -EPROTO;
55 } 57 }
56 58
57 if ((cmd & DGM_CMD_BIT) == 0) { 59 if ((cmd & DGM_CMD_BIT) == 0) {
58 if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) { 60 if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) {
59 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 61 pr_err("Packet is erroneous!\n");
60 cfpkt_destroy(pkt); 62 cfpkt_destroy(pkt);
61 return -EPROTO; 63 return -EPROTO;
62 } 64 }
@@ -75,8 +77,7 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
75 return 0; 77 return 0;
76 default: 78 default:
77 cfpkt_destroy(pkt); 79 cfpkt_destroy(pkt);
78 pr_info("CAIF: %s(): Unknown datagram control %d (0x%x)\n", 80 pr_info("Unknown datagram control %d (0x%x)\n", cmd, cmd);
79 __func__, cmd, cmd);
80 return -EPROTO; 81 return -EPROTO;
81 } 82 }
82} 83}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index e86a4ca3b217..a445043931ae 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -6,6 +6,8 @@
6 * License terms: GNU General Public License (GPL) version 2 6 * License terms: GNU General Public License (GPL) version 2
7 */ 7 */
8 8
9#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
10
9#include <linux/stddef.h> 11#include <linux/stddef.h>
10#include <linux/spinlock.h> 12#include <linux/spinlock.h>
11#include <linux/slab.h> 13#include <linux/slab.h>
@@ -32,7 +34,7 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
32{ 34{
33 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC); 35 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC);
34 if (!this) { 36 if (!this) {
35 pr_warning("CAIF: %s(): Out of memory\n", __func__); 37 pr_warn("Out of memory\n");
36 return NULL; 38 return NULL;
37 } 39 }
38 caif_assert(offsetof(struct cffrml, layer) == 0); 40 caif_assert(offsetof(struct cffrml, layer) == 0);
@@ -83,7 +85,7 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
83 85
84 if (cfpkt_setlen(pkt, len) < 0) { 86 if (cfpkt_setlen(pkt, len) < 0) {
85 ++cffrml_rcv_error; 87 ++cffrml_rcv_error;
86 pr_err("CAIF: %s():Framing length error (%d)\n", __func__, len); 88 pr_err("Framing length error (%d)\n", len);
87 cfpkt_destroy(pkt); 89 cfpkt_destroy(pkt);
88 return -EPROTO; 90 return -EPROTO;
89 } 91 }
@@ -99,14 +101,14 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
99 cfpkt_add_trail(pkt, &tmp, 2); 101 cfpkt_add_trail(pkt, &tmp, 2);
100 ++cffrml_rcv_error; 102 ++cffrml_rcv_error;
101 ++cffrml_rcv_checsum_error; 103 ++cffrml_rcv_checsum_error;
102 pr_info("CAIF: %s(): Frame checksum error " 104 pr_info("Frame checksum error (0x%x != 0x%x)\n",
103 "(0x%x != 0x%x)\n", __func__, hdrchks, pktchks); 105 hdrchks, pktchks);
104 return -EILSEQ; 106 return -EILSEQ;
105 } 107 }
106 } 108 }
107 if (cfpkt_erroneous(pkt)) { 109 if (cfpkt_erroneous(pkt)) {
108 ++cffrml_rcv_error; 110 ++cffrml_rcv_error;
109 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 111 pr_err("Packet is erroneous!\n");
110 cfpkt_destroy(pkt); 112 cfpkt_destroy(pkt);
111 return -EPROTO; 113 return -EPROTO;
112 } 114 }
@@ -132,7 +134,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
132 cfpkt_add_head(pkt, &tmp, 2); 134 cfpkt_add_head(pkt, &tmp, 2);
133 cfpkt_info(pkt)->hdr_len += 2; 135 cfpkt_info(pkt)->hdr_len += 2;
134 if (cfpkt_erroneous(pkt)) { 136 if (cfpkt_erroneous(pkt)) {
135 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 137 pr_err("Packet is erroneous!\n");
136 return -EPROTO; 138 return -EPROTO;
137 } 139 }
138 ret = layr->dn->transmit(layr->dn, pkt); 140 ret = layr->dn->transmit(layr->dn, pkt);
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 80c8d332b258..46f34b2e0478 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -3,6 +3,9 @@
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
6#include <linux/stddef.h> 9#include <linux/stddef.h>
7#include <linux/spinlock.h> 10#include <linux/spinlock.h>
8#include <linux/slab.h> 11#include <linux/slab.h>
@@ -190,7 +193,7 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
190 u8 id; 193 u8 id;
191 struct cflayer *up; 194 struct cflayer *up;
192 if (cfpkt_extr_head(pkt, &id, 1) < 0) { 195 if (cfpkt_extr_head(pkt, &id, 1) < 0) {
193 pr_err("CAIF: %s(): erroneous Caif Packet\n", __func__); 196 pr_err("erroneous Caif Packet\n");
194 cfpkt_destroy(pkt); 197 cfpkt_destroy(pkt);
195 return -EPROTO; 198 return -EPROTO;
196 } 199 }
@@ -199,8 +202,8 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
199 up = get_up(muxl, id); 202 up = get_up(muxl, id);
200 spin_unlock(&muxl->receive_lock); 203 spin_unlock(&muxl->receive_lock);
201 if (up == NULL) { 204 if (up == NULL) {
202 pr_info("CAIF: %s():Received data on unknown link ID = %d " 205 pr_info("Received data on unknown link ID = %d (0x%x) up == NULL",
203 "(0x%x) up == NULL", __func__, id, id); 206 id, id);
204 cfpkt_destroy(pkt); 207 cfpkt_destroy(pkt);
205 /* 208 /*
206 * Don't return ERROR, since modem misbehaves and sends out 209 * Don't return ERROR, since modem misbehaves and sends out
@@ -223,9 +226,8 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
223 struct caif_payload_info *info = cfpkt_info(pkt); 226 struct caif_payload_info *info = cfpkt_info(pkt);
224 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info); 227 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info);
225 if (dn == NULL) { 228 if (dn == NULL) {
226 pr_warning("CAIF: %s(): Send data on unknown phy " 229 pr_warn("Send data on unknown phy ID = %d (0x%x)\n",
227 "ID = %d (0x%x)\n", 230 info->dev_info->id, info->dev_info->id);
228 __func__, info->dev_info->id, info->dev_info->id);
229 return -ENOTCONN; 231 return -ENOTCONN;
230 } 232 }
231 info->hdr_len += 1; 233 info->hdr_len += 1;
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index c49a6695793a..d7e865e2ff65 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/string.h> 9#include <linux/string.h>
8#include <linux/skbuff.h> 10#include <linux/skbuff.h>
9#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -12,11 +14,12 @@
12#define PKT_PREFIX 48 14#define PKT_PREFIX 48
13#define PKT_POSTFIX 2 15#define PKT_POSTFIX 2
14#define PKT_LEN_WHEN_EXTENDING 128 16#define PKT_LEN_WHEN_EXTENDING 128
15#define PKT_ERROR(pkt, errmsg) do { \ 17#define PKT_ERROR(pkt, errmsg) \
16 cfpkt_priv(pkt)->erronous = true; \ 18do { \
17 skb_reset_tail_pointer(&pkt->skb); \ 19 cfpkt_priv(pkt)->erronous = true; \
18 pr_warning("CAIF: " errmsg);\ 20 skb_reset_tail_pointer(&pkt->skb); \
19 } while (0) 21 pr_warn(errmsg); \
22} while (0)
20 23
21struct cfpktq { 24struct cfpktq {
22 struct sk_buff_head head; 25 struct sk_buff_head head;
@@ -130,13 +133,13 @@ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
130 return -EPROTO; 133 return -EPROTO;
131 134
132 if (unlikely(len > skb->len)) { 135 if (unlikely(len > skb->len)) {
133 PKT_ERROR(pkt, "cfpkt_extr_head read beyond end of packet\n"); 136 PKT_ERROR(pkt, "read beyond end of packet\n");
134 return -EPROTO; 137 return -EPROTO;
135 } 138 }
136 139
137 if (unlikely(len > skb_headlen(skb))) { 140 if (unlikely(len > skb_headlen(skb))) {
138 if (unlikely(skb_linearize(skb) != 0)) { 141 if (unlikely(skb_linearize(skb) != 0)) {
139 PKT_ERROR(pkt, "cfpkt_extr_head linearize failed\n"); 142 PKT_ERROR(pkt, "linearize failed\n");
140 return -EPROTO; 143 return -EPROTO;
141 } 144 }
142 } 145 }
@@ -156,11 +159,11 @@ int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
156 return -EPROTO; 159 return -EPROTO;
157 160
158 if (unlikely(skb_linearize(skb) != 0)) { 161 if (unlikely(skb_linearize(skb) != 0)) {
159 PKT_ERROR(pkt, "cfpkt_extr_trail linearize failed\n"); 162 PKT_ERROR(pkt, "linearize failed\n");
160 return -EPROTO; 163 return -EPROTO;
161 } 164 }
162 if (unlikely(skb->data + len > skb_tail_pointer(skb))) { 165 if (unlikely(skb->data + len > skb_tail_pointer(skb))) {
163 PKT_ERROR(pkt, "cfpkt_extr_trail read beyond end of packet\n"); 166 PKT_ERROR(pkt, "read beyond end of packet\n");
164 return -EPROTO; 167 return -EPROTO;
165 } 168 }
166 from = skb_tail_pointer(skb) - len; 169 from = skb_tail_pointer(skb) - len;
@@ -202,7 +205,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
202 205
203 /* Make sure data is writable */ 206 /* Make sure data is writable */
204 if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) { 207 if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) {
205 PKT_ERROR(pkt, "cfpkt_add_body: cow failed\n"); 208 PKT_ERROR(pkt, "cow failed\n");
206 return -EPROTO; 209 return -EPROTO;
207 } 210 }
208 /* 211 /*
@@ -211,8 +214,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
211 * lengths of the top SKB. 214 * lengths of the top SKB.
212 */ 215 */
213 if (lastskb != skb) { 216 if (lastskb != skb) {
214 pr_warning("CAIF: %s(): Packet is non-linear\n", 217 pr_warn("Packet is non-linear\n");
215 __func__);
216 skb->len += len; 218 skb->len += len;
217 skb->data_len += len; 219 skb->data_len += len;
218 } 220 }
@@ -242,14 +244,14 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
242 if (unlikely(is_erronous(pkt))) 244 if (unlikely(is_erronous(pkt)))
243 return -EPROTO; 245 return -EPROTO;
244 if (unlikely(skb_headroom(skb) < len)) { 246 if (unlikely(skb_headroom(skb) < len)) {
245 PKT_ERROR(pkt, "cfpkt_add_head: no headroom\n"); 247 PKT_ERROR(pkt, "no headroom\n");
246 return -EPROTO; 248 return -EPROTO;
247 } 249 }
248 250
249 /* Make sure data is writable */ 251 /* Make sure data is writable */
250 ret = skb_cow_data(skb, 0, &lastskb); 252 ret = skb_cow_data(skb, 0, &lastskb);
251 if (unlikely(ret < 0)) { 253 if (unlikely(ret < 0)) {
252 PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n"); 254 PKT_ERROR(pkt, "cow failed\n");
253 return ret; 255 return ret;
254 } 256 }
255 257
@@ -283,7 +285,7 @@ inline u16 cfpkt_iterate(struct cfpkt *pkt,
283 if (unlikely(is_erronous(pkt))) 285 if (unlikely(is_erronous(pkt)))
284 return -EPROTO; 286 return -EPROTO;
285 if (unlikely(skb_linearize(&pkt->skb) != 0)) { 287 if (unlikely(skb_linearize(&pkt->skb) != 0)) {
286 PKT_ERROR(pkt, "cfpkt_iterate: linearize failed\n"); 288 PKT_ERROR(pkt, "linearize failed\n");
287 return -EPROTO; 289 return -EPROTO;
288 } 290 }
289 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt)); 291 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
@@ -309,7 +311,7 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
309 311
310 /* Need to expand SKB */ 312 /* Need to expand SKB */
311 if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len))) 313 if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len)))
312 PKT_ERROR(pkt, "cfpkt_setlen: skb_pad_trail failed\n"); 314 PKT_ERROR(pkt, "skb_pad_trail failed\n");
313 315
314 return cfpkt_getlen(pkt); 316 return cfpkt_getlen(pkt);
315} 317}
@@ -380,8 +382,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
380 return NULL; 382 return NULL;
381 383
382 if (skb->data + pos > skb_tail_pointer(skb)) { 384 if (skb->data + pos > skb_tail_pointer(skb)) {
383 PKT_ERROR(pkt, 385 PKT_ERROR(pkt, "trying to split beyond end of packet\n");
384 "cfpkt_split: trying to split beyond end of packet");
385 return NULL; 386 return NULL;
386 } 387 }
387 388
@@ -455,17 +456,17 @@ int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen)
455 return -EPROTO; 456 return -EPROTO;
456 /* Make sure SKB is writable */ 457 /* Make sure SKB is writable */
457 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) { 458 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
458 PKT_ERROR(pkt, "cfpkt_raw_append: skb_cow_data failed\n"); 459 PKT_ERROR(pkt, "skb_cow_data failed\n");
459 return -EPROTO; 460 return -EPROTO;
460 } 461 }
461 462
462 if (unlikely(skb_linearize(skb) != 0)) { 463 if (unlikely(skb_linearize(skb) != 0)) {
463 PKT_ERROR(pkt, "cfpkt_raw_append: linearize failed\n"); 464 PKT_ERROR(pkt, "linearize failed\n");
464 return -EPROTO; 465 return -EPROTO;
465 } 466 }
466 467
467 if (unlikely(skb_tailroom(skb) < buflen)) { 468 if (unlikely(skb_tailroom(skb) < buflen)) {
468 PKT_ERROR(pkt, "cfpkt_raw_append: buffer too short - failed\n"); 469 PKT_ERROR(pkt, "buffer too short - failed\n");
469 return -EPROTO; 470 return -EPROTO;
470 } 471 }
471 472
@@ -483,14 +484,13 @@ int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen)
483 return -EPROTO; 484 return -EPROTO;
484 485
485 if (unlikely(buflen > skb->len)) { 486 if (unlikely(buflen > skb->len)) {
486 PKT_ERROR(pkt, "cfpkt_raw_extract: buflen too large " 487 PKT_ERROR(pkt, "buflen too large - failed\n");
487 "- failed\n");
488 return -EPROTO; 488 return -EPROTO;
489 } 489 }
490 490
491 if (unlikely(buflen > skb_headlen(skb))) { 491 if (unlikely(buflen > skb_headlen(skb))) {
492 if (unlikely(skb_linearize(skb) != 0)) { 492 if (unlikely(skb_linearize(skb) != 0)) {
493 PKT_ERROR(pkt, "cfpkt_raw_extract: linearize failed\n"); 493 PKT_ERROR(pkt, "linearize failed\n");
494 return -EPROTO; 494 return -EPROTO;
495 } 495 }
496 } 496 }
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 9a699242d104..bde8481e8d25 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -48,7 +50,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
48 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); 50 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
49 51
50 if (!this) { 52 if (!this) {
51 pr_warning("CAIF: %s(): Out of memory\n", __func__); 53 pr_warn("Out of memory\n");
52 return NULL; 54 return NULL;
53 } 55 }
54 56
@@ -178,9 +180,7 @@ out:
178 cfpkt_destroy(rfml->incomplete_frm); 180 cfpkt_destroy(rfml->incomplete_frm);
179 rfml->incomplete_frm = NULL; 181 rfml->incomplete_frm = NULL;
180 182
181 pr_info("CAIF: %s(): " 183 pr_info("Connection error %d triggered on RFM link\n", err);
182 "Connection error %d triggered on RFM link\n",
183 __func__, err);
184 184
185 /* Trigger connection error upon failure.*/ 185 /* Trigger connection error upon failure.*/
186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
@@ -280,9 +280,7 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
280out: 280out:
281 281
282 if (err != 0) { 282 if (err != 0) {
283 pr_info("CAIF: %s(): " 283 pr_info("Connection error %d triggered on RFM link\n", err);
284 "Connection error %d triggered on RFM link\n",
285 __func__, err);
286 /* Trigger connection error upon failure.*/ 284 /* Trigger connection error upon failure.*/
287 285
288 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 286 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index a11fbd68a13d..9297f7dea9d8 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -34,7 +36,7 @@ struct cflayer *cfserl_create(int type, int instance, bool use_stx)
34{ 36{
35 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC); 37 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC);
36 if (!this) { 38 if (!this) {
37 pr_warning("CAIF: %s(): Out of memory\n", __func__); 39 pr_warn("Out of memory\n");
38 return NULL; 40 return NULL;
39 } 41 }
40 caif_assert(offsetof(struct cfserl, layer) == 0); 42 caif_assert(offsetof(struct cfserl, layer) == 0);
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index f40939a91211..ab5e542526bf 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/errno.h> 11#include <linux/errno.h>
@@ -79,8 +81,7 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
79 layr->up->ctrlcmd(layr->up, ctrl, phyid); 81 layr->up->ctrlcmd(layr->up, ctrl, phyid);
80 break; 82 break;
81 default: 83 default:
82 pr_warning("CAIF: %s(): " 84 pr_warn("Unexpected ctrl in cfsrvl (%d)\n", ctrl);
83 "Unexpected ctrl in cfsrvl (%d)\n", __func__, ctrl);
84 /* We have both modem and phy flow on, send flow on */ 85 /* We have both modem and phy flow on, send flow on */
85 layr->up->ctrlcmd(layr->up, ctrl, phyid); 86 layr->up->ctrlcmd(layr->up, ctrl, phyid);
86 service->phy_flow_on = true; 87 service->phy_flow_on = true;
@@ -107,14 +108,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
107 u8 flow_on = SRVL_FLOW_ON; 108 u8 flow_on = SRVL_FLOW_ON;
108 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 109 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
109 if (!pkt) { 110 if (!pkt) {
110 pr_warning("CAIF: %s(): Out of memory\n", 111 pr_warn("Out of memory\n");
111 __func__);
112 return -ENOMEM; 112 return -ENOMEM;
113 } 113 }
114 114
115 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) { 115 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
116 pr_err("CAIF: %s(): Packet is erroneous!\n", 116 pr_err("Packet is erroneous!\n");
117 __func__);
118 cfpkt_destroy(pkt); 117 cfpkt_destroy(pkt);
119 return -EPROTO; 118 return -EPROTO;
120 } 119 }
@@ -131,14 +130,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
131 u8 flow_off = SRVL_FLOW_OFF; 130 u8 flow_off = SRVL_FLOW_OFF;
132 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 131 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
133 if (!pkt) { 132 if (!pkt) {
134 pr_warning("CAIF: %s(): Out of memory\n", 133 pr_warn("Out of memory\n");
135 __func__);
136 return -ENOMEM; 134 return -ENOMEM;
137 } 135 }
138 136
139 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { 137 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
140 pr_err("CAIF: %s(): Packet is erroneous!\n", 138 pr_err("Packet is erroneous!\n");
141 __func__);
142 cfpkt_destroy(pkt); 139 cfpkt_destroy(pkt);
143 return -EPROTO; 140 return -EPROTO;
144 } 141 }
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 02795aff57a4..efad410e4c82 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
26{ 28{
27 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!util) { 30 if (!util) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__); 31 pr_warn("Out of memory\n");
30 return NULL; 32 return NULL;
31 } 33 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0); 34 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
47 caif_assert(layr->up->receive != NULL); 49 caif_assert(layr->up->receive != NULL);
48 caif_assert(layr->up->ctrlcmd != NULL); 50 caif_assert(layr->up->ctrlcmd != NULL);
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 52 pr_err("Packet is erroneous!\n");
51 cfpkt_destroy(pkt); 53 cfpkt_destroy(pkt);
52 return -EPROTO; 54 return -EPROTO;
53 } 55 }
@@ -64,16 +66,14 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
64 cfpkt_destroy(pkt); 66 cfpkt_destroy(pkt);
65 return 0; 67 return 0;
66 case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */ 68 case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */
67 pr_err("CAIF: %s(): REMOTE SHUTDOWN REQUEST RECEIVED\n", 69 pr_err("REMOTE SHUTDOWN REQUEST RECEIVED\n");
68 __func__);
69 layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0); 70 layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0);
70 service->open = false; 71 service->open = false;
71 cfpkt_destroy(pkt); 72 cfpkt_destroy(pkt);
72 return 0; 73 return 0;
73 default: 74 default:
74 cfpkt_destroy(pkt); 75 cfpkt_destroy(pkt);
75 pr_warning("CAIF: %s(): Unknown service control %d (0x%x)\n", 76 pr_warn("Unknown service control %d (0x%x)\n", cmd, cmd);
76 __func__, cmd, cmd);
77 return -EPROTO; 77 return -EPROTO;
78 } 78 }
79} 79}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 77cc09faac9a..3b425b189a99 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/slab.h> 10#include <linux/slab.h>
9#include <net/caif/caif_layer.h> 11#include <net/caif/caif_layer.h>
@@ -25,7 +27,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
25{ 27{
26 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 28 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
27 if (!vei) { 29 if (!vei) {
28 pr_warning("CAIF: %s(): Out of memory\n", __func__); 30 pr_warn("Out of memory\n");
29 return NULL; 31 return NULL;
30 } 32 }
31 caif_assert(offsetof(struct cfsrvl, layer) == 0); 33 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
47 49
48 50
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 52 pr_err("Packet is erroneous!\n");
51 cfpkt_destroy(pkt); 53 cfpkt_destroy(pkt);
52 return -EPROTO; 54 return -EPROTO;
53 } 55 }
@@ -67,8 +69,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
67 cfpkt_destroy(pkt); 69 cfpkt_destroy(pkt);
68 return 0; 70 return 0;
69 default: /* SET RS232 PIN */ 71 default: /* SET RS232 PIN */
70 pr_warning("CAIF: %s():Unknown VEI control packet %d (0x%x)!\n", 72 pr_warn("Unknown VEI control packet %d (0x%x)!\n", cmd, cmd);
71 __func__, cmd, cmd);
72 cfpkt_destroy(pkt); 73 cfpkt_destroy(pkt);
73 return -EPROTO; 74 return -EPROTO;
74 } 75 }
@@ -86,7 +87,7 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
86 caif_assert(layr->dn->transmit != NULL); 87 caif_assert(layr->dn->transmit != NULL);
87 88
88 if (cfpkt_add_head(pkt, &tmp, 1) < 0) { 89 if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
89 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 90 pr_err("Packet is erroneous!\n");
90 return -EPROTO; 91 return -EPROTO;
91 } 92 }
92 93
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index ada6ee2d48f5..bf6fef2a0eff 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -21,7 +23,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
21{ 23{
22 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 24 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
23 if (!vid) { 25 if (!vid) {
24 pr_warning("CAIF: %s(): Out of memory\n", __func__); 26 pr_warn("Out of memory\n");
25 return NULL; 27 return NULL;
26 } 28 }
27 caif_assert(offsetof(struct cfsrvl, layer) == 0); 29 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -38,7 +40,7 @@ static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt)
38{ 40{
39 u32 videoheader; 41 u32 videoheader;
40 if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) { 42 if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) {
41 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 43 pr_err("Packet is erroneous!\n");
42 cfpkt_destroy(pkt); 44 cfpkt_destroy(pkt);
43 return -EPROTO; 45 return -EPROTO;
44 } 46 }
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 4293e190ec53..84a422c98941 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -5,6 +5,8 @@
5 * License terms: GNU General Public License (GPL) version 2 5 * License terms: GNU General Public License (GPL) version 2
6 */ 6 */
7 7
8#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
9
8#include <linux/version.h> 10#include <linux/version.h>
9#include <linux/fs.h> 11#include <linux/fs.h>
10#include <linux/init.h> 12#include <linux/init.h>
@@ -28,9 +30,6 @@
28#define CONNECT_TIMEOUT (5 * HZ) 30#define CONNECT_TIMEOUT (5 * HZ)
29#define CAIF_NET_DEFAULT_QUEUE_LEN 500 31#define CAIF_NET_DEFAULT_QUEUE_LEN 500
30 32
31#undef pr_debug
32#define pr_debug pr_warning
33
34/*This list is protected by the rtnl lock. */ 33/*This list is protected by the rtnl lock. */
35static LIST_HEAD(chnl_net_list); 34static LIST_HEAD(chnl_net_list);
36 35
@@ -142,8 +141,7 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
142 int phyid) 141 int phyid)
143{ 142{
144 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); 143 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
145 pr_debug("CAIF: %s(): NET flowctrl func called flow: %s\n", 144 pr_debug("NET flowctrl func called flow: %s\n",
146 __func__,
147 flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" : 145 flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
148 flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" : 146 flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
149 flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" : 147 flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
@@ -196,12 +194,12 @@ static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
196 priv = netdev_priv(dev); 194 priv = netdev_priv(dev);
197 195
198 if (skb->len > priv->netdev->mtu) { 196 if (skb->len > priv->netdev->mtu) {
199 pr_warning("CAIF: %s(): Size of skb exceeded MTU\n", __func__); 197 pr_warn("Size of skb exceeded MTU\n");
200 return -ENOSPC; 198 return -ENOSPC;
201 } 199 }
202 200
203 if (!priv->flowenabled) { 201 if (!priv->flowenabled) {
204 pr_debug("CAIF: %s(): dropping packets flow off\n", __func__); 202 pr_debug("dropping packets flow off\n");
205 return NETDEV_TX_BUSY; 203 return NETDEV_TX_BUSY;
206 } 204 }
207 205
@@ -237,7 +235,7 @@ static int chnl_net_open(struct net_device *dev)
237 ASSERT_RTNL(); 235 ASSERT_RTNL();
238 priv = netdev_priv(dev); 236 priv = netdev_priv(dev);
239 if (!priv) { 237 if (!priv) {
240 pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__); 238 pr_debug("chnl_net_open: no priv\n");
241 return -ENODEV; 239 return -ENODEV;
242 } 240 }
243 241
@@ -246,18 +244,17 @@ static int chnl_net_open(struct net_device *dev)
246 result = caif_connect_client(&priv->conn_req, &priv->chnl, 244 result = caif_connect_client(&priv->conn_req, &priv->chnl,
247 &llifindex, &headroom, &tailroom); 245 &llifindex, &headroom, &tailroom);
248 if (result != 0) { 246 if (result != 0) {
249 pr_debug("CAIF: %s(): err: " 247 pr_debug("err: "
250 "Unable to register and open device," 248 "Unable to register and open device,"
251 " Err:%d\n", 249 " Err:%d\n",
252 __func__, 250 result);
253 result);
254 goto error; 251 goto error;
255 } 252 }
256 253
257 lldev = dev_get_by_index(dev_net(dev), llifindex); 254 lldev = dev_get_by_index(dev_net(dev), llifindex);
258 255
259 if (lldev == NULL) { 256 if (lldev == NULL) {
260 pr_debug("CAIF: %s(): no interface?\n", __func__); 257 pr_debug("no interface?\n");
261 result = -ENODEV; 258 result = -ENODEV;
262 goto error; 259 goto error;
263 } 260 }
@@ -279,9 +276,7 @@ static int chnl_net_open(struct net_device *dev)
279 dev_put(lldev); 276 dev_put(lldev);
280 277
281 if (mtu < 100) { 278 if (mtu < 100) {
282 pr_warning("CAIF: %s(): " 279 pr_warn("CAIF Interface MTU too small (%d)\n", mtu);
283 "CAIF Interface MTU too small (%d)\n",
284 __func__, mtu);
285 result = -ENODEV; 280 result = -ENODEV;
286 goto error; 281 goto error;
287 } 282 }
@@ -296,33 +291,32 @@ static int chnl_net_open(struct net_device *dev)
296 rtnl_lock(); 291 rtnl_lock();
297 292
298 if (result == -ERESTARTSYS) { 293 if (result == -ERESTARTSYS) {
299 pr_debug("CAIF: %s(): wait_event_interruptible" 294 pr_debug("wait_event_interruptible woken by a signal\n");
300 " woken by a signal\n", __func__);
301 result = -ERESTARTSYS; 295 result = -ERESTARTSYS;
302 goto error; 296 goto error;
303 } 297 }
304 298
305 if (result == 0) { 299 if (result == 0) {
306 pr_debug("CAIF: %s(): connect timeout\n", __func__); 300 pr_debug("connect timeout\n");
307 caif_disconnect_client(&priv->chnl); 301 caif_disconnect_client(&priv->chnl);
308 priv->state = CAIF_DISCONNECTED; 302 priv->state = CAIF_DISCONNECTED;
309 pr_debug("CAIF: %s(): state disconnected\n", __func__); 303 pr_debug("state disconnected\n");
310 result = -ETIMEDOUT; 304 result = -ETIMEDOUT;
311 goto error; 305 goto error;
312 } 306 }
313 307
314 if (priv->state != CAIF_CONNECTED) { 308 if (priv->state != CAIF_CONNECTED) {
315 pr_debug("CAIF: %s(): connect failed\n", __func__); 309 pr_debug("connect failed\n");
316 result = -ECONNREFUSED; 310 result = -ECONNREFUSED;
317 goto error; 311 goto error;
318 } 312 }
319 pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__); 313 pr_debug("CAIF Netdevice connected\n");
320 return 0; 314 return 0;
321 315
322error: 316error:
323 caif_disconnect_client(&priv->chnl); 317 caif_disconnect_client(&priv->chnl);
324 priv->state = CAIF_DISCONNECTED; 318 priv->state = CAIF_DISCONNECTED;
325 pr_debug("CAIF: %s(): state disconnected\n", __func__); 319 pr_debug("state disconnected\n");
326 return result; 320 return result;
327 321
328} 322}
@@ -413,7 +407,7 @@ static void caif_netlink_parms(struct nlattr *data[],
413 struct caif_connect_request *conn_req) 407 struct caif_connect_request *conn_req)
414{ 408{
415 if (!data) { 409 if (!data) {
416 pr_warning("CAIF: %s: no params data found\n", __func__); 410 pr_warn("no params data found\n");
417 return; 411 return;
418 } 412 }
419 if (data[IFLA_CAIF_IPV4_CONNID]) 413 if (data[IFLA_CAIF_IPV4_CONNID])
@@ -442,8 +436,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
442 436
443 ret = register_netdevice(dev); 437 ret = register_netdevice(dev);
444 if (ret) 438 if (ret)
445 pr_warning("CAIF: %s(): device rtml registration failed\n", 439 pr_warn("device rtml registration failed\n");
446 __func__);
447 return ret; 440 return ret;
448} 441}
449 442
diff --git a/net/can/raw.c b/net/can/raw.c
index a10e3338f084..7d77e67e57af 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -647,12 +647,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
647 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 647 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
648 if (err < 0) 648 if (err < 0)
649 goto free_skb; 649 goto free_skb;
650 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 650 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
651 if (err < 0) 651 if (err < 0)
652 goto free_skb; 652 goto free_skb;
653 653
654 /* to be able to check the received tx sock reference in raw_rcv() */ 654 /* to be able to check the received tx sock reference in raw_rcv() */
655 skb_tx(skb)->prevent_sk_orphan = 1; 655 skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
656 656
657 skb->dev = dev; 657 skb->dev = dev;
658 skb->sk = sk; 658 skb->sk = sk;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 251997a95483..4df1b7a6c1bf 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -746,13 +746,12 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
746 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 746 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
747 mask |= POLLERR; 747 mask |= POLLERR;
748 if (sk->sk_shutdown & RCV_SHUTDOWN) 748 if (sk->sk_shutdown & RCV_SHUTDOWN)
749 mask |= POLLRDHUP; 749 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
750 if (sk->sk_shutdown == SHUTDOWN_MASK) 750 if (sk->sk_shutdown == SHUTDOWN_MASK)
751 mask |= POLLHUP; 751 mask |= POLLHUP;
752 752
753 /* readable? */ 753 /* readable? */
754 if (!skb_queue_empty(&sk->sk_receive_queue) || 754 if (!skb_queue_empty(&sk->sk_receive_queue))
755 (sk->sk_shutdown & RCV_SHUTDOWN))
756 mask |= POLLIN | POLLRDNORM; 755 mask |= POLLIN | POLLRDNORM;
757 756
758 /* Connection-based need to check for termination and startup */ 757 /* Connection-based need to check for termination and startup */
diff --git a/net/core/dev.c b/net/core/dev.c
index 660dd41aaaa6..a313bab1b754 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,7 @@
129#include <linux/random.h> 129#include <linux/random.h>
130#include <trace/events/napi.h> 130#include <trace/events/napi.h>
131#include <linux/pci.h> 131#include <linux/pci.h>
132#include <linux/inetdevice.h>
132 133
133#include "net-sysfs.h" 134#include "net-sysfs.h"
134 135
@@ -371,6 +372,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
371 * --ANK (980803) 372 * --ANK (980803)
372 */ 373 */
373 374
375static inline struct list_head *ptype_head(const struct packet_type *pt)
376{
377 if (pt->type == htons(ETH_P_ALL))
378 return &ptype_all;
379 else
380 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
381}
382
374/** 383/**
375 * dev_add_pack - add packet handler 384 * dev_add_pack - add packet handler
376 * @pt: packet type declaration 385 * @pt: packet type declaration
@@ -386,16 +395,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
386 395
387void dev_add_pack(struct packet_type *pt) 396void dev_add_pack(struct packet_type *pt)
388{ 397{
389 int hash; 398 struct list_head *head = ptype_head(pt);
390 399
391 spin_lock_bh(&ptype_lock); 400 spin_lock(&ptype_lock);
392 if (pt->type == htons(ETH_P_ALL)) 401 list_add_rcu(&pt->list, head);
393 list_add_rcu(&pt->list, &ptype_all); 402 spin_unlock(&ptype_lock);
394 else {
395 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
396 list_add_rcu(&pt->list, &ptype_base[hash]);
397 }
398 spin_unlock_bh(&ptype_lock);
399} 403}
400EXPORT_SYMBOL(dev_add_pack); 404EXPORT_SYMBOL(dev_add_pack);
401 405
@@ -414,15 +418,10 @@ EXPORT_SYMBOL(dev_add_pack);
414 */ 418 */
415void __dev_remove_pack(struct packet_type *pt) 419void __dev_remove_pack(struct packet_type *pt)
416{ 420{
417 struct list_head *head; 421 struct list_head *head = ptype_head(pt);
418 struct packet_type *pt1; 422 struct packet_type *pt1;
419 423
420 spin_lock_bh(&ptype_lock); 424 spin_lock(&ptype_lock);
421
422 if (pt->type == htons(ETH_P_ALL))
423 head = &ptype_all;
424 else
425 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
426 425
427 list_for_each_entry(pt1, head, list) { 426 list_for_each_entry(pt1, head, list) {
428 if (pt == pt1) { 427 if (pt == pt1) {
@@ -433,7 +432,7 @@ void __dev_remove_pack(struct packet_type *pt)
433 432
434 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 433 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
435out: 434out:
436 spin_unlock_bh(&ptype_lock); 435 spin_unlock(&ptype_lock);
437} 436}
438EXPORT_SYMBOL(__dev_remove_pack); 437EXPORT_SYMBOL(__dev_remove_pack);
439 438
@@ -1568,6 +1567,41 @@ void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1568} 1567}
1569EXPORT_SYMBOL(netif_set_real_num_tx_queues); 1568EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1570 1569
1570#ifdef CONFIG_RPS
1571/**
1572 * netif_set_real_num_rx_queues - set actual number of RX queues used
1573 * @dev: Network device
1574 * @rxq: Actual number of RX queues
1575 *
1576 * This must be called either with the rtnl_lock held or before
1577 * registration of the net device. Returns 0 on success, or a
1578 * negative error code. If called before registration, it also
1579 * sets the maximum number of queues, and always succeeds.
1580 */
1581int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1582{
1583 int rc;
1584
1585 if (dev->reg_state == NETREG_REGISTERED) {
1586 ASSERT_RTNL();
1587
1588 if (rxq > dev->num_rx_queues)
1589 return -EINVAL;
1590
1591 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1592 rxq);
1593 if (rc)
1594 return rc;
1595 } else {
1596 dev->num_rx_queues = rxq;
1597 }
1598
1599 dev->real_num_rx_queues = rxq;
1600 return 0;
1601}
1602EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1603#endif
1604
1571static inline void __netif_reschedule(struct Qdisc *q) 1605static inline void __netif_reschedule(struct Qdisc *q)
1572{ 1606{
1573 struct softnet_data *sd; 1607 struct softnet_data *sd;
@@ -1902,14 +1936,14 @@ static int dev_gso_segment(struct sk_buff *skb)
1902 1936
1903/* 1937/*
1904 * Try to orphan skb early, right before transmission by the device. 1938 * Try to orphan skb early, right before transmission by the device.
1905 * We cannot orphan skb if tx timestamp is requested, since 1939 * We cannot orphan skb if tx timestamp is requested or the sk-reference
1906 * drivers need to call skb_tstamp_tx() to send the timestamp. 1940 * is needed on driver level for other reasons, e.g. see net/can/raw.c
1907 */ 1941 */
1908static inline void skb_orphan_try(struct sk_buff *skb) 1942static inline void skb_orphan_try(struct sk_buff *skb)
1909{ 1943{
1910 struct sock *sk = skb->sk; 1944 struct sock *sk = skb->sk;
1911 1945
1912 if (sk && !skb_tx(skb)->flags) { 1946 if (sk && !skb_shinfo(skb)->tx_flags) {
1913 /* skb_tx_hash() wont be able to get sk. 1947 /* skb_tx_hash() wont be able to get sk.
1914 * We copy sk_hash into skb->rxhash 1948 * We copy sk_hash into skb->rxhash
1915 */ 1949 */
@@ -1930,7 +1964,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
1930 struct net_device *dev) 1964 struct net_device *dev)
1931{ 1965{
1932 return skb_is_nonlinear(skb) && 1966 return skb_is_nonlinear(skb) &&
1933 ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || 1967 ((skb_has_frag_list(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
1934 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || 1968 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
1935 illegal_highdma(dev, skb)))); 1969 illegal_highdma(dev, skb))));
1936} 1970}
@@ -2143,6 +2177,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2143 return rc; 2177 return rc;
2144} 2178}
2145 2179
2180static DEFINE_PER_CPU(int, xmit_recursion);
2181#define RECURSION_LIMIT 3
2182
2146/** 2183/**
2147 * dev_queue_xmit - transmit a buffer 2184 * dev_queue_xmit - transmit a buffer
2148 * @skb: buffer to transmit 2185 * @skb: buffer to transmit
@@ -2208,10 +2245,15 @@ int dev_queue_xmit(struct sk_buff *skb)
2208 2245
2209 if (txq->xmit_lock_owner != cpu) { 2246 if (txq->xmit_lock_owner != cpu) {
2210 2247
2248 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2249 goto recursion_alert;
2250
2211 HARD_TX_LOCK(dev, txq, cpu); 2251 HARD_TX_LOCK(dev, txq, cpu);
2212 2252
2213 if (!netif_tx_queue_stopped(txq)) { 2253 if (!netif_tx_queue_stopped(txq)) {
2254 __this_cpu_inc(xmit_recursion);
2214 rc = dev_hard_start_xmit(skb, dev, txq); 2255 rc = dev_hard_start_xmit(skb, dev, txq);
2256 __this_cpu_dec(xmit_recursion);
2215 if (dev_xmit_complete(rc)) { 2257 if (dev_xmit_complete(rc)) {
2216 HARD_TX_UNLOCK(dev, txq); 2258 HARD_TX_UNLOCK(dev, txq);
2217 goto out; 2259 goto out;
@@ -2223,7 +2265,9 @@ int dev_queue_xmit(struct sk_buff *skb)
2223 "queue packet!\n", dev->name); 2265 "queue packet!\n", dev->name);
2224 } else { 2266 } else {
2225 /* Recursion is detected! It is possible, 2267 /* Recursion is detected! It is possible,
2226 * unfortunately */ 2268 * unfortunately
2269 */
2270recursion_alert:
2227 if (net_ratelimit()) 2271 if (net_ratelimit())
2228 printk(KERN_CRIT "Dead loop on virtual device " 2272 printk(KERN_CRIT "Dead loop on virtual device "
2229 "%s, fix it urgently!\n", dev->name); 2273 "%s, fix it urgently!\n", dev->name);
@@ -2259,69 +2303,44 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2259 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2303 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2260} 2304}
2261 2305
2262#ifdef CONFIG_RPS
2263
2264/* One global table that all flow-based protocols share. */
2265struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2266EXPORT_SYMBOL(rps_sock_flow_table);
2267
2268/* 2306/*
2269 * get_rps_cpu is called from netif_receive_skb and returns the target 2307 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2270 * CPU from the RPS map of the receiving queue for a given skb. 2308 * and src/dst port numbers. Returns a non-zero hash number on success
2271 * rcu_read_lock must be held on entry. 2309 * and 0 on failure.
2272 */ 2310 */
2273static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2311__u32 __skb_get_rxhash(struct sk_buff *skb)
2274 struct rps_dev_flow **rflowp)
2275{ 2312{
2313 int nhoff, hash = 0, poff;
2276 struct ipv6hdr *ip6; 2314 struct ipv6hdr *ip6;
2277 struct iphdr *ip; 2315 struct iphdr *ip;
2278 struct netdev_rx_queue *rxqueue;
2279 struct rps_map *map;
2280 struct rps_dev_flow_table *flow_table;
2281 struct rps_sock_flow_table *sock_flow_table;
2282 int cpu = -1;
2283 u8 ip_proto; 2316 u8 ip_proto;
2284 u16 tcpu;
2285 u32 addr1, addr2, ihl; 2317 u32 addr1, addr2, ihl;
2286 union { 2318 union {
2287 u32 v32; 2319 u32 v32;
2288 u16 v16[2]; 2320 u16 v16[2];
2289 } ports; 2321 } ports;
2290 2322
2291 if (skb_rx_queue_recorded(skb)) { 2323 nhoff = skb_network_offset(skb);
2292 u16 index = skb_get_rx_queue(skb);
2293 if (unlikely(index >= dev->num_rx_queues)) {
2294 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2295 "on queue %u, but number of RX queues is %u\n",
2296 dev->name, index, dev->num_rx_queues);
2297 goto done;
2298 }
2299 rxqueue = dev->_rx + index;
2300 } else
2301 rxqueue = dev->_rx;
2302
2303 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2304 goto done;
2305
2306 if (skb->rxhash)
2307 goto got_hash; /* Skip hash computation on packet header */
2308 2324
2309 switch (skb->protocol) { 2325 switch (skb->protocol) {
2310 case __constant_htons(ETH_P_IP): 2326 case __constant_htons(ETH_P_IP):
2311 if (!pskb_may_pull(skb, sizeof(*ip))) 2327 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2312 goto done; 2328 goto done;
2313 2329
2314 ip = (struct iphdr *) skb->data; 2330 ip = (struct iphdr *) (skb->data + nhoff);
2315 ip_proto = ip->protocol; 2331 if (ip->frag_off & htons(IP_MF | IP_OFFSET))
2332 ip_proto = 0;
2333 else
2334 ip_proto = ip->protocol;
2316 addr1 = (__force u32) ip->saddr; 2335 addr1 = (__force u32) ip->saddr;
2317 addr2 = (__force u32) ip->daddr; 2336 addr2 = (__force u32) ip->daddr;
2318 ihl = ip->ihl; 2337 ihl = ip->ihl;
2319 break; 2338 break;
2320 case __constant_htons(ETH_P_IPV6): 2339 case __constant_htons(ETH_P_IPV6):
2321 if (!pskb_may_pull(skb, sizeof(*ip6))) 2340 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2322 goto done; 2341 goto done;
2323 2342
2324 ip6 = (struct ipv6hdr *) skb->data; 2343 ip6 = (struct ipv6hdr *) (skb->data + nhoff);
2325 ip_proto = ip6->nexthdr; 2344 ip_proto = ip6->nexthdr;
2326 addr1 = (__force u32) ip6->saddr.s6_addr32[3]; 2345 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2327 addr2 = (__force u32) ip6->daddr.s6_addr32[3]; 2346 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2330,33 +2349,81 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2330 default: 2349 default:
2331 goto done; 2350 goto done;
2332 } 2351 }
2333 switch (ip_proto) { 2352
2334 case IPPROTO_TCP: 2353 ports.v32 = 0;
2335 case IPPROTO_UDP: 2354 poff = proto_ports_offset(ip_proto);
2336 case IPPROTO_DCCP: 2355 if (poff >= 0) {
2337 case IPPROTO_ESP: 2356 nhoff += ihl * 4 + poff;
2338 case IPPROTO_AH: 2357 if (pskb_may_pull(skb, nhoff + 4)) {
2339 case IPPROTO_SCTP: 2358 ports.v32 = * (__force u32 *) (skb->data + nhoff);
2340 case IPPROTO_UDPLITE:
2341 if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2342 ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2343 if (ports.v16[1] < ports.v16[0]) 2359 if (ports.v16[1] < ports.v16[0])
2344 swap(ports.v16[0], ports.v16[1]); 2360 swap(ports.v16[0], ports.v16[1]);
2345 break;
2346 } 2361 }
2347 default:
2348 ports.v32 = 0;
2349 break;
2350 } 2362 }
2351 2363
2352 /* get a consistent hash (same value on both flow directions) */ 2364 /* get a consistent hash (same value on both flow directions) */
2353 if (addr2 < addr1) 2365 if (addr2 < addr1)
2354 swap(addr1, addr2); 2366 swap(addr1, addr2);
2355 skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2356 if (!skb->rxhash)
2357 skb->rxhash = 1;
2358 2367
2359got_hash: 2368 hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2369 if (!hash)
2370 hash = 1;
2371
2372done:
2373 return hash;
2374}
2375EXPORT_SYMBOL(__skb_get_rxhash);
2376
2377#ifdef CONFIG_RPS
2378
2379/* One global table that all flow-based protocols share. */
2380struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2381EXPORT_SYMBOL(rps_sock_flow_table);
2382
2383/*
2384 * get_rps_cpu is called from netif_receive_skb and returns the target
2385 * CPU from the RPS map of the receiving queue for a given skb.
2386 * rcu_read_lock must be held on entry.
2387 */
2388static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2389 struct rps_dev_flow **rflowp)
2390{
2391 struct netdev_rx_queue *rxqueue;
2392 struct rps_map *map = NULL;
2393 struct rps_dev_flow_table *flow_table;
2394 struct rps_sock_flow_table *sock_flow_table;
2395 int cpu = -1;
2396 u16 tcpu;
2397
2398 if (skb_rx_queue_recorded(skb)) {
2399 u16 index = skb_get_rx_queue(skb);
2400 if (unlikely(index >= dev->real_num_rx_queues)) {
2401 WARN_ONCE(dev->real_num_rx_queues > 1,
2402 "%s received packet on queue %u, but number "
2403 "of RX queues is %u\n",
2404 dev->name, index, dev->real_num_rx_queues);
2405 goto done;
2406 }
2407 rxqueue = dev->_rx + index;
2408 } else
2409 rxqueue = dev->_rx;
2410
2411 if (rxqueue->rps_map) {
2412 map = rcu_dereference(rxqueue->rps_map);
2413 if (map && map->len == 1) {
2414 tcpu = map->cpus[0];
2415 if (cpu_online(tcpu))
2416 cpu = tcpu;
2417 goto done;
2418 }
2419 } else if (!rxqueue->rps_flow_table) {
2420 goto done;
2421 }
2422
2423 skb_reset_network_header(skb);
2424 if (!skb_get_rxhash(skb))
2425 goto done;
2426
2360 flow_table = rcu_dereference(rxqueue->rps_flow_table); 2427 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2361 sock_flow_table = rcu_dereference(rps_sock_flow_table); 2428 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2362 if (flow_table && sock_flow_table) { 2429 if (flow_table && sock_flow_table) {
@@ -2396,7 +2463,6 @@ got_hash:
2396 } 2463 }
2397 } 2464 }
2398 2465
2399 map = rcu_dereference(rxqueue->rps_map);
2400 if (map) { 2466 if (map) {
2401 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 2467 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2402 2468
@@ -2654,7 +2720,7 @@ static int ing_filter(struct sk_buff *skb)
2654 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); 2720 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2655 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); 2721 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2656 2722
2657 rxq = &dev->rx_queue; 2723 rxq = &dev->ingress_queue;
2658 2724
2659 q = rxq->qdisc; 2725 q = rxq->qdisc;
2660 if (q != &noop_qdisc) { 2726 if (q != &noop_qdisc) {
@@ -2671,7 +2737,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2671 struct packet_type **pt_prev, 2737 struct packet_type **pt_prev,
2672 int *ret, struct net_device *orig_dev) 2738 int *ret, struct net_device *orig_dev)
2673{ 2739{
2674 if (skb->dev->rx_queue.qdisc == &noop_qdisc) 2740 if (skb->dev->ingress_queue.qdisc == &noop_qdisc)
2675 goto out; 2741 goto out;
2676 2742
2677 if (*pt_prev) { 2743 if (*pt_prev) {
@@ -2828,8 +2894,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
2828 if (!netdev_tstamp_prequeue) 2894 if (!netdev_tstamp_prequeue)
2829 net_timestamp_check(skb); 2895 net_timestamp_check(skb);
2830 2896
2831 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) 2897 if (vlan_tx_tag_present(skb))
2832 return NET_RX_SUCCESS; 2898 vlan_hwaccel_do_receive(skb);
2833 2899
2834 /* if we've gotten here through NAPI, check netpoll */ 2900 /* if we've gotten here through NAPI, check netpoll */
2835 if (netpoll_receive_skb(skb)) 2901 if (netpoll_receive_skb(skb))
@@ -3050,7 +3116,7 @@ out:
3050 return netif_receive_skb(skb); 3116 return netif_receive_skb(skb);
3051} 3117}
3052 3118
3053static void napi_gro_flush(struct napi_struct *napi) 3119inline void napi_gro_flush(struct napi_struct *napi)
3054{ 3120{
3055 struct sk_buff *skb, *next; 3121 struct sk_buff *skb, *next;
3056 3122
@@ -3063,6 +3129,7 @@ static void napi_gro_flush(struct napi_struct *napi)
3063 napi->gro_count = 0; 3129 napi->gro_count = 0;
3064 napi->gro_list = NULL; 3130 napi->gro_list = NULL;
3065} 3131}
3132EXPORT_SYMBOL(napi_gro_flush);
3066 3133
3067enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3134enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3068{ 3135{
@@ -3077,7 +3144,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3077 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3144 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3078 goto normal; 3145 goto normal;
3079 3146
3080 if (skb_is_gso(skb) || skb_has_frags(skb)) 3147 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3081 goto normal; 3148 goto normal;
3082 3149
3083 rcu_read_lock(); 3150 rcu_read_lock();
@@ -3156,16 +3223,18 @@ normal:
3156} 3223}
3157EXPORT_SYMBOL(dev_gro_receive); 3224EXPORT_SYMBOL(dev_gro_receive);
3158 3225
3159static gro_result_t 3226static inline gro_result_t
3160__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3227__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3161{ 3228{
3162 struct sk_buff *p; 3229 struct sk_buff *p;
3163 3230
3164 for (p = napi->gro_list; p; p = p->next) { 3231 for (p = napi->gro_list; p; p = p->next) {
3165 NAPI_GRO_CB(p)->same_flow = 3232 unsigned long diffs;
3166 (p->dev == skb->dev) && 3233
3167 !compare_ether_header(skb_mac_header(p), 3234 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3235 diffs |= compare_ether_header(skb_mac_header(p),
3168 skb_gro_mac_header(skb)); 3236 skb_gro_mac_header(skb));
3237 NAPI_GRO_CB(p)->same_flow = !diffs;
3169 NAPI_GRO_CB(p)->flush = 0; 3238 NAPI_GRO_CB(p)->flush = 0;
3170 } 3239 }
3171 3240
@@ -4871,7 +4940,7 @@ static void __netdev_init_queue_locks_one(struct net_device *dev,
4871static void netdev_init_queue_locks(struct net_device *dev) 4940static void netdev_init_queue_locks(struct net_device *dev)
4872{ 4941{
4873 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); 4942 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4874 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); 4943 __netdev_init_queue_locks_one(dev, &dev->ingress_queue, NULL);
4875} 4944}
4876 4945
4877unsigned long netdev_fix_features(unsigned long features, const char *name) 4946unsigned long netdev_fix_features(unsigned long features, const char *name)
@@ -4941,6 +5010,34 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4941} 5010}
4942EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5011EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4943 5012
5013static int netif_alloc_rx_queues(struct net_device *dev)
5014{
5015#ifdef CONFIG_RPS
5016 unsigned int i, count = dev->num_rx_queues;
5017
5018 if (count) {
5019 struct netdev_rx_queue *rx;
5020
5021 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5022 if (!rx) {
5023 pr_err("netdev: Unable to allocate %u rx queues.\n",
5024 count);
5025 return -ENOMEM;
5026 }
5027 dev->_rx = rx;
5028 atomic_set(&rx->count, count);
5029
5030 /*
5031 * Set a pointer to first element in the array which holds the
5032 * reference count.
5033 */
5034 for (i = 0; i < count; i++)
5035 rx[i].first = rx;
5036 }
5037#endif
5038 return 0;
5039}
5040
4944/** 5041/**
4945 * register_netdevice - register a network device 5042 * register_netdevice - register a network device
4946 * @dev: device to register 5043 * @dev: device to register
@@ -4978,24 +5075,10 @@ int register_netdevice(struct net_device *dev)
4978 5075
4979 dev->iflink = -1; 5076 dev->iflink = -1;
4980 5077
4981#ifdef CONFIG_RPS 5078 ret = netif_alloc_rx_queues(dev);
4982 if (!dev->num_rx_queues) { 5079 if (ret)
4983 /* 5080 goto out;
4984 * Allocate a single RX queue if driver never called
4985 * alloc_netdev_mq
4986 */
4987
4988 dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
4989 if (!dev->_rx) {
4990 ret = -ENOMEM;
4991 goto out;
4992 }
4993 5081
4994 dev->_rx->first = dev->_rx;
4995 atomic_set(&dev->_rx->count, 1);
4996 dev->num_rx_queues = 1;
4997 }
4998#endif
4999 /* Init, if this function is available */ 5082 /* Init, if this function is available */
5000 if (dev->netdev_ops->ndo_init) { 5083 if (dev->netdev_ops->ndo_init) {
5001 ret = dev->netdev_ops->ndo_init(dev); 5084 ret = dev->netdev_ops->ndo_init(dev);
@@ -5035,6 +5118,12 @@ int register_netdevice(struct net_device *dev)
5035 if (dev->features & NETIF_F_SG) 5118 if (dev->features & NETIF_F_SG)
5036 dev->features |= NETIF_F_GSO; 5119 dev->features |= NETIF_F_GSO;
5037 5120
5121 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5122 * vlan_dev_init() will do the dev->features check, so these features
5123 * are enabled only if supported by underlying device.
5124 */
5125 dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
5126
5038 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5127 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5039 ret = notifier_to_errno(ret); 5128 ret = notifier_to_errno(ret);
5040 if (ret) 5129 if (ret)
@@ -5264,7 +5353,7 @@ void netdev_run_todo(void)
5264 5353
5265 /* paranoia */ 5354 /* paranoia */
5266 BUG_ON(atomic_read(&dev->refcnt)); 5355 BUG_ON(atomic_read(&dev->refcnt));
5267 WARN_ON(dev->ip_ptr); 5356 WARN_ON(rcu_dereference_raw(dev->ip_ptr));
5268 WARN_ON(dev->ip6_ptr); 5357 WARN_ON(dev->ip6_ptr);
5269 WARN_ON(dev->dn_ptr); 5358 WARN_ON(dev->dn_ptr);
5270 5359
@@ -5363,7 +5452,7 @@ static void netdev_init_one_queue(struct net_device *dev,
5363 5452
5364static void netdev_init_queues(struct net_device *dev) 5453static void netdev_init_queues(struct net_device *dev)
5365{ 5454{
5366 netdev_init_one_queue(dev, &dev->rx_queue, NULL); 5455 netdev_init_one_queue(dev, &dev->ingress_queue, NULL);
5367 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 5456 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5368 spin_lock_init(&dev->tx_global_lock); 5457 spin_lock_init(&dev->tx_global_lock);
5369} 5458}
@@ -5386,10 +5475,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5386 struct net_device *dev; 5475 struct net_device *dev;
5387 size_t alloc_size; 5476 size_t alloc_size;
5388 struct net_device *p; 5477 struct net_device *p;
5389#ifdef CONFIG_RPS
5390 struct netdev_rx_queue *rx;
5391 int i;
5392#endif
5393 5478
5394 BUG_ON(strlen(name) >= sizeof(dev->name)); 5479 BUG_ON(strlen(name) >= sizeof(dev->name));
5395 5480
@@ -5415,29 +5500,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5415 goto free_p; 5500 goto free_p;
5416 } 5501 }
5417 5502
5418#ifdef CONFIG_RPS
5419 rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5420 if (!rx) {
5421 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5422 "rx queues.\n");
5423 goto free_tx;
5424 }
5425
5426 atomic_set(&rx->count, queue_count);
5427
5428 /*
5429 * Set a pointer to first element in the array which holds the
5430 * reference count.
5431 */
5432 for (i = 0; i < queue_count; i++)
5433 rx[i].first = rx;
5434#endif
5435 5503
5436 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5504 dev = PTR_ALIGN(p, NETDEV_ALIGN);
5437 dev->padded = (char *)dev - (char *)p; 5505 dev->padded = (char *)dev - (char *)p;
5438 5506
5439 if (dev_addr_init(dev)) 5507 if (dev_addr_init(dev))
5440 goto free_rx; 5508 goto free_tx;
5441 5509
5442 dev_mc_init(dev); 5510 dev_mc_init(dev);
5443 dev_uc_init(dev); 5511 dev_uc_init(dev);
@@ -5449,8 +5517,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5449 dev->real_num_tx_queues = queue_count; 5517 dev->real_num_tx_queues = queue_count;
5450 5518
5451#ifdef CONFIG_RPS 5519#ifdef CONFIG_RPS
5452 dev->_rx = rx;
5453 dev->num_rx_queues = queue_count; 5520 dev->num_rx_queues = queue_count;
5521 dev->real_num_rx_queues = queue_count;
5454#endif 5522#endif
5455 5523
5456 dev->gso_max_size = GSO_MAX_SIZE; 5524 dev->gso_max_size = GSO_MAX_SIZE;
@@ -5467,11 +5535,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5467 strcpy(dev->name, name); 5535 strcpy(dev->name, name);
5468 return dev; 5536 return dev;
5469 5537
5470free_rx:
5471#ifdef CONFIG_RPS
5472 kfree(rx);
5473free_tx: 5538free_tx:
5474#endif
5475 kfree(tx); 5539 kfree(tx);
5476free_p: 5540free_p:
5477 kfree(p); 5541 kfree(p);
@@ -5658,6 +5722,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5658 5722
5659 /* Notify protocols, that we are about to destroy 5723 /* Notify protocols, that we are about to destroy
5660 this device. They should clean all the things. 5724 this device. They should clean all the things.
5725
5726 Note that dev->reg_state stays at NETREG_REGISTERED.
5727 This is wanted because this way 8021q and macvlan know
5728 the device is just moving and can keep their slaves up.
5661 */ 5729 */
5662 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5730 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5663 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); 5731 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 7a85367b3c2f..7d7e572cedc7 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -19,6 +19,7 @@
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/vmalloc.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23 24
24/* 25/*
@@ -205,18 +206,24 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
205 struct ethtool_drvinfo info; 206 struct ethtool_drvinfo info;
206 const struct ethtool_ops *ops = dev->ethtool_ops; 207 const struct ethtool_ops *ops = dev->ethtool_ops;
207 208
208 if (!ops->get_drvinfo)
209 return -EOPNOTSUPP;
210
211 memset(&info, 0, sizeof(info)); 209 memset(&info, 0, sizeof(info));
212 info.cmd = ETHTOOL_GDRVINFO; 210 info.cmd = ETHTOOL_GDRVINFO;
213 ops->get_drvinfo(dev, &info); 211 if (ops && ops->get_drvinfo) {
212 ops->get_drvinfo(dev, &info);
213 } else if (dev->dev.parent && dev->dev.parent->driver) {
214 strlcpy(info.bus_info, dev_name(dev->dev.parent),
215 sizeof(info.bus_info));
216 strlcpy(info.driver, dev->dev.parent->driver->name,
217 sizeof(info.driver));
218 } else {
219 return -EOPNOTSUPP;
220 }
214 221
215 /* 222 /*
216 * this method of obtaining string set info is deprecated; 223 * this method of obtaining string set info is deprecated;
217 * Use ETHTOOL_GSSET_INFO instead. 224 * Use ETHTOOL_GSSET_INFO instead.
218 */ 225 */
219 if (ops->get_sset_count) { 226 if (ops && ops->get_sset_count) {
220 int rc; 227 int rc;
221 228
222 rc = ops->get_sset_count(dev, ETH_SS_TEST); 229 rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -229,9 +236,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
229 if (rc >= 0) 236 if (rc >= 0)
230 info.n_priv_flags = rc; 237 info.n_priv_flags = rc;
231 } 238 }
232 if (ops->get_regs_len) 239 if (ops && ops->get_regs_len)
233 info.regdump_len = ops->get_regs_len(dev); 240 info.regdump_len = ops->get_regs_len(dev);
234 if (ops->get_eeprom_len) 241 if (ops && ops->get_eeprom_len)
235 info.eedump_len = ops->get_eeprom_len(dev); 242 info.eedump_len = ops->get_eeprom_len(dev);
236 243
237 if (copy_to_user(useraddr, &info, sizeof(info))) 244 if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -479,6 +486,38 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
479 list->count++; 486 list->count++;
480} 487}
481 488
489/*
490 * ethtool does not (or did not) set masks for flow parameters that are
491 * not specified, so if both value and mask are 0 then this must be
492 * treated as equivalent to a mask with all bits set. Implement that
493 * here rather than in drivers.
494 */
495static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs)
496{
497 struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec;
498 struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec;
499
500 if (fs->flow_type != TCP_V4_FLOW &&
501 fs->flow_type != UDP_V4_FLOW &&
502 fs->flow_type != SCTP_V4_FLOW)
503 return;
504
505 if (!(entry->ip4src | mask->ip4src))
506 mask->ip4src = htonl(0xffffffff);
507 if (!(entry->ip4dst | mask->ip4dst))
508 mask->ip4dst = htonl(0xffffffff);
509 if (!(entry->psrc | mask->psrc))
510 mask->psrc = htons(0xffff);
511 if (!(entry->pdst | mask->pdst))
512 mask->pdst = htons(0xffff);
513 if (!(entry->tos | mask->tos))
514 mask->tos = 0xff;
515 if (!(fs->vlan_tag | fs->vlan_tag_mask))
516 fs->vlan_tag_mask = 0xffff;
517 if (!(fs->data | fs->data_mask))
518 fs->data_mask = 0xffffffffffffffffULL;
519}
520
482static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, 521static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
483 void __user *useraddr) 522 void __user *useraddr)
484{ 523{
@@ -493,6 +532,8 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
493 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 532 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
494 return -EFAULT; 533 return -EFAULT;
495 534
535 rx_ntuple_fix_masks(&cmd.fs);
536
496 /* 537 /*
497 * Cache filter in dev struct for GET operation only if 538 * Cache filter in dev struct for GET operation only if
498 * the underlying driver doesn't have its own GET operation, and 539 * the underlying driver doesn't have its own GET operation, and
@@ -667,19 +708,19 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
667 break; 708 break;
668 case IP_USER_FLOW: 709 case IP_USER_FLOW:
669 sprintf(p, "\tSrc IP addr: 0x%x\n", 710 sprintf(p, "\tSrc IP addr: 0x%x\n",
670 fsc->fs.h_u.raw_ip4_spec.ip4src); 711 fsc->fs.h_u.usr_ip4_spec.ip4src);
671 p += ETH_GSTRING_LEN; 712 p += ETH_GSTRING_LEN;
672 num_strings++; 713 num_strings++;
673 sprintf(p, "\tSrc IP mask: 0x%x\n", 714 sprintf(p, "\tSrc IP mask: 0x%x\n",
674 fsc->fs.m_u.raw_ip4_spec.ip4src); 715 fsc->fs.m_u.usr_ip4_spec.ip4src);
675 p += ETH_GSTRING_LEN; 716 p += ETH_GSTRING_LEN;
676 num_strings++; 717 num_strings++;
677 sprintf(p, "\tDest IP addr: 0x%x\n", 718 sprintf(p, "\tDest IP addr: 0x%x\n",
678 fsc->fs.h_u.raw_ip4_spec.ip4dst); 719 fsc->fs.h_u.usr_ip4_spec.ip4dst);
679 p += ETH_GSTRING_LEN; 720 p += ETH_GSTRING_LEN;
680 num_strings++; 721 num_strings++;
681 sprintf(p, "\tDest IP mask: 0x%x\n", 722 sprintf(p, "\tDest IP mask: 0x%x\n",
682 fsc->fs.m_u.raw_ip4_spec.ip4dst); 723 fsc->fs.m_u.usr_ip4_spec.ip4dst);
683 p += ETH_GSTRING_LEN; 724 p += ETH_GSTRING_LEN;
684 num_strings++; 725 num_strings++;
685 break; 726 break;
@@ -775,7 +816,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
775 if (regs.len > reglen) 816 if (regs.len > reglen)
776 regs.len = reglen; 817 regs.len = reglen;
777 818
778 regbuf = kmalloc(reglen, GFP_USER); 819 regbuf = vmalloc(reglen);
779 if (!regbuf) 820 if (!regbuf)
780 return -ENOMEM; 821 return -ENOMEM;
781 822
@@ -790,7 +831,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
790 ret = 0; 831 ret = 0;
791 832
792 out: 833 out:
793 kfree(regbuf); 834 vfree(regbuf);
794 return ret; 835 return ret;
795} 836}
796 837
@@ -1175,8 +1216,11 @@ static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
1175 return -EFAULT; 1216 return -EFAULT;
1176 1217
1177 if (edata.data) { 1218 if (edata.data) {
1178 if (!dev->ethtool_ops->get_rx_csum || 1219 u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
1179 !dev->ethtool_ops->get_rx_csum(dev)) 1220 dev->ethtool_ops->get_rx_csum(dev) :
1221 ethtool_op_get_rx_csum(dev);
1222
1223 if (!rxcsum)
1180 return -EINVAL; 1224 return -EINVAL;
1181 dev->features |= NETIF_F_GRO; 1225 dev->features |= NETIF_F_GRO;
1182 } else 1226 } else
@@ -1402,14 +1446,22 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1402 if (!dev || !netif_device_present(dev)) 1446 if (!dev || !netif_device_present(dev))
1403 return -ENODEV; 1447 return -ENODEV;
1404 1448
1405 if (!dev->ethtool_ops)
1406 return -EOPNOTSUPP;
1407
1408 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd))) 1449 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
1409 return -EFAULT; 1450 return -EFAULT;
1410 1451
1452 if (!dev->ethtool_ops) {
1453 /* ETHTOOL_GDRVINFO does not require any driver support.
1454 * It is also unprivileged and does not change anything,
1455 * so we can take a shortcut to it. */
1456 if (ethcmd == ETHTOOL_GDRVINFO)
1457 return ethtool_get_drvinfo(dev, useraddr);
1458 else
1459 return -EOPNOTSUPP;
1460 }
1461
1411 /* Allow some commands to be done by anyone */ 1462 /* Allow some commands to be done by anyone */
1412 switch (ethcmd) { 1463 switch (ethcmd) {
1464 case ETHTOOL_GSET:
1413 case ETHTOOL_GDRVINFO: 1465 case ETHTOOL_GDRVINFO:
1414 case ETHTOOL_GMSGLVL: 1466 case ETHTOOL_GMSGLVL:
1415 case ETHTOOL_GCOALESCE: 1467 case ETHTOOL_GCOALESCE:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 42e84e08a1be..332c2e31d048 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -182,7 +182,8 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
182{ 182{
183 int ret = 0; 183 int ret = 0;
184 184
185 if (rule->iifindex && (rule->iifindex != fl->iif)) 185 if (rule->iifindex && (rule->iifindex != fl->iif) &&
186 !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF))
186 goto out; 187 goto out;
187 188
188 if (rule->oifindex && (rule->oifindex != fl->oif)) 189 if (rule->oifindex && (rule->oifindex != fl->oif))
@@ -225,9 +226,11 @@ jumped:
225 err = ops->action(rule, fl, flags, arg); 226 err = ops->action(rule, fl, flags, arg);
226 227
227 if (err != -EAGAIN) { 228 if (err != -EAGAIN) {
228 fib_rule_get(rule); 229 if (likely(atomic_inc_not_zero(&rule->refcnt))) {
229 arg->rule = rule; 230 arg->rule = rule;
230 goto out; 231 goto out;
232 }
233 break;
231 } 234 }
232 } 235 }
233 236
diff --git a/net/core/filter.c b/net/core/filter.c
index 52b051f82a01..7adf50352918 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -638,10 +638,9 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
638 return err; 638 return err;
639 } 639 }
640 640
641 rcu_read_lock_bh(); 641 old_fp = rcu_dereference_protected(sk->sk_filter,
642 old_fp = rcu_dereference_bh(sk->sk_filter); 642 sock_owned_by_user(sk));
643 rcu_assign_pointer(sk->sk_filter, fp); 643 rcu_assign_pointer(sk->sk_filter, fp);
644 rcu_read_unlock_bh();
645 644
646 if (old_fp) 645 if (old_fp)
647 sk_filter_delayed_uncharge(sk, old_fp); 646 sk_filter_delayed_uncharge(sk, old_fp);
@@ -654,14 +653,13 @@ int sk_detach_filter(struct sock *sk)
654 int ret = -ENOENT; 653 int ret = -ENOENT;
655 struct sk_filter *filter; 654 struct sk_filter *filter;
656 655
657 rcu_read_lock_bh(); 656 filter = rcu_dereference_protected(sk->sk_filter,
658 filter = rcu_dereference_bh(sk->sk_filter); 657 sock_owned_by_user(sk));
659 if (filter) { 658 if (filter) {
660 rcu_assign_pointer(sk->sk_filter, NULL); 659 rcu_assign_pointer(sk->sk_filter, NULL);
661 sk_filter_delayed_uncharge(sk, filter); 660 sk_filter_delayed_uncharge(sk, filter);
662 ret = 0; 661 ret = 0;
663 } 662 }
664 rcu_read_unlock_bh();
665 return ret; 663 return ret;
666} 664}
667EXPORT_SYMBOL_GPL(sk_detach_filter); 665EXPORT_SYMBOL_GPL(sk_detach_filter);
diff --git a/net/core/flow.c b/net/core/flow.c
index f67dcbfe54ef..127c8a7ffd61 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -53,8 +53,7 @@ struct flow_flush_info {
53 53
54struct flow_cache { 54struct flow_cache {
55 u32 hash_shift; 55 u32 hash_shift;
56 unsigned long order; 56 struct flow_cache_percpu __percpu *percpu;
57 struct flow_cache_percpu *percpu;
58 struct notifier_block hotcpu_notifier; 57 struct notifier_block hotcpu_notifier;
59 int low_watermark; 58 int low_watermark;
60 int high_watermark; 59 int high_watermark;
@@ -64,7 +63,7 @@ struct flow_cache {
64atomic_t flow_cache_genid = ATOMIC_INIT(0); 63atomic_t flow_cache_genid = ATOMIC_INIT(0);
65EXPORT_SYMBOL(flow_cache_genid); 64EXPORT_SYMBOL(flow_cache_genid);
66static struct flow_cache flow_cache_global; 65static struct flow_cache flow_cache_global;
67static struct kmem_cache *flow_cachep; 66static struct kmem_cache *flow_cachep __read_mostly;
68 67
69static DEFINE_SPINLOCK(flow_cache_gc_lock); 68static DEFINE_SPINLOCK(flow_cache_gc_lock);
70static LIST_HEAD(flow_cache_gc_list); 69static LIST_HEAD(flow_cache_gc_list);
@@ -177,15 +176,11 @@ static u32 flow_hash_code(struct flow_cache *fc,
177{ 176{
178 u32 *k = (u32 *) key; 177 u32 *k = (u32 *) key;
179 178
180 return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) 179 return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
181 & (flow_cache_hash_size(fc) - 1)); 180 & (flow_cache_hash_size(fc) - 1);
182} 181}
183 182
184#if (BITS_PER_LONG == 64) 183typedef unsigned long flow_compare_t;
185typedef u64 flow_compare_t;
186#else
187typedef u32 flow_compare_t;
188#endif
189 184
190/* I hear what you're saying, use memcmp. But memcmp cannot make 185/* I hear what you're saying, use memcmp. But memcmp cannot make
191 * important assumptions that we can here, such as alignment and 186 * important assumptions that we can here, such as alignment and
@@ -357,62 +352,73 @@ void flow_cache_flush(void)
357 put_online_cpus(); 352 put_online_cpus();
358} 353}
359 354
360static void __init flow_cache_cpu_prepare(struct flow_cache *fc, 355static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
361 struct flow_cache_percpu *fcp)
362{ 356{
363 fcp->hash_table = (struct hlist_head *) 357 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
364 __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order); 358 size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
365 if (!fcp->hash_table)
366 panic("NET: failed to allocate flow cache order %lu\n", fc->order);
367 359
368 fcp->hash_rnd_recalc = 1; 360 if (!fcp->hash_table) {
369 fcp->hash_count = 0; 361 fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
370 tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); 362 if (!fcp->hash_table) {
363 pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
364 return -ENOMEM;
365 }
366 fcp->hash_rnd_recalc = 1;
367 fcp->hash_count = 0;
368 tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
369 }
370 return 0;
371} 371}
372 372
373static int flow_cache_cpu(struct notifier_block *nfb, 373static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
374 unsigned long action, 374 unsigned long action,
375 void *hcpu) 375 void *hcpu)
376{ 376{
377 struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); 377 struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
378 int cpu = (unsigned long) hcpu; 378 int res, cpu = (unsigned long) hcpu;
379 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); 379 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
380 380
381 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) 381 switch (action) {
382 case CPU_UP_PREPARE:
383 case CPU_UP_PREPARE_FROZEN:
384 res = flow_cache_cpu_prepare(fc, cpu);
385 if (res)
386 return notifier_from_errno(res);
387 break;
388 case CPU_DEAD:
389 case CPU_DEAD_FROZEN:
382 __flow_cache_shrink(fc, fcp, 0); 390 __flow_cache_shrink(fc, fcp, 0);
391 break;
392 }
383 return NOTIFY_OK; 393 return NOTIFY_OK;
384} 394}
385 395
386static int flow_cache_init(struct flow_cache *fc) 396static int __init flow_cache_init(struct flow_cache *fc)
387{ 397{
388 unsigned long order;
389 int i; 398 int i;
390 399
391 fc->hash_shift = 10; 400 fc->hash_shift = 10;
392 fc->low_watermark = 2 * flow_cache_hash_size(fc); 401 fc->low_watermark = 2 * flow_cache_hash_size(fc);
393 fc->high_watermark = 4 * flow_cache_hash_size(fc); 402 fc->high_watermark = 4 * flow_cache_hash_size(fc);
394 403
395 for (order = 0;
396 (PAGE_SIZE << order) <
397 (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
398 order++)
399 /* NOTHING */;
400 fc->order = order;
401 fc->percpu = alloc_percpu(struct flow_cache_percpu); 404 fc->percpu = alloc_percpu(struct flow_cache_percpu);
405 if (!fc->percpu)
406 return -ENOMEM;
402 407
403 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, 408 for_each_online_cpu(i) {
404 (unsigned long) fc); 409 if (flow_cache_cpu_prepare(fc, i))
405 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; 410 return -ENOMEM;
406 add_timer(&fc->rnd_timer); 411 }
407
408 for_each_possible_cpu(i)
409 flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
410
411 fc->hotcpu_notifier = (struct notifier_block){ 412 fc->hotcpu_notifier = (struct notifier_block){
412 .notifier_call = flow_cache_cpu, 413 .notifier_call = flow_cache_cpu,
413 }; 414 };
414 register_hotcpu_notifier(&fc->hotcpu_notifier); 415 register_hotcpu_notifier(&fc->hotcpu_notifier);
415 416
417 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
418 (unsigned long) fc);
419 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
420 add_timer(&fc->rnd_timer);
421
416 return 0; 422 return 0;
417} 423}
418 424
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 6743146e4d6b..7c2373321b74 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -274,9 +274,9 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
274 while ((e = gen_find_node(bstats, rate_est))) { 274 while ((e = gen_find_node(bstats, rate_est))) {
275 rb_erase(&e->node, &est_root); 275 rb_erase(&e->node, &est_root);
276 276
277 write_lock_bh(&est_lock); 277 write_lock(&est_lock);
278 e->bstats = NULL; 278 e->bstats = NULL;
279 write_unlock_bh(&est_lock); 279 write_unlock(&est_lock);
280 280
281 list_del_rcu(&e->list); 281 list_del_rcu(&e->list);
282 call_rcu(&e->e_rcu, __gen_kill_estimator); 282 call_rcu(&e->e_rcu, __gen_kill_estimator);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index e6b133b77ccb..72aceb1fe4fa 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -42,7 +42,9 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
42 42
43 if (m->msg_namelen) { 43 if (m->msg_namelen) {
44 if (mode == VERIFY_READ) { 44 if (mode == VERIFY_READ) {
45 err = move_addr_to_kernel(m->msg_name, m->msg_namelen, 45 void __user *namep;
46 namep = (void __user __force *) m->msg_name;
47 err = move_addr_to_kernel(namep, m->msg_namelen,
46 address); 48 address);
47 if (err < 0) 49 if (err < 0)
48 return err; 50 return err;
@@ -53,7 +55,7 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
53 } 55 }
54 56
55 size = m->msg_iovlen * sizeof(struct iovec); 57 size = m->msg_iovlen * sizeof(struct iovec);
56 if (copy_from_user(iov, m->msg_iov, size)) 58 if (copy_from_user(iov, (void __user __force *) m->msg_iov, size))
57 return -EFAULT; 59 return -EFAULT;
58 60
59 m->msg_iov = iov; 61 m->msg_iov = iov;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a4e0a7482c2b..b142a0d76072 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,7 +122,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
122 122
123unsigned long neigh_rand_reach_time(unsigned long base) 123unsigned long neigh_rand_reach_time(unsigned long base)
124{ 124{
125 return (base ? (net_random() % base) + (base >> 1) : 0); 125 return base ? (net_random() % base) + (base >> 1) : 0;
126} 126}
127EXPORT_SYMBOL(neigh_rand_reach_time); 127EXPORT_SYMBOL(neigh_rand_reach_time);
128 128
@@ -766,9 +766,9 @@ next_elt:
766static __inline__ int neigh_max_probes(struct neighbour *n) 766static __inline__ int neigh_max_probes(struct neighbour *n)
767{ 767{
768 struct neigh_parms *p = n->parms; 768 struct neigh_parms *p = n->parms;
769 return (n->nud_state & NUD_PROBE ? 769 return (n->nud_state & NUD_PROBE) ?
770 p->ucast_probes : 770 p->ucast_probes :
771 p->ucast_probes + p->app_probes + p->mcast_probes); 771 p->ucast_probes + p->app_probes + p->mcast_probes;
772} 772}
773 773
774static void neigh_invalidate(struct neighbour *neigh) 774static void neigh_invalidate(struct neighbour *neigh)
@@ -1210,7 +1210,9 @@ int neigh_resolve_output(struct sk_buff *skb)
1210 if (!neigh_event_send(neigh, skb)) { 1210 if (!neigh_event_send(neigh, skb)) {
1211 int err; 1211 int err;
1212 struct net_device *dev = neigh->dev; 1212 struct net_device *dev = neigh->dev;
1213 if (dev->header_ops->cache && !dst->hh) { 1213 if (dev->header_ops->cache &&
1214 !dst->hh &&
1215 !(dst->flags & DST_NOCACHE)) {
1214 write_lock_bh(&neigh->lock); 1216 write_lock_bh(&neigh->lock);
1215 if (!dst->hh) 1217 if (!dst->hh)
1216 neigh_hh_init(neigh, dst, dst->ops->protocol); 1218 neigh_hh_init(neigh, dst, dst->ops->protocol);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index af4dfbadf2a0..fa81fd0a488f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -515,7 +515,7 @@ static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
515 return attribute->store(queue, attribute, buf, count); 515 return attribute->store(queue, attribute, buf, count);
516} 516}
517 517
518static struct sysfs_ops rx_queue_sysfs_ops = { 518static const struct sysfs_ops rx_queue_sysfs_ops = {
519 .show = rx_queue_attr_show, 519 .show = rx_queue_attr_show,
520 .store = rx_queue_attr_store, 520 .store = rx_queue_attr_store,
521}; 521};
@@ -742,34 +742,38 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
742 return error; 742 return error;
743} 743}
744 744
745static int rx_queue_register_kobjects(struct net_device *net) 745int
746net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
746{ 747{
747 int i; 748 int i;
748 int error = 0; 749 int error = 0;
749 750
750 net->queues_kset = kset_create_and_add("queues", 751 for (i = old_num; i < new_num; i++) {
751 NULL, &net->dev.kobj);
752 if (!net->queues_kset)
753 return -ENOMEM;
754 for (i = 0; i < net->num_rx_queues; i++) {
755 error = rx_queue_add_kobject(net, i); 752 error = rx_queue_add_kobject(net, i);
756 if (error) 753 if (error) {
754 new_num = old_num;
757 break; 755 break;
756 }
758 } 757 }
759 758
760 if (error) 759 while (--i >= new_num)
761 while (--i >= 0) 760 kobject_put(&net->_rx[i].kobj);
762 kobject_put(&net->_rx[i].kobj);
763 761
764 return error; 762 return error;
765} 763}
766 764
767static void rx_queue_remove_kobjects(struct net_device *net) 765static int rx_queue_register_kobjects(struct net_device *net)
768{ 766{
769 int i; 767 net->queues_kset = kset_create_and_add("queues",
768 NULL, &net->dev.kobj);
769 if (!net->queues_kset)
770 return -ENOMEM;
771 return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
772}
770 773
771 for (i = 0; i < net->num_rx_queues; i++) 774static void rx_queue_remove_kobjects(struct net_device *net)
772 kobject_put(&net->_rx[i].kobj); 775{
776 net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
773 kset_unregister(net->queues_kset); 777 kset_unregister(net->queues_kset);
774} 778}
775#endif /* CONFIG_RPS */ 779#endif /* CONFIG_RPS */
@@ -789,12 +793,13 @@ static const void *net_netlink_ns(struct sock *sk)
789 return sock_net(sk); 793 return sock_net(sk);
790} 794}
791 795
792static struct kobj_ns_type_operations net_ns_type_operations = { 796struct kobj_ns_type_operations net_ns_type_operations = {
793 .type = KOBJ_NS_TYPE_NET, 797 .type = KOBJ_NS_TYPE_NET,
794 .current_ns = net_current_ns, 798 .current_ns = net_current_ns,
795 .netlink_ns = net_netlink_ns, 799 .netlink_ns = net_netlink_ns,
796 .initial_ns = net_initial_ns, 800 .initial_ns = net_initial_ns,
797}; 801};
802EXPORT_SYMBOL_GPL(net_ns_type_operations);
798 803
799static void net_kobj_ns_exit(struct net *net) 804static void net_kobj_ns_exit(struct net *net)
800{ 805{
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 805555e8b187..778e1571548d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,4 +4,8 @@
4int netdev_kobject_init(void); 4int netdev_kobject_init(void);
5int netdev_register_kobject(struct net_device *); 5int netdev_register_kobject(struct net_device *);
6void netdev_unregister_kobject(struct net_device *); 6void netdev_unregister_kobject(struct net_device *);
7#ifdef CONFIG_RPS
8int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
9#endif
10
7#endif 11#endif
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 10a1ea72010d..2c0df0f95b3d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -729,16 +729,14 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen,
729 *num = 0; 729 *num = 0;
730 730
731 for (; i < maxlen; i++) { 731 for (; i < maxlen; i++) {
732 int value;
732 char c; 733 char c;
733 *num <<= 4; 734 *num <<= 4;
734 if (get_user(c, &user_buffer[i])) 735 if (get_user(c, &user_buffer[i]))
735 return -EFAULT; 736 return -EFAULT;
736 if ((c >= '0') && (c <= '9')) 737 value = hex_to_bin(c);
737 *num |= c - '0'; 738 if (value >= 0)
738 else if ((c >= 'a') && (c <= 'f')) 739 *num |= value;
739 *num |= c - 'a' + 10;
740 else if ((c >= 'A') && (c <= 'F'))
741 *num |= c - 'A' + 10;
742 else 740 else
743 break; 741 break;
744 } 742 }
@@ -3907,8 +3905,6 @@ static void __exit pg_cleanup(void)
3907{ 3905{
3908 struct pktgen_thread *t; 3906 struct pktgen_thread *t;
3909 struct list_head *q, *n; 3907 struct list_head *q, *n;
3910 wait_queue_head_t queue;
3911 init_waitqueue_head(&queue);
3912 3908
3913 /* Stop all interfaces & threads */ 3909 /* Stop all interfaces & threads */
3914 3910
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f78d821bd935..b2a718dfd720 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -612,36 +612,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
612 612
613static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) 613static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
614{ 614{
615 struct rtnl_link_stats64 a; 615 memcpy(v, b, sizeof(*b));
616
617 a.rx_packets = b->rx_packets;
618 a.tx_packets = b->tx_packets;
619 a.rx_bytes = b->rx_bytes;
620 a.tx_bytes = b->tx_bytes;
621 a.rx_errors = b->rx_errors;
622 a.tx_errors = b->tx_errors;
623 a.rx_dropped = b->rx_dropped;
624 a.tx_dropped = b->tx_dropped;
625
626 a.multicast = b->multicast;
627 a.collisions = b->collisions;
628
629 a.rx_length_errors = b->rx_length_errors;
630 a.rx_over_errors = b->rx_over_errors;
631 a.rx_crc_errors = b->rx_crc_errors;
632 a.rx_frame_errors = b->rx_frame_errors;
633 a.rx_fifo_errors = b->rx_fifo_errors;
634 a.rx_missed_errors = b->rx_missed_errors;
635
636 a.tx_aborted_errors = b->tx_aborted_errors;
637 a.tx_carrier_errors = b->tx_carrier_errors;
638 a.tx_fifo_errors = b->tx_fifo_errors;
639 a.tx_heartbeat_errors = b->tx_heartbeat_errors;
640 a.tx_window_errors = b->tx_window_errors;
641
642 a.rx_compressed = b->rx_compressed;
643 a.tx_compressed = b->tx_compressed;
644 memcpy(v, &a, sizeof(a));
645} 616}
646 617
647/* All VF info */ 618/* All VF info */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c83b421341c0..752c1972b3a7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -202,8 +202,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
202 skb->data = data; 202 skb->data = data;
203 skb_reset_tail_pointer(skb); 203 skb_reset_tail_pointer(skb);
204 skb->end = skb->tail + size; 204 skb->end = skb->tail + size;
205 kmemcheck_annotate_bitfield(skb, flags1);
206 kmemcheck_annotate_bitfield(skb, flags2);
207#ifdef NET_SKBUFF_DATA_USES_OFFSET 205#ifdef NET_SKBUFF_DATA_USES_OFFSET
208 skb->mac_header = ~0U; 206 skb->mac_header = ~0U;
209#endif 207#endif
@@ -340,7 +338,7 @@ static void skb_release_data(struct sk_buff *skb)
340 put_page(skb_shinfo(skb)->frags[i].page); 338 put_page(skb_shinfo(skb)->frags[i].page);
341 } 339 }
342 340
343 if (skb_has_frags(skb)) 341 if (skb_has_frag_list(skb))
344 skb_drop_fraglist(skb); 342 skb_drop_fraglist(skb);
345 343
346 kfree(skb->head); 344 kfree(skb->head);
@@ -685,16 +683,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
685 683
686struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) 684struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
687{ 685{
688 int headerlen = skb->data - skb->head; 686 int headerlen = skb_headroom(skb);
689 /* 687 unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
690 * Allocate the copy buffer 688 struct sk_buff *n = alloc_skb(size, gfp_mask);
691 */ 689
692 struct sk_buff *n;
693#ifdef NET_SKBUFF_DATA_USES_OFFSET
694 n = alloc_skb(skb->end + skb->data_len, gfp_mask);
695#else
696 n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
697#endif
698 if (!n) 690 if (!n)
699 return NULL; 691 return NULL;
700 692
@@ -726,20 +718,14 @@ EXPORT_SYMBOL(skb_copy);
726 718
727struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) 719struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
728{ 720{
729 /* 721 unsigned int size = skb_end_pointer(skb) - skb->head;
730 * Allocate the copy buffer 722 struct sk_buff *n = alloc_skb(size, gfp_mask);
731 */ 723
732 struct sk_buff *n;
733#ifdef NET_SKBUFF_DATA_USES_OFFSET
734 n = alloc_skb(skb->end, gfp_mask);
735#else
736 n = alloc_skb(skb->end - skb->head, gfp_mask);
737#endif
738 if (!n) 724 if (!n)
739 goto out; 725 goto out;
740 726
741 /* Set the data pointer */ 727 /* Set the data pointer */
742 skb_reserve(n, skb->data - skb->head); 728 skb_reserve(n, skb_headroom(skb));
743 /* Set the tail pointer and length */ 729 /* Set the tail pointer and length */
744 skb_put(n, skb_headlen(skb)); 730 skb_put(n, skb_headlen(skb));
745 /* Copy the bytes */ 731 /* Copy the bytes */
@@ -759,7 +745,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
759 skb_shinfo(n)->nr_frags = i; 745 skb_shinfo(n)->nr_frags = i;
760 } 746 }
761 747
762 if (skb_has_frags(skb)) { 748 if (skb_has_frag_list(skb)) {
763 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 749 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
764 skb_clone_fraglist(n); 750 skb_clone_fraglist(n);
765 } 751 }
@@ -791,12 +777,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
791{ 777{
792 int i; 778 int i;
793 u8 *data; 779 u8 *data;
794#ifdef NET_SKBUFF_DATA_USES_OFFSET 780 int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
795 int size = nhead + skb->end + ntail;
796#else
797 int size = nhead + (skb->end - skb->head) + ntail;
798#endif
799 long off; 781 long off;
782 bool fastpath;
800 783
801 BUG_ON(nhead < 0); 784 BUG_ON(nhead < 0);
802 785
@@ -810,23 +793,36 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
810 goto nodata; 793 goto nodata;
811 794
812 /* Copy only real data... and, alas, header. This should be 795 /* Copy only real data... and, alas, header. This should be
813 * optimized for the cases when header is void. */ 796 * optimized for the cases when header is void.
814#ifdef NET_SKBUFF_DATA_USES_OFFSET 797 */
815 memcpy(data + nhead, skb->head, skb->tail); 798 memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
816#else 799
817 memcpy(data + nhead, skb->head, skb->tail - skb->head); 800 memcpy((struct skb_shared_info *)(data + size),
818#endif 801 skb_shinfo(skb),
819 memcpy(data + size, skb_end_pointer(skb),
820 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); 802 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
821 803
822 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 804 /* Check if we can avoid taking references on fragments if we own
823 get_page(skb_shinfo(skb)->frags[i].page); 805 * the last reference on skb->head. (see skb_release_data())
806 */
807 if (!skb->cloned)
808 fastpath = true;
809 else {
810 int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
824 811
825 if (skb_has_frags(skb)) 812 fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
826 skb_clone_fraglist(skb); 813 }
827 814
828 skb_release_data(skb); 815 if (fastpath) {
816 kfree(skb->head);
817 } else {
818 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
819 get_page(skb_shinfo(skb)->frags[i].page);
829 820
821 if (skb_has_frag_list(skb))
822 skb_clone_fraglist(skb);
823
824 skb_release_data(skb);
825 }
830 off = (data + nhead) - skb->head; 826 off = (data + nhead) - skb->head;
831 827
832 skb->head = data; 828 skb->head = data;
@@ -1099,7 +1095,7 @@ drop_pages:
1099 for (; i < nfrags; i++) 1095 for (; i < nfrags; i++)
1100 put_page(skb_shinfo(skb)->frags[i].page); 1096 put_page(skb_shinfo(skb)->frags[i].page);
1101 1097
1102 if (skb_has_frags(skb)) 1098 if (skb_has_frag_list(skb))
1103 skb_drop_fraglist(skb); 1099 skb_drop_fraglist(skb);
1104 goto done; 1100 goto done;
1105 } 1101 }
@@ -1194,7 +1190,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1194 /* Optimization: no fragments, no reasons to preestimate 1190 /* Optimization: no fragments, no reasons to preestimate
1195 * size of pulled pages. Superb. 1191 * size of pulled pages. Superb.
1196 */ 1192 */
1197 if (!skb_has_frags(skb)) 1193 if (!skb_has_frag_list(skb))
1198 goto pull_pages; 1194 goto pull_pages;
1199 1195
1200 /* Estimate size of pulled pages. */ 1196 /* Estimate size of pulled pages. */
@@ -2323,7 +2319,7 @@ next_skb:
2323 st->frag_data = NULL; 2319 st->frag_data = NULL;
2324 } 2320 }
2325 2321
2326 if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) { 2322 if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
2327 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 2323 st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
2328 st->frag_idx = 0; 2324 st->frag_idx = 0;
2329 goto next_skb; 2325 goto next_skb;
@@ -2893,7 +2889,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2893 return -ENOMEM; 2889 return -ENOMEM;
2894 2890
2895 /* Easy case. Most of packets will go this way. */ 2891 /* Easy case. Most of packets will go this way. */
2896 if (!skb_has_frags(skb)) { 2892 if (!skb_has_frag_list(skb)) {
2897 /* A little of trouble, not enough of space for trailer. 2893 /* A little of trouble, not enough of space for trailer.
2898 * This should not happen, when stack is tuned to generate 2894 * This should not happen, when stack is tuned to generate
2899 * good frames. OK, on miss we reallocate and reserve even more 2895 * good frames. OK, on miss we reallocate and reserve even more
@@ -2928,7 +2924,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2928 2924
2929 if (skb1->next == NULL && tailbits) { 2925 if (skb1->next == NULL && tailbits) {
2930 if (skb_shinfo(skb1)->nr_frags || 2926 if (skb_shinfo(skb1)->nr_frags ||
2931 skb_has_frags(skb1) || 2927 skb_has_frag_list(skb1) ||
2932 skb_tailroom(skb1) < tailbits) 2928 skb_tailroom(skb1) < tailbits)
2933 ntail = tailbits + 128; 2929 ntail = tailbits + 128;
2934 } 2930 }
@@ -2937,7 +2933,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2937 skb_cloned(skb1) || 2933 skb_cloned(skb1) ||
2938 ntail || 2934 ntail ||
2939 skb_shinfo(skb1)->nr_frags || 2935 skb_shinfo(skb1)->nr_frags ||
2940 skb_has_frags(skb1)) { 2936 skb_has_frag_list(skb1)) {
2941 struct sk_buff *skb2; 2937 struct sk_buff *skb2;
2942 2938
2943 /* Fuck, we are miserable poor guys... */ 2939 /* Fuck, we are miserable poor guys... */
@@ -3020,7 +3016,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3020 } else { 3016 } else {
3021 /* 3017 /*
3022 * no hardware time stamps available, 3018 * no hardware time stamps available,
3023 * so keep the skb_shared_tx and only 3019 * so keep the shared tx_flags and only
3024 * store software time stamp 3020 * store software time stamp
3025 */ 3021 */
3026 skb->tstamp = ktime_get_real(); 3022 skb->tstamp = ktime_get_real();
diff --git a/net/core/sock.c b/net/core/sock.c
index ef30e9d286e7..42365deeba27 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1557,6 +1557,8 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1557EXPORT_SYMBOL(sock_alloc_send_skb); 1557EXPORT_SYMBOL(sock_alloc_send_skb);
1558 1558
1559static void __lock_sock(struct sock *sk) 1559static void __lock_sock(struct sock *sk)
1560 __releases(&sk->sk_lock.slock)
1561 __acquires(&sk->sk_lock.slock)
1560{ 1562{
1561 DEFINE_WAIT(wait); 1563 DEFINE_WAIT(wait);
1562 1564
@@ -1573,6 +1575,8 @@ static void __lock_sock(struct sock *sk)
1573} 1575}
1574 1576
1575static void __release_sock(struct sock *sk) 1577static void __release_sock(struct sock *sk)
1578 __releases(&sk->sk_lock.slock)
1579 __acquires(&sk->sk_lock.slock)
1576{ 1580{
1577 struct sk_buff *skb = sk->sk_backlog.head; 1581 struct sk_buff *skb = sk->sk_backlog.head;
1578 1582
diff --git a/net/core/utils.c b/net/core/utils.c
index f41854470539..5fea0ab21902 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -75,7 +75,7 @@ __be32 in_aton(const char *str)
75 str++; 75 str++;
76 } 76 }
77 } 77 }
78 return(htonl(l)); 78 return htonl(l);
79} 79}
80EXPORT_SYMBOL(in_aton); 80EXPORT_SYMBOL(in_aton);
81 81
@@ -92,18 +92,19 @@ EXPORT_SYMBOL(in_aton);
92 92
93static inline int xdigit2bin(char c, int delim) 93static inline int xdigit2bin(char c, int delim)
94{ 94{
95 int val;
96
95 if (c == delim || c == '\0') 97 if (c == delim || c == '\0')
96 return IN6PTON_DELIM; 98 return IN6PTON_DELIM;
97 if (c == ':') 99 if (c == ':')
98 return IN6PTON_COLON_MASK; 100 return IN6PTON_COLON_MASK;
99 if (c == '.') 101 if (c == '.')
100 return IN6PTON_DOT; 102 return IN6PTON_DOT;
101 if (c >= '0' && c <= '9') 103
102 return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0')); 104 val = hex_to_bin(c);
103 if (c >= 'a' && c <= 'f') 105 if (val >= 0)
104 return (IN6PTON_XDIGIT | (c - 'a' + 10)); 106 return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0);
105 if (c >= 'A' && c <= 'F') 107
106 return (IN6PTON_XDIGIT | (c - 'A' + 10));
107 if (delim == -1) 108 if (delim == -1)
108 return IN6PTON_DELIM; 109 return IN6PTON_DELIM;
109 return IN6PTON_UNKNOWN; 110 return IN6PTON_UNKNOWN;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 6df6f8ac9636..6d16a9070ff0 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -62,18 +62,14 @@ struct ccid_operations {
62 void (*ccid_hc_tx_exit)(struct sock *sk); 62 void (*ccid_hc_tx_exit)(struct sock *sk);
63 void (*ccid_hc_rx_packet_recv)(struct sock *sk, 63 void (*ccid_hc_rx_packet_recv)(struct sock *sk,
64 struct sk_buff *skb); 64 struct sk_buff *skb);
65 int (*ccid_hc_rx_parse_options)(struct sock *sk, 65 int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt,
66 unsigned char option, 66 u8 opt, u8 *val, u8 len);
67 unsigned char len, u16 idx,
68 unsigned char* value);
69 int (*ccid_hc_rx_insert_options)(struct sock *sk, 67 int (*ccid_hc_rx_insert_options)(struct sock *sk,
70 struct sk_buff *skb); 68 struct sk_buff *skb);
71 void (*ccid_hc_tx_packet_recv)(struct sock *sk, 69 void (*ccid_hc_tx_packet_recv)(struct sock *sk,
72 struct sk_buff *skb); 70 struct sk_buff *skb);
73 int (*ccid_hc_tx_parse_options)(struct sock *sk, 71 int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt,
74 unsigned char option, 72 u8 opt, u8 *val, u8 len);
75 unsigned char len, u16 idx,
76 unsigned char* value);
77 int (*ccid_hc_tx_send_packet)(struct sock *sk, 73 int (*ccid_hc_tx_send_packet)(struct sock *sk,
78 struct sk_buff *skb); 74 struct sk_buff *skb);
79 void (*ccid_hc_tx_packet_sent)(struct sock *sk, 75 void (*ccid_hc_tx_packet_sent)(struct sock *sk,
@@ -168,27 +164,31 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
168 ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb); 164 ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb);
169} 165}
170 166
167/**
168 * ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver
169 * @pkt: type of packet that @opt appears on (RFC 4340, 5.1)
170 * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3)
171 * @val: value of @opt
172 * @len: length of @val in bytes
173 */
171static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, 174static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
172 unsigned char option, 175 u8 pkt, u8 opt, u8 *val, u8 len)
173 unsigned char len, u16 idx,
174 unsigned char* value)
175{ 176{
176 int rc = 0; 177 if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
177 if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL) 178 return 0;
178 rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx, 179 return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
179 value);
180 return rc;
181} 180}
182 181
182/**
183 * ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender
184 * Arguments are analogous to ccid_hc_tx_parse_options()
185 */
183static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, 186static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
184 unsigned char option, 187 u8 pkt, u8 opt, u8 *val, u8 len)
185 unsigned char len, u16 idx,
186 unsigned char* value)
187{ 188{
188 int rc = 0; 189 if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
189 if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL) 190 return 0;
190 rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value); 191 return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
191 return rc;
192} 192}
193 193
194static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, 194static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 8408398cd44e..0581143cb800 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -47,37 +47,6 @@ config IP_DCCP_CCID3_DEBUG
47 47
48 If in doubt, say N. 48 If in doubt, say N.
49 49
50config IP_DCCP_CCID3_RTO
51 int "Use higher bound for nofeedback timer"
52 default 100
53 depends on IP_DCCP_CCID3 && EXPERIMENTAL
54 ---help---
55 Use higher lower bound for nofeedback timer expiration.
56
57 The TFRC nofeedback timer normally expires after the maximum of 4
58 RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
59 with a small RTT this can mean a high processing load and reduced
60 performance, since then the nofeedback timer is triggered very
61 frequently.
62
63 This option enables to set a higher lower bound for the nofeedback
64 value. Values in units of milliseconds can be set here.
65
66 A value of 0 disables this feature by enforcing the value specified
67 in RFC 3448. The following values have been suggested as bounds for
68 experimental use:
69 * 16-20ms to match the typical multimedia inter-frame interval
70 * 100ms as a reasonable compromise [default]
71 * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
72
73 The default of 100ms is a compromise between a large value for
74 efficient DCCP implementations, and a small value to avoid disrupting
75 the network in times of congestion.
76
77 The purpose of the nofeedback timer is to slow DCCP down when there
78 is serious network congestion: experimenting with larger values should
79 therefore not be performed on WANs.
80
81config IP_DCCP_TFRC_LIB 50config IP_DCCP_TFRC_LIB
82 def_bool y if IP_DCCP_CCID3 51 def_bool y if IP_DCCP_CCID3
83 52
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9b3ae9922be1..dc18172b1e59 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,59 +25,14 @@
25 */ 25 */
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include "../feat.h" 27#include "../feat.h"
28#include "../ccid.h"
29#include "../dccp.h"
30#include "ccid2.h" 28#include "ccid2.h"
31 29
32 30
33#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 31#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
34static int ccid2_debug; 32static int ccid2_debug;
35#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) 33#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
36
37static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc)
38{
39 int len = 0;
40 int pipe = 0;
41 struct ccid2_seq *seqp = hc->tx_seqh;
42
43 /* there is data in the chain */
44 if (seqp != hc->tx_seqt) {
45 seqp = seqp->ccid2s_prev;
46 len++;
47 if (!seqp->ccid2s_acked)
48 pipe++;
49
50 while (seqp != hc->tx_seqt) {
51 struct ccid2_seq *prev = seqp->ccid2s_prev;
52
53 len++;
54 if (!prev->ccid2s_acked)
55 pipe++;
56
57 /* packets are sent sequentially */
58 BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
59 prev->ccid2s_seq ) >= 0);
60 BUG_ON(time_before(seqp->ccid2s_sent,
61 prev->ccid2s_sent));
62
63 seqp = prev;
64 }
65 }
66
67 BUG_ON(pipe != hc->tx_pipe);
68 ccid2_pr_debug("len of chain=%d\n", len);
69
70 do {
71 seqp = seqp->ccid2s_prev;
72 len++;
73 } while (seqp != hc->tx_seqh);
74
75 ccid2_pr_debug("total len=%d\n", len);
76 BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN);
77}
78#else 34#else
79#define ccid2_pr_debug(format, a...) 35#define ccid2_pr_debug(format, a...)
80#define ccid2_hc_tx_check_sanity(hc)
81#endif 36#endif
82 37
83static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) 38static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
@@ -156,19 +111,10 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
156 dp->dccps_l_ack_ratio = val; 111 dp->dccps_l_ack_ratio = val;
157} 112}
158 113
159static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val)
160{
161 ccid2_pr_debug("change SRTT to %ld\n", val);
162 hc->tx_srtt = val;
163}
164
165static void ccid2_start_rto_timer(struct sock *sk);
166
167static void ccid2_hc_tx_rto_expire(unsigned long data) 114static void ccid2_hc_tx_rto_expire(unsigned long data)
168{ 115{
169 struct sock *sk = (struct sock *)data; 116 struct sock *sk = (struct sock *)data;
170 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 117 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
171 long s;
172 118
173 bh_lock_sock(sk); 119 bh_lock_sock(sk);
174 if (sock_owned_by_user(sk)) { 120 if (sock_owned_by_user(sk)) {
@@ -178,23 +124,19 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
178 124
179 ccid2_pr_debug("RTO_EXPIRE\n"); 125 ccid2_pr_debug("RTO_EXPIRE\n");
180 126
181 ccid2_hc_tx_check_sanity(hc);
182
183 /* back-off timer */ 127 /* back-off timer */
184 hc->tx_rto <<= 1; 128 hc->tx_rto <<= 1;
129 if (hc->tx_rto > DCCP_RTO_MAX)
130 hc->tx_rto = DCCP_RTO_MAX;
185 131
186 s = hc->tx_rto / HZ; 132 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
187 if (s > 60)
188 hc->tx_rto = 60 * HZ;
189
190 ccid2_start_rto_timer(sk);
191 133
192 /* adjust pipe, cwnd etc */ 134 /* adjust pipe, cwnd etc */
193 hc->tx_ssthresh = hc->tx_cwnd / 2; 135 hc->tx_ssthresh = hc->tx_cwnd / 2;
194 if (hc->tx_ssthresh < 2) 136 if (hc->tx_ssthresh < 2)
195 hc->tx_ssthresh = 2; 137 hc->tx_ssthresh = 2;
196 hc->tx_cwnd = 1; 138 hc->tx_cwnd = 1;
197 hc->tx_pipe = 0; 139 hc->tx_pipe = 0;
198 140
199 /* clear state about stuff we sent */ 141 /* clear state about stuff we sent */
200 hc->tx_seqt = hc->tx_seqh; 142 hc->tx_seqt = hc->tx_seqh;
@@ -204,22 +146,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
204 hc->tx_rpseq = 0; 146 hc->tx_rpseq = 0;
205 hc->tx_rpdupack = -1; 147 hc->tx_rpdupack = -1;
206 ccid2_change_l_ack_ratio(sk, 1); 148 ccid2_change_l_ack_ratio(sk, 1);
207 ccid2_hc_tx_check_sanity(hc);
208out: 149out:
209 bh_unlock_sock(sk); 150 bh_unlock_sock(sk);
210 sock_put(sk); 151 sock_put(sk);
211} 152}
212 153
213static void ccid2_start_rto_timer(struct sock *sk)
214{
215 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
216
217 ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto);
218
219 BUG_ON(timer_pending(&hc->tx_rtotimer));
220 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
221}
222
223static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) 154static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
224{ 155{
225 struct dccp_sock *dp = dccp_sk(sk); 156 struct dccp_sock *dp = dccp_sk(sk);
@@ -230,7 +161,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
230 161
231 hc->tx_seqh->ccid2s_seq = dp->dccps_gss; 162 hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
232 hc->tx_seqh->ccid2s_acked = 0; 163 hc->tx_seqh->ccid2s_acked = 0;
233 hc->tx_seqh->ccid2s_sent = jiffies; 164 hc->tx_seqh->ccid2s_sent = ccid2_time_stamp;
234 165
235 next = hc->tx_seqh->ccid2s_next; 166 next = hc->tx_seqh->ccid2s_next;
236 /* check if we need to alloc more space */ 167 /* check if we need to alloc more space */
@@ -296,23 +227,20 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
296 } 227 }
297#endif 228#endif
298 229
299 /* setup RTO timer */ 230 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
300 if (!timer_pending(&hc->tx_rtotimer))
301 ccid2_start_rto_timer(sk);
302 231
303#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 232#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
304 do { 233 do {
305 struct ccid2_seq *seqp = hc->tx_seqt; 234 struct ccid2_seq *seqp = hc->tx_seqt;
306 235
307 while (seqp != hc->tx_seqh) { 236 while (seqp != hc->tx_seqh) {
308 ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", 237 ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
309 (unsigned long long)seqp->ccid2s_seq, 238 (unsigned long long)seqp->ccid2s_seq,
310 seqp->ccid2s_acked, seqp->ccid2s_sent); 239 seqp->ccid2s_acked, seqp->ccid2s_sent);
311 seqp = seqp->ccid2s_next; 240 seqp = seqp->ccid2s_next;
312 } 241 }
313 } while (0); 242 } while (0);
314 ccid2_pr_debug("=========\n"); 243 ccid2_pr_debug("=========\n");
315 ccid2_hc_tx_check_sanity(hc);
316#endif 244#endif
317} 245}
318 246
@@ -378,17 +306,87 @@ out_invalid_option:
378 return -1; 306 return -1;
379} 307}
380 308
381static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) 309/**
310 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
311 * This code is almost identical with TCP's tcp_rtt_estimator(), since
312 * - it has a higher sampling frequency (recommended by RFC 1323),
313 * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
314 * - it is simple (cf. more complex proposals such as Eifel timer or research
315 * which suggests that the gain should be set according to window size),
316 * - in tests it was found to work well with CCID2 [gerrit].
317 */
318static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
382{ 319{
383 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 320 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
321 long m = mrtt ? : 1;
384 322
385 sk_stop_timer(sk, &hc->tx_rtotimer); 323 if (hc->tx_srtt == 0) {
386 ccid2_pr_debug("deleted RTO timer\n"); 324 /* First measurement m */
325 hc->tx_srtt = m << 3;
326 hc->tx_mdev = m << 1;
327
328 hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
329 hc->tx_rttvar = hc->tx_mdev_max;
330
331 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
332 } else {
333 /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
334 m -= (hc->tx_srtt >> 3);
335 hc->tx_srtt += m;
336
337 /* Similarly, update scaled mdev with regard to |m| */
338 if (m < 0) {
339 m = -m;
340 m -= (hc->tx_mdev >> 2);
341 /*
342 * This neutralises RTO increase when RTT < SRTT - mdev
343 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
344 * in Linux TCP", USENIX 2002, pp. 49-62).
345 */
346 if (m > 0)
347 m >>= 3;
348 } else {
349 m -= (hc->tx_mdev >> 2);
350 }
351 hc->tx_mdev += m;
352
353 if (hc->tx_mdev > hc->tx_mdev_max) {
354 hc->tx_mdev_max = hc->tx_mdev;
355 if (hc->tx_mdev_max > hc->tx_rttvar)
356 hc->tx_rttvar = hc->tx_mdev_max;
357 }
358
359 /*
360 * Decay RTTVAR at most once per flight, exploiting that
361 * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
362 * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
363 * GAR is a useful bound for FlightSize = pipe.
364 * AWL is probably too low here, as it over-estimates pipe.
365 */
366 if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
367 if (hc->tx_mdev_max < hc->tx_rttvar)
368 hc->tx_rttvar -= (hc->tx_rttvar -
369 hc->tx_mdev_max) >> 2;
370 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
371 hc->tx_mdev_max = tcp_rto_min(sk);
372 }
373 }
374
375 /*
376 * Set RTO from SRTT and RTTVAR
377 * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
378 * This agrees with RFC 4341, 5:
379 * "Because DCCP does not retransmit data, DCCP does not require
380 * TCP's recommended minimum timeout of one second".
381 */
382 hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
383
384 if (hc->tx_rto > DCCP_RTO_MAX)
385 hc->tx_rto = DCCP_RTO_MAX;
387} 386}
388 387
389static inline void ccid2_new_ack(struct sock *sk, 388static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
390 struct ccid2_seq *seqp, 389 unsigned int *maxincr)
391 unsigned int *maxincr)
392{ 390{
393 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 391 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
394 392
@@ -402,93 +400,27 @@ static inline void ccid2_new_ack(struct sock *sk,
402 hc->tx_cwnd += 1; 400 hc->tx_cwnd += 1;
403 hc->tx_packets_acked = 0; 401 hc->tx_packets_acked = 0;
404 } 402 }
405 403 /*
406 /* update RTO */ 404 * FIXME: RTT is sampled several times per acknowledgment (for each
407 if (hc->tx_srtt == -1 || 405 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
408 time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { 406 * This causes the RTT to be over-estimated, since the older entries
409 unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; 407 * in the Ack Vector have earlier sending times.
410 int s; 408 * The cleanest solution is to not use the ccid2s_sent field at all
411 409 * and instead use DCCP timestamps: requires changes in other places.
412 /* first measurement */ 410 */
413 if (hc->tx_srtt == -1) { 411 ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
414 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
415 r, jiffies,
416 (unsigned long long)seqp->ccid2s_seq);
417 ccid2_change_srtt(hc, r);
418 hc->tx_rttvar = r >> 1;
419 } else {
420 /* RTTVAR */
421 long tmp = hc->tx_srtt - r;
422 long srtt;
423
424 if (tmp < 0)
425 tmp *= -1;
426
427 tmp >>= 2;
428 hc->tx_rttvar *= 3;
429 hc->tx_rttvar >>= 2;
430 hc->tx_rttvar += tmp;
431
432 /* SRTT */
433 srtt = hc->tx_srtt;
434 srtt *= 7;
435 srtt >>= 3;
436 tmp = r >> 3;
437 srtt += tmp;
438 ccid2_change_srtt(hc, srtt);
439 }
440 s = hc->tx_rttvar << 2;
441 /* clock granularity is 1 when based on jiffies */
442 if (!s)
443 s = 1;
444 hc->tx_rto = hc->tx_srtt + s;
445
446 /* must be at least a second */
447 s = hc->tx_rto / HZ;
448 /* DCCP doesn't require this [but I like it cuz my code sux] */
449#if 1
450 if (s < 1)
451 hc->tx_rto = HZ;
452#endif
453 /* max 60 seconds */
454 if (s > 60)
455 hc->tx_rto = HZ * 60;
456
457 hc->tx_lastrtt = jiffies;
458
459 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
460 hc->tx_srtt, hc->tx_rttvar,
461 hc->tx_rto, HZ, r);
462 }
463
464 /* we got a new ack, so re-start RTO timer */
465 ccid2_hc_tx_kill_rto_timer(sk);
466 ccid2_start_rto_timer(sk);
467}
468
469static void ccid2_hc_tx_dec_pipe(struct sock *sk)
470{
471 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
472
473 if (hc->tx_pipe == 0)
474 DCCP_BUG("pipe == 0");
475 else
476 hc->tx_pipe--;
477
478 if (hc->tx_pipe == 0)
479 ccid2_hc_tx_kill_rto_timer(sk);
480} 412}
481 413
482static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) 414static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
483{ 415{
484 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 416 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
485 417
486 if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { 418 if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
487 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); 419 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
488 return; 420 return;
489 } 421 }
490 422
491 hc->tx_last_cong = jiffies; 423 hc->tx_last_cong = ccid2_time_stamp;
492 424
493 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; 425 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
494 hc->tx_ssthresh = max(hc->tx_cwnd, 2U); 426 hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
@@ -510,7 +442,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
510 int done = 0; 442 int done = 0;
511 unsigned int maxincr = 0; 443 unsigned int maxincr = 0;
512 444
513 ccid2_hc_tx_check_sanity(hc);
514 /* check reverse path congestion */ 445 /* check reverse path congestion */
515 seqno = DCCP_SKB_CB(skb)->dccpd_seq; 446 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
516 447
@@ -620,7 +551,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
620 seqp->ccid2s_acked = 1; 551 seqp->ccid2s_acked = 1;
621 ccid2_pr_debug("Got ack for %llu\n", 552 ccid2_pr_debug("Got ack for %llu\n",
622 (unsigned long long)seqp->ccid2s_seq); 553 (unsigned long long)seqp->ccid2s_seq);
623 ccid2_hc_tx_dec_pipe(sk); 554 hc->tx_pipe--;
624 } 555 }
625 if (seqp == hc->tx_seqt) { 556 if (seqp == hc->tx_seqt) {
626 done = 1; 557 done = 1;
@@ -677,7 +608,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
677 * one ack vector. 608 * one ack vector.
678 */ 609 */
679 ccid2_congestion_event(sk, seqp); 610 ccid2_congestion_event(sk, seqp);
680 ccid2_hc_tx_dec_pipe(sk); 611 hc->tx_pipe--;
681 } 612 }
682 if (seqp == hc->tx_seqt) 613 if (seqp == hc->tx_seqt)
683 break; 614 break;
@@ -695,7 +626,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
695 hc->tx_seqt = hc->tx_seqt->ccid2s_next; 626 hc->tx_seqt = hc->tx_seqt->ccid2s_next;
696 } 627 }
697 628
698 ccid2_hc_tx_check_sanity(hc); 629 /* restart RTO timer if not all outstanding data has been acked */
630 if (hc->tx_pipe == 0)
631 sk_stop_timer(sk, &hc->tx_rtotimer);
632 else
633 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
699} 634}
700 635
701static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 636static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -707,12 +642,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
707 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ 642 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
708 hc->tx_ssthresh = ~0U; 643 hc->tx_ssthresh = ~0U;
709 644
710 /* 645 /* Use larger initial windows (RFC 4341, section 5). */
711 * RFC 4341, 5: "The cwnd parameter is initialized to at most four 646 hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
712 * packets for new connections, following the rules from [RFC3390]".
713 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
714 */
715 hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
716 647
717 /* Make sure that Ack Ratio is enabled and within bounds. */ 648 /* Make sure that Ack Ratio is enabled and within bounds. */
718 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); 649 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
@@ -723,15 +654,11 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
723 if (ccid2_hc_tx_alloc_seq(hc)) 654 if (ccid2_hc_tx_alloc_seq(hc))
724 return -ENOMEM; 655 return -ENOMEM;
725 656
726 hc->tx_rto = 3 * HZ; 657 hc->tx_rto = DCCP_TIMEOUT_INIT;
727 ccid2_change_srtt(hc, -1);
728 hc->tx_rttvar = -1;
729 hc->tx_rpdupack = -1; 658 hc->tx_rpdupack = -1;
730 hc->tx_last_cong = jiffies; 659 hc->tx_last_cong = ccid2_time_stamp;
731 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 660 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
732 (unsigned long)sk); 661 (unsigned long)sk);
733
734 ccid2_hc_tx_check_sanity(hc);
735 return 0; 662 return 0;
736} 663}
737 664
@@ -740,7 +667,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
740 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 667 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
741 int i; 668 int i;
742 669
743 ccid2_hc_tx_kill_rto_timer(sk); 670 sk_stop_timer(sk, &hc->tx_rtotimer);
744 671
745 for (i = 0; i < hc->tx_seqbufc; i++) 672 for (i = 0; i < hc->tx_seqbufc; i++)
746 kfree(hc->tx_seqbuf[i]); 673 kfree(hc->tx_seqbuf[i]);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 1ec6a30103bb..9731c2dc1487 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -18,18 +18,23 @@
18#ifndef _DCCP_CCID2_H_ 18#ifndef _DCCP_CCID2_H_
19#define _DCCP_CCID2_H_ 19#define _DCCP_CCID2_H_
20 20
21#include <linux/dccp.h>
22#include <linux/timer.h> 21#include <linux/timer.h>
23#include <linux/types.h> 22#include <linux/types.h>
24#include "../ccid.h" 23#include "../ccid.h"
24#include "../dccp.h"
25
26/*
27 * CCID-2 timestamping faces the same issues as TCP timestamping.
28 * Hence we reuse/share as much of the code as possible.
29 */
30#define ccid2_time_stamp tcp_time_stamp
31
25/* NUMDUPACK parameter from RFC 4341, p. 6 */ 32/* NUMDUPACK parameter from RFC 4341, p. 6 */
26#define NUMDUPACK 3 33#define NUMDUPACK 3
27 34
28struct sock;
29
30struct ccid2_seq { 35struct ccid2_seq {
31 u64 ccid2s_seq; 36 u64 ccid2s_seq;
32 unsigned long ccid2s_sent; 37 u32 ccid2s_sent;
33 int ccid2s_acked; 38 int ccid2s_acked;
34 struct ccid2_seq *ccid2s_prev; 39 struct ccid2_seq *ccid2s_prev;
35 struct ccid2_seq *ccid2s_next; 40 struct ccid2_seq *ccid2s_next;
@@ -42,7 +47,12 @@ struct ccid2_seq {
42 * struct ccid2_hc_tx_sock - CCID2 TX half connection 47 * struct ccid2_hc_tx_sock - CCID2 TX half connection
43 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 48 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
44 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) 49 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
45 * @tx_lastrtt: time RTT was last measured 50 * @tx_srtt: smoothed RTT estimate, scaled by 2^3
51 * @tx_mdev: smoothed RTT variation, scaled by 2^2
52 * @tx_mdev_max: maximum of @mdev during one flight
53 * @tx_rttvar: moving average/maximum of @mdev_max
54 * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
55 * @tx_rtt_seq: to decay RTTVAR at most once per flight
46 * @tx_rpseq: last consecutive seqno 56 * @tx_rpseq: last consecutive seqno
47 * @tx_rpdupack: dupacks since rpseq 57 * @tx_rpdupack: dupacks since rpseq
48 */ 58 */
@@ -55,14 +65,19 @@ struct ccid2_hc_tx_sock {
55 int tx_seqbufc; 65 int tx_seqbufc;
56 struct ccid2_seq *tx_seqh; 66 struct ccid2_seq *tx_seqh;
57 struct ccid2_seq *tx_seqt; 67 struct ccid2_seq *tx_seqt;
58 long tx_rto; 68
59 long tx_srtt; 69 /* RTT measurement: variables/principles are the same as in TCP */
60 long tx_rttvar; 70 u32 tx_srtt,
61 unsigned long tx_lastrtt; 71 tx_mdev,
72 tx_mdev_max,
73 tx_rttvar,
74 tx_rto;
75 u64 tx_rtt_seq:48;
62 struct timer_list tx_rtotimer; 76 struct timer_list tx_rtotimer;
77
63 u64 tx_rpseq; 78 u64 tx_rpseq;
64 int tx_rpdupack; 79 int tx_rpdupack;
65 unsigned long tx_last_cong; 80 u32 tx_last_cong;
66 u64 tx_high_ack; 81 u64 tx_high_ack;
67}; 82};
68 83
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 95f752986497..c3f3a25bbd7a 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -54,7 +54,6 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
54 [TFRC_SSTATE_NO_SENT] = "NO_SENT", 54 [TFRC_SSTATE_NO_SENT] = "NO_SENT",
55 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", 55 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
56 [TFRC_SSTATE_FBACK] = "FBACK", 56 [TFRC_SSTATE_FBACK] = "FBACK",
57 [TFRC_SSTATE_TERM] = "TERM",
58 }; 57 };
59 58
60 return ccid3_state_names[state]; 59 return ccid3_state_names[state];
@@ -91,19 +90,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
91 return scaled_div(w_init << 6, hc->tx_rtt); 90 return scaled_div(w_init << 6, hc->tx_rtt);
92} 91}
93 92
94/* 93/**
95 * Recalculate t_ipi and delta (should be called whenever X changes) 94 * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst
95 * This respects the granularity of X_inst (64 * bytes/second).
96 */ 96 */
97static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) 97static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc)
98{ 98{
99 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
100 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); 99 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x);
101 100
102 /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ 101 ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi,
103 hc->tx_delta = min_t(u32, hc->tx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); 102 hc->tx_s, (unsigned)(hc->tx_x >> 6));
104
105 ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hc->tx_t_ipi,
106 hc->tx_delta, hc->tx_s, (unsigned)(hc->tx_x >> 6));
107} 103}
108 104
109static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) 105static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now)
@@ -211,16 +207,19 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
211 ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk, 207 ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk,
212 ccid3_tx_state_name(hc->tx_state)); 208 ccid3_tx_state_name(hc->tx_state));
213 209
210 /* Ignore and do not restart after leaving the established state */
211 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
212 goto out;
213
214 /* Reset feedback state to "no feedback received" */
214 if (hc->tx_state == TFRC_SSTATE_FBACK) 215 if (hc->tx_state == TFRC_SSTATE_FBACK)
215 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 216 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
216 else if (hc->tx_state != TFRC_SSTATE_NO_FBACK)
217 goto out;
218 217
219 /* 218 /*
220 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 219 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
220 * RTO is 0 if and only if no feedback has been received yet.
221 */ 221 */
222 if (hc->tx_t_rto == 0 || /* no feedback received yet */ 222 if (hc->tx_t_rto == 0 || hc->tx_p == 0) {
223 hc->tx_p == 0) {
224 223
225 /* halve send rate directly */ 224 /* halve send rate directly */
226 hc->tx_x = max(hc->tx_x / 2, 225 hc->tx_x = max(hc->tx_x / 2,
@@ -256,7 +255,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
256 * Set new timeout for the nofeedback timer. 255 * Set new timeout for the nofeedback timer.
257 * See comments in packet_recv() regarding the value of t_RTO. 256 * See comments in packet_recv() regarding the value of t_RTO.
258 */ 257 */
259 if (unlikely(hc->tx_t_rto == 0)) /* no feedback yet */ 258 if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */
260 t_nfb = TFRC_INITIAL_TIMEOUT; 259 t_nfb = TFRC_INITIAL_TIMEOUT;
261 else 260 else
262 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); 261 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi);
@@ -290,8 +289,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
290 if (unlikely(skb->len == 0)) 289 if (unlikely(skb->len == 0))
291 return -EBADMSG; 290 return -EBADMSG;
292 291
293 switch (hc->tx_state) { 292 if (hc->tx_state == TFRC_SSTATE_NO_SENT) {
294 case TFRC_SSTATE_NO_SENT:
295 sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + 293 sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies +
296 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); 294 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
297 hc->tx_last_win_count = 0; 295 hc->tx_last_win_count = 0;
@@ -326,27 +324,22 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
326 ccid3_update_send_interval(hc); 324 ccid3_update_send_interval(hc);
327 325
328 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 326 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
329 break; 327
330 case TFRC_SSTATE_NO_FBACK: 328 } else {
331 case TFRC_SSTATE_FBACK:
332 delay = ktime_us_delta(hc->tx_t_nom, now); 329 delay = ktime_us_delta(hc->tx_t_nom, now);
333 ccid3_pr_debug("delay=%ld\n", (long)delay); 330 ccid3_pr_debug("delay=%ld\n", (long)delay);
334 /* 331 /*
335 * Scheduling of packet transmissions [RFC 3448, 4.6] 332 * Scheduling of packet transmissions (RFC 5348, 8.3)
336 * 333 *
337 * if (t_now > t_nom - delta) 334 * if (t_now > t_nom - delta)
338 * // send the packet now 335 * // send the packet now
339 * else 336 * else
340 * // send the packet in (t_nom - t_now) milliseconds. 337 * // send the packet in (t_nom - t_now) milliseconds.
341 */ 338 */
342 if (delay - (s64)hc->tx_delta >= 1000) 339 if (delay >= TFRC_T_DELTA)
343 return (u32)delay / 1000L; 340 return (u32)delay / USEC_PER_MSEC;
344 341
345 ccid3_hc_tx_update_win_count(hc, now); 342 ccid3_hc_tx_update_win_count(hc, now);
346 break;
347 case TFRC_SSTATE_TERM:
348 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
349 return -EINVAL;
350 } 343 }
351 344
352 /* prepare to send now (add options etc.) */ 345 /* prepare to send now (add options etc.) */
@@ -372,48 +365,34 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
372static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 365static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
373{ 366{
374 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 367 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
375 struct ccid3_options_received *opt_recv; 368 struct tfrc_tx_hist_entry *acked;
376 ktime_t now; 369 ktime_t now;
377 unsigned long t_nfb; 370 unsigned long t_nfb;
378 u32 pinv, r_sample; 371 u32 r_sample;
379 372
380 /* we are only interested in ACKs */ 373 /* we are only interested in ACKs */
381 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || 374 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
382 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) 375 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
383 return; 376 return;
384 /* ... and only in the established state */
385 if (hc->tx_state != TFRC_SSTATE_FBACK &&
386 hc->tx_state != TFRC_SSTATE_NO_FBACK)
387 return;
388
389 opt_recv = &hc->tx_options_received;
390 now = ktime_get_real();
391
392 /* Estimate RTT from history if ACK number is valid */
393 r_sample = tfrc_tx_hist_rtt(hc->tx_hist,
394 DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
395 if (r_sample == 0) {
396 DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
397 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
398 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
399 return;
400 }
401
402 /* Update receive rate in units of 64 * bytes/second */
403 hc->tx_x_recv = opt_recv->ccid3or_receive_rate;
404 hc->tx_x_recv <<= 6;
405
406 /* Update loss event rate (which is scaled by 1e6) */
407 pinv = opt_recv->ccid3or_loss_event_rate;
408 if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
409 hc->tx_p = 0;
410 else /* can not exceed 100% */
411 hc->tx_p = scaled_div(1, pinv);
412 /* 377 /*
413 * Validate new RTT sample and update moving average 378 * Locate the acknowledged packet in the TX history.
379 *
380 * Returning "entry not found" here can for instance happen when
381 * - the host has not sent out anything (e.g. a passive server),
382 * - the Ack is outdated (packet with higher Ack number was received),
383 * - it is a bogus Ack (for a packet not sent on this connection).
414 */ 384 */
415 r_sample = dccp_sample_rtt(sk, r_sample); 385 acked = tfrc_tx_hist_find_entry(hc->tx_hist, dccp_hdr_ack_seq(skb));
386 if (acked == NULL)
387 return;
388 /* For the sake of RTT sampling, ignore/remove all older entries */
389 tfrc_tx_hist_purge(&acked->next);
390
391 /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */
392 now = ktime_get_real();
393 r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp));
416 hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); 394 hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9);
395
417 /* 396 /*
418 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 397 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
419 */ 398 */
@@ -461,13 +440,12 @@ done_computing_x:
461 sk->sk_write_space(sk); 440 sk->sk_write_space(sk);
462 441
463 /* 442 /*
464 * Update timeout interval for the nofeedback timer. 443 * Update timeout interval for the nofeedback timer. In order to control
465 * We use a configuration option to increase the lower bound. 444 * rate halving on networks with very low RTTs (<= 1 ms), use per-route
466 * This can help avoid triggering the nofeedback timer too 445 * tunable RTAX_RTO_MIN value as the lower bound.
467 * often ('spinning') on LANs with small RTTs.
468 */ 446 */
469 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO * 447 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt,
470 (USEC_PER_SEC / 1000))); 448 USEC_PER_SEC/HZ * tcp_rto_min(sk));
471 /* 449 /*
472 * Schedule no feedback timer to expire in 450 * Schedule no feedback timer to expire in
473 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) 451 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
@@ -482,66 +460,41 @@ done_computing_x:
482 jiffies + usecs_to_jiffies(t_nfb)); 460 jiffies + usecs_to_jiffies(t_nfb));
483} 461}
484 462
485static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, 463static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type,
486 unsigned char len, u16 idx, 464 u8 option, u8 *optval, u8 optlen)
487 unsigned char *value)
488{ 465{
489 int rc = 0;
490 const struct dccp_sock *dp = dccp_sk(sk);
491 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 466 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
492 struct ccid3_options_received *opt_recv;
493 __be32 opt_val; 467 __be32 opt_val;
494 468
495 opt_recv = &hc->tx_options_received;
496
497 if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
498 opt_recv->ccid3or_seqno = dp->dccps_gsr;
499 opt_recv->ccid3or_loss_event_rate = ~0;
500 opt_recv->ccid3or_loss_intervals_idx = 0;
501 opt_recv->ccid3or_loss_intervals_len = 0;
502 opt_recv->ccid3or_receive_rate = 0;
503 }
504
505 switch (option) { 469 switch (option) {
470 case TFRC_OPT_RECEIVE_RATE:
506 case TFRC_OPT_LOSS_EVENT_RATE: 471 case TFRC_OPT_LOSS_EVENT_RATE:
507 if (unlikely(len != 4)) { 472 /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */
508 DCCP_WARN("%s(%p), invalid len %d " 473 if (packet_type == DCCP_PKT_DATA)
509 "for TFRC_OPT_LOSS_EVENT_RATE\n", 474 break;
510 dccp_role(sk), sk, len); 475 if (unlikely(optlen != 4)) {
511 rc = -EINVAL; 476 DCCP_WARN("%s(%p), invalid len %d for %u\n",
512 } else { 477 dccp_role(sk), sk, optlen, option);
513 opt_val = get_unaligned((__be32 *)value); 478 return -EINVAL;
514 opt_recv->ccid3or_loss_event_rate = ntohl(opt_val);
515 ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
516 dccp_role(sk), sk,
517 opt_recv->ccid3or_loss_event_rate);
518 } 479 }
519 break; 480 opt_val = ntohl(get_unaligned((__be32 *)optval));
520 case TFRC_OPT_LOSS_INTERVALS: 481
521 opt_recv->ccid3or_loss_intervals_idx = idx; 482 if (option == TFRC_OPT_RECEIVE_RATE) {
522 opt_recv->ccid3or_loss_intervals_len = len; 483 /* Receive Rate is kept in units of 64 bytes/second */
523 ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", 484 hc->tx_x_recv = opt_val;
524 dccp_role(sk), sk, 485 hc->tx_x_recv <<= 6;
525 opt_recv->ccid3or_loss_intervals_idx, 486
526 opt_recv->ccid3or_loss_intervals_len);
527 break;
528 case TFRC_OPT_RECEIVE_RATE:
529 if (unlikely(len != 4)) {
530 DCCP_WARN("%s(%p), invalid len %d "
531 "for TFRC_OPT_RECEIVE_RATE\n",
532 dccp_role(sk), sk, len);
533 rc = -EINVAL;
534 } else {
535 opt_val = get_unaligned((__be32 *)value);
536 opt_recv->ccid3or_receive_rate = ntohl(opt_val);
537 ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", 487 ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
538 dccp_role(sk), sk, 488 dccp_role(sk), sk, opt_val);
539 opt_recv->ccid3or_receive_rate); 489 } else {
490 /* Update the fixpoint Loss Event Rate fraction */
491 hc->tx_p = tfrc_invert_loss_event_rate(opt_val);
492
493 ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
494 dccp_role(sk), sk, opt_val);
540 } 495 }
541 break;
542 } 496 }
543 497 return 0;
544 return rc;
545} 498}
546 499
547static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) 500static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -559,42 +512,36 @@ static void ccid3_hc_tx_exit(struct sock *sk)
559{ 512{
560 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 513 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
561 514
562 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
563 sk_stop_timer(sk, &hc->tx_no_feedback_timer); 515 sk_stop_timer(sk, &hc->tx_no_feedback_timer);
564
565 tfrc_tx_hist_purge(&hc->tx_hist); 516 tfrc_tx_hist_purge(&hc->tx_hist);
566} 517}
567 518
568static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) 519static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
569{ 520{
570 struct ccid3_hc_tx_sock *hc; 521 info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto;
571 522 info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt;
572 /* Listen socks doesn't have a private CCID block */
573 if (sk->sk_state == DCCP_LISTEN)
574 return;
575
576 hc = ccid3_hc_tx_sk(sk);
577 info->tcpi_rto = hc->tx_t_rto;
578 info->tcpi_rtt = hc->tx_rtt;
579} 523}
580 524
581static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, 525static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
582 u32 __user *optval, int __user *optlen) 526 u32 __user *optval, int __user *optlen)
583{ 527{
584 const struct ccid3_hc_tx_sock *hc; 528 const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
529 struct tfrc_tx_info tfrc;
585 const void *val; 530 const void *val;
586 531
587 /* Listen socks doesn't have a private CCID block */
588 if (sk->sk_state == DCCP_LISTEN)
589 return -EINVAL;
590
591 hc = ccid3_hc_tx_sk(sk);
592 switch (optname) { 532 switch (optname) {
593 case DCCP_SOCKOPT_CCID_TX_INFO: 533 case DCCP_SOCKOPT_CCID_TX_INFO:
594 if (len < sizeof(hc->tx_tfrc)) 534 if (len < sizeof(tfrc))
595 return -EINVAL; 535 return -EINVAL;
596 len = sizeof(hc->tx_tfrc); 536 tfrc.tfrctx_x = hc->tx_x;
597 val = &hc->tx_tfrc; 537 tfrc.tfrctx_x_recv = hc->tx_x_recv;
538 tfrc.tfrctx_x_calc = hc->tx_x_calc;
539 tfrc.tfrctx_rtt = hc->tx_rtt;
540 tfrc.tfrctx_p = hc->tx_p;
541 tfrc.tfrctx_rto = hc->tx_t_rto;
542 tfrc.tfrctx_ipi = hc->tx_t_ipi;
543 len = sizeof(tfrc);
544 val = &tfrc;
598 break; 545 break;
599 default: 546 default:
600 return -ENOPROTOOPT; 547 return -ENOPROTOOPT;
@@ -624,7 +571,6 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
624 static const char *const ccid3_rx_state_names[] = { 571 static const char *const ccid3_rx_state_names[] = {
625 [TFRC_RSTATE_NO_DATA] = "NO_DATA", 572 [TFRC_RSTATE_NO_DATA] = "NO_DATA",
626 [TFRC_RSTATE_DATA] = "DATA", 573 [TFRC_RSTATE_DATA] = "DATA",
627 [TFRC_RSTATE_TERM] = "TERM",
628 }; 574 };
629 575
630 return ccid3_rx_state_names[state]; 576 return ccid3_rx_state_names[state];
@@ -650,14 +596,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
650{ 596{
651 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 597 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
652 struct dccp_sock *dp = dccp_sk(sk); 598 struct dccp_sock *dp = dccp_sk(sk);
653 ktime_t now; 599 ktime_t now = ktime_get_real();
654 s64 delta = 0; 600 s64 delta = 0;
655 601
656 if (unlikely(hc->rx_state == TFRC_RSTATE_TERM))
657 return;
658
659 now = ktime_get_real();
660
661 switch (fbtype) { 602 switch (fbtype) {
662 case CCID3_FBACK_INITIAL: 603 case CCID3_FBACK_INITIAL:
663 hc->rx_x_recv = 0; 604 hc->rx_x_recv = 0;
@@ -701,14 +642,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
701 642
702static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) 643static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
703{ 644{
704 const struct ccid3_hc_rx_sock *hc; 645 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
705 __be32 x_recv, pinv; 646 __be32 x_recv, pinv;
706 647
707 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) 648 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
708 return 0; 649 return 0;
709 650
710 hc = ccid3_hc_rx_sk(sk);
711
712 if (dccp_packet_without_ack(skb)) 651 if (dccp_packet_without_ack(skb))
713 return 0; 652 return 0;
714 653
@@ -749,10 +688,11 @@ static u32 ccid3_first_li(struct sock *sk)
749 x_recv = scaled_div32(hc->rx_bytes_recv, delta); 688 x_recv = scaled_div32(hc->rx_bytes_recv, delta);
750 if (x_recv == 0) { /* would also trigger divide-by-zero */ 689 if (x_recv == 0) { /* would also trigger divide-by-zero */
751 DCCP_WARN("X_recv==0\n"); 690 DCCP_WARN("X_recv==0\n");
752 if ((x_recv = hc->rx_x_recv) == 0) { 691 if (hc->rx_x_recv == 0) {
753 DCCP_BUG("stored value of X_recv is zero"); 692 DCCP_BUG("stored value of X_recv is zero");
754 return ~0U; 693 return ~0U;
755 } 694 }
695 x_recv = hc->rx_x_recv;
756 } 696 }
757 697
758 fval = scaled_div(hc->rx_s, hc->rx_rtt); 698 fval = scaled_div(hc->rx_s, hc->rx_rtt);
@@ -862,46 +802,31 @@ static void ccid3_hc_rx_exit(struct sock *sk)
862{ 802{
863 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 803 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
864 804
865 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
866
867 tfrc_rx_hist_purge(&hc->rx_hist); 805 tfrc_rx_hist_purge(&hc->rx_hist);
868 tfrc_lh_cleanup(&hc->rx_li_hist); 806 tfrc_lh_cleanup(&hc->rx_li_hist);
869} 807}
870 808
871static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) 809static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
872{ 810{
873 const struct ccid3_hc_rx_sock *hc; 811 info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state;
874
875 /* Listen socks doesn't have a private CCID block */
876 if (sk->sk_state == DCCP_LISTEN)
877 return;
878
879 hc = ccid3_hc_rx_sk(sk);
880 info->tcpi_ca_state = hc->rx_state;
881 info->tcpi_options |= TCPI_OPT_TIMESTAMPS; 812 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
882 info->tcpi_rcv_rtt = hc->rx_rtt; 813 info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt;
883} 814}
884 815
885static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, 816static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
886 u32 __user *optval, int __user *optlen) 817 u32 __user *optval, int __user *optlen)
887{ 818{
888 const struct ccid3_hc_rx_sock *hc; 819 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
889 struct tfrc_rx_info rx_info; 820 struct tfrc_rx_info rx_info;
890 const void *val; 821 const void *val;
891 822
892 /* Listen socks doesn't have a private CCID block */
893 if (sk->sk_state == DCCP_LISTEN)
894 return -EINVAL;
895
896 hc = ccid3_hc_rx_sk(sk);
897 switch (optname) { 823 switch (optname) {
898 case DCCP_SOCKOPT_CCID_RX_INFO: 824 case DCCP_SOCKOPT_CCID_RX_INFO:
899 if (len < sizeof(rx_info)) 825 if (len < sizeof(rx_info))
900 return -EINVAL; 826 return -EINVAL;
901 rx_info.tfrcrx_x_recv = hc->rx_x_recv; 827 rx_info.tfrcrx_x_recv = hc->rx_x_recv;
902 rx_info.tfrcrx_rtt = hc->rx_rtt; 828 rx_info.tfrcrx_rtt = hc->rx_rtt;
903 rx_info.tfrcrx_p = hc->rx_pinv == 0 ? ~0U : 829 rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hc->rx_pinv);
904 scaled_div(1, hc->rx_pinv);
905 len = sizeof(rx_info); 830 len = sizeof(rx_info);
906 val = &rx_info; 831 val = &rx_info;
907 break; 832 break;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 032635776653..1a9933c29672 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -42,35 +42,36 @@
42#include "lib/tfrc.h" 42#include "lib/tfrc.h"
43#include "../ccid.h" 43#include "../ccid.h"
44 44
45/* Two seconds as per RFC 3448 4.2 */ 45/* Two seconds as per RFC 5348, 4.2 */
46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) 46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
47 47
48/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
49#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
50
51/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ 48/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
52#define TFRC_T_MBI 64 49#define TFRC_T_MBI 64
53 50
51/*
52 * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are
53 * rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
54 * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
55 * resolution of HZ < 500 means that the error is below one timer tick (t_gran)
56 * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
57 */
58#if (HZ >= 500)
59# define TFRC_T_DELTA USEC_PER_MSEC
60#else
61# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
62#endif
63
54enum ccid3_options { 64enum ccid3_options {
55 TFRC_OPT_LOSS_EVENT_RATE = 192, 65 TFRC_OPT_LOSS_EVENT_RATE = 192,
56 TFRC_OPT_LOSS_INTERVALS = 193, 66 TFRC_OPT_LOSS_INTERVALS = 193,
57 TFRC_OPT_RECEIVE_RATE = 194, 67 TFRC_OPT_RECEIVE_RATE = 194,
58}; 68};
59 69
60struct ccid3_options_received {
61 u64 ccid3or_seqno:48,
62 ccid3or_loss_intervals_idx:16;
63 u16 ccid3or_loss_intervals_len;
64 u32 ccid3or_loss_event_rate;
65 u32 ccid3or_receive_rate;
66};
67
68/* TFRC sender states */ 70/* TFRC sender states */
69enum ccid3_hc_tx_states { 71enum ccid3_hc_tx_states {
70 TFRC_SSTATE_NO_SENT = 1, 72 TFRC_SSTATE_NO_SENT = 1,
71 TFRC_SSTATE_NO_FBACK, 73 TFRC_SSTATE_NO_FBACK,
72 TFRC_SSTATE_FBACK, 74 TFRC_SSTATE_FBACK,
73 TFRC_SSTATE_TERM,
74}; 75};
75 76
76/** 77/**
@@ -90,19 +91,16 @@ enum ccid3_hc_tx_states {
90 * @tx_no_feedback_timer: Handle to no feedback timer 91 * @tx_no_feedback_timer: Handle to no feedback timer
91 * @tx_t_ld: Time last doubled during slow start 92 * @tx_t_ld: Time last doubled during slow start
92 * @tx_t_nom: Nominal send time of next packet 93 * @tx_t_nom: Nominal send time of next packet
93 * @tx_delta: Send timer delta (RFC 3448, 4.6) in usecs
94 * @tx_hist: Packet history 94 * @tx_hist: Packet history
95 * @tx_options_received: Parsed set of retrieved options
96 */ 95 */
97struct ccid3_hc_tx_sock { 96struct ccid3_hc_tx_sock {
98 struct tfrc_tx_info tx_tfrc; 97 u64 tx_x;
99#define tx_x tx_tfrc.tfrctx_x 98 u64 tx_x_recv;
100#define tx_x_recv tx_tfrc.tfrctx_x_recv 99 u32 tx_x_calc;
101#define tx_x_calc tx_tfrc.tfrctx_x_calc 100 u32 tx_rtt;
102#define tx_rtt tx_tfrc.tfrctx_rtt 101 u32 tx_p;
103#define tx_p tx_tfrc.tfrctx_p 102 u32 tx_t_rto;
104#define tx_t_rto tx_tfrc.tfrctx_rto 103 u32 tx_t_ipi;
105#define tx_t_ipi tx_tfrc.tfrctx_ipi
106 u16 tx_s; 104 u16 tx_s;
107 enum ccid3_hc_tx_states tx_state:8; 105 enum ccid3_hc_tx_states tx_state:8;
108 u8 tx_last_win_count; 106 u8 tx_last_win_count;
@@ -110,9 +108,7 @@ struct ccid3_hc_tx_sock {
110 struct timer_list tx_no_feedback_timer; 108 struct timer_list tx_no_feedback_timer;
111 ktime_t tx_t_ld; 109 ktime_t tx_t_ld;
112 ktime_t tx_t_nom; 110 ktime_t tx_t_nom;
113 u32 tx_delta;
114 struct tfrc_tx_hist_entry *tx_hist; 111 struct tfrc_tx_hist_entry *tx_hist;
115 struct ccid3_options_received tx_options_received;
116}; 112};
117 113
118static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) 114static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
@@ -126,21 +122,16 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
126enum ccid3_hc_rx_states { 122enum ccid3_hc_rx_states {
127 TFRC_RSTATE_NO_DATA = 1, 123 TFRC_RSTATE_NO_DATA = 1,
128 TFRC_RSTATE_DATA, 124 TFRC_RSTATE_DATA,
129 TFRC_RSTATE_TERM = 127,
130}; 125};
131 126
132/** 127/**
133 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket 128 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
134 * @rx_x_recv: Receiver estimate of send rate (RFC 3448 4.3)
135 * @rx_rtt: Receiver estimate of rtt (non-standard)
136 * @rx_p: Current loss event rate (RFC 3448 5.4)
137 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) 129 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1)
138 * @rx_state: Receiver state, one of %ccid3_hc_rx_states 130 * @rx_state: Receiver state, one of %ccid3_hc_rx_states
139 * @rx_bytes_recv: Total sum of DCCP payload bytes 131 * @rx_bytes_recv: Total sum of DCCP payload bytes
140 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) 132 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3)
141 * @rx_rtt: Receiver estimate of RTT 133 * @rx_rtt: Receiver estimate of RTT
142 * @rx_tstamp_last_feedback: Time at which last feedback was sent 134 * @rx_tstamp_last_feedback: Time at which last feedback was sent
143 * @rx_tstamp_last_ack: Time at which last feedback was sent
144 * @rx_hist: Packet history (loss detection + RTT sampling) 135 * @rx_hist: Packet history (loss detection + RTT sampling)
145 * @rx_li_hist: Loss Interval database 136 * @rx_li_hist: Loss Interval database
146 * @rx_s: Received packet size in bytes 137 * @rx_s: Received packet size in bytes
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 8fc3cbf79071..497723c4d4bb 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -116,7 +116,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
116 cur->li_length = len; 116 cur->li_length = len;
117 tfrc_lh_calc_i_mean(lh); 117 tfrc_lh_calc_i_mean(lh);
118 118
119 return (lh->i_mean < old_i_mean); 119 return lh->i_mean < old_i_mean;
120} 120}
121 121
122/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ 122/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 3a4f414e94a0..de8fe294bf0b 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -38,18 +38,6 @@
38#include "packet_history.h" 38#include "packet_history.h"
39#include "../../dccp.h" 39#include "../../dccp.h"
40 40
41/**
42 * tfrc_tx_hist_entry - Simple singly-linked TX history list
43 * @next: next oldest entry (LIFO order)
44 * @seqno: sequence number of this entry
45 * @stamp: send time of packet with sequence number @seqno
46 */
47struct tfrc_tx_hist_entry {
48 struct tfrc_tx_hist_entry *next;
49 u64 seqno;
50 ktime_t stamp;
51};
52
53/* 41/*
54 * Transmitter History Routines 42 * Transmitter History Routines
55 */ 43 */
@@ -71,15 +59,6 @@ void tfrc_tx_packet_history_exit(void)
71 } 59 }
72} 60}
73 61
74static struct tfrc_tx_hist_entry *
75 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
76{
77 while (head != NULL && head->seqno != seqno)
78 head = head->next;
79
80 return head;
81}
82
83int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) 62int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
84{ 63{
85 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); 64 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -107,24 +86,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
107 *headp = NULL; 86 *headp = NULL;
108} 87}
109 88
110u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
111 const ktime_t now)
112{
113 u32 rtt = 0;
114 struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
115
116 if (packet != NULL) {
117 rtt = ktime_us_delta(now, packet->stamp);
118 /*
119 * Garbage-collect older (irrelevant) entries:
120 */
121 tfrc_tx_hist_purge(&packet->next);
122 }
123
124 return rtt;
125}
126
127
128/* 89/*
129 * Receiver History Routines 90 * Receiver History Routines
130 */ 91 */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 7df6c5299999..7ee4a9d9d335 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -40,12 +40,28 @@
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include "tfrc.h" 41#include "tfrc.h"
42 42
43struct tfrc_tx_hist_entry; 43/**
44 * tfrc_tx_hist_entry - Simple singly-linked TX history list
45 * @next: next oldest entry (LIFO order)
46 * @seqno: sequence number of this entry
47 * @stamp: send time of packet with sequence number @seqno
48 */
49struct tfrc_tx_hist_entry {
50 struct tfrc_tx_hist_entry *next;
51 u64 seqno;
52 ktime_t stamp;
53};
54
55static inline struct tfrc_tx_hist_entry *
56 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
57{
58 while (head != NULL && head->seqno != seqno)
59 head = head->next;
60 return head;
61}
44 62
45extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); 63extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
46extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); 64extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
47extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
48 const u64 seqno, const ktime_t now);
49 65
50/* Subtraction a-b modulo-16, respects circular wrap-around */ 66/* Subtraction a-b modulo-16, respects circular wrap-around */
51#define SUB16(a, b) (((a) + 16 - (b)) & 0xF) 67#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 01bb48e96c2e..f8ee3f549770 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -57,6 +57,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
57 57
58extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); 58extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
59extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); 59extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
60extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
60 61
61extern int tfrc_tx_packet_history_init(void); 62extern int tfrc_tx_packet_history_init(void);
62extern void tfrc_tx_packet_history_exit(void); 63extern void tfrc_tx_packet_history_exit(void);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 22ca1cf0eb55..a052a4377e26 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -687,3 +687,17 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
687 index = tfrc_binsearch(fvalue, 0); 687 index = tfrc_binsearch(fvalue, 0);
688 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; 688 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
689} 689}
690
691/**
692 * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
693 * When @loss_event_rate is large, there is a chance that p is truncated to 0.
694 * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
695 */
696u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
697{
698 if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
699 return 0;
700 if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
701 return 1000000;
702 return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
703}
diff --git a/net/dccp/options.c b/net/dccp/options.c
index bfda087bd90d..92718511eac5 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -96,18 +96,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
96 } 96 }
97 97
98 /* 98 /*
99 * CCID-Specific Options (from RFC 4340, sec. 10.3):
100 *
101 * Option numbers 128 through 191 are for options sent from the
102 * HC-Sender to the HC-Receiver; option numbers 192 through 255
103 * are for options sent from the HC-Receiver to the HC-Sender.
104 *
105 * CCID-specific options are ignored during connection setup, as 99 * CCID-specific options are ignored during connection setup, as
106 * negotiation may still be in progress (see RFC 4340, 10.3). 100 * negotiation may still be in progress (see RFC 4340, 10.3).
107 * The same applies to Ack Vectors, as these depend on the CCID. 101 * The same applies to Ack Vectors, as these depend on the CCID.
108 *
109 */ 102 */
110 if (dreq != NULL && (opt >= 128 || 103 if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC ||
111 opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) 104 opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
112 goto ignore_option; 105 goto ignore_option;
113 106
@@ -226,23 +219,15 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
226 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", 219 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
227 dccp_role(sk), elapsed_time); 220 dccp_role(sk), elapsed_time);
228 break; 221 break;
229 case 128 ... 191: { 222 case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC:
230 const u16 idx = value - options;
231
232 if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, 223 if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
233 opt, len, idx, 224 pkt_type, opt, value, len))
234 value) != 0)
235 goto out_invalid_option; 225 goto out_invalid_option;
236 }
237 break; 226 break;
238 case 192 ... 255: { 227 case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
239 const u16 idx = value - options;
240
241 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, 228 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
242 opt, len, idx, 229 pkt_type, opt, value, len))
243 value) != 0)
244 goto out_invalid_option; 230 goto out_invalid_option;
245 }
246 break; 231 break;
247 default: 232 default:
248 DCCP_CRIT("DCCP(%p): option %d(len=%d) not " 233 DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index baeb1eaf011b..2ef115277bea 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -693,22 +693,22 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
693 aux = scp->accessdata.acc_userl; 693 aux = scp->accessdata.acc_userl;
694 *skb_put(skb, 1) = aux; 694 *skb_put(skb, 1) = aux;
695 if (aux > 0) 695 if (aux > 0)
696 memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux); 696 memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux);
697 697
698 aux = scp->accessdata.acc_passl; 698 aux = scp->accessdata.acc_passl;
699 *skb_put(skb, 1) = aux; 699 *skb_put(skb, 1) = aux;
700 if (aux > 0) 700 if (aux > 0)
701 memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux); 701 memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux);
702 702
703 aux = scp->accessdata.acc_accl; 703 aux = scp->accessdata.acc_accl;
704 *skb_put(skb, 1) = aux; 704 *skb_put(skb, 1) = aux;
705 if (aux > 0) 705 if (aux > 0)
706 memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux); 706 memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
707 707
708 aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); 708 aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
709 *skb_put(skb, 1) = aux; 709 *skb_put(skb, 1) = aux;
710 if (aux > 0) 710 if (aux > 0)
711 memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux); 711 memcpy(skb_put(skb, aux), scp->conndata_out.opt_data, aux);
712 712
713 scp->persist = dn_nsp_persist(sk); 713 scp->persist = dn_nsp_persist(sk);
714 scp->persist_fxn = dn_nsp_retrans_conninit; 714 scp->persist_fxn = dn_nsp_retrans_conninit;
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index dc54bd0d083b..f8c1ae4b41f0 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -392,7 +392,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
392 dev_queue_xmit(skb); 392 dev_queue_xmit(skb);
393 dev_put(dev); 393 dev_put(dev);
394 mutex_unlock(&econet_mutex); 394 mutex_unlock(&econet_mutex);
395 return(len); 395 return len;
396 396
397 out_free: 397 out_free:
398 kfree_skb(skb); 398 kfree_skb(skb);
@@ -637,7 +637,7 @@ static int econet_create(struct net *net, struct socket *sock, int protocol,
637 eo->num = protocol; 637 eo->num = protocol;
638 638
639 econet_insert_socket(&econet_sklist, sk); 639 econet_insert_socket(&econet_sklist, sk);
640 return(0); 640 return 0;
641out: 641out:
642 return err; 642 return err;
643} 643}
@@ -1009,7 +1009,6 @@ static int __init aun_udp_initialise(void)
1009 struct sockaddr_in sin; 1009 struct sockaddr_in sin;
1010 1010
1011 skb_queue_head_init(&aun_queue); 1011 skb_queue_head_init(&aun_queue);
1012 spin_lock_init(&aun_queue_lock);
1013 setup_timer(&ab_cleanup_timer, ab_cleanup, 0); 1012 setup_timer(&ab_cleanup_timer, ab_cleanup, 0);
1014 ab_cleanup_timer.expires = jiffies + (HZ*2); 1013 ab_cleanup_timer.expires = jiffies + (HZ*2);
1015 add_timer(&ab_cleanup_timer); 1014 add_timer(&ab_cleanup_timer);
@@ -1167,7 +1166,6 @@ static int __init econet_proto_init(void)
1167 goto out; 1166 goto out;
1168 sock_register(&econet_family_ops); 1167 sock_register(&econet_family_ops);
1169#ifdef CONFIG_ECONET_AUNUDP 1168#ifdef CONFIG_ECONET_AUNUDP
1170 spin_lock_init(&aun_queue_lock);
1171 aun_udp_initialise(); 1169 aun_udp_initialise();
1172#endif 1170#endif
1173#ifdef CONFIG_ECONET_NATIVE 1171#ifdef CONFIG_ECONET_NATIVE
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 215c83986a9d..f00ef2f1d814 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -367,7 +367,7 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
367EXPORT_SYMBOL(alloc_etherdev_mq); 367EXPORT_SYMBOL(alloc_etherdev_mq);
368 368
369static size_t _format_mac_addr(char *buf, int buflen, 369static size_t _format_mac_addr(char *buf, int buflen,
370 const unsigned char *addr, int len) 370 const unsigned char *addr, int len)
371{ 371{
372 int i; 372 int i;
373 char *cp = buf; 373 char *cp = buf;
@@ -376,7 +376,7 @@ static size_t _format_mac_addr(char *buf, int buflen,
376 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]); 376 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]);
377 if (i == len - 1) 377 if (i == len - 1)
378 break; 378 break;
379 cp += strlcpy(cp, ":", buflen - (cp - buf)); 379 cp += scnprintf(cp, buflen - (cp - buf), ":");
380 } 380 }
381 return cp - buf; 381 return cp - buf;
382} 382}
@@ -386,7 +386,7 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
386 size_t l; 386 size_t l;
387 387
388 l = _format_mac_addr(buf, PAGE_SIZE, addr, len); 388 l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
389 l += strlcpy(buf + l, "\n", PAGE_SIZE - l); 389 l += scnprintf(buf + l, PAGE_SIZE - l, "\n");
390 return ((ssize_t) l); 390 return (ssize_t)l;
391} 391}
392EXPORT_SYMBOL(sysfs_format_mac); 392EXPORT_SYMBOL(sysfs_format_mac);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7cd7760144f7..e848e6c062cd 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -215,9 +215,15 @@ config NET_IPIP
215 be inserted in and removed from the running kernel whenever you 215 be inserted in and removed from the running kernel whenever you
216 want). Most people won't need this and can say N. 216 want). Most people won't need this and can say N.
217 217
218config NET_IPGRE_DEMUX
219 tristate "IP: GRE demultiplexer"
220 help
221 This is helper module to demultiplex GRE packets on GRE version field criteria.
222 Required by ip_gre and pptp modules.
223
218config NET_IPGRE 224config NET_IPGRE
219 tristate "IP: GRE tunnels over IP" 225 tristate "IP: GRE tunnels over IP"
220 depends on IPV6 || IPV6=n 226 depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX
221 help 227 help
222 Tunneling means encapsulating data of one protocol type within 228 Tunneling means encapsulating data of one protocol type within
223 another protocol and sending it over a channel that understands the 229 another protocol and sending it over a channel that understands the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 80ff87ce43aa..4978d22f9a75 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
21obj-$(CONFIG_IP_MROUTE) += ipmr.o 21obj-$(CONFIG_IP_MROUTE) += ipmr.o
22obj-$(CONFIG_NET_IPIP) += ipip.o 22obj-$(CONFIG_NET_IPIP) += ipip.o
23obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
23obj-$(CONFIG_NET_IPGRE) += ip_gre.o 24obj-$(CONFIG_NET_IPGRE) += ip_gre.o
24obj-$(CONFIG_SYN_COOKIES) += syncookies.o 25obj-$(CONFIG_SYN_COOKIES) += syncookies.o
25obj-$(CONFIG_INET_AH) += ah4.o 26obj-$(CONFIG_INET_AH) += ah4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6a1100c25a9f..f581f77d1097 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -227,18 +227,16 @@ EXPORT_SYMBOL(inet_ehash_secret);
227 227
228/* 228/*
229 * inet_ehash_secret must be set exactly once 229 * inet_ehash_secret must be set exactly once
230 * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
231 */ 230 */
232void build_ehash_secret(void) 231void build_ehash_secret(void)
233{ 232{
234 u32 rnd; 233 u32 rnd;
234
235 do { 235 do {
236 get_random_bytes(&rnd, sizeof(rnd)); 236 get_random_bytes(&rnd, sizeof(rnd));
237 } while (rnd == 0); 237 } while (rnd == 0);
238 spin_lock_bh(&inetsw_lock); 238
239 if (!inet_ehash_secret) 239 cmpxchg(&inet_ehash_secret, 0, rnd);
240 inet_ehash_secret = rnd;
241 spin_unlock_bh(&inetsw_lock);
242} 240}
243EXPORT_SYMBOL(build_ehash_secret); 241EXPORT_SYMBOL(build_ehash_secret);
244 242
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96c1955b3e2f..d9031ad67826 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -55,7 +55,7 @@
55 * Stuart Cheshire : Metricom and grat arp fixes 55 * Stuart Cheshire : Metricom and grat arp fixes
56 * *** FOR 2.1 clean this up *** 56 * *** FOR 2.1 clean this up ***
57 * Lawrence V. Stefani: (08/12/96) Added FDDI support. 57 * Lawrence V. Stefani: (08/12/96) Added FDDI support.
58 * Alan Cox : Took the AP1000 nasty FDDI hack and 58 * Alan Cox : Took the AP1000 nasty FDDI hack and
59 * folded into the mainstream FDDI code. 59 * folded into the mainstream FDDI code.
60 * Ack spit, Linus how did you allow that 60 * Ack spit, Linus how did you allow that
61 * one in... 61 * one in...
@@ -120,7 +120,7 @@ EXPORT_SYMBOL(clip_tbl_hook);
120#endif 120#endif
121 121
122#include <asm/system.h> 122#include <asm/system.h>
123#include <asm/uaccess.h> 123#include <linux/uaccess.h>
124 124
125#include <linux/netfilter_arp.h> 125#include <linux/netfilter_arp.h>
126 126
@@ -161,7 +161,7 @@ static const struct neigh_ops arp_direct_ops = {
161 .queue_xmit = dev_queue_xmit, 161 .queue_xmit = dev_queue_xmit,
162}; 162};
163 163
164const struct neigh_ops arp_broken_ops = { 164static const struct neigh_ops arp_broken_ops = {
165 .family = AF_INET, 165 .family = AF_INET,
166 .solicit = arp_solicit, 166 .solicit = arp_solicit,
167 .error_report = arp_error_report, 167 .error_report = arp_error_report,
@@ -170,35 +170,34 @@ const struct neigh_ops arp_broken_ops = {
170 .hh_output = dev_queue_xmit, 170 .hh_output = dev_queue_xmit,
171 .queue_xmit = dev_queue_xmit, 171 .queue_xmit = dev_queue_xmit,
172}; 172};
173EXPORT_SYMBOL(arp_broken_ops);
174 173
175struct neigh_table arp_tbl = { 174struct neigh_table arp_tbl = {
176 .family = AF_INET, 175 .family = AF_INET,
177 .entry_size = sizeof(struct neighbour) + 4, 176 .entry_size = sizeof(struct neighbour) + 4,
178 .key_len = 4, 177 .key_len = 4,
179 .hash = arp_hash, 178 .hash = arp_hash,
180 .constructor = arp_constructor, 179 .constructor = arp_constructor,
181 .proxy_redo = parp_redo, 180 .proxy_redo = parp_redo,
182 .id = "arp_cache", 181 .id = "arp_cache",
183 .parms = { 182 .parms = {
184 .tbl = &arp_tbl, 183 .tbl = &arp_tbl,
185 .base_reachable_time = 30 * HZ, 184 .base_reachable_time = 30 * HZ,
186 .retrans_time = 1 * HZ, 185 .retrans_time = 1 * HZ,
187 .gc_staletime = 60 * HZ, 186 .gc_staletime = 60 * HZ,
188 .reachable_time = 30 * HZ, 187 .reachable_time = 30 * HZ,
189 .delay_probe_time = 5 * HZ, 188 .delay_probe_time = 5 * HZ,
190 .queue_len = 3, 189 .queue_len = 3,
191 .ucast_probes = 3, 190 .ucast_probes = 3,
192 .mcast_probes = 3, 191 .mcast_probes = 3,
193 .anycast_delay = 1 * HZ, 192 .anycast_delay = 1 * HZ,
194 .proxy_delay = (8 * HZ) / 10, 193 .proxy_delay = (8 * HZ) / 10,
195 .proxy_qlen = 64, 194 .proxy_qlen = 64,
196 .locktime = 1 * HZ, 195 .locktime = 1 * HZ,
197 }, 196 },
198 .gc_interval = 30 * HZ, 197 .gc_interval = 30 * HZ,
199 .gc_thresh1 = 128, 198 .gc_thresh1 = 128,
200 .gc_thresh2 = 512, 199 .gc_thresh2 = 512,
201 .gc_thresh3 = 1024, 200 .gc_thresh3 = 1024,
202}; 201};
203EXPORT_SYMBOL(arp_tbl); 202EXPORT_SYMBOL(arp_tbl);
204 203
@@ -233,7 +232,7 @@ static u32 arp_hash(const void *pkey, const struct net_device *dev)
233 232
234static int arp_constructor(struct neighbour *neigh) 233static int arp_constructor(struct neighbour *neigh)
235{ 234{
236 __be32 addr = *(__be32*)neigh->primary_key; 235 __be32 addr = *(__be32 *)neigh->primary_key;
237 struct net_device *dev = neigh->dev; 236 struct net_device *dev = neigh->dev;
238 struct in_device *in_dev; 237 struct in_device *in_dev;
239 struct neigh_parms *parms; 238 struct neigh_parms *parms;
@@ -296,16 +295,19 @@ static int arp_constructor(struct neighbour *neigh)
296 neigh->ops = &arp_broken_ops; 295 neigh->ops = &arp_broken_ops;
297 neigh->output = neigh->ops->output; 296 neigh->output = neigh->ops->output;
298 return 0; 297 return 0;
298#else
299 break;
299#endif 300#endif
300 ;} 301 }
301#endif 302#endif
302 if (neigh->type == RTN_MULTICAST) { 303 if (neigh->type == RTN_MULTICAST) {
303 neigh->nud_state = NUD_NOARP; 304 neigh->nud_state = NUD_NOARP;
304 arp_mc_map(addr, neigh->ha, dev, 1); 305 arp_mc_map(addr, neigh->ha, dev, 1);
305 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { 306 } else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
306 neigh->nud_state = NUD_NOARP; 307 neigh->nud_state = NUD_NOARP;
307 memcpy(neigh->ha, dev->dev_addr, dev->addr_len); 308 memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
308 } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) { 309 } else if (neigh->type == RTN_BROADCAST ||
310 (dev->flags & IFF_POINTOPOINT)) {
309 neigh->nud_state = NUD_NOARP; 311 neigh->nud_state = NUD_NOARP;
310 memcpy(neigh->ha, dev->broadcast, dev->addr_len); 312 memcpy(neigh->ha, dev->broadcast, dev->addr_len);
311 } 313 }
@@ -315,7 +317,7 @@ static int arp_constructor(struct neighbour *neigh)
315 else 317 else
316 neigh->ops = &arp_generic_ops; 318 neigh->ops = &arp_generic_ops;
317 319
318 if (neigh->nud_state&NUD_VALID) 320 if (neigh->nud_state & NUD_VALID)
319 neigh->output = neigh->ops->connected_output; 321 neigh->output = neigh->ops->connected_output;
320 else 322 else
321 neigh->output = neigh->ops->output; 323 neigh->output = neigh->ops->output;
@@ -334,7 +336,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
334 __be32 saddr = 0; 336 __be32 saddr = 0;
335 u8 *dst_ha = NULL; 337 u8 *dst_ha = NULL;
336 struct net_device *dev = neigh->dev; 338 struct net_device *dev = neigh->dev;
337 __be32 target = *(__be32*)neigh->primary_key; 339 __be32 target = *(__be32 *)neigh->primary_key;
338 int probes = atomic_read(&neigh->probes); 340 int probes = atomic_read(&neigh->probes);
339 struct in_device *in_dev; 341 struct in_device *in_dev;
340 342
@@ -347,7 +349,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
347 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 349 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
348 default: 350 default:
349 case 0: /* By default announce any local IP */ 351 case 0: /* By default announce any local IP */
350 if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL) 352 if (skb && inet_addr_type(dev_net(dev),
353 ip_hdr(skb)->saddr) == RTN_LOCAL)
351 saddr = ip_hdr(skb)->saddr; 354 saddr = ip_hdr(skb)->saddr;
352 break; 355 break;
353 case 1: /* Restrict announcements of saddr in same subnet */ 356 case 1: /* Restrict announcements of saddr in same subnet */
@@ -369,16 +372,21 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
369 if (!saddr) 372 if (!saddr)
370 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); 373 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
371 374
372 if ((probes -= neigh->parms->ucast_probes) < 0) { 375 probes -= neigh->parms->ucast_probes;
373 if (!(neigh->nud_state&NUD_VALID)) 376 if (probes < 0) {
374 printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n"); 377 if (!(neigh->nud_state & NUD_VALID))
378 printk(KERN_DEBUG
379 "trying to ucast probe in NUD_INVALID\n");
375 dst_ha = neigh->ha; 380 dst_ha = neigh->ha;
376 read_lock_bh(&neigh->lock); 381 read_lock_bh(&neigh->lock);
377 } else if ((probes -= neigh->parms->app_probes) < 0) { 382 } else {
383 probes -= neigh->parms->app_probes;
384 if (probes < 0) {
378#ifdef CONFIG_ARPD 385#ifdef CONFIG_ARPD
379 neigh_app_ns(neigh); 386 neigh_app_ns(neigh);
380#endif 387#endif
381 return; 388 return;
389 }
382 } 390 }
383 391
384 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, 392 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
@@ -451,7 +459,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
451 * is allowed to use this function, it is scheduled to be removed. --ANK 459 * is allowed to use this function, it is scheduled to be removed. --ANK
452 */ 460 */
453 461
454static int arp_set_predefined(int addr_hint, unsigned char * haddr, __be32 paddr, struct net_device * dev) 462static int arp_set_predefined(int addr_hint, unsigned char *haddr,
463 __be32 paddr, struct net_device *dev)
455{ 464{
456 switch (addr_hint) { 465 switch (addr_hint) {
457 case RTN_LOCAL: 466 case RTN_LOCAL:
@@ -483,7 +492,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
483 492
484 paddr = skb_rtable(skb)->rt_gateway; 493 paddr = skb_rtable(skb)->rt_gateway;
485 494
486 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) 495 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr,
496 paddr, dev))
487 return 0; 497 return 0;
488 498
489 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); 499 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
@@ -515,13 +525,14 @@ int arp_bind_neighbour(struct dst_entry *dst)
515 return -EINVAL; 525 return -EINVAL;
516 if (n == NULL) { 526 if (n == NULL) {
517 __be32 nexthop = ((struct rtable *)dst)->rt_gateway; 527 __be32 nexthop = ((struct rtable *)dst)->rt_gateway;
518 if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) 528 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
519 nexthop = 0; 529 nexthop = 0;
520 n = __neigh_lookup_errno( 530 n = __neigh_lookup_errno(
521#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) 531#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
522 dev->type == ARPHRD_ATM ? clip_tbl_hook : 532 dev->type == ARPHRD_ATM ?
533 clip_tbl_hook :
523#endif 534#endif
524 &arp_tbl, &nexthop, dev); 535 &arp_tbl, &nexthop, dev);
525 if (IS_ERR(n)) 536 if (IS_ERR(n))
526 return PTR_ERR(n); 537 return PTR_ERR(n);
527 dst->neighbour = n; 538 dst->neighbour = n;
@@ -543,8 +554,8 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
543 554
544 if (!IN_DEV_PROXY_ARP(in_dev)) 555 if (!IN_DEV_PROXY_ARP(in_dev))
545 return 0; 556 return 0;
546 557 imi = IN_DEV_MEDIUM_ID(in_dev);
547 if ((imi = IN_DEV_MEDIUM_ID(in_dev)) == 0) 558 if (imi == 0)
548 return 1; 559 return 1;
549 if (imi == -1) 560 if (imi == -1)
550 return 0; 561 return 0;
@@ -555,7 +566,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
555 if (out_dev) 566 if (out_dev)
556 omi = IN_DEV_MEDIUM_ID(out_dev); 567 omi = IN_DEV_MEDIUM_ID(out_dev);
557 568
558 return (omi != imi && omi != -1); 569 return omi != imi && omi != -1;
559} 570}
560 571
561/* 572/*
@@ -685,7 +696,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
685 arp->ar_pln = 4; 696 arp->ar_pln = 4;
686 arp->ar_op = htons(type); 697 arp->ar_op = htons(type);
687 698
688 arp_ptr=(unsigned char *)(arp+1); 699 arp_ptr = (unsigned char *)(arp + 1);
689 700
690 memcpy(arp_ptr, src_hw, dev->addr_len); 701 memcpy(arp_ptr, src_hw, dev->addr_len);
691 arp_ptr += dev->addr_len; 702 arp_ptr += dev->addr_len;
@@ -735,9 +746,8 @@ void arp_send(int type, int ptype, __be32 dest_ip,
735 746
736 skb = arp_create(type, ptype, dest_ip, dev, src_ip, 747 skb = arp_create(type, ptype, dest_ip, dev, src_ip,
737 dest_hw, src_hw, target_hw); 748 dest_hw, src_hw, target_hw);
738 if (skb == NULL) { 749 if (skb == NULL)
739 return; 750 return;
740 }
741 751
742 arp_xmit(skb); 752 arp_xmit(skb);
743} 753}
@@ -815,7 +825,7 @@ static int arp_process(struct sk_buff *skb)
815/* 825/*
816 * Extract fields 826 * Extract fields
817 */ 827 */
818 arp_ptr= (unsigned char *)(arp+1); 828 arp_ptr = (unsigned char *)(arp + 1);
819 sha = arp_ptr; 829 sha = arp_ptr;
820 arp_ptr += dev->addr_len; 830 arp_ptr += dev->addr_len;
821 memcpy(&sip, arp_ptr, 4); 831 memcpy(&sip, arp_ptr, 4);
@@ -869,16 +879,17 @@ static int arp_process(struct sk_buff *skb)
869 addr_type = rt->rt_type; 879 addr_type = rt->rt_type;
870 880
871 if (addr_type == RTN_LOCAL) { 881 if (addr_type == RTN_LOCAL) {
872 int dont_send = 0; 882 int dont_send;
873 883
874 if (!dont_send) 884 dont_send = arp_ignore(in_dev, sip, tip);
875 dont_send |= arp_ignore(in_dev,sip,tip);
876 if (!dont_send && IN_DEV_ARPFILTER(in_dev)) 885 if (!dont_send && IN_DEV_ARPFILTER(in_dev))
877 dont_send |= arp_filter(sip,tip,dev); 886 dont_send |= arp_filter(sip, tip, dev);
878 if (!dont_send) { 887 if (!dont_send) {
879 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 888 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
880 if (n) { 889 if (n) {
881 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 890 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
891 dev, tip, sha, dev->dev_addr,
892 sha);
882 neigh_release(n); 893 neigh_release(n);
883 } 894 }
884 } 895 }
@@ -887,8 +898,7 @@ static int arp_process(struct sk_buff *skb)
887 if (addr_type == RTN_UNICAST && 898 if (addr_type == RTN_UNICAST &&
888 (arp_fwd_proxy(in_dev, dev, rt) || 899 (arp_fwd_proxy(in_dev, dev, rt) ||
889 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || 900 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
890 pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) 901 pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
891 {
892 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 902 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
893 if (n) 903 if (n)
894 neigh_release(n); 904 neigh_release(n);
@@ -896,9 +906,12 @@ static int arp_process(struct sk_buff *skb)
896 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || 906 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
897 skb->pkt_type == PACKET_HOST || 907 skb->pkt_type == PACKET_HOST ||
898 in_dev->arp_parms->proxy_delay == 0) { 908 in_dev->arp_parms->proxy_delay == 0) {
899 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 909 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
910 dev, tip, sha, dev->dev_addr,
911 sha);
900 } else { 912 } else {
901 pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); 913 pneigh_enqueue(&arp_tbl,
914 in_dev->arp_parms, skb);
902 return 0; 915 return 0;
903 } 916 }
904 goto out; 917 goto out;
@@ -939,7 +952,8 @@ static int arp_process(struct sk_buff *skb)
939 if (arp->ar_op != htons(ARPOP_REPLY) || 952 if (arp->ar_op != htons(ARPOP_REPLY) ||
940 skb->pkt_type != PACKET_HOST) 953 skb->pkt_type != PACKET_HOST)
941 state = NUD_STALE; 954 state = NUD_STALE;
942 neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0); 955 neigh_update(n, sha, state,
956 override ? NEIGH_UPDATE_F_OVERRIDE : 0);
943 neigh_release(n); 957 neigh_release(n);
944 } 958 }
945 959
@@ -975,7 +989,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
975 arp->ar_pln != 4) 989 arp->ar_pln != 4)
976 goto freeskb; 990 goto freeskb;
977 991
978 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 992 skb = skb_share_check(skb, GFP_ATOMIC);
993 if (skb == NULL)
979 goto out_of_mem; 994 goto out_of_mem;
980 995
981 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); 996 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
@@ -1019,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
1019 return -EINVAL; 1034 return -EINVAL;
1020 if (!dev && (r->arp_flags & ATF_COM)) { 1035 if (!dev && (r->arp_flags & ATF_COM)) {
1021 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, 1036 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
1022 r->arp_ha.sa_data); 1037 r->arp_ha.sa_data);
1023 if (!dev) 1038 if (!dev)
1024 return -ENODEV; 1039 return -ENODEV;
1025 } 1040 }
@@ -1033,7 +1048,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
1033} 1048}
1034 1049
1035static int arp_req_set(struct net *net, struct arpreq *r, 1050static int arp_req_set(struct net *net, struct arpreq *r,
1036 struct net_device * dev) 1051 struct net_device *dev)
1037{ 1052{
1038 __be32 ip; 1053 __be32 ip;
1039 struct neighbour *neigh; 1054 struct neighbour *neigh;
@@ -1046,10 +1061,11 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1046 if (r->arp_flags & ATF_PERM) 1061 if (r->arp_flags & ATF_PERM)
1047 r->arp_flags |= ATF_COM; 1062 r->arp_flags |= ATF_COM;
1048 if (dev == NULL) { 1063 if (dev == NULL) {
1049 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1064 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
1050 .tos = RTO_ONLINK } } }; 1065 .tos = RTO_ONLINK } };
1051 struct rtable * rt; 1066 struct rtable *rt;
1052 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1067 err = ip_route_output_key(net, &rt, &fl);
1068 if (err != 0)
1053 return err; 1069 return err;
1054 dev = rt->dst.dev; 1070 dev = rt->dst.dev;
1055 ip_rt_put(rt); 1071 ip_rt_put(rt);
@@ -1083,9 +1099,9 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1083 unsigned state = NUD_STALE; 1099 unsigned state = NUD_STALE;
1084 if (r->arp_flags & ATF_PERM) 1100 if (r->arp_flags & ATF_PERM)
1085 state = NUD_PERMANENT; 1101 state = NUD_PERMANENT;
1086 err = neigh_update(neigh, (r->arp_flags&ATF_COM) ? 1102 err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
1087 r->arp_ha.sa_data : NULL, state, 1103 r->arp_ha.sa_data : NULL, state,
1088 NEIGH_UPDATE_F_OVERRIDE| 1104 NEIGH_UPDATE_F_OVERRIDE |
1089 NEIGH_UPDATE_F_ADMIN); 1105 NEIGH_UPDATE_F_ADMIN);
1090 neigh_release(neigh); 1106 neigh_release(neigh);
1091 } 1107 }
@@ -1094,12 +1110,12 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1094 1110
1095static unsigned arp_state_to_flags(struct neighbour *neigh) 1111static unsigned arp_state_to_flags(struct neighbour *neigh)
1096{ 1112{
1097 unsigned flags = 0;
1098 if (neigh->nud_state&NUD_PERMANENT) 1113 if (neigh->nud_state&NUD_PERMANENT)
1099 flags = ATF_PERM|ATF_COM; 1114 return ATF_PERM | ATF_COM;
1100 else if (neigh->nud_state&NUD_VALID) 1115 else if (neigh->nud_state&NUD_VALID)
1101 flags = ATF_COM; 1116 return ATF_COM;
1102 return flags; 1117 else
1118 return 0;
1103} 1119}
1104 1120
1105/* 1121/*
@@ -1142,7 +1158,7 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r,
1142} 1158}
1143 1159
1144static int arp_req_delete(struct net *net, struct arpreq *r, 1160static int arp_req_delete(struct net *net, struct arpreq *r,
1145 struct net_device * dev) 1161 struct net_device *dev)
1146{ 1162{
1147 int err; 1163 int err;
1148 __be32 ip; 1164 __be32 ip;
@@ -1153,10 +1169,11 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1153 1169
1154 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; 1170 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
1155 if (dev == NULL) { 1171 if (dev == NULL) {
1156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1172 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
1157 .tos = RTO_ONLINK } } }; 1173 .tos = RTO_ONLINK } };
1158 struct rtable * rt; 1174 struct rtable *rt;
1159 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1175 err = ip_route_output_key(net, &rt, &fl);
1176 if (err != 0)
1160 return err; 1177 return err;
1161 dev = rt->dst.dev; 1178 dev = rt->dst.dev;
1162 ip_rt_put(rt); 1179 ip_rt_put(rt);
@@ -1166,7 +1183,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1166 err = -ENXIO; 1183 err = -ENXIO;
1167 neigh = neigh_lookup(&arp_tbl, &ip, dev); 1184 neigh = neigh_lookup(&arp_tbl, &ip, dev);
1168 if (neigh) { 1185 if (neigh) {
1169 if (neigh->nud_state&~NUD_NOARP) 1186 if (neigh->nud_state & ~NUD_NOARP)
1170 err = neigh_update(neigh, NULL, NUD_FAILED, 1187 err = neigh_update(neigh, NULL, NUD_FAILED,
1171 NEIGH_UPDATE_F_OVERRIDE| 1188 NEIGH_UPDATE_F_OVERRIDE|
1172 NEIGH_UPDATE_F_ADMIN); 1189 NEIGH_UPDATE_F_ADMIN);
@@ -1186,24 +1203,24 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1186 struct net_device *dev = NULL; 1203 struct net_device *dev = NULL;
1187 1204
1188 switch (cmd) { 1205 switch (cmd) {
1189 case SIOCDARP: 1206 case SIOCDARP:
1190 case SIOCSARP: 1207 case SIOCSARP:
1191 if (!capable(CAP_NET_ADMIN)) 1208 if (!capable(CAP_NET_ADMIN))
1192 return -EPERM; 1209 return -EPERM;
1193 case SIOCGARP: 1210 case SIOCGARP:
1194 err = copy_from_user(&r, arg, sizeof(struct arpreq)); 1211 err = copy_from_user(&r, arg, sizeof(struct arpreq));
1195 if (err) 1212 if (err)
1196 return -EFAULT; 1213 return -EFAULT;
1197 break; 1214 break;
1198 default: 1215 default:
1199 return -EINVAL; 1216 return -EINVAL;
1200 } 1217 }
1201 1218
1202 if (r.arp_pa.sa_family != AF_INET) 1219 if (r.arp_pa.sa_family != AF_INET)
1203 return -EPFNOSUPPORT; 1220 return -EPFNOSUPPORT;
1204 1221
1205 if (!(r.arp_flags & ATF_PUBL) && 1222 if (!(r.arp_flags & ATF_PUBL) &&
1206 (r.arp_flags & (ATF_NETMASK|ATF_DONTPUB))) 1223 (r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
1207 return -EINVAL; 1224 return -EINVAL;
1208 if (!(r.arp_flags & ATF_NETMASK)) 1225 if (!(r.arp_flags & ATF_NETMASK))
1209 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = 1226 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
@@ -1211,7 +1228,8 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1211 rtnl_lock(); 1228 rtnl_lock();
1212 if (r.arp_dev[0]) { 1229 if (r.arp_dev[0]) {
1213 err = -ENODEV; 1230 err = -ENODEV;
1214 if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL) 1231 dev = __dev_get_by_name(net, r.arp_dev);
1232 if (dev == NULL)
1215 goto out; 1233 goto out;
1216 1234
1217 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ 1235 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
@@ -1243,7 +1261,8 @@ out:
1243 return err; 1261 return err;
1244} 1262}
1245 1263
1246static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 1264static int arp_netdev_event(struct notifier_block *this, unsigned long event,
1265 void *ptr)
1247{ 1266{
1248 struct net_device *dev = ptr; 1267 struct net_device *dev = ptr;
1249 1268
@@ -1311,12 +1330,13 @@ static char *ax2asc2(ax25_address *a, char *buf)
1311 for (n = 0, s = buf; n < 6; n++) { 1330 for (n = 0, s = buf; n < 6; n++) {
1312 c = (a->ax25_call[n] >> 1) & 0x7F; 1331 c = (a->ax25_call[n] >> 1) & 0x7F;
1313 1332
1314 if (c != ' ') *s++ = c; 1333 if (c != ' ')
1334 *s++ = c;
1315 } 1335 }
1316 1336
1317 *s++ = '-'; 1337 *s++ = '-';
1318 1338 n = (a->ax25_call[6] >> 1) & 0x0F;
1319 if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) { 1339 if (n > 9) {
1320 *s++ = '1'; 1340 *s++ = '1';
1321 n -= 10; 1341 n -= 10;
1322 } 1342 }
@@ -1325,10 +1345,9 @@ static char *ax2asc2(ax25_address *a, char *buf)
1325 *s++ = '\0'; 1345 *s++ = '\0';
1326 1346
1327 if (*buf == '\0' || *buf == '-') 1347 if (*buf == '\0' || *buf == '-')
1328 return "*"; 1348 return "*";
1329 1349
1330 return buf; 1350 return buf;
1331
1332} 1351}
1333#endif /* CONFIG_AX25 */ 1352#endif /* CONFIG_AX25 */
1334 1353
@@ -1408,10 +1427,10 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
1408/* ------------------------------------------------------------------------ */ 1427/* ------------------------------------------------------------------------ */
1409 1428
1410static const struct seq_operations arp_seq_ops = { 1429static const struct seq_operations arp_seq_ops = {
1411 .start = arp_seq_start, 1430 .start = arp_seq_start,
1412 .next = neigh_seq_next, 1431 .next = neigh_seq_next,
1413 .stop = neigh_seq_stop, 1432 .stop = neigh_seq_stop,
1414 .show = arp_seq_show, 1433 .show = arp_seq_show,
1415}; 1434};
1416 1435
1417static int arp_seq_open(struct inode *inode, struct file *file) 1436static int arp_seq_open(struct inode *inode, struct file *file)
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 721a8a37b45c..174be6caa5c8 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -73,6 +73,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
73 inet->inet_id = jiffies; 73 inet->inet_id = jiffies;
74 74
75 sk_dst_set(sk, &rt->dst); 75 sk_dst_set(sk, &rt->dst);
76 return(0); 76 return 0;
77} 77}
78EXPORT_SYMBOL(ip4_datagram_connect); 78EXPORT_SYMBOL(ip4_datagram_connect);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index da14c49284f4..c2ff48fa18c7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -209,7 +209,7 @@ static void inetdev_destroy(struct in_device *in_dev)
209 inet_free_ifa(ifa); 209 inet_free_ifa(ifa);
210 } 210 }
211 211
212 dev->ip_ptr = NULL; 212 rcu_assign_pointer(dev->ip_ptr, NULL);
213 213
214 devinet_sysctl_unregister(in_dev); 214 devinet_sysctl_unregister(in_dev);
215 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 215 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
@@ -1059,7 +1059,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1059 switch (event) { 1059 switch (event) {
1060 case NETDEV_REGISTER: 1060 case NETDEV_REGISTER:
1061 printk(KERN_DEBUG "inetdev_event: bug\n"); 1061 printk(KERN_DEBUG "inetdev_event: bug\n");
1062 dev->ip_ptr = NULL; 1062 rcu_assign_pointer(dev->ip_ptr, NULL);
1063 break; 1063 break;
1064 case NETDEV_UP: 1064 case NETDEV_UP:
1065 if (!inetdev_valid_mtu(dev->mtu)) 1065 if (!inetdev_valid_mtu(dev->mtu))
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7d02a9f999fa..4a69a957872b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -147,35 +147,40 @@ static void fib_flush(struct net *net)
147 rt_cache_flush(net, -1); 147 rt_cache_flush(net, -1);
148} 148}
149 149
150/* 150/**
151 * Find the first device with a given source address. 151 * __ip_dev_find - find the first device with a given source address.
152 * @net: the net namespace
153 * @addr: the source address
154 * @devref: if true, take a reference on the found device
155 *
156 * If a caller uses devref=false, it should be protected by RCU
152 */ 157 */
153 158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154struct net_device * ip_dev_find(struct net *net, __be32 addr)
155{ 159{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 160 struct flowi fl = {
157 struct fib_result res; 161 .nl_u = {
162 .ip4_u = {
163 .daddr = addr
164 }
165 },
166 .flags = FLOWI_FLAG_MATCH_ANY_IIF
167 };
168 struct fib_result res = { 0 };
158 struct net_device *dev = NULL; 169 struct net_device *dev = NULL;
159 struct fib_table *local_table;
160 170
161#ifdef CONFIG_IP_MULTIPLE_TABLES 171 if (fib_lookup(net, &fl, &res))
162 res.r = NULL;
163#endif
164
165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
167 return NULL; 172 return NULL;
168 if (res.type != RTN_LOCAL) 173 if (res.type != RTN_LOCAL)
169 goto out; 174 goto out;
170 dev = FIB_RES_DEV(res); 175 dev = FIB_RES_DEV(res);
171 176
172 if (dev) 177 if (dev && devref)
173 dev_hold(dev); 178 dev_hold(dev);
174out: 179out:
175 fib_res_put(&res); 180 fib_res_put(&res);
176 return dev; 181 return dev;
177} 182}
178EXPORT_SYMBOL(ip_dev_find); 183EXPORT_SYMBOL(__ip_dev_find);
179 184
180/* 185/*
181 * Find address type as if only "dev" was present in the system. If 186 * Find address type as if only "dev" was present in the system. If
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4a8e370862bc..a96e5ec211a0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -186,9 +186,7 @@ static inline struct tnode *node_parent_rcu(struct node *node)
186{ 186{
187 struct tnode *ret = node_parent(node); 187 struct tnode *ret = node_parent(node);
188 188
189 return rcu_dereference_check(ret, 189 return rcu_dereference_rtnl(ret);
190 rcu_read_lock_held() ||
191 lockdep_rtnl_is_held());
192} 190}
193 191
194/* Same as rcu_assign_pointer 192/* Same as rcu_assign_pointer
@@ -211,9 +209,7 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
211{ 209{
212 struct node *ret = tnode_get_child(tn, i); 210 struct node *ret = tnode_get_child(tn, i);
213 211
214 return rcu_dereference_check(ret, 212 return rcu_dereference_rtnl(ret);
215 rcu_read_lock_held() ||
216 lockdep_rtnl_is_held());
217} 213}
218 214
219static inline int tnode_child_length(const struct tnode *tn) 215static inline int tnode_child_length(const struct tnode *tn)
@@ -459,8 +455,8 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
459 tn->empty_children = 1<<bits; 455 tn->empty_children = 1<<bits;
460 } 456 }
461 457
462 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode), 458 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
463 (unsigned long) (sizeof(struct node) << bits)); 459 sizeof(struct node) << bits);
464 return tn; 460 return tn;
465} 461}
466 462
@@ -609,11 +605,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
609 605
610 /* Keep root node larger */ 606 /* Keep root node larger */
611 607
612 if (!node_parent((struct node*) tn)) { 608 if (!node_parent((struct node *)tn)) {
613 inflate_threshold_use = inflate_threshold_root; 609 inflate_threshold_use = inflate_threshold_root;
614 halve_threshold_use = halve_threshold_root; 610 halve_threshold_use = halve_threshold_root;
615 } 611 } else {
616 else {
617 inflate_threshold_use = inflate_threshold; 612 inflate_threshold_use = inflate_threshold;
618 halve_threshold_use = halve_threshold; 613 halve_threshold_use = halve_threshold;
619 } 614 }
@@ -639,7 +634,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
639 check_tnode(tn); 634 check_tnode(tn);
640 635
641 /* Return if at least one inflate is run */ 636 /* Return if at least one inflate is run */
642 if( max_work != MAX_WORK) 637 if (max_work != MAX_WORK)
643 return (struct node *) tn; 638 return (struct node *) tn;
644 639
645 /* 640 /*
@@ -966,9 +961,7 @@ fib_find_node(struct trie *t, u32 key)
966 struct node *n; 961 struct node *n;
967 962
968 pos = 0; 963 pos = 0;
969 n = rcu_dereference_check(t->trie, 964 n = rcu_dereference_rtnl(t->trie);
970 rcu_read_lock_held() ||
971 lockdep_rtnl_is_held());
972 965
973 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 966 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
974 tn = (struct tnode *) n; 967 tn = (struct tnode *) n;
@@ -1748,16 +1741,14 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
1748 1741
1749 /* Node empty, walk back up to parent */ 1742 /* Node empty, walk back up to parent */
1750 c = (struct node *) p; 1743 c = (struct node *) p;
1751 } while ( (p = node_parent_rcu(c)) != NULL); 1744 } while ((p = node_parent_rcu(c)) != NULL);
1752 1745
1753 return NULL; /* Root of trie */ 1746 return NULL; /* Root of trie */
1754} 1747}
1755 1748
1756static struct leaf *trie_firstleaf(struct trie *t) 1749static struct leaf *trie_firstleaf(struct trie *t)
1757{ 1750{
1758 struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie, 1751 struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie);
1759 rcu_read_lock_held() ||
1760 lockdep_rtnl_is_held());
1761 1752
1762 if (!n) 1753 if (!n)
1763 return NULL; 1754 return NULL;
@@ -2043,14 +2034,14 @@ struct fib_trie_iter {
2043 struct seq_net_private p; 2034 struct seq_net_private p;
2044 struct fib_table *tb; 2035 struct fib_table *tb;
2045 struct tnode *tnode; 2036 struct tnode *tnode;
2046 unsigned index; 2037 unsigned int index;
2047 unsigned depth; 2038 unsigned int depth;
2048}; 2039};
2049 2040
2050static struct node *fib_trie_get_next(struct fib_trie_iter *iter) 2041static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
2051{ 2042{
2052 struct tnode *tn = iter->tnode; 2043 struct tnode *tn = iter->tnode;
2053 unsigned cindex = iter->index; 2044 unsigned int cindex = iter->index;
2054 struct tnode *p; 2045 struct tnode *p;
2055 2046
2056 /* A single entry routing table */ 2047 /* A single entry routing table */
@@ -2159,7 +2150,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2159 */ 2150 */
2160static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 2151static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2161{ 2152{
2162 unsigned i, max, pointers, bytes, avdepth; 2153 unsigned int i, max, pointers, bytes, avdepth;
2163 2154
2164 if (stat->leaves) 2155 if (stat->leaves)
2165 avdepth = stat->totdepth*100 / stat->leaves; 2156 avdepth = stat->totdepth*100 / stat->leaves;
@@ -2356,7 +2347,8 @@ static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2356 2347
2357static void seq_indent(struct seq_file *seq, int n) 2348static void seq_indent(struct seq_file *seq, int n)
2358{ 2349{
2359 while (n-- > 0) seq_puts(seq, " "); 2350 while (n-- > 0)
2351 seq_puts(seq, " ");
2360} 2352}
2361 2353
2362static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2354static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
@@ -2388,7 +2380,7 @@ static const char *const rtn_type_names[__RTN_MAX] = {
2388 [RTN_XRESOLVE] = "XRESOLVE", 2380 [RTN_XRESOLVE] = "XRESOLVE",
2389}; 2381};
2390 2382
2391static inline const char *rtn_type(char *buf, size_t len, unsigned t) 2383static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2392{ 2384{
2393 if (t < __RTN_MAX && rtn_type_names[t]) 2385 if (t < __RTN_MAX && rtn_type_names[t])
2394 return rtn_type_names[t]; 2386 return rtn_type_names[t];
@@ -2544,13 +2536,12 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v)
2544 rcu_read_unlock(); 2536 rcu_read_unlock();
2545} 2537}
2546 2538
2547static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2539static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2548{ 2540{
2549 static unsigned type2flags[RTN_MAX + 1] = { 2541 unsigned int flags = 0;
2550 [7] = RTF_REJECT, [8] = RTF_REJECT,
2551 };
2552 unsigned flags = type2flags[type];
2553 2542
2543 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2544 flags = RTF_REJECT;
2554 if (fi && fi->fib_nh->nh_gw) 2545 if (fi && fi->fib_nh->nh_gw)
2555 flags |= RTF_GATEWAY; 2546 flags |= RTF_GATEWAY;
2556 if (mask == htonl(0xFFFFFFFF)) 2547 if (mask == htonl(0xFFFFFFFF))
@@ -2562,7 +2553,7 @@ static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2562/* 2553/*
2563 * This outputs /proc/net/route. 2554 * This outputs /proc/net/route.
2564 * The format of the file is not supposed to be changed 2555 * The format of the file is not supposed to be changed
2565 * and needs to be same as fib_hash output to avoid breaking 2556 * and needs to be same as fib_hash output to avoid breaking
2566 * legacy utilities 2557 * legacy utilities
2567 */ 2558 */
2568static int fib_route_seq_show(struct seq_file *seq, void *v) 2559static int fib_route_seq_show(struct seq_file *seq, void *v)
@@ -2587,7 +2578,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2587 2578
2588 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2579 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2589 const struct fib_info *fi = fa->fa_info; 2580 const struct fib_info *fi = fa->fa_info;
2590 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2581 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2591 int len; 2582 int len;
2592 2583
2593 if (fa->fa_type == RTN_BROADCAST 2584 if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
new file mode 100644
index 000000000000..caea6885fdbd
--- /dev/null
+++ b/net/ipv4/gre.c
@@ -0,0 +1,151 @@
1/*
2 * GRE over IPv4 demultiplexer driver
3 *
4 * Authors: Dmitry Kozlov (xeb@mail.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/kmod.h>
16#include <linux/skbuff.h>
17#include <linux/in.h>
18#include <linux/netdevice.h>
19#include <linux/version.h>
20#include <linux/spinlock.h>
21#include <net/protocol.h>
22#include <net/gre.h>
23
24
25static const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly;
26static DEFINE_SPINLOCK(gre_proto_lock);
27
28int gre_add_protocol(const struct gre_protocol *proto, u8 version)
29{
30 if (version >= GREPROTO_MAX)
31 goto err_out;
32
33 spin_lock(&gre_proto_lock);
34 if (gre_proto[version])
35 goto err_out_unlock;
36
37 rcu_assign_pointer(gre_proto[version], proto);
38 spin_unlock(&gre_proto_lock);
39 return 0;
40
41err_out_unlock:
42 spin_unlock(&gre_proto_lock);
43err_out:
44 return -1;
45}
46EXPORT_SYMBOL_GPL(gre_add_protocol);
47
48int gre_del_protocol(const struct gre_protocol *proto, u8 version)
49{
50 if (version >= GREPROTO_MAX)
51 goto err_out;
52
53 spin_lock(&gre_proto_lock);
54 if (gre_proto[version] != proto)
55 goto err_out_unlock;
56 rcu_assign_pointer(gre_proto[version], NULL);
57 spin_unlock(&gre_proto_lock);
58 synchronize_rcu();
59 return 0;
60
61err_out_unlock:
62 spin_unlock(&gre_proto_lock);
63err_out:
64 return -1;
65}
66EXPORT_SYMBOL_GPL(gre_del_protocol);
67
68static int gre_rcv(struct sk_buff *skb)
69{
70 const struct gre_protocol *proto;
71 u8 ver;
72 int ret;
73
74 if (!pskb_may_pull(skb, 12))
75 goto drop;
76
77 ver = skb->data[1]&0x7f;
78 if (ver >= GREPROTO_MAX)
79 goto drop;
80
81 rcu_read_lock();
82 proto = rcu_dereference(gre_proto[ver]);
83 if (!proto || !proto->handler)
84 goto drop_unlock;
85 ret = proto->handler(skb);
86 rcu_read_unlock();
87 return ret;
88
89drop_unlock:
90 rcu_read_unlock();
91drop:
92 kfree_skb(skb);
93 return NET_RX_DROP;
94}
95
96static void gre_err(struct sk_buff *skb, u32 info)
97{
98 const struct gre_protocol *proto;
99 u8 ver;
100
101 if (!pskb_may_pull(skb, 12))
102 goto drop;
103
104 ver = skb->data[1]&0x7f;
105 if (ver >= GREPROTO_MAX)
106 goto drop;
107
108 rcu_read_lock();
109 proto = rcu_dereference(gre_proto[ver]);
110 if (!proto || !proto->err_handler)
111 goto drop_unlock;
112 proto->err_handler(skb, info);
113 rcu_read_unlock();
114 return;
115
116drop_unlock:
117 rcu_read_unlock();
118drop:
119 kfree_skb(skb);
120}
121
122static const struct net_protocol net_gre_protocol = {
123 .handler = gre_rcv,
124 .err_handler = gre_err,
125 .netns_ok = 1,
126};
127
128static int __init gre_init(void)
129{
130 pr_info("GRE over IPv4 demultiplexor driver");
131
132 if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
133 pr_err("gre: can't add protocol\n");
134 return -EAGAIN;
135 }
136
137 return 0;
138}
139
140static void __exit gre_exit(void)
141{
142 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
143}
144
145module_init(gre_init);
146module_exit(gre_exit);
147
148MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
149MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
150MODULE_LICENSE("GPL");
151
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a0d847c7cba5..96bc7f9475a3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -379,7 +379,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
379 inet->tos = ip_hdr(skb)->tos; 379 inet->tos = ip_hdr(skb)->tos;
380 daddr = ipc.addr = rt->rt_src; 380 daddr = ipc.addr = rt->rt_src;
381 ipc.opt = NULL; 381 ipc.opt = NULL;
382 ipc.shtx.flags = 0; 382 ipc.tx_flags = 0;
383 if (icmp_param->replyopts.optlen) { 383 if (icmp_param->replyopts.optlen) {
384 ipc.opt = &icmp_param->replyopts; 384 ipc.opt = &icmp_param->replyopts;
385 if (ipc.opt->srr) 385 if (ipc.opt->srr)
@@ -538,7 +538,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
538 inet_sk(sk)->tos = tos; 538 inet_sk(sk)->tos = tos;
539 ipc.addr = iph->saddr; 539 ipc.addr = iph->saddr;
540 ipc.opt = &icmp_param.replyopts; 540 ipc.opt = &icmp_param.replyopts;
541 ipc.shtx.flags = 0; 541 ipc.tx_flags = 0;
542 542
543 { 543 {
544 struct flowi fl = { 544 struct flowi fl = {
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e5fa2ddce320..ba8042665849 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -425,7 +425,7 @@ static int inet_diag_bc_run(const void *bc, int len,
425 bc += op->no; 425 bc += op->no;
426 } 426 }
427 } 427 }
428 return (len == 0); 428 return len == 0;
429} 429}
430 430
431static int valid_cc(const void *bc, int len, int cc) 431static int valid_cc(const void *bc, int len, int cc)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b7c41654dde5..168440834ade 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -116,11 +116,11 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
116 struct ip4_create_arg *arg = a; 116 struct ip4_create_arg *arg = a;
117 117
118 qp = container_of(q, struct ipq, q); 118 qp = container_of(q, struct ipq, q);
119 return (qp->id == arg->iph->id && 119 return qp->id == arg->iph->id &&
120 qp->saddr == arg->iph->saddr && 120 qp->saddr == arg->iph->saddr &&
121 qp->daddr == arg->iph->daddr && 121 qp->daddr == arg->iph->daddr &&
122 qp->protocol == arg->iph->protocol && 122 qp->protocol == arg->iph->protocol &&
123 qp->user == arg->user); 123 qp->user == arg->user;
124} 124}
125 125
126/* Memory Tracking Functions. */ 126/* Memory Tracking Functions. */
@@ -542,7 +542,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
542 /* If the first fragment is fragmented itself, we split 542 /* If the first fragment is fragmented itself, we split
543 * it to two chunks: the first with data and paged part 543 * it to two chunks: the first with data and paged part
544 * and the second, holding only fragments. */ 544 * and the second, holding only fragments. */
545 if (skb_has_frags(head)) { 545 if (skb_has_frag_list(head)) {
546 struct sk_buff *clone; 546 struct sk_buff *clone;
547 int i, plen = 0; 547 int i, plen = 0;
548 548
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 35c93e8b6a46..fbe2c473a06a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
44#include <net/net_namespace.h> 44#include <net/net_namespace.h>
45#include <net/netns/generic.h> 45#include <net/netns/generic.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/gre.h>
47 48
48#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 49#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
49#include <net/ipv6.h> 50#include <net/ipv6.h>
@@ -63,13 +64,13 @@
63 We cannot track such dead loops during route installation, 64 We cannot track such dead loops during route installation,
64 it is infeasible task. The most general solutions would be 65 it is infeasible task. The most general solutions would be
65 to keep skb->encapsulation counter (sort of local ttl), 66 to keep skb->encapsulation counter (sort of local ttl),
66 and silently drop packet when it expires. It is the best 67 and silently drop packet when it expires. It is a good
67 solution, but it supposes maintaing new variable in ALL 68 solution, but it supposes maintaing new variable in ALL
68 skb, even if no tunneling is used. 69 skb, even if no tunneling is used.
69 70
70 Current solution: HARD_TX_LOCK lock breaks dead loops. 71 Current solution: xmit_recursion breaks dead loops. This is a percpu
71 72 counter, since when we enter the first ndo_xmit(), cpu migration is
72 73 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
73 74
74 2. Networking dead loops would not kill routers, but would really 75 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case, 76 kill network. IP hop limit plays role of "t->recursion" in this case,
@@ -128,7 +129,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev);
128 129
129static int ipgre_net_id __read_mostly; 130static int ipgre_net_id __read_mostly;
130struct ipgre_net { 131struct ipgre_net {
131 struct ip_tunnel *tunnels[4][HASH_SIZE]; 132 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
132 133
133 struct net_device *fb_tunnel_dev; 134 struct net_device *fb_tunnel_dev;
134}; 135};
@@ -158,13 +159,40 @@ struct ipgre_net {
158#define tunnels_l tunnels[1] 159#define tunnels_l tunnels[1]
159#define tunnels_wc tunnels[0] 160#define tunnels_wc tunnels[0]
160/* 161/*
161 * Locking : hash tables are protected by RCU and a spinlock 162 * Locking : hash tables are protected by RCU and RTNL
162 */ 163 */
163static DEFINE_SPINLOCK(ipgre_lock);
164 164
165#define for_each_ip_tunnel_rcu(start) \ 165#define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
167 167
168/* often modified stats are per cpu, other are shared (netdev->stats) */
169struct pcpu_tstats {
170 unsigned long rx_packets;
171 unsigned long rx_bytes;
172 unsigned long tx_packets;
173 unsigned long tx_bytes;
174};
175
176static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
177{
178 struct pcpu_tstats sum = { 0 };
179 int i;
180
181 for_each_possible_cpu(i) {
182 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
183
184 sum.rx_packets += tstats->rx_packets;
185 sum.rx_bytes += tstats->rx_bytes;
186 sum.tx_packets += tstats->tx_packets;
187 sum.tx_bytes += tstats->tx_bytes;
188 }
189 dev->stats.rx_packets = sum.rx_packets;
190 dev->stats.rx_bytes = sum.rx_bytes;
191 dev->stats.tx_packets = sum.tx_packets;
192 dev->stats.tx_bytes = sum.tx_bytes;
193 return &dev->stats;
194}
195
168/* Given src, dst and key, find appropriate for input tunnel. */ 196/* Given src, dst and key, find appropriate for input tunnel. */
169 197
170static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, 198static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
@@ -173,8 +201,8 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
173{ 201{
174 struct net *net = dev_net(dev); 202 struct net *net = dev_net(dev);
175 int link = dev->ifindex; 203 int link = dev->ifindex;
176 unsigned h0 = HASH(remote); 204 unsigned int h0 = HASH(remote);
177 unsigned h1 = HASH(key); 205 unsigned int h1 = HASH(key);
178 struct ip_tunnel *t, *cand = NULL; 206 struct ip_tunnel *t, *cand = NULL;
179 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 207 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 208 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
@@ -289,13 +317,13 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
289 return NULL; 317 return NULL;
290} 318}
291 319
292static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, 320static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
293 struct ip_tunnel_parm *parms) 321 struct ip_tunnel_parm *parms)
294{ 322{
295 __be32 remote = parms->iph.daddr; 323 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr; 324 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key; 325 __be32 key = parms->i_key;
298 unsigned h = HASH(key); 326 unsigned int h = HASH(key);
299 int prio = 0; 327 int prio = 0;
300 328
301 if (local) 329 if (local)
@@ -308,7 +336,7 @@ static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
308 return &ign->tunnels[prio][h]; 336 return &ign->tunnels[prio][h];
309} 337}
310 338
311static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, 339static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
312 struct ip_tunnel *t) 340 struct ip_tunnel *t)
313{ 341{
314 return __ipgre_bucket(ign, &t->parms); 342 return __ipgre_bucket(ign, &t->parms);
@@ -316,23 +344,22 @@ static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
316 344
317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) 345static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
318{ 346{
319 struct ip_tunnel **tp = ipgre_bucket(ign, t); 347 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
320 348
321 spin_lock_bh(&ipgre_lock); 349 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
322 t->next = *tp;
323 rcu_assign_pointer(*tp, t); 350 rcu_assign_pointer(*tp, t);
324 spin_unlock_bh(&ipgre_lock);
325} 351}
326 352
327static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 353static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
328{ 354{
329 struct ip_tunnel **tp; 355 struct ip_tunnel __rcu **tp;
330 356 struct ip_tunnel *iter;
331 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 357
332 if (t == *tp) { 358 for (tp = ipgre_bucket(ign, t);
333 spin_lock_bh(&ipgre_lock); 359 (iter = rtnl_dereference(*tp)) != NULL;
334 *tp = t->next; 360 tp = &iter->next) {
335 spin_unlock_bh(&ipgre_lock); 361 if (t == iter) {
362 rcu_assign_pointer(*tp, t->next);
336 break; 363 break;
337 } 364 }
338 } 365 }
@@ -346,10 +373,13 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
346 __be32 local = parms->iph.saddr; 373 __be32 local = parms->iph.saddr;
347 __be32 key = parms->i_key; 374 __be32 key = parms->i_key;
348 int link = parms->link; 375 int link = parms->link;
349 struct ip_tunnel *t, **tp; 376 struct ip_tunnel *t;
377 struct ip_tunnel __rcu **tp;
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 378 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351 379
352 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) 380 for (tp = __ipgre_bucket(ign, parms);
381 (t = rtnl_dereference(*tp)) != NULL;
382 tp = &t->next)
353 if (local == t->parms.iph.saddr && 383 if (local == t->parms.iph.saddr &&
354 remote == t->parms.iph.daddr && 384 remote == t->parms.iph.daddr &&
355 key == t->parms.i_key && 385 key == t->parms.i_key &&
@@ -360,7 +390,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
360 return t; 390 return t;
361} 391}
362 392
363static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, 393static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
364 struct ip_tunnel_parm *parms, int create) 394 struct ip_tunnel_parm *parms, int create)
365{ 395{
366 struct ip_tunnel *t, *nt; 396 struct ip_tunnel *t, *nt;
@@ -582,7 +612,7 @@ static int ipgre_rcv(struct sk_buff *skb)
582 if ((tunnel = ipgre_tunnel_lookup(skb->dev, 612 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
583 iph->saddr, iph->daddr, key, 613 iph->saddr, iph->daddr, key,
584 gre_proto))) { 614 gre_proto))) {
585 struct net_device_stats *stats = &tunnel->dev->stats; 615 struct pcpu_tstats *tstats;
586 616
587 secpath_reset(skb); 617 secpath_reset(skb);
588 618
@@ -606,22 +636,22 @@ static int ipgre_rcv(struct sk_buff *skb)
606 /* Looped back packet, drop it! */ 636 /* Looped back packet, drop it! */
607 if (skb_rtable(skb)->fl.iif == 0) 637 if (skb_rtable(skb)->fl.iif == 0)
608 goto drop; 638 goto drop;
609 stats->multicast++; 639 tunnel->dev->stats.multicast++;
610 skb->pkt_type = PACKET_BROADCAST; 640 skb->pkt_type = PACKET_BROADCAST;
611 } 641 }
612#endif 642#endif
613 643
614 if (((flags&GRE_CSUM) && csum) || 644 if (((flags&GRE_CSUM) && csum) ||
615 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 645 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
616 stats->rx_crc_errors++; 646 tunnel->dev->stats.rx_crc_errors++;
617 stats->rx_errors++; 647 tunnel->dev->stats.rx_errors++;
618 goto drop; 648 goto drop;
619 } 649 }
620 if (tunnel->parms.i_flags&GRE_SEQ) { 650 if (tunnel->parms.i_flags&GRE_SEQ) {
621 if (!(flags&GRE_SEQ) || 651 if (!(flags&GRE_SEQ) ||
622 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 652 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
623 stats->rx_fifo_errors++; 653 tunnel->dev->stats.rx_fifo_errors++;
624 stats->rx_errors++; 654 tunnel->dev->stats.rx_errors++;
625 goto drop; 655 goto drop;
626 } 656 }
627 tunnel->i_seqno = seqno + 1; 657 tunnel->i_seqno = seqno + 1;
@@ -630,8 +660,8 @@ static int ipgre_rcv(struct sk_buff *skb)
630 /* Warning: All skb pointers will be invalidated! */ 660 /* Warning: All skb pointers will be invalidated! */
631 if (tunnel->dev->type == ARPHRD_ETHER) { 661 if (tunnel->dev->type == ARPHRD_ETHER) {
632 if (!pskb_may_pull(skb, ETH_HLEN)) { 662 if (!pskb_may_pull(skb, ETH_HLEN)) {
633 stats->rx_length_errors++; 663 tunnel->dev->stats.rx_length_errors++;
634 stats->rx_errors++; 664 tunnel->dev->stats.rx_errors++;
635 goto drop; 665 goto drop;
636 } 666 }
637 667
@@ -640,14 +670,20 @@ static int ipgre_rcv(struct sk_buff *skb)
640 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 670 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
641 } 671 }
642 672
643 skb_tunnel_rx(skb, tunnel->dev); 673 tstats = this_cpu_ptr(tunnel->dev->tstats);
674 tstats->rx_packets++;
675 tstats->rx_bytes += skb->len;
676
677 __skb_tunnel_rx(skb, tunnel->dev);
644 678
645 skb_reset_network_header(skb); 679 skb_reset_network_header(skb);
646 ipgre_ecn_decapsulate(iph, skb); 680 ipgre_ecn_decapsulate(iph, skb);
647 681
648 netif_rx(skb); 682 if (netif_rx(skb) == NET_RX_DROP)
683 tunnel->dev->stats.rx_dropped++;
684
649 rcu_read_unlock(); 685 rcu_read_unlock();
650 return(0); 686 return 0;
651 } 687 }
652 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 688 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
653 689
@@ -655,20 +691,19 @@ drop:
655 rcu_read_unlock(); 691 rcu_read_unlock();
656drop_nolock: 692drop_nolock:
657 kfree_skb(skb); 693 kfree_skb(skb);
658 return(0); 694 return 0;
659} 695}
660 696
661static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 697static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
662{ 698{
663 struct ip_tunnel *tunnel = netdev_priv(dev); 699 struct ip_tunnel *tunnel = netdev_priv(dev);
664 struct net_device_stats *stats = &dev->stats; 700 struct pcpu_tstats *tstats;
665 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
666 struct iphdr *old_iph = ip_hdr(skb); 701 struct iphdr *old_iph = ip_hdr(skb);
667 struct iphdr *tiph; 702 struct iphdr *tiph;
668 u8 tos; 703 u8 tos;
669 __be16 df; 704 __be16 df;
670 struct rtable *rt; /* Route to the other host */ 705 struct rtable *rt; /* Route to the other host */
671 struct net_device *tdev; /* Device to other host */ 706 struct net_device *tdev; /* Device to other host */
672 struct iphdr *iph; /* Our new IP header */ 707 struct iphdr *iph; /* Our new IP header */
673 unsigned int max_headroom; /* The extra header space needed */ 708 unsigned int max_headroom; /* The extra header space needed */
674 int gre_hlen; 709 int gre_hlen;
@@ -690,7 +725,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
690 /* NBMA tunnel */ 725 /* NBMA tunnel */
691 726
692 if (skb_dst(skb) == NULL) { 727 if (skb_dst(skb) == NULL) {
693 stats->tx_fifo_errors++; 728 dev->stats.tx_fifo_errors++;
694 goto tx_error; 729 goto tx_error;
695 } 730 }
696 731
@@ -736,14 +771,20 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
736 } 771 }
737 772
738 { 773 {
739 struct flowi fl = { .oif = tunnel->parms.link, 774 struct flowi fl = {
740 .nl_u = { .ip4_u = 775 .oif = tunnel->parms.link,
741 { .daddr = dst, 776 .nl_u = {
742 .saddr = tiph->saddr, 777 .ip4_u = {
743 .tos = RT_TOS(tos) } }, 778 .daddr = dst,
744 .proto = IPPROTO_GRE }; 779 .saddr = tiph->saddr,
780 .tos = RT_TOS(tos)
781 }
782 },
783 .proto = IPPROTO_GRE
784 }
785;
745 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 786 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
746 stats->tx_carrier_errors++; 787 dev->stats.tx_carrier_errors++;
747 goto tx_error; 788 goto tx_error;
748 } 789 }
749 } 790 }
@@ -751,7 +792,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
751 792
752 if (tdev == dev) { 793 if (tdev == dev) {
753 ip_rt_put(rt); 794 ip_rt_put(rt);
754 stats->collisions++; 795 dev->stats.collisions++;
755 goto tx_error; 796 goto tx_error;
756 } 797 }
757 798
@@ -814,7 +855,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
814 dev->needed_headroom = max_headroom; 855 dev->needed_headroom = max_headroom;
815 if (!new_skb) { 856 if (!new_skb) {
816 ip_rt_put(rt); 857 ip_rt_put(rt);
817 txq->tx_dropped++; 858 dev->stats.tx_dropped++;
818 dev_kfree_skb(skb); 859 dev_kfree_skb(skb);
819 return NETDEV_TX_OK; 860 return NETDEV_TX_OK;
820 } 861 }
@@ -881,15 +922,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
881 } 922 }
882 923
883 nf_reset(skb); 924 nf_reset(skb);
884 925 tstats = this_cpu_ptr(dev->tstats);
885 IPTUNNEL_XMIT(); 926 __IPTUNNEL_XMIT(tstats, &dev->stats);
886 return NETDEV_TX_OK; 927 return NETDEV_TX_OK;
887 928
888tx_error_icmp: 929tx_error_icmp:
889 dst_link_failure(skb); 930 dst_link_failure(skb);
890 931
891tx_error: 932tx_error:
892 stats->tx_errors++; 933 dev->stats.tx_errors++;
893 dev_kfree_skb(skb); 934 dev_kfree_skb(skb);
894 return NETDEV_TX_OK; 935 return NETDEV_TX_OK;
895} 936}
@@ -909,13 +950,19 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
909 /* Guess output device to choose reasonable mtu and needed_headroom */ 950 /* Guess output device to choose reasonable mtu and needed_headroom */
910 951
911 if (iph->daddr) { 952 if (iph->daddr) {
912 struct flowi fl = { .oif = tunnel->parms.link, 953 struct flowi fl = {
913 .nl_u = { .ip4_u = 954 .oif = tunnel->parms.link,
914 { .daddr = iph->daddr, 955 .nl_u = {
915 .saddr = iph->saddr, 956 .ip4_u = {
916 .tos = RT_TOS(iph->tos) } }, 957 .daddr = iph->daddr,
917 .proto = IPPROTO_GRE }; 958 .saddr = iph->saddr,
959 .tos = RT_TOS(iph->tos)
960 }
961 },
962 .proto = IPPROTO_GRE
963 };
918 struct rtable *rt; 964 struct rtable *rt;
965
919 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 966 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
920 tdev = rt->dst.dev; 967 tdev = rt->dst.dev;
921 ip_rt_put(rt); 968 ip_rt_put(rt);
@@ -1012,7 +1059,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1012 break; 1059 break;
1013 } 1060 }
1014 } else { 1061 } else {
1015 unsigned nflags = 0; 1062 unsigned int nflags = 0;
1016 1063
1017 t = netdev_priv(dev); 1064 t = netdev_priv(dev);
1018 1065
@@ -1125,7 +1172,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1125 1172
1126static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 1173static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1127 unsigned short type, 1174 unsigned short type,
1128 const void *daddr, const void *saddr, unsigned len) 1175 const void *daddr, const void *saddr, unsigned int len)
1129{ 1176{
1130 struct ip_tunnel *t = netdev_priv(dev); 1177 struct ip_tunnel *t = netdev_priv(dev);
1131 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1178 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
@@ -1167,13 +1214,19 @@ static int ipgre_open(struct net_device *dev)
1167 struct ip_tunnel *t = netdev_priv(dev); 1214 struct ip_tunnel *t = netdev_priv(dev);
1168 1215
1169 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1216 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1170 struct flowi fl = { .oif = t->parms.link, 1217 struct flowi fl = {
1171 .nl_u = { .ip4_u = 1218 .oif = t->parms.link,
1172 { .daddr = t->parms.iph.daddr, 1219 .nl_u = {
1173 .saddr = t->parms.iph.saddr, 1220 .ip4_u = {
1174 .tos = RT_TOS(t->parms.iph.tos) } }, 1221 .daddr = t->parms.iph.daddr,
1175 .proto = IPPROTO_GRE }; 1222 .saddr = t->parms.iph.saddr,
1223 .tos = RT_TOS(t->parms.iph.tos)
1224 }
1225 },
1226 .proto = IPPROTO_GRE
1227 };
1176 struct rtable *rt; 1228 struct rtable *rt;
1229
1177 if (ip_route_output_key(dev_net(dev), &rt, &fl)) 1230 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1178 return -EADDRNOTAVAIL; 1231 return -EADDRNOTAVAIL;
1179 dev = rt->dst.dev; 1232 dev = rt->dst.dev;
@@ -1213,12 +1266,19 @@ static const struct net_device_ops ipgre_netdev_ops = {
1213 .ndo_start_xmit = ipgre_tunnel_xmit, 1266 .ndo_start_xmit = ipgre_tunnel_xmit,
1214 .ndo_do_ioctl = ipgre_tunnel_ioctl, 1267 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1215 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1268 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1269 .ndo_get_stats = ipgre_get_stats,
1216}; 1270};
1217 1271
1272static void ipgre_dev_free(struct net_device *dev)
1273{
1274 free_percpu(dev->tstats);
1275 free_netdev(dev);
1276}
1277
1218static void ipgre_tunnel_setup(struct net_device *dev) 1278static void ipgre_tunnel_setup(struct net_device *dev)
1219{ 1279{
1220 dev->netdev_ops = &ipgre_netdev_ops; 1280 dev->netdev_ops = &ipgre_netdev_ops;
1221 dev->destructor = free_netdev; 1281 dev->destructor = ipgre_dev_free;
1222 1282
1223 dev->type = ARPHRD_IPGRE; 1283 dev->type = ARPHRD_IPGRE;
1224 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 1284 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
@@ -1256,6 +1316,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
1256 } else 1316 } else
1257 dev->header_ops = &ipgre_header_ops; 1317 dev->header_ops = &ipgre_header_ops;
1258 1318
1319 dev->tstats = alloc_percpu(struct pcpu_tstats);
1320 if (!dev->tstats)
1321 return -ENOMEM;
1322
1259 return 0; 1323 return 0;
1260} 1324}
1261 1325
@@ -1274,14 +1338,13 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1274 tunnel->hlen = sizeof(struct iphdr) + 4; 1338 tunnel->hlen = sizeof(struct iphdr) + 4;
1275 1339
1276 dev_hold(dev); 1340 dev_hold(dev);
1277 ign->tunnels_wc[0] = tunnel; 1341 rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
1278} 1342}
1279 1343
1280 1344
1281static const struct net_protocol ipgre_protocol = { 1345static const struct gre_protocol ipgre_protocol = {
1282 .handler = ipgre_rcv, 1346 .handler = ipgre_rcv,
1283 .err_handler = ipgre_err, 1347 .err_handler = ipgre_err,
1284 .netns_ok = 1,
1285}; 1348};
1286 1349
1287static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) 1350static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
@@ -1291,11 +1354,13 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1291 for (prio = 0; prio < 4; prio++) { 1354 for (prio = 0; prio < 4; prio++) {
1292 int h; 1355 int h;
1293 for (h = 0; h < HASH_SIZE; h++) { 1356 for (h = 0; h < HASH_SIZE; h++) {
1294 struct ip_tunnel *t = ign->tunnels[prio][h]; 1357 struct ip_tunnel *t;
1358
1359 t = rtnl_dereference(ign->tunnels[prio][h]);
1295 1360
1296 while (t != NULL) { 1361 while (t != NULL) {
1297 unregister_netdevice_queue(t->dev, head); 1362 unregister_netdevice_queue(t->dev, head);
1298 t = t->next; 1363 t = rtnl_dereference(t->next);
1299 } 1364 }
1300 } 1365 }
1301 } 1366 }
@@ -1441,6 +1506,10 @@ static int ipgre_tap_init(struct net_device *dev)
1441 1506
1442 ipgre_tunnel_bind_dev(dev); 1507 ipgre_tunnel_bind_dev(dev);
1443 1508
1509 dev->tstats = alloc_percpu(struct pcpu_tstats);
1510 if (!dev->tstats)
1511 return -ENOMEM;
1512
1444 return 0; 1513 return 0;
1445} 1514}
1446 1515
@@ -1451,6 +1520,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = {
1451 .ndo_set_mac_address = eth_mac_addr, 1520 .ndo_set_mac_address = eth_mac_addr,
1452 .ndo_validate_addr = eth_validate_addr, 1521 .ndo_validate_addr = eth_validate_addr,
1453 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1522 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1523 .ndo_get_stats = ipgre_get_stats,
1454}; 1524};
1455 1525
1456static void ipgre_tap_setup(struct net_device *dev) 1526static void ipgre_tap_setup(struct net_device *dev)
@@ -1459,7 +1529,7 @@ static void ipgre_tap_setup(struct net_device *dev)
1459 ether_setup(dev); 1529 ether_setup(dev);
1460 1530
1461 dev->netdev_ops = &ipgre_tap_netdev_ops; 1531 dev->netdev_ops = &ipgre_tap_netdev_ops;
1462 dev->destructor = free_netdev; 1532 dev->destructor = ipgre_dev_free;
1463 1533
1464 dev->iflink = 0; 1534 dev->iflink = 0;
1465 dev->features |= NETIF_F_NETNS_LOCAL; 1535 dev->features |= NETIF_F_NETNS_LOCAL;
@@ -1487,6 +1557,10 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nla
1487 if (!tb[IFLA_MTU]) 1557 if (!tb[IFLA_MTU])
1488 dev->mtu = mtu; 1558 dev->mtu = mtu;
1489 1559
1560 /* Can use a lockless transmit, unless we generate output sequences */
1561 if (!(nt->parms.o_flags & GRE_SEQ))
1562 dev->features |= NETIF_F_LLTX;
1563
1490 err = register_netdevice(dev); 1564 err = register_netdevice(dev);
1491 if (err) 1565 if (err)
1492 goto out; 1566 goto out;
@@ -1522,7 +1596,7 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1522 t = nt; 1596 t = nt;
1523 1597
1524 if (dev->type != ARPHRD_ETHER) { 1598 if (dev->type != ARPHRD_ETHER) {
1525 unsigned nflags = 0; 1599 unsigned int nflags = 0;
1526 1600
1527 if (ipv4_is_multicast(p.iph.daddr)) 1601 if (ipv4_is_multicast(p.iph.daddr))
1528 nflags = IFF_BROADCAST; 1602 nflags = IFF_BROADCAST;
@@ -1663,7 +1737,7 @@ static int __init ipgre_init(void)
1663 if (err < 0) 1737 if (err < 0)
1664 return err; 1738 return err;
1665 1739
1666 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); 1740 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1667 if (err < 0) { 1741 if (err < 0) {
1668 printk(KERN_INFO "ipgre init: can't add protocol\n"); 1742 printk(KERN_INFO "ipgre init: can't add protocol\n");
1669 goto add_proto_failed; 1743 goto add_proto_failed;
@@ -1683,7 +1757,7 @@ out:
1683tap_ops_failed: 1757tap_ops_failed:
1684 rtnl_link_unregister(&ipgre_link_ops); 1758 rtnl_link_unregister(&ipgre_link_ops);
1685rtnl_link_failed: 1759rtnl_link_failed:
1686 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1760 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1687add_proto_failed: 1761add_proto_failed:
1688 unregister_pernet_device(&ipgre_net_ops); 1762 unregister_pernet_device(&ipgre_net_ops);
1689 goto out; 1763 goto out;
@@ -1693,7 +1767,7 @@ static void __exit ipgre_fini(void)
1693{ 1767{
1694 rtnl_link_unregister(&ipgre_tap_ops); 1768 rtnl_link_unregister(&ipgre_tap_ops);
1695 rtnl_link_unregister(&ipgre_link_ops); 1769 rtnl_link_unregister(&ipgre_link_ops);
1696 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1770 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1697 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1771 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1698 unregister_pernet_device(&ipgre_net_ops); 1772 unregister_pernet_device(&ipgre_net_ops);
1699} 1773}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ba9836c488ed..1906fa35860c 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -466,7 +466,7 @@ error:
466 } 466 }
467 return -EINVAL; 467 return -EINVAL;
468} 468}
469 469EXPORT_SYMBOL(ip_options_compile);
470 470
471/* 471/*
472 * Undo all the changes done by ip_options_compile(). 472 * Undo all the changes done by ip_options_compile().
@@ -646,3 +646,4 @@ int ip_options_rcv_srr(struct sk_buff *skb)
646 } 646 }
647 return 0; 647 return 0;
648} 648}
649EXPORT_SYMBOL(ip_options_rcv_srr);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7649d7750075..439d2a34ee44 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -487,7 +487,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
487 * LATER: this step can be merged to real generation of fragments, 487 * LATER: this step can be merged to real generation of fragments,
488 * we can switch to copy when see the first bad fragment. 488 * we can switch to copy when see the first bad fragment.
489 */ 489 */
490 if (skb_has_frags(skb)) { 490 if (skb_has_frag_list(skb)) {
491 struct sk_buff *frag, *frag2; 491 struct sk_buff *frag, *frag2;
492 int first_len = skb_pagelen(skb); 492 int first_len = skb_pagelen(skb);
493 493
@@ -844,10 +844,9 @@ int ip_append_data(struct sock *sk,
844 inet->cork.length = 0; 844 inet->cork.length = 0;
845 sk->sk_sndmsg_page = NULL; 845 sk->sk_sndmsg_page = NULL;
846 sk->sk_sndmsg_off = 0; 846 sk->sk_sndmsg_off = 0;
847 if ((exthdrlen = rt->dst.header_len) != 0) { 847 exthdrlen = rt->dst.header_len;
848 length += exthdrlen; 848 length += exthdrlen;
849 transhdrlen += exthdrlen; 849 transhdrlen += exthdrlen;
850 }
851 } else { 850 } else {
852 rt = (struct rtable *)inet->cork.dst; 851 rt = (struct rtable *)inet->cork.dst;
853 if (inet->cork.flags & IPCORK_OPT) 852 if (inet->cork.flags & IPCORK_OPT)
@@ -934,16 +933,19 @@ alloc_new_skb:
934 !(rt->dst.dev->features&NETIF_F_SG)) 933 !(rt->dst.dev->features&NETIF_F_SG))
935 alloclen = mtu; 934 alloclen = mtu;
936 else 935 else
937 alloclen = datalen + fragheaderlen; 936 alloclen = fraglen;
938 937
939 /* The last fragment gets additional space at tail. 938 /* The last fragment gets additional space at tail.
940 * Note, with MSG_MORE we overallocate on fragments, 939 * Note, with MSG_MORE we overallocate on fragments,
941 * because we have no idea what fragment will be 940 * because we have no idea what fragment will be
942 * the last. 941 * the last.
943 */ 942 */
944 if (datalen == length + fraggap) 943 if (datalen == length + fraggap) {
945 alloclen += rt->dst.trailer_len; 944 alloclen += rt->dst.trailer_len;
946 945 /* make sure mtu is not reached */
946 if (datalen > mtu - fragheaderlen - rt->dst.trailer_len)
947 datalen -= ALIGN(rt->dst.trailer_len, 8);
948 }
947 if (transhdrlen) { 949 if (transhdrlen) {
948 skb = sock_alloc_send_skb(sk, 950 skb = sock_alloc_send_skb(sk,
949 alloclen + hh_len + 15, 951 alloclen + hh_len + 15,
@@ -960,7 +962,7 @@ alloc_new_skb:
960 else 962 else
961 /* only the initial fragment is 963 /* only the initial fragment is
962 time stamped */ 964 time stamped */
963 ipc->shtx.flags = 0; 965 ipc->tx_flags = 0;
964 } 966 }
965 if (skb == NULL) 967 if (skb == NULL)
966 goto error; 968 goto error;
@@ -971,7 +973,7 @@ alloc_new_skb:
971 skb->ip_summed = csummode; 973 skb->ip_summed = csummode;
972 skb->csum = 0; 974 skb->csum = 0;
973 skb_reserve(skb, hh_len); 975 skb_reserve(skb, hh_len);
974 *skb_tx(skb) = ipc->shtx; 976 skb_shinfo(skb)->tx_flags = ipc->tx_flags;
975 977
976 /* 978 /*
977 * Find where to start putting bytes. 979 * Find where to start putting bytes.
@@ -1391,7 +1393,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1391 1393
1392 daddr = ipc.addr = rt->rt_src; 1394 daddr = ipc.addr = rt->rt_src;
1393 ipc.opt = NULL; 1395 ipc.opt = NULL;
1394 ipc.shtx.flags = 0; 1396 ipc.tx_flags = 0;
1395 1397
1396 if (replyopts.opt.optlen) { 1398 if (replyopts.opt.optlen) {
1397 ipc.opt = &replyopts.opt; 1399 ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ec036731a70b..6ad46c28ede2 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -122,31 +122,59 @@
122 122
123static int ipip_net_id __read_mostly; 123static int ipip_net_id __read_mostly;
124struct ipip_net { 124struct ipip_net {
125 struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 125 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126 struct ip_tunnel *tunnels_r[HASH_SIZE]; 126 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127 struct ip_tunnel *tunnels_l[HASH_SIZE]; 127 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128 struct ip_tunnel *tunnels_wc[1]; 128 struct ip_tunnel __rcu *tunnels_wc[1];
129 struct ip_tunnel **tunnels[4]; 129 struct ip_tunnel __rcu **tunnels[4];
130 130
131 struct net_device *fb_tunnel_dev; 131 struct net_device *fb_tunnel_dev;
132}; 132};
133 133
134static void ipip_tunnel_init(struct net_device *dev); 134static int ipip_tunnel_init(struct net_device *dev);
135static void ipip_tunnel_setup(struct net_device *dev); 135static void ipip_tunnel_setup(struct net_device *dev);
136static void ipip_dev_free(struct net_device *dev);
136 137
137/* 138/*
138 * Locking : hash tables are protected by RCU and a spinlock 139 * Locking : hash tables are protected by RCU and RTNL
139 */ 140 */
140static DEFINE_SPINLOCK(ipip_lock);
141 141
142#define for_each_ip_tunnel_rcu(start) \ 142#define for_each_ip_tunnel_rcu(start) \
143 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 143 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144 144
145/* often modified stats are per cpu, other are shared (netdev->stats) */
146struct pcpu_tstats {
147 unsigned long rx_packets;
148 unsigned long rx_bytes;
149 unsigned long tx_packets;
150 unsigned long tx_bytes;
151};
152
153static struct net_device_stats *ipip_get_stats(struct net_device *dev)
154{
155 struct pcpu_tstats sum = { 0 };
156 int i;
157
158 for_each_possible_cpu(i) {
159 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
160
161 sum.rx_packets += tstats->rx_packets;
162 sum.rx_bytes += tstats->rx_bytes;
163 sum.tx_packets += tstats->tx_packets;
164 sum.tx_bytes += tstats->tx_bytes;
165 }
166 dev->stats.rx_packets = sum.rx_packets;
167 dev->stats.rx_bytes = sum.rx_bytes;
168 dev->stats.tx_packets = sum.tx_packets;
169 dev->stats.tx_bytes = sum.tx_bytes;
170 return &dev->stats;
171}
172
145static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, 173static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
146 __be32 remote, __be32 local) 174 __be32 remote, __be32 local)
147{ 175{
148 unsigned h0 = HASH(remote); 176 unsigned int h0 = HASH(remote);
149 unsigned h1 = HASH(local); 177 unsigned int h1 = HASH(local);
150 struct ip_tunnel *t; 178 struct ip_tunnel *t;
151 struct ipip_net *ipn = net_generic(net, ipip_net_id); 179 struct ipip_net *ipn = net_generic(net, ipip_net_id);
152 180
@@ -169,12 +197,12 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
169 return NULL; 197 return NULL;
170} 198}
171 199
172static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn, 200static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
173 struct ip_tunnel_parm *parms) 201 struct ip_tunnel_parm *parms)
174{ 202{
175 __be32 remote = parms->iph.daddr; 203 __be32 remote = parms->iph.daddr;
176 __be32 local = parms->iph.saddr; 204 __be32 local = parms->iph.saddr;
177 unsigned h = 0; 205 unsigned int h = 0;
178 int prio = 0; 206 int prio = 0;
179 207
180 if (remote) { 208 if (remote) {
@@ -188,7 +216,7 @@ static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
188 return &ipn->tunnels[prio][h]; 216 return &ipn->tunnels[prio][h];
189} 217}
190 218
191static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn, 219static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
192 struct ip_tunnel *t) 220 struct ip_tunnel *t)
193{ 221{
194 return __ipip_bucket(ipn, &t->parms); 222 return __ipip_bucket(ipn, &t->parms);
@@ -196,13 +224,14 @@ static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
196 224
197static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) 225static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
198{ 226{
199 struct ip_tunnel **tp; 227 struct ip_tunnel __rcu **tp;
200 228 struct ip_tunnel *iter;
201 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { 229
202 if (t == *tp) { 230 for (tp = ipip_bucket(ipn, t);
203 spin_lock_bh(&ipip_lock); 231 (iter = rtnl_dereference(*tp)) != NULL;
204 *tp = t->next; 232 tp = &iter->next) {
205 spin_unlock_bh(&ipip_lock); 233 if (t == iter) {
234 rcu_assign_pointer(*tp, t->next);
206 break; 235 break;
207 } 236 }
208 } 237 }
@@ -210,12 +239,10 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
210 239
211static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) 240static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
212{ 241{
213 struct ip_tunnel **tp = ipip_bucket(ipn, t); 242 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
214 243
215 spin_lock_bh(&ipip_lock); 244 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
216 t->next = *tp;
217 rcu_assign_pointer(*tp, t); 245 rcu_assign_pointer(*tp, t);
218 spin_unlock_bh(&ipip_lock);
219} 246}
220 247
221static struct ip_tunnel * ipip_tunnel_locate(struct net *net, 248static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -223,12 +250,15 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
223{ 250{
224 __be32 remote = parms->iph.daddr; 251 __be32 remote = parms->iph.daddr;
225 __be32 local = parms->iph.saddr; 252 __be32 local = parms->iph.saddr;
226 struct ip_tunnel *t, **tp, *nt; 253 struct ip_tunnel *t, *nt;
254 struct ip_tunnel __rcu **tp;
227 struct net_device *dev; 255 struct net_device *dev;
228 char name[IFNAMSIZ]; 256 char name[IFNAMSIZ];
229 struct ipip_net *ipn = net_generic(net, ipip_net_id); 257 struct ipip_net *ipn = net_generic(net, ipip_net_id);
230 258
231 for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) { 259 for (tp = __ipip_bucket(ipn, parms);
260 (t = rtnl_dereference(*tp)) != NULL;
261 tp = &t->next) {
232 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 262 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
233 return t; 263 return t;
234 } 264 }
@@ -238,7 +268,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
238 if (parms->name[0]) 268 if (parms->name[0])
239 strlcpy(name, parms->name, IFNAMSIZ); 269 strlcpy(name, parms->name, IFNAMSIZ);
240 else 270 else
241 sprintf(name, "tunl%%d"); 271 strcpy(name, "tunl%d");
242 272
243 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); 273 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
244 if (dev == NULL) 274 if (dev == NULL)
@@ -254,7 +284,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
254 nt = netdev_priv(dev); 284 nt = netdev_priv(dev);
255 nt->parms = *parms; 285 nt->parms = *parms;
256 286
257 ipip_tunnel_init(dev); 287 if (ipip_tunnel_init(dev) < 0)
288 goto failed_free;
258 289
259 if (register_netdevice(dev) < 0) 290 if (register_netdevice(dev) < 0)
260 goto failed_free; 291 goto failed_free;
@@ -264,20 +295,19 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
264 return nt; 295 return nt;
265 296
266failed_free: 297failed_free:
267 free_netdev(dev); 298 ipip_dev_free(dev);
268 return NULL; 299 return NULL;
269} 300}
270 301
302/* called with RTNL */
271static void ipip_tunnel_uninit(struct net_device *dev) 303static void ipip_tunnel_uninit(struct net_device *dev)
272{ 304{
273 struct net *net = dev_net(dev); 305 struct net *net = dev_net(dev);
274 struct ipip_net *ipn = net_generic(net, ipip_net_id); 306 struct ipip_net *ipn = net_generic(net, ipip_net_id);
275 307
276 if (dev == ipn->fb_tunnel_dev) { 308 if (dev == ipn->fb_tunnel_dev)
277 spin_lock_bh(&ipip_lock); 309 rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
278 ipn->tunnels_wc[0] = NULL; 310 else
279 spin_unlock_bh(&ipip_lock);
280 } else
281 ipip_tunnel_unlink(ipn, netdev_priv(dev)); 311 ipip_tunnel_unlink(ipn, netdev_priv(dev));
282 dev_put(dev); 312 dev_put(dev);
283} 313}
@@ -359,8 +389,10 @@ static int ipip_rcv(struct sk_buff *skb)
359 const struct iphdr *iph = ip_hdr(skb); 389 const struct iphdr *iph = ip_hdr(skb);
360 390
361 rcu_read_lock(); 391 rcu_read_lock();
362 if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), 392 tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
363 iph->saddr, iph->daddr)) != NULL) { 393 if (tunnel != NULL) {
394 struct pcpu_tstats *tstats;
395
364 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 396 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
365 rcu_read_unlock(); 397 rcu_read_unlock();
366 kfree_skb(skb); 398 kfree_skb(skb);
@@ -374,10 +406,17 @@ static int ipip_rcv(struct sk_buff *skb)
374 skb->protocol = htons(ETH_P_IP); 406 skb->protocol = htons(ETH_P_IP);
375 skb->pkt_type = PACKET_HOST; 407 skb->pkt_type = PACKET_HOST;
376 408
377 skb_tunnel_rx(skb, tunnel->dev); 409 tstats = this_cpu_ptr(tunnel->dev->tstats);
410 tstats->rx_packets++;
411 tstats->rx_bytes += skb->len;
412
413 __skb_tunnel_rx(skb, tunnel->dev);
378 414
379 ipip_ecn_decapsulate(iph, skb); 415 ipip_ecn_decapsulate(iph, skb);
380 netif_rx(skb); 416
417 if (netif_rx(skb) == NET_RX_DROP)
418 tunnel->dev->stats.rx_dropped++;
419
381 rcu_read_unlock(); 420 rcu_read_unlock();
382 return 0; 421 return 0;
383 } 422 }
@@ -394,13 +433,12 @@ static int ipip_rcv(struct sk_buff *skb)
394static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 433static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
395{ 434{
396 struct ip_tunnel *tunnel = netdev_priv(dev); 435 struct ip_tunnel *tunnel = netdev_priv(dev);
397 struct net_device_stats *stats = &dev->stats; 436 struct pcpu_tstats *tstats;
398 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
399 struct iphdr *tiph = &tunnel->parms.iph; 437 struct iphdr *tiph = &tunnel->parms.iph;
400 u8 tos = tunnel->parms.iph.tos; 438 u8 tos = tunnel->parms.iph.tos;
401 __be16 df = tiph->frag_off; 439 __be16 df = tiph->frag_off;
402 struct rtable *rt; /* Route to the other host */ 440 struct rtable *rt; /* Route to the other host */
403 struct net_device *tdev; /* Device to other host */ 441 struct net_device *tdev; /* Device to other host */
404 struct iphdr *old_iph = ip_hdr(skb); 442 struct iphdr *old_iph = ip_hdr(skb);
405 struct iphdr *iph; /* Our new IP header */ 443 struct iphdr *iph; /* Our new IP header */
406 unsigned int max_headroom; /* The extra header space needed */ 444 unsigned int max_headroom; /* The extra header space needed */
@@ -410,13 +448,13 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
410 if (skb->protocol != htons(ETH_P_IP)) 448 if (skb->protocol != htons(ETH_P_IP))
411 goto tx_error; 449 goto tx_error;
412 450
413 if (tos&1) 451 if (tos & 1)
414 tos = old_iph->tos; 452 tos = old_iph->tos;
415 453
416 if (!dst) { 454 if (!dst) {
417 /* NBMA tunnel */ 455 /* NBMA tunnel */
418 if ((rt = skb_rtable(skb)) == NULL) { 456 if ((rt = skb_rtable(skb)) == NULL) {
419 stats->tx_fifo_errors++; 457 dev->stats.tx_fifo_errors++;
420 goto tx_error; 458 goto tx_error;
421 } 459 }
422 if ((dst = rt->rt_gateway) == 0) 460 if ((dst = rt->rt_gateway) == 0)
@@ -424,14 +462,20 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
424 } 462 }
425 463
426 { 464 {
427 struct flowi fl = { .oif = tunnel->parms.link, 465 struct flowi fl = {
428 .nl_u = { .ip4_u = 466 .oif = tunnel->parms.link,
429 { .daddr = dst, 467 .nl_u = {
430 .saddr = tiph->saddr, 468 .ip4_u = {
431 .tos = RT_TOS(tos) } }, 469 .daddr = dst,
432 .proto = IPPROTO_IPIP }; 470 .saddr = tiph->saddr,
471 .tos = RT_TOS(tos)
472 }
473 },
474 .proto = IPPROTO_IPIP
475 };
476
433 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 477 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
434 stats->tx_carrier_errors++; 478 dev->stats.tx_carrier_errors++;
435 goto tx_error_icmp; 479 goto tx_error_icmp;
436 } 480 }
437 } 481 }
@@ -439,7 +483,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
439 483
440 if (tdev == dev) { 484 if (tdev == dev) {
441 ip_rt_put(rt); 485 ip_rt_put(rt);
442 stats->collisions++; 486 dev->stats.collisions++;
443 goto tx_error; 487 goto tx_error;
444 } 488 }
445 489
@@ -449,7 +493,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
449 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 493 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
450 494
451 if (mtu < 68) { 495 if (mtu < 68) {
452 stats->collisions++; 496 dev->stats.collisions++;
453 ip_rt_put(rt); 497 ip_rt_put(rt);
454 goto tx_error; 498 goto tx_error;
455 } 499 }
@@ -485,7 +529,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
485 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 529 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
486 if (!new_skb) { 530 if (!new_skb) {
487 ip_rt_put(rt); 531 ip_rt_put(rt);
488 txq->tx_dropped++; 532 dev->stats.tx_dropped++;
489 dev_kfree_skb(skb); 533 dev_kfree_skb(skb);
490 return NETDEV_TX_OK; 534 return NETDEV_TX_OK;
491 } 535 }
@@ -522,14 +566,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
522 iph->ttl = old_iph->ttl; 566 iph->ttl = old_iph->ttl;
523 567
524 nf_reset(skb); 568 nf_reset(skb);
525 569 tstats = this_cpu_ptr(dev->tstats);
526 IPTUNNEL_XMIT(); 570 __IPTUNNEL_XMIT(tstats, &dev->stats);
527 return NETDEV_TX_OK; 571 return NETDEV_TX_OK;
528 572
529tx_error_icmp: 573tx_error_icmp:
530 dst_link_failure(skb); 574 dst_link_failure(skb);
531tx_error: 575tx_error:
532 stats->tx_errors++; 576 dev->stats.tx_errors++;
533 dev_kfree_skb(skb); 577 dev_kfree_skb(skb);
534 return NETDEV_TX_OK; 578 return NETDEV_TX_OK;
535} 579}
@@ -544,13 +588,19 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
544 iph = &tunnel->parms.iph; 588 iph = &tunnel->parms.iph;
545 589
546 if (iph->daddr) { 590 if (iph->daddr) {
547 struct flowi fl = { .oif = tunnel->parms.link, 591 struct flowi fl = {
548 .nl_u = { .ip4_u = 592 .oif = tunnel->parms.link,
549 { .daddr = iph->daddr, 593 .nl_u = {
550 .saddr = iph->saddr, 594 .ip4_u = {
551 .tos = RT_TOS(iph->tos) } }, 595 .daddr = iph->daddr,
552 .proto = IPPROTO_IPIP }; 596 .saddr = iph->saddr,
597 .tos = RT_TOS(iph->tos)
598 }
599 },
600 .proto = IPPROTO_IPIP
601 };
553 struct rtable *rt; 602 struct rtable *rt;
603
554 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 604 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
555 tdev = rt->dst.dev; 605 tdev = rt->dst.dev;
556 ip_rt_put(rt); 606 ip_rt_put(rt);
@@ -696,13 +746,19 @@ static const struct net_device_ops ipip_netdev_ops = {
696 .ndo_start_xmit = ipip_tunnel_xmit, 746 .ndo_start_xmit = ipip_tunnel_xmit,
697 .ndo_do_ioctl = ipip_tunnel_ioctl, 747 .ndo_do_ioctl = ipip_tunnel_ioctl,
698 .ndo_change_mtu = ipip_tunnel_change_mtu, 748 .ndo_change_mtu = ipip_tunnel_change_mtu,
699 749 .ndo_get_stats = ipip_get_stats,
700}; 750};
701 751
752static void ipip_dev_free(struct net_device *dev)
753{
754 free_percpu(dev->tstats);
755 free_netdev(dev);
756}
757
702static void ipip_tunnel_setup(struct net_device *dev) 758static void ipip_tunnel_setup(struct net_device *dev)
703{ 759{
704 dev->netdev_ops = &ipip_netdev_ops; 760 dev->netdev_ops = &ipip_netdev_ops;
705 dev->destructor = free_netdev; 761 dev->destructor = ipip_dev_free;
706 762
707 dev->type = ARPHRD_TUNNEL; 763 dev->type = ARPHRD_TUNNEL;
708 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 764 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
@@ -711,10 +767,11 @@ static void ipip_tunnel_setup(struct net_device *dev)
711 dev->iflink = 0; 767 dev->iflink = 0;
712 dev->addr_len = 4; 768 dev->addr_len = 4;
713 dev->features |= NETIF_F_NETNS_LOCAL; 769 dev->features |= NETIF_F_NETNS_LOCAL;
770 dev->features |= NETIF_F_LLTX;
714 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 771 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
715} 772}
716 773
717static void ipip_tunnel_init(struct net_device *dev) 774static int ipip_tunnel_init(struct net_device *dev)
718{ 775{
719 struct ip_tunnel *tunnel = netdev_priv(dev); 776 struct ip_tunnel *tunnel = netdev_priv(dev);
720 777
@@ -725,9 +782,15 @@ static void ipip_tunnel_init(struct net_device *dev)
725 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 782 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
726 783
727 ipip_tunnel_bind_dev(dev); 784 ipip_tunnel_bind_dev(dev);
785
786 dev->tstats = alloc_percpu(struct pcpu_tstats);
787 if (!dev->tstats)
788 return -ENOMEM;
789
790 return 0;
728} 791}
729 792
730static void __net_init ipip_fb_tunnel_init(struct net_device *dev) 793static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
731{ 794{
732 struct ip_tunnel *tunnel = netdev_priv(dev); 795 struct ip_tunnel *tunnel = netdev_priv(dev);
733 struct iphdr *iph = &tunnel->parms.iph; 796 struct iphdr *iph = &tunnel->parms.iph;
@@ -740,11 +803,16 @@ static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
740 iph->protocol = IPPROTO_IPIP; 803 iph->protocol = IPPROTO_IPIP;
741 iph->ihl = 5; 804 iph->ihl = 5;
742 805
806 dev->tstats = alloc_percpu(struct pcpu_tstats);
807 if (!dev->tstats)
808 return -ENOMEM;
809
743 dev_hold(dev); 810 dev_hold(dev);
744 ipn->tunnels_wc[0] = tunnel; 811 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
812 return 0;
745} 813}
746 814
747static struct xfrm_tunnel ipip_handler = { 815static struct xfrm_tunnel ipip_handler __read_mostly = {
748 .handler = ipip_rcv, 816 .handler = ipip_rcv,
749 .err_handler = ipip_err, 817 .err_handler = ipip_err,
750 .priority = 1, 818 .priority = 1,
@@ -760,11 +828,12 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
760 for (prio = 1; prio < 4; prio++) { 828 for (prio = 1; prio < 4; prio++) {
761 int h; 829 int h;
762 for (h = 0; h < HASH_SIZE; h++) { 830 for (h = 0; h < HASH_SIZE; h++) {
763 struct ip_tunnel *t = ipn->tunnels[prio][h]; 831 struct ip_tunnel *t;
764 832
833 t = rtnl_dereference(ipn->tunnels[prio][h]);
765 while (t != NULL) { 834 while (t != NULL) {
766 unregister_netdevice_queue(t->dev, head); 835 unregister_netdevice_queue(t->dev, head);
767 t = t->next; 836 t = rtnl_dereference(t->next);
768 } 837 }
769 } 838 }
770 } 839 }
@@ -789,7 +858,9 @@ static int __net_init ipip_init_net(struct net *net)
789 } 858 }
790 dev_net_set(ipn->fb_tunnel_dev, net); 859 dev_net_set(ipn->fb_tunnel_dev, net);
791 860
792 ipip_fb_tunnel_init(ipn->fb_tunnel_dev); 861 err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
862 if (err)
863 goto err_reg_dev;
793 864
794 if ((err = register_netdev(ipn->fb_tunnel_dev))) 865 if ((err = register_netdev(ipn->fb_tunnel_dev)))
795 goto err_reg_dev; 866 goto err_reg_dev;
@@ -797,7 +868,7 @@ static int __net_init ipip_init_net(struct net *net)
797 return 0; 868 return 0;
798 869
799err_reg_dev: 870err_reg_dev:
800 free_netdev(ipn->fb_tunnel_dev); 871 ipip_dev_free(ipn->fb_tunnel_dev);
801err_alloc_dev: 872err_alloc_dev:
802 /* nothing */ 873 /* nothing */
803 return err; 874 return err;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 179fcab866fc..86dd5691af46 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -75,7 +75,7 @@ struct mr_table {
75 struct net *net; 75 struct net *net;
76#endif 76#endif
77 u32 id; 77 u32 id;
78 struct sock *mroute_sk; 78 struct sock __rcu *mroute_sk;
79 struct timer_list ipmr_expire_timer; 79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue; 80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES]; 81 struct list_head mfc_cache_array[MFC_LINES];
@@ -98,7 +98,7 @@ struct ipmr_result {
98}; 98};
99 99
100/* Big lock, protecting vif table, mrt cache and mroute socket state. 100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock. 101 * Note that the changes are semaphored via rtnl_lock.
102 */ 102 */
103 103
104static DEFINE_RWLOCK(mrt_lock); 104static DEFINE_RWLOCK(mrt_lock);
@@ -113,11 +113,11 @@ static DEFINE_RWLOCK(mrt_lock);
113static DEFINE_SPINLOCK(mfc_unres_lock); 113static DEFINE_SPINLOCK(mfc_unres_lock);
114 114
115/* We return to original Alan's scheme. Hash table of resolved 115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected 116 * entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected 117 * with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock. 118 * with strong spinlock mfc_unres_lock.
119 119 *
120 In this case data path is free of exclusive locks at all. 120 * In this case data path is free of exclusive locks at all.
121 */ 121 */
122 122
123static struct kmem_cache *mrt_cachep __read_mostly; 123static struct kmem_cache *mrt_cachep __read_mostly;
@@ -396,9 +396,9 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
396 set_fs(KERNEL_DS); 396 set_fs(KERNEL_DS);
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398 set_fs(oldfs); 398 set_fs(oldfs);
399 } else 399 } else {
400 err = -EOPNOTSUPP; 400 err = -EOPNOTSUPP;
401 401 }
402 dev = NULL; 402 dev = NULL;
403 403
404 if (err == 0 && 404 if (err == 0 &&
@@ -495,7 +495,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
495 dev->iflink = 0; 495 dev->iflink = 0;
496 496
497 rcu_read_lock(); 497 rcu_read_lock();
498 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { 498 in_dev = __in_dev_get_rcu(dev);
499 if (!in_dev) {
499 rcu_read_unlock(); 500 rcu_read_unlock();
500 goto failure; 501 goto failure;
501 } 502 }
@@ -552,9 +553,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
552 mrt->mroute_reg_vif_num = -1; 553 mrt->mroute_reg_vif_num = -1;
553#endif 554#endif
554 555
555 if (vifi+1 == mrt->maxvif) { 556 if (vifi + 1 == mrt->maxvif) {
556 int tmp; 557 int tmp;
557 for (tmp=vifi-1; tmp>=0; tmp--) { 558
559 for (tmp = vifi - 1; tmp >= 0; tmp--) {
558 if (VIF_EXISTS(mrt, tmp)) 560 if (VIF_EXISTS(mrt, tmp))
559 break; 561 break;
560 } 562 }
@@ -565,25 +567,33 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
565 567
566 dev_set_allmulti(dev, -1); 568 dev_set_allmulti(dev, -1);
567 569
568 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { 570 in_dev = __in_dev_get_rtnl(dev);
571 if (in_dev) {
569 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 572 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
570 ip_rt_multicast_event(in_dev); 573 ip_rt_multicast_event(in_dev);
571 } 574 }
572 575
573 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) 576 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
574 unregister_netdevice_queue(dev, head); 577 unregister_netdevice_queue(dev, head);
575 578
576 dev_put(dev); 579 dev_put(dev);
577 return 0; 580 return 0;
578} 581}
579 582
580static inline void ipmr_cache_free(struct mfc_cache *c) 583static void ipmr_cache_free_rcu(struct rcu_head *head)
581{ 584{
585 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
586
582 kmem_cache_free(mrt_cachep, c); 587 kmem_cache_free(mrt_cachep, c);
583} 588}
584 589
590static inline void ipmr_cache_free(struct mfc_cache *c)
591{
592 call_rcu(&c->rcu, ipmr_cache_free_rcu);
593}
594
585/* Destroy an unresolved cache entry, killing queued skbs 595/* Destroy an unresolved cache entry, killing queued skbs
586 and reporting error to netlink readers. 596 * and reporting error to netlink readers.
587 */ 597 */
588 598
589static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 599static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
@@ -605,8 +615,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
605 memset(&e->msg, 0, sizeof(e->msg)); 615 memset(&e->msg, 0, sizeof(e->msg));
606 616
607 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 617 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
608 } else 618 } else {
609 kfree_skb(skb); 619 kfree_skb(skb);
620 }
610 } 621 }
611 622
612 ipmr_cache_free(c); 623 ipmr_cache_free(c);
@@ -724,13 +735,13 @@ static int vif_add(struct net *net, struct mr_table *mrt,
724 case 0: 735 case 0:
725 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 736 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
726 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 737 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
727 if (dev && dev->ip_ptr == NULL) { 738 if (dev && __in_dev_get_rtnl(dev) == NULL) {
728 dev_put(dev); 739 dev_put(dev);
729 return -EADDRNOTAVAIL; 740 return -EADDRNOTAVAIL;
730 } 741 }
731 } else 742 } else {
732 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 743 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
733 744 }
734 if (!dev) 745 if (!dev)
735 return -EADDRNOTAVAIL; 746 return -EADDRNOTAVAIL;
736 err = dev_set_allmulti(dev, 1); 747 err = dev_set_allmulti(dev, 1);
@@ -743,16 +754,16 @@ static int vif_add(struct net *net, struct mr_table *mrt,
743 return -EINVAL; 754 return -EINVAL;
744 } 755 }
745 756
746 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { 757 in_dev = __in_dev_get_rtnl(dev);
758 if (!in_dev) {
747 dev_put(dev); 759 dev_put(dev);
748 return -EADDRNOTAVAIL; 760 return -EADDRNOTAVAIL;
749 } 761 }
750 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 762 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
751 ip_rt_multicast_event(in_dev); 763 ip_rt_multicast_event(in_dev);
752 764
753 /* 765 /* Fill in the VIF structures */
754 * Fill in the VIF structures 766
755 */
756 v->rate_limit = vifc->vifc_rate_limit; 767 v->rate_limit = vifc->vifc_rate_limit;
757 v->local = vifc->vifc_lcl_addr.s_addr; 768 v->local = vifc->vifc_lcl_addr.s_addr;
758 v->remote = vifc->vifc_rmt_addr.s_addr; 769 v->remote = vifc->vifc_rmt_addr.s_addr;
@@ -765,14 +776,14 @@ static int vif_add(struct net *net, struct mr_table *mrt,
765 v->pkt_in = 0; 776 v->pkt_in = 0;
766 v->pkt_out = 0; 777 v->pkt_out = 0;
767 v->link = dev->ifindex; 778 v->link = dev->ifindex;
768 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 779 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
769 v->link = dev->iflink; 780 v->link = dev->iflink;
770 781
771 /* And finish update writing critical data */ 782 /* And finish update writing critical data */
772 write_lock_bh(&mrt_lock); 783 write_lock_bh(&mrt_lock);
773 v->dev = dev; 784 v->dev = dev;
774#ifdef CONFIG_IP_PIMSM 785#ifdef CONFIG_IP_PIMSM
775 if (v->flags&VIFF_REGISTER) 786 if (v->flags & VIFF_REGISTER)
776 mrt->mroute_reg_vif_num = vifi; 787 mrt->mroute_reg_vif_num = vifi;
777#endif 788#endif
778 if (vifi+1 > mrt->maxvif) 789 if (vifi+1 > mrt->maxvif)
@@ -781,6 +792,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
781 return 0; 792 return 0;
782} 793}
783 794
795/* called with rcu_read_lock() */
784static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 796static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
785 __be32 origin, 797 __be32 origin,
786 __be32 mcastgrp) 798 __be32 mcastgrp)
@@ -788,7 +800,7 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
788 int line = MFC_HASH(mcastgrp, origin); 800 int line = MFC_HASH(mcastgrp, origin);
789 struct mfc_cache *c; 801 struct mfc_cache *c;
790 802
791 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { 803 list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) {
792 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) 804 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
793 return c; 805 return c;
794 } 806 }
@@ -801,19 +813,20 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
801static struct mfc_cache *ipmr_cache_alloc(void) 813static struct mfc_cache *ipmr_cache_alloc(void)
802{ 814{
803 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 815 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
804 if (c == NULL) 816
805 return NULL; 817 if (c)
806 c->mfc_un.res.minvif = MAXVIFS; 818 c->mfc_un.res.minvif = MAXVIFS;
807 return c; 819 return c;
808} 820}
809 821
810static struct mfc_cache *ipmr_cache_alloc_unres(void) 822static struct mfc_cache *ipmr_cache_alloc_unres(void)
811{ 823{
812 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 824 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
813 if (c == NULL) 825
814 return NULL; 826 if (c) {
815 skb_queue_head_init(&c->mfc_un.unres.unresolved); 827 skb_queue_head_init(&c->mfc_un.unres.unresolved);
816 c->mfc_un.unres.expires = jiffies + 10*HZ; 828 c->mfc_un.unres.expires = jiffies + 10*HZ;
829 }
817 return c; 830 return c;
818} 831}
819 832
@@ -827,17 +840,15 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
827 struct sk_buff *skb; 840 struct sk_buff *skb;
828 struct nlmsgerr *e; 841 struct nlmsgerr *e;
829 842
830 /* 843 /* Play the pending entries through our router */
831 * Play the pending entries through our router
832 */
833 844
834 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 845 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
835 if (ip_hdr(skb)->version == 0) { 846 if (ip_hdr(skb)->version == 0) {
836 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 847 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
837 848
838 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 849 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
839 nlh->nlmsg_len = (skb_tail_pointer(skb) - 850 nlh->nlmsg_len = skb_tail_pointer(skb) -
840 (u8 *)nlh); 851 (u8 *)nlh;
841 } else { 852 } else {
842 nlh->nlmsg_type = NLMSG_ERROR; 853 nlh->nlmsg_type = NLMSG_ERROR;
843 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 854 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -848,8 +859,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
848 } 859 }
849 860
850 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 861 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
851 } else 862 } else {
852 ip_mr_forward(net, mrt, skb, c, 0); 863 ip_mr_forward(net, mrt, skb, c, 0);
864 }
853 } 865 }
854} 866}
855 867
@@ -867,6 +879,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
867 const int ihl = ip_hdrlen(pkt); 879 const int ihl = ip_hdrlen(pkt);
868 struct igmphdr *igmp; 880 struct igmphdr *igmp;
869 struct igmpmsg *msg; 881 struct igmpmsg *msg;
882 struct sock *mroute_sk;
870 int ret; 883 int ret;
871 884
872#ifdef CONFIG_IP_PIMSM 885#ifdef CONFIG_IP_PIMSM
@@ -882,9 +895,9 @@ static int ipmr_cache_report(struct mr_table *mrt,
882#ifdef CONFIG_IP_PIMSM 895#ifdef CONFIG_IP_PIMSM
883 if (assert == IGMPMSG_WHOLEPKT) { 896 if (assert == IGMPMSG_WHOLEPKT) {
884 /* Ugly, but we have no choice with this interface. 897 /* Ugly, but we have no choice with this interface.
885 Duplicate old header, fix ihl, length etc. 898 * Duplicate old header, fix ihl, length etc.
886 And all this only to mangle msg->im_msgtype and 899 * And all this only to mangle msg->im_msgtype and
887 to set msg->im_mbz to "mbz" :-) 900 * to set msg->im_mbz to "mbz" :-)
888 */ 901 */
889 skb_push(skb, sizeof(struct iphdr)); 902 skb_push(skb, sizeof(struct iphdr));
890 skb_reset_network_header(skb); 903 skb_reset_network_header(skb);
@@ -901,39 +914,38 @@ static int ipmr_cache_report(struct mr_table *mrt,
901#endif 914#endif
902 { 915 {
903 916
904 /* 917 /* Copy the IP header */
905 * Copy the IP header
906 */
907 918
908 skb->network_header = skb->tail; 919 skb->network_header = skb->tail;
909 skb_put(skb, ihl); 920 skb_put(skb, ihl);
910 skb_copy_to_linear_data(skb, pkt->data, ihl); 921 skb_copy_to_linear_data(skb, pkt->data, ihl);
911 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ 922 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
912 msg = (struct igmpmsg *)skb_network_header(skb); 923 msg = (struct igmpmsg *)skb_network_header(skb);
913 msg->im_vif = vifi; 924 msg->im_vif = vifi;
914 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 925 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
915 926
916 /* 927 /* Add our header */
917 * Add our header
918 */
919 928
920 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 929 igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
921 igmp->type = 930 igmp->type =
922 msg->im_msgtype = assert; 931 msg->im_msgtype = assert;
923 igmp->code = 0; 932 igmp->code = 0;
924 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 933 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
925 skb->transport_header = skb->network_header; 934 skb->transport_header = skb->network_header;
926 } 935 }
927 936
928 if (mrt->mroute_sk == NULL) { 937 rcu_read_lock();
938 mroute_sk = rcu_dereference(mrt->mroute_sk);
939 if (mroute_sk == NULL) {
940 rcu_read_unlock();
929 kfree_skb(skb); 941 kfree_skb(skb);
930 return -EINVAL; 942 return -EINVAL;
931 } 943 }
932 944
933 /* 945 /* Deliver to mrouted */
934 * Deliver to mrouted 946
935 */ 947 ret = sock_queue_rcv_skb(mroute_sk, skb);
936 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb); 948 rcu_read_unlock();
937 if (ret < 0) { 949 if (ret < 0) {
938 if (net_ratelimit()) 950 if (net_ratelimit())
939 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 951 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -965,9 +977,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
965 } 977 }
966 978
967 if (!found) { 979 if (!found) {
968 /* 980 /* Create a new entry if allowable */
969 * Create a new entry if allowable
970 */
971 981
972 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 982 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
973 (c = ipmr_cache_alloc_unres()) == NULL) { 983 (c = ipmr_cache_alloc_unres()) == NULL) {
@@ -977,16 +987,14 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
977 return -ENOBUFS; 987 return -ENOBUFS;
978 } 988 }
979 989
980 /* 990 /* Fill in the new cache entry */
981 * Fill in the new cache entry 991
982 */
983 c->mfc_parent = -1; 992 c->mfc_parent = -1;
984 c->mfc_origin = iph->saddr; 993 c->mfc_origin = iph->saddr;
985 c->mfc_mcastgrp = iph->daddr; 994 c->mfc_mcastgrp = iph->daddr;
986 995
987 /* 996 /* Reflect first query at mrouted. */
988 * Reflect first query at mrouted. 997
989 */
990 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 998 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
991 if (err < 0) { 999 if (err < 0) {
992 /* If the report failed throw the cache entry 1000 /* If the report failed throw the cache entry
@@ -1006,10 +1014,9 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1006 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1014 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1007 } 1015 }
1008 1016
1009 /* 1017 /* See if we can append the packet */
1010 * See if we can append the packet 1018
1011 */ 1019 if (c->mfc_un.unres.unresolved.qlen > 3) {
1012 if (c->mfc_un.unres.unresolved.qlen>3) {
1013 kfree_skb(skb); 1020 kfree_skb(skb);
1014 err = -ENOBUFS; 1021 err = -ENOBUFS;
1015 } else { 1022 } else {
@@ -1035,9 +1042,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1035 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { 1042 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1036 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1043 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1037 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1044 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1038 write_lock_bh(&mrt_lock); 1045 list_del_rcu(&c->list);
1039 list_del(&c->list);
1040 write_unlock_bh(&mrt_lock);
1041 1046
1042 ipmr_cache_free(c); 1047 ipmr_cache_free(c);
1043 return 0; 1048 return 0;
@@ -1090,9 +1095,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1090 if (!mrtsock) 1095 if (!mrtsock)
1091 c->mfc_flags |= MFC_STATIC; 1096 c->mfc_flags |= MFC_STATIC;
1092 1097
1093 write_lock_bh(&mrt_lock); 1098 list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
1094 list_add(&c->list, &mrt->mfc_cache_array[line]);
1095 write_unlock_bh(&mrt_lock);
1096 1099
1097 /* 1100 /*
1098 * Check to see if we resolved a queued list. If so we 1101 * Check to see if we resolved a queued list. If so we
@@ -1130,26 +1133,21 @@ static void mroute_clean_tables(struct mr_table *mrt)
1130 LIST_HEAD(list); 1133 LIST_HEAD(list);
1131 struct mfc_cache *c, *next; 1134 struct mfc_cache *c, *next;
1132 1135
1133 /* 1136 /* Shut down all active vif entries */
1134 * Shut down all active vif entries 1137
1135 */
1136 for (i = 0; i < mrt->maxvif; i++) { 1138 for (i = 0; i < mrt->maxvif; i++) {
1137 if (!(mrt->vif_table[i].flags&VIFF_STATIC)) 1139 if (!(mrt->vif_table[i].flags & VIFF_STATIC))
1138 vif_delete(mrt, i, 0, &list); 1140 vif_delete(mrt, i, 0, &list);
1139 } 1141 }
1140 unregister_netdevice_many(&list); 1142 unregister_netdevice_many(&list);
1141 1143
1142 /* 1144 /* Wipe the cache */
1143 * Wipe the cache 1145
1144 */
1145 for (i = 0; i < MFC_LINES; i++) { 1146 for (i = 0; i < MFC_LINES; i++) {
1146 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { 1147 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1147 if (c->mfc_flags&MFC_STATIC) 1148 if (c->mfc_flags & MFC_STATIC)
1148 continue; 1149 continue;
1149 write_lock_bh(&mrt_lock); 1150 list_del_rcu(&c->list);
1150 list_del(&c->list);
1151 write_unlock_bh(&mrt_lock);
1152
1153 ipmr_cache_free(c); 1151 ipmr_cache_free(c);
1154 } 1152 }
1155 } 1153 }
@@ -1164,6 +1162,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
1164 } 1162 }
1165} 1163}
1166 1164
1165/* called from ip_ra_control(), before an RCU grace period,
1166 * we dont need to call synchronize_rcu() here
1167 */
1167static void mrtsock_destruct(struct sock *sk) 1168static void mrtsock_destruct(struct sock *sk)
1168{ 1169{
1169 struct net *net = sock_net(sk); 1170 struct net *net = sock_net(sk);
@@ -1171,13 +1172,9 @@ static void mrtsock_destruct(struct sock *sk)
1171 1172
1172 rtnl_lock(); 1173 rtnl_lock();
1173 ipmr_for_each_table(mrt, net) { 1174 ipmr_for_each_table(mrt, net) {
1174 if (sk == mrt->mroute_sk) { 1175 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1175 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1176 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1176 1177 rcu_assign_pointer(mrt->mroute_sk, NULL);
1177 write_lock_bh(&mrt_lock);
1178 mrt->mroute_sk = NULL;
1179 write_unlock_bh(&mrt_lock);
1180
1181 mroute_clean_tables(mrt); 1178 mroute_clean_tables(mrt);
1182 } 1179 }
1183 } 1180 }
@@ -1204,7 +1201,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1204 return -ENOENT; 1201 return -ENOENT;
1205 1202
1206 if (optname != MRT_INIT) { 1203 if (optname != MRT_INIT) {
1207 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN)) 1204 if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
1205 !capable(CAP_NET_ADMIN))
1208 return -EACCES; 1206 return -EACCES;
1209 } 1207 }
1210 1208
@@ -1217,23 +1215,20 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1217 return -ENOPROTOOPT; 1215 return -ENOPROTOOPT;
1218 1216
1219 rtnl_lock(); 1217 rtnl_lock();
1220 if (mrt->mroute_sk) { 1218 if (rtnl_dereference(mrt->mroute_sk)) {
1221 rtnl_unlock(); 1219 rtnl_unlock();
1222 return -EADDRINUSE; 1220 return -EADDRINUSE;
1223 } 1221 }
1224 1222
1225 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1223 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1226 if (ret == 0) { 1224 if (ret == 0) {
1227 write_lock_bh(&mrt_lock); 1225 rcu_assign_pointer(mrt->mroute_sk, sk);
1228 mrt->mroute_sk = sk;
1229 write_unlock_bh(&mrt_lock);
1230
1231 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1226 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1232 } 1227 }
1233 rtnl_unlock(); 1228 rtnl_unlock();
1234 return ret; 1229 return ret;
1235 case MRT_DONE: 1230 case MRT_DONE:
1236 if (sk != mrt->mroute_sk) 1231 if (sk != rcu_dereference_raw(mrt->mroute_sk))
1237 return -EACCES; 1232 return -EACCES;
1238 return ip_ra_control(sk, 0, NULL); 1233 return ip_ra_control(sk, 0, NULL);
1239 case MRT_ADD_VIF: 1234 case MRT_ADD_VIF:
@@ -1246,7 +1241,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1246 return -ENFILE; 1241 return -ENFILE;
1247 rtnl_lock(); 1242 rtnl_lock();
1248 if (optname == MRT_ADD_VIF) { 1243 if (optname == MRT_ADD_VIF) {
1249 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk); 1244 ret = vif_add(net, mrt, &vif,
1245 sk == rtnl_dereference(mrt->mroute_sk));
1250 } else { 1246 } else {
1251 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1247 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1252 } 1248 }
@@ -1267,7 +1263,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1267 if (optname == MRT_DEL_MFC) 1263 if (optname == MRT_DEL_MFC)
1268 ret = ipmr_mfc_delete(mrt, &mfc); 1264 ret = ipmr_mfc_delete(mrt, &mfc);
1269 else 1265 else
1270 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk); 1266 ret = ipmr_mfc_add(net, mrt, &mfc,
1267 sk == rtnl_dereference(mrt->mroute_sk));
1271 rtnl_unlock(); 1268 rtnl_unlock();
1272 return ret; 1269 return ret;
1273 /* 1270 /*
@@ -1276,7 +1273,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1276 case MRT_ASSERT: 1273 case MRT_ASSERT:
1277 { 1274 {
1278 int v; 1275 int v;
1279 if (get_user(v,(int __user *)optval)) 1276 if (get_user(v, (int __user *)optval))
1280 return -EFAULT; 1277 return -EFAULT;
1281 mrt->mroute_do_assert = (v) ? 1 : 0; 1278 mrt->mroute_do_assert = (v) ? 1 : 0;
1282 return 0; 1279 return 0;
@@ -1286,7 +1283,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1286 { 1283 {
1287 int v; 1284 int v;
1288 1285
1289 if (get_user(v,(int __user *)optval)) 1286 if (get_user(v, (int __user *)optval))
1290 return -EFAULT; 1287 return -EFAULT;
1291 v = (v) ? 1 : 0; 1288 v = (v) ? 1 : 0;
1292 1289
@@ -1309,14 +1306,16 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1309 return -EINVAL; 1306 return -EINVAL;
1310 if (get_user(v, (u32 __user *)optval)) 1307 if (get_user(v, (u32 __user *)optval))
1311 return -EFAULT; 1308 return -EFAULT;
1312 if (sk == mrt->mroute_sk)
1313 return -EBUSY;
1314 1309
1315 rtnl_lock(); 1310 rtnl_lock();
1316 ret = 0; 1311 ret = 0;
1317 if (!ipmr_new_table(net, v)) 1312 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1318 ret = -ENOMEM; 1313 ret = -EBUSY;
1319 raw_sk(sk)->ipmr_table = v; 1314 } else {
1315 if (!ipmr_new_table(net, v))
1316 ret = -ENOMEM;
1317 raw_sk(sk)->ipmr_table = v;
1318 }
1320 rtnl_unlock(); 1319 rtnl_unlock();
1321 return ret; 1320 return ret;
1322 } 1321 }
@@ -1347,9 +1346,9 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1347 1346
1348 if (optname != MRT_VERSION && 1347 if (optname != MRT_VERSION &&
1349#ifdef CONFIG_IP_PIMSM 1348#ifdef CONFIG_IP_PIMSM
1350 optname!=MRT_PIM && 1349 optname != MRT_PIM &&
1351#endif 1350#endif
1352 optname!=MRT_ASSERT) 1351 optname != MRT_ASSERT)
1353 return -ENOPROTOOPT; 1352 return -ENOPROTOOPT;
1354 1353
1355 if (get_user(olr, optlen)) 1354 if (get_user(olr, optlen))
@@ -1416,19 +1415,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1416 if (copy_from_user(&sr, arg, sizeof(sr))) 1415 if (copy_from_user(&sr, arg, sizeof(sr)))
1417 return -EFAULT; 1416 return -EFAULT;
1418 1417
1419 read_lock(&mrt_lock); 1418 rcu_read_lock();
1420 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1419 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1421 if (c) { 1420 if (c) {
1422 sr.pktcnt = c->mfc_un.res.pkt; 1421 sr.pktcnt = c->mfc_un.res.pkt;
1423 sr.bytecnt = c->mfc_un.res.bytes; 1422 sr.bytecnt = c->mfc_un.res.bytes;
1424 sr.wrong_if = c->mfc_un.res.wrong_if; 1423 sr.wrong_if = c->mfc_un.res.wrong_if;
1425 read_unlock(&mrt_lock); 1424 rcu_read_unlock();
1426 1425
1427 if (copy_to_user(arg, &sr, sizeof(sr))) 1426 if (copy_to_user(arg, &sr, sizeof(sr)))
1428 return -EFAULT; 1427 return -EFAULT;
1429 return 0; 1428 return 0;
1430 } 1429 }
1431 read_unlock(&mrt_lock); 1430 rcu_read_unlock();
1432 return -EADDRNOTAVAIL; 1431 return -EADDRNOTAVAIL;
1433 default: 1432 default:
1434 return -ENOIOCTLCMD; 1433 return -ENOIOCTLCMD;
@@ -1465,7 +1464,7 @@ static struct notifier_block ip_mr_notifier = {
1465}; 1464};
1466 1465
1467/* 1466/*
1468 * Encapsulate a packet by attaching a valid IPIP header to it. 1467 * Encapsulate a packet by attaching a valid IPIP header to it.
1469 * This avoids tunnel drivers and other mess and gives us the speed so 1468 * This avoids tunnel drivers and other mess and gives us the speed so
1470 * important for multicast video. 1469 * important for multicast video.
1471 */ 1470 */
@@ -1480,7 +1479,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1480 skb_reset_network_header(skb); 1479 skb_reset_network_header(skb);
1481 iph = ip_hdr(skb); 1480 iph = ip_hdr(skb);
1482 1481
1483 iph->version = 4; 1482 iph->version = 4;
1484 iph->tos = old_iph->tos; 1483 iph->tos = old_iph->tos;
1485 iph->ttl = old_iph->ttl; 1484 iph->ttl = old_iph->ttl;
1486 iph->frag_off = 0; 1485 iph->frag_off = 0;
@@ -1498,7 +1497,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1498 1497
1499static inline int ipmr_forward_finish(struct sk_buff *skb) 1498static inline int ipmr_forward_finish(struct sk_buff *skb)
1500{ 1499{
1501 struct ip_options * opt = &(IPCB(skb)->opt); 1500 struct ip_options *opt = &(IPCB(skb)->opt);
1502 1501
1503 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); 1502 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1504 1503
@@ -1535,22 +1534,34 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1535 } 1534 }
1536#endif 1535#endif
1537 1536
1538 if (vif->flags&VIFF_TUNNEL) { 1537 if (vif->flags & VIFF_TUNNEL) {
1539 struct flowi fl = { .oif = vif->link, 1538 struct flowi fl = {
1540 .nl_u = { .ip4_u = 1539 .oif = vif->link,
1541 { .daddr = vif->remote, 1540 .nl_u = {
1542 .saddr = vif->local, 1541 .ip4_u = {
1543 .tos = RT_TOS(iph->tos) } }, 1542 .daddr = vif->remote,
1544 .proto = IPPROTO_IPIP }; 1543 .saddr = vif->local,
1544 .tos = RT_TOS(iph->tos)
1545 }
1546 },
1547 .proto = IPPROTO_IPIP
1548 };
1549
1545 if (ip_route_output_key(net, &rt, &fl)) 1550 if (ip_route_output_key(net, &rt, &fl))
1546 goto out_free; 1551 goto out_free;
1547 encap = sizeof(struct iphdr); 1552 encap = sizeof(struct iphdr);
1548 } else { 1553 } else {
1549 struct flowi fl = { .oif = vif->link, 1554 struct flowi fl = {
1550 .nl_u = { .ip4_u = 1555 .oif = vif->link,
1551 { .daddr = iph->daddr, 1556 .nl_u = {
1552 .tos = RT_TOS(iph->tos) } }, 1557 .ip4_u = {
1553 .proto = IPPROTO_IPIP }; 1558 .daddr = iph->daddr,
1559 .tos = RT_TOS(iph->tos)
1560 }
1561 },
1562 .proto = IPPROTO_IPIP
1563 };
1564
1554 if (ip_route_output_key(net, &rt, &fl)) 1565 if (ip_route_output_key(net, &rt, &fl))
1555 goto out_free; 1566 goto out_free;
1556 } 1567 }
@@ -1559,8 +1570,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1559 1570
1560 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1571 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1561 /* Do not fragment multicasts. Alas, IPv4 does not 1572 /* Do not fragment multicasts. Alas, IPv4 does not
1562 allow to send ICMP, so that packets will disappear 1573 * allow to send ICMP, so that packets will disappear
1563 to blackhole. 1574 * to blackhole.
1564 */ 1575 */
1565 1576
1566 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 1577 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
@@ -1583,7 +1594,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1583 ip_decrease_ttl(ip_hdr(skb)); 1594 ip_decrease_ttl(ip_hdr(skb));
1584 1595
1585 /* FIXME: forward and output firewalls used to be called here. 1596 /* FIXME: forward and output firewalls used to be called here.
1586 * What do we do with netfilter? -- RR */ 1597 * What do we do with netfilter? -- RR
1598 */
1587 if (vif->flags & VIFF_TUNNEL) { 1599 if (vif->flags & VIFF_TUNNEL) {
1588 ip_encap(skb, vif->local, vif->remote); 1600 ip_encap(skb, vif->local, vif->remote);
1589 /* FIXME: extra output firewall step used to be here. --RR */ 1601 /* FIXME: extra output firewall step used to be here. --RR */
@@ -1644,15 +1656,15 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1644 1656
1645 if (skb_rtable(skb)->fl.iif == 0) { 1657 if (skb_rtable(skb)->fl.iif == 0) {
1646 /* It is our own packet, looped back. 1658 /* It is our own packet, looped back.
1647 Very complicated situation... 1659 * Very complicated situation...
1648 1660 *
1649 The best workaround until routing daemons will be 1661 * The best workaround until routing daemons will be
1650 fixed is not to redistribute packet, if it was 1662 * fixed is not to redistribute packet, if it was
1651 send through wrong interface. It means, that 1663 * send through wrong interface. It means, that
1652 multicast applications WILL NOT work for 1664 * multicast applications WILL NOT work for
1653 (S,G), which have default multicast route pointing 1665 * (S,G), which have default multicast route pointing
1654 to wrong oif. In any case, it is not a good 1666 * to wrong oif. In any case, it is not a good
1655 idea to use multicasting applications on router. 1667 * idea to use multicasting applications on router.
1656 */ 1668 */
1657 goto dont_forward; 1669 goto dont_forward;
1658 } 1670 }
@@ -1662,9 +1674,9 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1662 1674
1663 if (true_vifi >= 0 && mrt->mroute_do_assert && 1675 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1664 /* pimsm uses asserts, when switching from RPT to SPT, 1676 /* pimsm uses asserts, when switching from RPT to SPT,
1665 so that we cannot check that packet arrived on an oif. 1677 * so that we cannot check that packet arrived on an oif.
1666 It is bad, but otherwise we would need to move pretty 1678 * It is bad, but otherwise we would need to move pretty
1667 large chunk of pimd to kernel. Ough... --ANK 1679 * large chunk of pimd to kernel. Ough... --ANK
1668 */ 1680 */
1669 (mrt->mroute_do_pim || 1681 (mrt->mroute_do_pim ||
1670 cache->mfc_un.res.ttls[true_vifi] < 255) && 1682 cache->mfc_un.res.ttls[true_vifi] < 255) &&
@@ -1682,10 +1694,12 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1682 /* 1694 /*
1683 * Forward the frame 1695 * Forward the frame
1684 */ 1696 */
1685 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 1697 for (ct = cache->mfc_un.res.maxvif - 1;
1698 ct >= cache->mfc_un.res.minvif; ct--) {
1686 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 1699 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1687 if (psend != -1) { 1700 if (psend != -1) {
1688 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1701 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1702
1689 if (skb2) 1703 if (skb2)
1690 ipmr_queue_xmit(net, mrt, skb2, cache, 1704 ipmr_queue_xmit(net, mrt, skb2, cache,
1691 psend); 1705 psend);
@@ -1696,6 +1710,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1696 if (psend != -1) { 1710 if (psend != -1) {
1697 if (local) { 1711 if (local) {
1698 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1712 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1713
1699 if (skb2) 1714 if (skb2)
1700 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 1715 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1701 } else { 1716 } else {
@@ -1713,6 +1728,7 @@ dont_forward:
1713 1728
1714/* 1729/*
1715 * Multicast packets for forwarding arrive here 1730 * Multicast packets for forwarding arrive here
1731 * Called with rcu_read_lock();
1716 */ 1732 */
1717 1733
1718int ip_mr_input(struct sk_buff *skb) 1734int ip_mr_input(struct sk_buff *skb)
@@ -1724,9 +1740,9 @@ int ip_mr_input(struct sk_buff *skb)
1724 int err; 1740 int err;
1725 1741
1726 /* Packet is looped back after forward, it should not be 1742 /* Packet is looped back after forward, it should not be
1727 forwarded second time, but still can be delivered locally. 1743 * forwarded second time, but still can be delivered locally.
1728 */ 1744 */
1729 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1745 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1730 goto dont_forward; 1746 goto dont_forward;
1731 1747
1732 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); 1748 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
@@ -1736,28 +1752,28 @@ int ip_mr_input(struct sk_buff *skb)
1736 } 1752 }
1737 1753
1738 if (!local) { 1754 if (!local) {
1739 if (IPCB(skb)->opt.router_alert) { 1755 if (IPCB(skb)->opt.router_alert) {
1740 if (ip_call_ra_chain(skb)) 1756 if (ip_call_ra_chain(skb))
1741 return 0; 1757 return 0;
1742 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ 1758 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
1743 /* IGMPv1 (and broken IGMPv2 implementations sort of 1759 /* IGMPv1 (and broken IGMPv2 implementations sort of
1744 Cisco IOS <= 11.2(8)) do not put router alert 1760 * Cisco IOS <= 11.2(8)) do not put router alert
1745 option to IGMP packets destined to routable 1761 * option to IGMP packets destined to routable
1746 groups. It is very bad, because it means 1762 * groups. It is very bad, because it means
1747 that we can forward NO IGMP messages. 1763 * that we can forward NO IGMP messages.
1748 */ 1764 */
1749 read_lock(&mrt_lock); 1765 struct sock *mroute_sk;
1750 if (mrt->mroute_sk) { 1766
1751 nf_reset(skb); 1767 mroute_sk = rcu_dereference(mrt->mroute_sk);
1752 raw_rcv(mrt->mroute_sk, skb); 1768 if (mroute_sk) {
1753 read_unlock(&mrt_lock); 1769 nf_reset(skb);
1754 return 0; 1770 raw_rcv(mroute_sk, skb);
1755 } 1771 return 0;
1756 read_unlock(&mrt_lock); 1772 }
1757 } 1773 }
1758 } 1774 }
1759 1775
1760 read_lock(&mrt_lock); 1776 /* already under rcu_read_lock() */
1761 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1777 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1762 1778
1763 /* 1779 /*
@@ -1769,13 +1785,12 @@ int ip_mr_input(struct sk_buff *skb)
1769 if (local) { 1785 if (local) {
1770 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1786 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1771 ip_local_deliver(skb); 1787 ip_local_deliver(skb);
1772 if (skb2 == NULL) { 1788 if (skb2 == NULL)
1773 read_unlock(&mrt_lock);
1774 return -ENOBUFS; 1789 return -ENOBUFS;
1775 }
1776 skb = skb2; 1790 skb = skb2;
1777 } 1791 }
1778 1792
1793 read_lock(&mrt_lock);
1779 vif = ipmr_find_vif(mrt, skb->dev); 1794 vif = ipmr_find_vif(mrt, skb->dev);
1780 if (vif >= 0) { 1795 if (vif >= 0) {
1781 int err2 = ipmr_cache_unresolved(mrt, vif, skb); 1796 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
@@ -1788,8 +1803,8 @@ int ip_mr_input(struct sk_buff *skb)
1788 return -ENODEV; 1803 return -ENODEV;
1789 } 1804 }
1790 1805
1806 read_lock(&mrt_lock);
1791 ip_mr_forward(net, mrt, skb, cache, local); 1807 ip_mr_forward(net, mrt, skb, cache, local);
1792
1793 read_unlock(&mrt_lock); 1808 read_unlock(&mrt_lock);
1794 1809
1795 if (local) 1810 if (local)
@@ -1805,6 +1820,7 @@ dont_forward:
1805} 1820}
1806 1821
1807#ifdef CONFIG_IP_PIMSM 1822#ifdef CONFIG_IP_PIMSM
1823/* called with rcu_read_lock() */
1808static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 1824static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1809 unsigned int pimlen) 1825 unsigned int pimlen)
1810{ 1826{
@@ -1813,10 +1829,10 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1813 1829
1814 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1830 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1815 /* 1831 /*
1816 Check that: 1832 * Check that:
1817 a. packet is really destinted to a multicast group 1833 * a. packet is really sent to a multicast group
1818 b. packet is not a NULL-REGISTER 1834 * b. packet is not a NULL-REGISTER
1819 c. packet is not truncated 1835 * c. packet is not truncated
1820 */ 1836 */
1821 if (!ipv4_is_multicast(encap->daddr) || 1837 if (!ipv4_is_multicast(encap->daddr) ||
1822 encap->tot_len == 0 || 1838 encap->tot_len == 0 ||
@@ -1826,26 +1842,23 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1826 read_lock(&mrt_lock); 1842 read_lock(&mrt_lock);
1827 if (mrt->mroute_reg_vif_num >= 0) 1843 if (mrt->mroute_reg_vif_num >= 0)
1828 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 1844 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1829 if (reg_dev)
1830 dev_hold(reg_dev);
1831 read_unlock(&mrt_lock); 1845 read_unlock(&mrt_lock);
1832 1846
1833 if (reg_dev == NULL) 1847 if (reg_dev == NULL)
1834 return 1; 1848 return 1;
1835 1849
1836 skb->mac_header = skb->network_header; 1850 skb->mac_header = skb->network_header;
1837 skb_pull(skb, (u8*)encap - skb->data); 1851 skb_pull(skb, (u8 *)encap - skb->data);
1838 skb_reset_network_header(skb); 1852 skb_reset_network_header(skb);
1839 skb->protocol = htons(ETH_P_IP); 1853 skb->protocol = htons(ETH_P_IP);
1840 skb->ip_summed = 0; 1854 skb->ip_summed = CHECKSUM_NONE;
1841 skb->pkt_type = PACKET_HOST; 1855 skb->pkt_type = PACKET_HOST;
1842 1856
1843 skb_tunnel_rx(skb, reg_dev); 1857 skb_tunnel_rx(skb, reg_dev);
1844 1858
1845 netif_rx(skb); 1859 netif_rx(skb);
1846 dev_put(reg_dev);
1847 1860
1848 return 0; 1861 return NET_RX_SUCCESS;
1849} 1862}
1850#endif 1863#endif
1851 1864
@@ -1854,7 +1867,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1854 * Handle IGMP messages of PIMv1 1867 * Handle IGMP messages of PIMv1
1855 */ 1868 */
1856 1869
1857int pim_rcv_v1(struct sk_buff * skb) 1870int pim_rcv_v1(struct sk_buff *skb)
1858{ 1871{
1859 struct igmphdr *pim; 1872 struct igmphdr *pim;
1860 struct net *net = dev_net(skb->dev); 1873 struct net *net = dev_net(skb->dev);
@@ -1881,7 +1894,7 @@ drop:
1881#endif 1894#endif
1882 1895
1883#ifdef CONFIG_IP_PIMSM_V2 1896#ifdef CONFIG_IP_PIMSM_V2
1884static int pim_rcv(struct sk_buff * skb) 1897static int pim_rcv(struct sk_buff *skb)
1885{ 1898{
1886 struct pimreghdr *pim; 1899 struct pimreghdr *pim;
1887 struct net *net = dev_net(skb->dev); 1900 struct net *net = dev_net(skb->dev);
@@ -1891,8 +1904,8 @@ static int pim_rcv(struct sk_buff * skb)
1891 goto drop; 1904 goto drop;
1892 1905
1893 pim = (struct pimreghdr *)skb_transport_header(skb); 1906 pim = (struct pimreghdr *)skb_transport_header(skb);
1894 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 1907 if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) ||
1895 (pim->flags&PIM_NULL_REGISTER) || 1908 (pim->flags & PIM_NULL_REGISTER) ||
1896 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 1909 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1897 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1910 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1898 goto drop; 1911 goto drop;
@@ -1958,28 +1971,33 @@ int ipmr_get_route(struct net *net,
1958 if (mrt == NULL) 1971 if (mrt == NULL)
1959 return -ENOENT; 1972 return -ENOENT;
1960 1973
1961 read_lock(&mrt_lock); 1974 rcu_read_lock();
1962 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); 1975 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1963 1976
1964 if (cache == NULL) { 1977 if (cache == NULL) {
1965 struct sk_buff *skb2; 1978 struct sk_buff *skb2;
1966 struct iphdr *iph; 1979 struct iphdr *iph;
1967 struct net_device *dev; 1980 struct net_device *dev;
1968 int vif; 1981 int vif = -1;
1969 1982
1970 if (nowait) { 1983 if (nowait) {
1971 read_unlock(&mrt_lock); 1984 rcu_read_unlock();
1972 return -EAGAIN; 1985 return -EAGAIN;
1973 } 1986 }
1974 1987
1975 dev = skb->dev; 1988 dev = skb->dev;
1976 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) { 1989 read_lock(&mrt_lock);
1990 if (dev)
1991 vif = ipmr_find_vif(mrt, dev);
1992 if (vif < 0) {
1977 read_unlock(&mrt_lock); 1993 read_unlock(&mrt_lock);
1994 rcu_read_unlock();
1978 return -ENODEV; 1995 return -ENODEV;
1979 } 1996 }
1980 skb2 = skb_clone(skb, GFP_ATOMIC); 1997 skb2 = skb_clone(skb, GFP_ATOMIC);
1981 if (!skb2) { 1998 if (!skb2) {
1982 read_unlock(&mrt_lock); 1999 read_unlock(&mrt_lock);
2000 rcu_read_unlock();
1983 return -ENOMEM; 2001 return -ENOMEM;
1984 } 2002 }
1985 2003
@@ -1992,13 +2010,16 @@ int ipmr_get_route(struct net *net,
1992 iph->version = 0; 2010 iph->version = 0;
1993 err = ipmr_cache_unresolved(mrt, vif, skb2); 2011 err = ipmr_cache_unresolved(mrt, vif, skb2);
1994 read_unlock(&mrt_lock); 2012 read_unlock(&mrt_lock);
2013 rcu_read_unlock();
1995 return err; 2014 return err;
1996 } 2015 }
1997 2016
1998 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 2017 read_lock(&mrt_lock);
2018 if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY))
1999 cache->mfc_flags |= MFC_NOTIFY; 2019 cache->mfc_flags |= MFC_NOTIFY;
2000 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 2020 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
2001 read_unlock(&mrt_lock); 2021 read_unlock(&mrt_lock);
2022 rcu_read_unlock();
2002 return err; 2023 return err;
2003} 2024}
2004 2025
@@ -2050,14 +2071,14 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2050 s_h = cb->args[1]; 2071 s_h = cb->args[1];
2051 s_e = cb->args[2]; 2072 s_e = cb->args[2];
2052 2073
2053 read_lock(&mrt_lock); 2074 rcu_read_lock();
2054 ipmr_for_each_table(mrt, net) { 2075 ipmr_for_each_table(mrt, net) {
2055 if (t < s_t) 2076 if (t < s_t)
2056 goto next_table; 2077 goto next_table;
2057 if (t > s_t) 2078 if (t > s_t)
2058 s_h = 0; 2079 s_h = 0;
2059 for (h = s_h; h < MFC_LINES; h++) { 2080 for (h = s_h; h < MFC_LINES; h++) {
2060 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) { 2081 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) {
2061 if (e < s_e) 2082 if (e < s_e)
2062 goto next_entry; 2083 goto next_entry;
2063 if (ipmr_fill_mroute(mrt, skb, 2084 if (ipmr_fill_mroute(mrt, skb,
@@ -2075,7 +2096,7 @@ next_table:
2075 t++; 2096 t++;
2076 } 2097 }
2077done: 2098done:
2078 read_unlock(&mrt_lock); 2099 rcu_read_unlock();
2079 2100
2080 cb->args[2] = e; 2101 cb->args[2] = e;
2081 cb->args[1] = h; 2102 cb->args[1] = h;
@@ -2086,7 +2107,8 @@ done:
2086 2107
2087#ifdef CONFIG_PROC_FS 2108#ifdef CONFIG_PROC_FS
2088/* 2109/*
2089 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 2110 * The /proc interfaces to multicast routing :
2111 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
2090 */ 2112 */
2091struct ipmr_vif_iter { 2113struct ipmr_vif_iter {
2092 struct seq_net_private p; 2114 struct seq_net_private p;
@@ -2208,14 +2230,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2208 struct mr_table *mrt = it->mrt; 2230 struct mr_table *mrt = it->mrt;
2209 struct mfc_cache *mfc; 2231 struct mfc_cache *mfc;
2210 2232
2211 read_lock(&mrt_lock); 2233 rcu_read_lock();
2212 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) { 2234 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2213 it->cache = &mrt->mfc_cache_array[it->ct]; 2235 it->cache = &mrt->mfc_cache_array[it->ct];
2214 list_for_each_entry(mfc, it->cache, list) 2236 list_for_each_entry_rcu(mfc, it->cache, list)
2215 if (pos-- == 0) 2237 if (pos-- == 0)
2216 return mfc; 2238 return mfc;
2217 } 2239 }
2218 read_unlock(&mrt_lock); 2240 rcu_read_unlock();
2219 2241
2220 spin_lock_bh(&mfc_unres_lock); 2242 spin_lock_bh(&mfc_unres_lock);
2221 it->cache = &mrt->mfc_unres_queue; 2243 it->cache = &mrt->mfc_unres_queue;
@@ -2274,7 +2296,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2274 } 2296 }
2275 2297
2276 /* exhausted cache_array, show unresolved */ 2298 /* exhausted cache_array, show unresolved */
2277 read_unlock(&mrt_lock); 2299 rcu_read_unlock();
2278 it->cache = &mrt->mfc_unres_queue; 2300 it->cache = &mrt->mfc_unres_queue;
2279 it->ct = 0; 2301 it->ct = 0;
2280 2302
@@ -2282,7 +2304,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2282 if (!list_empty(it->cache)) 2304 if (!list_empty(it->cache))
2283 return list_first_entry(it->cache, struct mfc_cache, list); 2305 return list_first_entry(it->cache, struct mfc_cache, list);
2284 2306
2285 end_of_list: 2307end_of_list:
2286 spin_unlock_bh(&mfc_unres_lock); 2308 spin_unlock_bh(&mfc_unres_lock);
2287 it->cache = NULL; 2309 it->cache = NULL;
2288 2310
@@ -2297,7 +2319,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2297 if (it->cache == &mrt->mfc_unres_queue) 2319 if (it->cache == &mrt->mfc_unres_queue)
2298 spin_unlock_bh(&mfc_unres_lock); 2320 spin_unlock_bh(&mfc_unres_lock);
2299 else if (it->cache == &mrt->mfc_cache_array[it->ct]) 2321 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2300 read_unlock(&mrt_lock); 2322 rcu_read_unlock();
2301} 2323}
2302 2324
2303static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2325static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -2323,7 +2345,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2323 mfc->mfc_un.res.bytes, 2345 mfc->mfc_un.res.bytes,
2324 mfc->mfc_un.res.wrong_if); 2346 mfc->mfc_un.res.wrong_if);
2325 for (n = mfc->mfc_un.res.minvif; 2347 for (n = mfc->mfc_un.res.minvif;
2326 n < mfc->mfc_un.res.maxvif; n++ ) { 2348 n < mfc->mfc_un.res.maxvif; n++) {
2327 if (VIF_EXISTS(mrt, n) && 2349 if (VIF_EXISTS(mrt, n) &&
2328 mfc->mfc_un.res.ttls[n] < 255) 2350 mfc->mfc_un.res.ttls[n] < 255)
2329 seq_printf(seq, 2351 seq_printf(seq,
@@ -2421,7 +2443,7 @@ int __init ip_mr_init(void)
2421 2443
2422 mrt_cachep = kmem_cache_create("ip_mrt_cache", 2444 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2423 sizeof(struct mfc_cache), 2445 sizeof(struct mfc_cache),
2424 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2446 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
2425 NULL); 2447 NULL);
2426 if (!mrt_cachep) 2448 if (!mrt_cachep)
2427 return -ENOMEM; 2449 return -ENOMEM;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8f4f9a57f12..8b642f152468 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -72,7 +72,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
72 for (i = 0; i < len; i++) 72 for (i = 0; i < len; i++)
73 ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i]; 73 ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
74 74
75 return (ret != 0); 75 return ret != 0;
76} 76}
77 77
78/* 78/*
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 3a43cf36db87..1e26a4897655 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -29,6 +29,7 @@
29#include <net/netfilter/nf_conntrack.h> 29#include <net/netfilter/nf_conntrack.h>
30#include <net/net_namespace.h> 30#include <net/net_namespace.h>
31#include <net/checksum.h> 31#include <net/checksum.h>
32#include <net/ip.h>
32 33
33#define CLUSTERIP_VERSION "0.8" 34#define CLUSTERIP_VERSION "0.8"
34 35
@@ -231,24 +232,22 @@ clusterip_hashfn(const struct sk_buff *skb,
231{ 232{
232 const struct iphdr *iph = ip_hdr(skb); 233 const struct iphdr *iph = ip_hdr(skb);
233 unsigned long hashval; 234 unsigned long hashval;
234 u_int16_t sport, dport; 235 u_int16_t sport = 0, dport = 0;
235 const u_int16_t *ports; 236 int poff;
236 237
237 switch (iph->protocol) { 238 poff = proto_ports_offset(iph->protocol);
238 case IPPROTO_TCP: 239 if (poff >= 0) {
239 case IPPROTO_UDP: 240 const u_int16_t *ports;
240 case IPPROTO_UDPLITE: 241 u16 _ports[2];
241 case IPPROTO_SCTP: 242
242 case IPPROTO_DCCP: 243 ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
243 case IPPROTO_ICMP: 244 if (ports) {
244 ports = (const void *)iph+iph->ihl*4; 245 sport = ports[0];
245 sport = ports[0]; 246 dport = ports[1];
246 dport = ports[1]; 247 }
247 break; 248 } else {
248 default:
249 if (net_ratelimit()) 249 if (net_ratelimit())
250 pr_info("unknown protocol %u\n", iph->protocol); 250 pr_info("unknown protocol %u\n", iph->protocol);
251 sport = dport = 0;
252 } 251 }
253 252
254 switch (config->hash_mode) { 253 switch (config->hash_mode) {
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index f2d297351405..65699c24411c 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,8 +28,7 @@
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <net/protocol.h> 29#include <net/protocol.h>
30 30
31const struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; 31const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly;
32static DEFINE_SPINLOCK(inet_proto_lock);
33 32
34/* 33/*
35 * Add a protocol handler to the hash tables 34 * Add a protocol handler to the hash tables
@@ -37,20 +36,9 @@ static DEFINE_SPINLOCK(inet_proto_lock);
37 36
38int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) 37int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
39{ 38{
40 int hash, ret; 39 int hash = protocol & (MAX_INET_PROTOS - 1);
41 40
42 hash = protocol & (MAX_INET_PROTOS - 1); 41 return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1;
43
44 spin_lock_bh(&inet_proto_lock);
45 if (inet_protos[hash]) {
46 ret = -1;
47 } else {
48 inet_protos[hash] = prot;
49 ret = 0;
50 }
51 spin_unlock_bh(&inet_proto_lock);
52
53 return ret;
54} 42}
55EXPORT_SYMBOL(inet_add_protocol); 43EXPORT_SYMBOL(inet_add_protocol);
56 44
@@ -60,18 +48,9 @@ EXPORT_SYMBOL(inet_add_protocol);
60 48
61int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) 49int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
62{ 50{
63 int hash, ret; 51 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
64
65 hash = protocol & (MAX_INET_PROTOS - 1);
66 52
67 spin_lock_bh(&inet_proto_lock); 53 ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1;
68 if (inet_protos[hash] == prot) {
69 inet_protos[hash] = NULL;
70 ret = 0;
71 } else {
72 ret = -1;
73 }
74 spin_unlock_bh(&inet_proto_lock);
75 54
76 synchronize_net(); 55 synchronize_net();
77 56
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 009a7b2aa1ef..1f85ef289895 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -505,7 +505,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
505 505
506 ipc.addr = inet->inet_saddr; 506 ipc.addr = inet->inet_saddr;
507 ipc.opt = NULL; 507 ipc.opt = NULL;
508 ipc.shtx.flags = 0; 508 ipc.tx_flags = 0;
509 ipc.oif = sk->sk_bound_dev_if; 509 ipc.oif = sk->sk_bound_dev_if;
510 510
511 if (msg->msg_controllen) { 511 if (msg->msg_controllen) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ac6559cb54f9..04e0df82b88c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1107,6 +1107,7 @@ restart:
1107 * on the route gc list. 1107 * on the route gc list.
1108 */ 1108 */
1109 1109
1110 rt->dst.flags |= DST_NOCACHE;
1110 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1111 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1111 int err = arp_bind_neighbour(&rt->dst); 1112 int err = arp_bind_neighbour(&rt->dst);
1112 if (err) { 1113 if (err) {
@@ -1268,18 +1269,11 @@ skip_hashing:
1268 1269
1269void rt_bind_peer(struct rtable *rt, int create) 1270void rt_bind_peer(struct rtable *rt, int create)
1270{ 1271{
1271 static DEFINE_SPINLOCK(rt_peer_lock);
1272 struct inet_peer *peer; 1272 struct inet_peer *peer;
1273 1273
1274 peer = inet_getpeer(rt->rt_dst, create); 1274 peer = inet_getpeer(rt->rt_dst, create);
1275 1275
1276 spin_lock_bh(&rt_peer_lock); 1276 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1277 if (rt->peer == NULL) {
1278 rt->peer = peer;
1279 peer = NULL;
1280 }
1281 spin_unlock_bh(&rt_peer_lock);
1282 if (peer)
1283 inet_putpeer(peer); 1277 inet_putpeer(peer);
1284} 1278}
1285 1279
@@ -2365,9 +2359,8 @@ static int __mkroute_output(struct rtable **result,
2365 struct rtable *rth; 2359 struct rtable *rth;
2366 struct in_device *in_dev; 2360 struct in_device *in_dev;
2367 u32 tos = RT_FL_TOS(oldflp); 2361 u32 tos = RT_FL_TOS(oldflp);
2368 int err = 0;
2369 2362
2370 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) 2363 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK))
2371 return -EINVAL; 2364 return -EINVAL;
2372 2365
2373 if (fl->fl4_dst == htonl(0xFFFFFFFF)) 2366 if (fl->fl4_dst == htonl(0xFFFFFFFF))
@@ -2380,11 +2373,12 @@ static int __mkroute_output(struct rtable **result,
2380 if (dev_out->flags & IFF_LOOPBACK) 2373 if (dev_out->flags & IFF_LOOPBACK)
2381 flags |= RTCF_LOCAL; 2374 flags |= RTCF_LOCAL;
2382 2375
2383 /* get work reference to inet device */ 2376 rcu_read_lock();
2384 in_dev = in_dev_get(dev_out); 2377 in_dev = __in_dev_get_rcu(dev_out);
2385 if (!in_dev) 2378 if (!in_dev) {
2379 rcu_read_unlock();
2386 return -EINVAL; 2380 return -EINVAL;
2387 2381 }
2388 if (res->type == RTN_BROADCAST) { 2382 if (res->type == RTN_BROADCAST) {
2389 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2383 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2390 if (res->fi) { 2384 if (res->fi) {
@@ -2392,13 +2386,13 @@ static int __mkroute_output(struct rtable **result,
2392 res->fi = NULL; 2386 res->fi = NULL;
2393 } 2387 }
2394 } else if (res->type == RTN_MULTICAST) { 2388 } else if (res->type == RTN_MULTICAST) {
2395 flags |= RTCF_MULTICAST|RTCF_LOCAL; 2389 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2396 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 2390 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
2397 oldflp->proto)) 2391 oldflp->proto))
2398 flags &= ~RTCF_LOCAL; 2392 flags &= ~RTCF_LOCAL;
2399 /* If multicast route do not exist use 2393 /* If multicast route do not exist use
2400 default one, but do not gateway in this case. 2394 * default one, but do not gateway in this case.
2401 Yes, it is hack. 2395 * Yes, it is hack.
2402 */ 2396 */
2403 if (res->fi && res->prefixlen < 4) { 2397 if (res->fi && res->prefixlen < 4) {
2404 fib_info_put(res->fi); 2398 fib_info_put(res->fi);
@@ -2409,9 +2403,12 @@ static int __mkroute_output(struct rtable **result,
2409 2403
2410 rth = dst_alloc(&ipv4_dst_ops); 2404 rth = dst_alloc(&ipv4_dst_ops);
2411 if (!rth) { 2405 if (!rth) {
2412 err = -ENOBUFS; 2406 rcu_read_unlock();
2413 goto cleanup; 2407 return -ENOBUFS;
2414 } 2408 }
2409 in_dev_hold(in_dev);
2410 rcu_read_unlock();
2411 rth->idev = in_dev;
2415 2412
2416 atomic_set(&rth->dst.__refcnt, 1); 2413 atomic_set(&rth->dst.__refcnt, 1);
2417 rth->dst.flags= DST_HOST; 2414 rth->dst.flags= DST_HOST;
@@ -2432,7 +2429,6 @@ static int __mkroute_output(struct rtable **result,
2432 cache entry */ 2429 cache entry */
2433 rth->dst.dev = dev_out; 2430 rth->dst.dev = dev_out;
2434 dev_hold(dev_out); 2431 dev_hold(dev_out);
2435 rth->idev = in_dev_get(dev_out);
2436 rth->rt_gateway = fl->fl4_dst; 2432 rth->rt_gateway = fl->fl4_dst;
2437 rth->rt_spec_dst= fl->fl4_src; 2433 rth->rt_spec_dst= fl->fl4_src;
2438 2434
@@ -2467,13 +2463,8 @@ static int __mkroute_output(struct rtable **result,
2467 rt_set_nexthop(rth, res, 0); 2463 rt_set_nexthop(rth, res, 0);
2468 2464
2469 rth->rt_flags = flags; 2465 rth->rt_flags = flags;
2470
2471 *result = rth; 2466 *result = rth;
2472 cleanup: 2467 return 0;
2473 /* release work reference to inet device */
2474 in_dev_put(in_dev);
2475
2476 return err;
2477} 2468}
2478 2469
2479static int ip_mkroute_output(struct rtable **rp, 2470static int ip_mkroute_output(struct rtable **rp,
@@ -2497,6 +2488,7 @@ static int ip_mkroute_output(struct rtable **rp,
2497 2488
2498/* 2489/*
2499 * Major route resolver routine. 2490 * Major route resolver routine.
2491 * called with rcu_read_lock();
2500 */ 2492 */
2501 2493
2502static int ip_route_output_slow(struct net *net, struct rtable **rp, 2494static int ip_route_output_slow(struct net *net, struct rtable **rp,
@@ -2515,7 +2507,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2515 .iif = net->loopback_dev->ifindex, 2507 .iif = net->loopback_dev->ifindex,
2516 .oif = oldflp->oif }; 2508 .oif = oldflp->oif };
2517 struct fib_result res; 2509 struct fib_result res;
2518 unsigned flags = 0; 2510 unsigned int flags = 0;
2519 struct net_device *dev_out = NULL; 2511 struct net_device *dev_out = NULL;
2520 int free_res = 0; 2512 int free_res = 0;
2521 int err; 2513 int err;
@@ -2545,7 +2537,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2545 (ipv4_is_multicast(oldflp->fl4_dst) || 2537 (ipv4_is_multicast(oldflp->fl4_dst) ||
2546 oldflp->fl4_dst == htonl(0xFFFFFFFF))) { 2538 oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
2547 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2539 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2548 dev_out = ip_dev_find(net, oldflp->fl4_src); 2540 dev_out = __ip_dev_find(net, oldflp->fl4_src, false);
2549 if (dev_out == NULL) 2541 if (dev_out == NULL)
2550 goto out; 2542 goto out;
2551 2543
@@ -2570,26 +2562,21 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2570 2562
2571 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { 2563 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {
2572 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2564 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2573 dev_out = ip_dev_find(net, oldflp->fl4_src); 2565 if (!__ip_dev_find(net, oldflp->fl4_src, false))
2574 if (dev_out == NULL)
2575 goto out; 2566 goto out;
2576 dev_put(dev_out);
2577 dev_out = NULL;
2578 } 2567 }
2579 } 2568 }
2580 2569
2581 2570
2582 if (oldflp->oif) { 2571 if (oldflp->oif) {
2583 dev_out = dev_get_by_index(net, oldflp->oif); 2572 dev_out = dev_get_by_index_rcu(net, oldflp->oif);
2584 err = -ENODEV; 2573 err = -ENODEV;
2585 if (dev_out == NULL) 2574 if (dev_out == NULL)
2586 goto out; 2575 goto out;
2587 2576
2588 /* RACE: Check return value of inet_select_addr instead. */ 2577 /* RACE: Check return value of inet_select_addr instead. */
2589 if (__in_dev_get_rtnl(dev_out) == NULL) { 2578 if (rcu_dereference(dev_out->ip_ptr) == NULL)
2590 dev_put(dev_out);
2591 goto out; /* Wrong error code */ 2579 goto out; /* Wrong error code */
2592 }
2593 2580
2594 if (ipv4_is_local_multicast(oldflp->fl4_dst) || 2581 if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
2595 oldflp->fl4_dst == htonl(0xFFFFFFFF)) { 2582 oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
@@ -2612,10 +2599,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2612 fl.fl4_dst = fl.fl4_src; 2599 fl.fl4_dst = fl.fl4_src;
2613 if (!fl.fl4_dst) 2600 if (!fl.fl4_dst)
2614 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2601 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
2615 if (dev_out)
2616 dev_put(dev_out);
2617 dev_out = net->loopback_dev; 2602 dev_out = net->loopback_dev;
2618 dev_hold(dev_out);
2619 fl.oif = net->loopback_dev->ifindex; 2603 fl.oif = net->loopback_dev->ifindex;
2620 res.type = RTN_LOCAL; 2604 res.type = RTN_LOCAL;
2621 flags |= RTCF_LOCAL; 2605 flags |= RTCF_LOCAL;
@@ -2649,8 +2633,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2649 res.type = RTN_UNICAST; 2633 res.type = RTN_UNICAST;
2650 goto make_route; 2634 goto make_route;
2651 } 2635 }
2652 if (dev_out)
2653 dev_put(dev_out);
2654 err = -ENETUNREACH; 2636 err = -ENETUNREACH;
2655 goto out; 2637 goto out;
2656 } 2638 }
@@ -2659,10 +2641,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2659 if (res.type == RTN_LOCAL) { 2641 if (res.type == RTN_LOCAL) {
2660 if (!fl.fl4_src) 2642 if (!fl.fl4_src)
2661 fl.fl4_src = fl.fl4_dst; 2643 fl.fl4_src = fl.fl4_dst;
2662 if (dev_out)
2663 dev_put(dev_out);
2664 dev_out = net->loopback_dev; 2644 dev_out = net->loopback_dev;
2665 dev_hold(dev_out);
2666 fl.oif = dev_out->ifindex; 2645 fl.oif = dev_out->ifindex;
2667 if (res.fi) 2646 if (res.fi)
2668 fib_info_put(res.fi); 2647 fib_info_put(res.fi);
@@ -2682,28 +2661,23 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2682 if (!fl.fl4_src) 2661 if (!fl.fl4_src)
2683 fl.fl4_src = FIB_RES_PREFSRC(res); 2662 fl.fl4_src = FIB_RES_PREFSRC(res);
2684 2663
2685 if (dev_out)
2686 dev_put(dev_out);
2687 dev_out = FIB_RES_DEV(res); 2664 dev_out = FIB_RES_DEV(res);
2688 dev_hold(dev_out);
2689 fl.oif = dev_out->ifindex; 2665 fl.oif = dev_out->ifindex;
2690 2666
2691 2667
2692make_route: 2668make_route:
2693 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2669 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
2694 2670
2695
2696 if (free_res) 2671 if (free_res)
2697 fib_res_put(&res); 2672 fib_res_put(&res);
2698 if (dev_out)
2699 dev_put(dev_out);
2700out: return err; 2673out: return err;
2701} 2674}
2702 2675
2703int __ip_route_output_key(struct net *net, struct rtable **rp, 2676int __ip_route_output_key(struct net *net, struct rtable **rp,
2704 const struct flowi *flp) 2677 const struct flowi *flp)
2705{ 2678{
2706 unsigned hash; 2679 unsigned int hash;
2680 int res;
2707 struct rtable *rth; 2681 struct rtable *rth;
2708 2682
2709 if (!rt_caching(net)) 2683 if (!rt_caching(net))
@@ -2734,7 +2708,10 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2734 rcu_read_unlock_bh(); 2708 rcu_read_unlock_bh();
2735 2709
2736slow_output: 2710slow_output:
2737 return ip_route_output_slow(net, rp, flp); 2711 rcu_read_lock();
2712 res = ip_route_output_slow(net, rp, flp);
2713 rcu_read_unlock();
2714 return res;
2738} 2715}
2739EXPORT_SYMBOL_GPL(__ip_route_output_key); 2716EXPORT_SYMBOL_GPL(__ip_route_output_key);
2740 2717
@@ -2798,7 +2775,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2798 2775
2799 dst_release(&(*rp)->dst); 2776 dst_release(&(*rp)->dst);
2800 *rp = rt; 2777 *rp = rt;
2801 return (rt ? 0 : -ENOMEM); 2778 return rt ? 0 : -ENOMEM;
2802} 2779}
2803 2780
2804int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, 2781int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f115ea68a4ef..1664a0590bb8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2392,7 +2392,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2392 err = tp->af_specific->md5_parse(sk, optval, optlen); 2392 err = tp->af_specific->md5_parse(sk, optval, optlen);
2393 break; 2393 break;
2394#endif 2394#endif
2395 2395 case TCP_USER_TIMEOUT:
2396 /* Cap the max timeout in ms TCP will retry/retrans
2397 * before giving up and aborting (ETIMEDOUT) a connection.
2398 */
2399 icsk->icsk_user_timeout = msecs_to_jiffies(val);
2400 break;
2396 default: 2401 default:
2397 err = -ENOPROTOOPT; 2402 err = -ENOPROTOOPT;
2398 break; 2403 break;
@@ -2611,6 +2616,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2611 case TCP_THIN_DUPACK: 2616 case TCP_THIN_DUPACK:
2612 val = tp->thin_dupack; 2617 val = tp->thin_dupack;
2613 break; 2618 break;
2619
2620 case TCP_USER_TIMEOUT:
2621 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2622 break;
2614 default: 2623 default:
2615 return -ENOPROTOOPT; 2624 return -ENOPROTOOPT;
2616 } 2625 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b55f60f6fcbe..f6fdd727a23d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -182,7 +182,7 @@ static void tcp_incr_quickack(struct sock *sk)
182 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); 182 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
183} 183}
184 184
185void tcp_enter_quickack_mode(struct sock *sk) 185static void tcp_enter_quickack_mode(struct sock *sk)
186{ 186{
187 struct inet_connection_sock *icsk = inet_csk(sk); 187 struct inet_connection_sock *icsk = inet_csk(sk);
188 tcp_incr_quickack(sk); 188 tcp_incr_quickack(sk);
@@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk)
805 } 805 }
806} 806}
807 807
808/* Numbers are taken from RFC3390.
809 *
810 * John Heffner states:
811 *
812 * The RFC specifies a window of no more than 4380 bytes
813 * unless 2*MSS > 4380. Reading the pseudocode in the RFC
814 * is a bit misleading because they use a clamp at 4380 bytes
815 * rather than use a multiplier in the relevant range.
816 */
817__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) 808__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
818{ 809{
819 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 810 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
820 811
821 if (!cwnd) { 812 if (!cwnd)
822 if (tp->mss_cache > 1460) 813 cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
823 cwnd = 2;
824 else
825 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
826 }
827 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 814 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
828} 815}
829 816
@@ -2314,7 +2301,7 @@ static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
2314 2301
2315static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) 2302static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
2316{ 2303{
2317 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); 2304 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2318} 2305}
2319 2306
2320static inline int tcp_head_timedout(struct sock *sk) 2307static inline int tcp_head_timedout(struct sock *sk)
@@ -3412,8 +3399,8 @@ static void tcp_ack_probe(struct sock *sk)
3412 3399
3413static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) 3400static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
3414{ 3401{
3415 return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || 3402 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3416 inet_csk(sk)->icsk_ca_state != TCP_CA_Open); 3403 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3417} 3404}
3418 3405
3419static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) 3406static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
@@ -3430,9 +3417,9 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
3430 const u32 ack, const u32 ack_seq, 3417 const u32 ack, const u32 ack_seq,
3431 const u32 nwin) 3418 const u32 nwin)
3432{ 3419{
3433 return (after(ack, tp->snd_una) || 3420 return after(ack, tp->snd_una) ||
3434 after(ack_seq, tp->snd_wl1) || 3421 after(ack_seq, tp->snd_wl1) ||
3435 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd)); 3422 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3436} 3423}
3437 3424
3438/* Update our send window. 3425/* Update our send window.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 020766292bb0..a0232f3a358b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2571,7 +2571,6 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2571 2571
2572 return tcp_gro_receive(head, skb); 2572 return tcp_gro_receive(head, skb);
2573} 2573}
2574EXPORT_SYMBOL(tcp4_gro_receive);
2575 2574
2576int tcp4_gro_complete(struct sk_buff *skb) 2575int tcp4_gro_complete(struct sk_buff *skb)
2577{ 2576{
@@ -2584,7 +2583,6 @@ int tcp4_gro_complete(struct sk_buff *skb)
2584 2583
2585 return tcp_gro_complete(skb); 2584 return tcp_gro_complete(skb);
2586} 2585}
2587EXPORT_SYMBOL(tcp4_gro_complete);
2588 2586
2589struct proto tcp_prot = { 2587struct proto tcp_prot = {
2590 .name = "TCP", 2588 .name = "TCP",
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f25b56cb85cb..43cf901d7659 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -55,7 +55,7 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
55 return 1; 55 return 1;
56 if (after(end_seq, s_win) && before(seq, e_win)) 56 if (after(end_seq, s_win) && before(seq, e_win))
57 return 1; 57 return 1;
58 return (seq == e_win && seq == end_seq); 58 return seq == e_win && seq == end_seq;
59} 59}
60 60
61/* 61/*
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index de3bd8458588..05b1ecf36763 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -224,16 +224,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
224 } 224 }
225 } 225 }
226 226
227 /* Set initial window to value enough for senders, 227 /* Set initial window to value enough for senders, following RFC5681. */
228 * following RFC2414. Senders, not following this RFC,
229 * will be satisfied with 2.
230 */
231 if (mss > (1 << *rcv_wscale)) { 228 if (mss > (1 << *rcv_wscale)) {
232 int init_cwnd = 4; 229 int init_cwnd = rfc3390_bytes_to_packets(mss);
233 if (mss > 1460 * 3) 230
234 init_cwnd = 2;
235 else if (mss > 1460)
236 init_cwnd = 3;
237 /* when initializing use the value from init_rcv_wnd 231 /* when initializing use the value from init_rcv_wnd
238 * rather than the default from above 232 * rather than the default from above
239 */ 233 */
@@ -1376,9 +1370,9 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp,
1376 const struct sk_buff *skb, 1370 const struct sk_buff *skb,
1377 unsigned mss_now, int nonagle) 1371 unsigned mss_now, int nonagle)
1378{ 1372{
1379 return (skb->len < mss_now && 1373 return skb->len < mss_now &&
1380 ((nonagle & TCP_NAGLE_CORK) || 1374 ((nonagle & TCP_NAGLE_CORK) ||
1381 (!nonagle && tp->packets_out && tcp_minshall_check(tp)))); 1375 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1382} 1376}
1383 1377
1384/* Return non-zero if the Nagle test allows this packet to be 1378/* Return non-zero if the Nagle test allows this packet to be
@@ -1449,10 +1443,10 @@ int tcp_may_send_now(struct sock *sk)
1449 struct tcp_sock *tp = tcp_sk(sk); 1443 struct tcp_sock *tp = tcp_sk(sk);
1450 struct sk_buff *skb = tcp_send_head(sk); 1444 struct sk_buff *skb = tcp_send_head(sk);
1451 1445
1452 return (skb && 1446 return skb &&
1453 tcp_snd_test(sk, skb, tcp_current_mss(sk), 1447 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1454 (tcp_skb_is_last(sk, skb) ? 1448 (tcp_skb_is_last(sk, skb) ?
1455 tp->nonagle : TCP_NAGLE_PUSH))); 1449 tp->nonagle : TCP_NAGLE_PUSH));
1456} 1450}
1457 1451
1458/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet 1452/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
@@ -2429,6 +2423,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2429 __u8 rcv_wscale; 2423 __u8 rcv_wscale;
2430 /* Set this up on the first call only */ 2424 /* Set this up on the first call only */
2431 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 2425 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2426
2427 /* limit the window selection if the user enforce a smaller rx buffer */
2428 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2429 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2430 req->window_clamp = tcp_full_space(sk);
2431
2432 /* tcp_full_space because it is guaranteed to be the first packet */ 2432 /* tcp_full_space because it is guaranteed to be the first packet */
2433 tcp_select_initial_window(tcp_full_space(sk), 2433 tcp_select_initial_window(tcp_full_space(sk),
2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -2555,6 +2555,11 @@ static void tcp_connect_init(struct sock *sk)
2555 2555
2556 tcp_initialize_rcv_mss(sk); 2556 tcp_initialize_rcv_mss(sk);
2557 2557
2558 /* limit the window selection if the user enforce a smaller rx buffer */
2559 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2560 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2561 tp->window_clamp = tcp_full_space(sk);
2562
2558 tcp_select_initial_window(tcp_full_space(sk), 2563 tcp_select_initial_window(tcp_full_space(sk),
2559 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), 2564 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2560 &tp->rcv_wnd, 2565 &tp->rcv_wnd,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 74c54b30600f..f3c8c6c019ae 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -140,10 +140,10 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
140 */ 140 */
141static bool retransmits_timed_out(struct sock *sk, 141static bool retransmits_timed_out(struct sock *sk,
142 unsigned int boundary, 142 unsigned int boundary,
143 unsigned int timeout,
143 bool syn_set) 144 bool syn_set)
144{ 145{
145 unsigned int timeout, linear_backoff_thresh; 146 unsigned int linear_backoff_thresh, start_ts;
146 unsigned int start_ts;
147 unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN; 147 unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
148 148
149 if (!inet_csk(sk)->icsk_retransmits) 149 if (!inet_csk(sk)->icsk_retransmits)
@@ -154,14 +154,15 @@ static bool retransmits_timed_out(struct sock *sk,
154 else 154 else
155 start_ts = tcp_sk(sk)->retrans_stamp; 155 start_ts = tcp_sk(sk)->retrans_stamp;
156 156
157 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); 157 if (likely(timeout == 0)) {
158 158 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
159 if (boundary <= linear_backoff_thresh)
160 timeout = ((2 << boundary) - 1) * rto_base;
161 else
162 timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
163 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
164 159
160 if (boundary <= linear_backoff_thresh)
161 timeout = ((2 << boundary) - 1) * rto_base;
162 else
163 timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
164 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
165 }
165 return (tcp_time_stamp - start_ts) >= timeout; 166 return (tcp_time_stamp - start_ts) >= timeout;
166} 167}
167 168
@@ -178,7 +179,7 @@ static int tcp_write_timeout(struct sock *sk)
178 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 179 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
179 syn_set = 1; 180 syn_set = 1;
180 } else { 181 } else {
181 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) { 182 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
182 /* Black hole detection */ 183 /* Black hole detection */
183 tcp_mtu_probing(icsk, sk); 184 tcp_mtu_probing(icsk, sk);
184 185
@@ -191,14 +192,15 @@ static int tcp_write_timeout(struct sock *sk)
191 192
192 retry_until = tcp_orphan_retries(sk, alive); 193 retry_until = tcp_orphan_retries(sk, alive);
193 do_reset = alive || 194 do_reset = alive ||
194 !retransmits_timed_out(sk, retry_until, 0); 195 !retransmits_timed_out(sk, retry_until, 0, 0);
195 196
196 if (tcp_out_of_resources(sk, do_reset)) 197 if (tcp_out_of_resources(sk, do_reset))
197 return 1; 198 return 1;
198 } 199 }
199 } 200 }
200 201
201 if (retransmits_timed_out(sk, retry_until, syn_set)) { 202 if (retransmits_timed_out(sk, retry_until,
203 syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) {
202 /* Has it gone just too far? */ 204 /* Has it gone just too far? */
203 tcp_write_err(sk); 205 tcp_write_err(sk);
204 return 1; 206 return 1;
@@ -440,7 +442,7 @@ out_reset_timer:
440 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 442 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
441 } 443 }
442 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 444 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
443 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0)) 445 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0))
444 __sk_dst_reset(sk); 446 __sk_dst_reset(sk);
445 447
446out:; 448out:;
@@ -560,7 +562,14 @@ static void tcp_keepalive_timer (unsigned long data)
560 elapsed = keepalive_time_elapsed(tp); 562 elapsed = keepalive_time_elapsed(tp);
561 563
562 if (elapsed >= keepalive_time_when(tp)) { 564 if (elapsed >= keepalive_time_when(tp)) {
563 if (icsk->icsk_probes_out >= keepalive_probes(tp)) { 565 /* If the TCP_USER_TIMEOUT option is enabled, use that
566 * to determine when to timeout instead.
567 */
568 if ((icsk->icsk_user_timeout != 0 &&
569 elapsed >= icsk->icsk_user_timeout &&
570 icsk->icsk_probes_out > 0) ||
571 (icsk->icsk_user_timeout == 0 &&
572 icsk->icsk_probes_out >= keepalive_probes(tp))) {
564 tcp_send_active_reset(sk, GFP_ATOMIC); 573 tcp_send_active_reset(sk, GFP_ATOMIC);
565 tcp_write_err(sk); 574 tcp_write_err(sk);
566 goto out; 575 goto out;
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 20151d6a6241..a534dda5456e 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -80,7 +80,7 @@ static void tcp_westwood_init(struct sock *sk)
80 */ 80 */
81static inline u32 westwood_do_filter(u32 a, u32 b) 81static inline u32 westwood_do_filter(u32 a, u32 b)
82{ 82{
83 return (((7 * a) + b) >> 3); 83 return ((7 * a) + b) >> 3;
84} 84}
85 85
86static void westwood_filter(struct westwood *w, u32 delta) 86static void westwood_filter(struct westwood *w, u32 delta)
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 59186ca7808a..9a17bd2a0a37 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,8 +14,8 @@
14#include <net/protocol.h> 14#include <net/protocol.h>
15#include <net/xfrm.h> 15#include <net/xfrm.h>
16 16
17static struct xfrm_tunnel *tunnel4_handlers; 17static struct xfrm_tunnel *tunnel4_handlers __read_mostly;
18static struct xfrm_tunnel *tunnel64_handlers; 18static struct xfrm_tunnel *tunnel64_handlers __read_mostly;
19static DEFINE_MUTEX(tunnel4_mutex); 19static DEFINE_MUTEX(tunnel4_mutex);
20 20
21static inline struct xfrm_tunnel **fam_handlers(unsigned short family) 21static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
@@ -39,7 +39,7 @@ int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
39 } 39 }
40 40
41 handler->next = *pprev; 41 handler->next = *pprev;
42 *pprev = handler; 42 rcu_assign_pointer(*pprev, handler);
43 43
44 ret = 0; 44 ret = 0;
45 45
@@ -73,6 +73,11 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
73} 73}
74EXPORT_SYMBOL(xfrm4_tunnel_deregister); 74EXPORT_SYMBOL(xfrm4_tunnel_deregister);
75 75
76#define for_each_tunnel_rcu(head, handler) \
77 for (handler = rcu_dereference(head); \
78 handler != NULL; \
79 handler = rcu_dereference(handler->next)) \
80
76static int tunnel4_rcv(struct sk_buff *skb) 81static int tunnel4_rcv(struct sk_buff *skb)
77{ 82{
78 struct xfrm_tunnel *handler; 83 struct xfrm_tunnel *handler;
@@ -80,7 +85,7 @@ static int tunnel4_rcv(struct sk_buff *skb)
80 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 85 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
81 goto drop; 86 goto drop;
82 87
83 for (handler = tunnel4_handlers; handler; handler = handler->next) 88 for_each_tunnel_rcu(tunnel4_handlers, handler)
84 if (!handler->handler(skb)) 89 if (!handler->handler(skb))
85 return 0; 90 return 0;
86 91
@@ -99,7 +104,7 @@ static int tunnel64_rcv(struct sk_buff *skb)
99 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 104 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
100 goto drop; 105 goto drop;
101 106
102 for (handler = tunnel64_handlers; handler; handler = handler->next) 107 for_each_tunnel_rcu(tunnel64_handlers, handler)
103 if (!handler->handler(skb)) 108 if (!handler->handler(skb))
104 return 0; 109 return 0;
105 110
@@ -115,7 +120,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
115{ 120{
116 struct xfrm_tunnel *handler; 121 struct xfrm_tunnel *handler;
117 122
118 for (handler = tunnel4_handlers; handler; handler = handler->next) 123 for_each_tunnel_rcu(tunnel4_handlers, handler)
119 if (!handler->err_handler(skb, info)) 124 if (!handler->err_handler(skb, info))
120 break; 125 break;
121} 126}
@@ -125,7 +130,7 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
125{ 130{
126 struct xfrm_tunnel *handler; 131 struct xfrm_tunnel *handler;
127 132
128 for (handler = tunnel64_handlers; handler; handler = handler->next) 133 for_each_tunnel_rcu(tunnel64_handlers, handler)
129 if (!handler->err_handler(skb, info)) 134 if (!handler->err_handler(skb, info))
130 break; 135 break;
131} 136}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fb23c2e63b52..b3f7e8cf18ac 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -797,7 +797,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
797 return -EOPNOTSUPP; 797 return -EOPNOTSUPP;
798 798
799 ipc.opt = NULL; 799 ipc.opt = NULL;
800 ipc.shtx.flags = 0; 800 ipc.tx_flags = 0;
801 801
802 if (up->pending) { 802 if (up->pending) {
803 /* 803 /*
@@ -845,7 +845,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
845 ipc.addr = inet->inet_saddr; 845 ipc.addr = inet->inet_saddr;
846 846
847 ipc.oif = sk->sk_bound_dev_if; 847 ipc.oif = sk->sk_bound_dev_if;
848 err = sock_tx_timestamp(msg, sk, &ipc.shtx); 848 err = sock_tx_timestamp(sk, &ipc.tx_flags);
849 if (err) 849 if (err)
850 return err; 850 return err;
851 if (msg->msg_controllen) { 851 if (msg->msg_controllen) {
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 41f5982d2087..82806455e859 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -58,14 +58,14 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)
58 return -ENOENT; 58 return -ENOENT;
59} 59}
60 60
61static struct xfrm_tunnel xfrm_tunnel_handler = { 61static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
62 .handler = xfrm_tunnel_rcv, 62 .handler = xfrm_tunnel_rcv,
63 .err_handler = xfrm_tunnel_err, 63 .err_handler = xfrm_tunnel_err,
64 .priority = 2, 64 .priority = 2,
65}; 65};
66 66
67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
68static struct xfrm_tunnel xfrm64_tunnel_handler = { 68static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = {
69 .handler = xfrm_tunnel_rcv, 69 .handler = xfrm_tunnel_rcv,
70 .err_handler = xfrm_tunnel_err, 70 .err_handler = xfrm_tunnel_err,
71 .priority = 2, 71 .priority = 2,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 324fac3b6c16..8c88340278f5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -243,7 +243,7 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev)
243/* Check if a route is valid prefix route */ 243/* Check if a route is valid prefix route */
244static inline int addrconf_is_prefix_route(const struct rt6_info *rt) 244static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
245{ 245{
246 return ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0); 246 return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0;
247} 247}
248 248
249static void addrconf_del_timer(struct inet6_ifaddr *ifp) 249static void addrconf_del_timer(struct inet6_ifaddr *ifp)
@@ -2964,7 +2964,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
2964 start sending router solicitations. 2964 start sending router solicitations.
2965 */ 2965 */
2966 2966
2967 if (ifp->idev->cnf.forwarding == 0 && 2967 if ((ifp->idev->cnf.forwarding == 0 ||
2968 ifp->idev->cnf.forwarding == 2) &&
2968 ifp->idev->cnf.rtr_solicits > 0 && 2969 ifp->idev->cnf.rtr_solicits > 0 &&
2969 (dev->flags&IFF_LOOPBACK) == 0 && 2970 (dev->flags&IFF_LOOPBACK) == 0 &&
2970 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { 2971 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 8175f802651b..c8993e5a337c 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -518,10 +518,9 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
518 518
519static inline int ip6addrlbl_msgsize(void) 519static inline int ip6addrlbl_msgsize(void)
520{ 520{
521 return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 521 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
522 + nla_total_size(16) /* IFAL_ADDRESS */ 522 + nla_total_size(16) /* IFAL_ADDRESS */
523 + nla_total_size(4) /* IFAL_LABEL */ 523 + nla_total_size(4); /* IFAL_LABEL */
524 );
525} 524}
526 525
527static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, 526static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 56b9bf2516f4..60220985bb80 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -467,7 +467,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
467 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 467 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
468 sin->sin6_scope_id = sk->sk_bound_dev_if; 468 sin->sin6_scope_id = sk->sk_bound_dev_if;
469 *uaddr_len = sizeof(*sin); 469 *uaddr_len = sizeof(*sin);
470 return(0); 470 return 0;
471} 471}
472 472
473EXPORT_SYMBOL(inet6_getname); 473EXPORT_SYMBOL(inet6_getname);
@@ -488,7 +488,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
488 case SIOCADDRT: 488 case SIOCADDRT:
489 case SIOCDELRT: 489 case SIOCDELRT:
490 490
491 return(ipv6_route_ioctl(net, cmd, (void __user *)arg)); 491 return ipv6_route_ioctl(net, cmd, (void __user *)arg);
492 492
493 case SIOCSIFADDR: 493 case SIOCSIFADDR:
494 return addrconf_add_ifaddr(net, (void __user *) arg); 494 return addrconf_add_ifaddr(net, (void __user *) arg);
@@ -502,7 +502,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
502 return sk->sk_prot->ioctl(sk, cmd, arg); 502 return sk->sk_prot->ioctl(sk, cmd, arg);
503 } 503 }
504 /*NOTREACHED*/ 504 /*NOTREACHED*/
505 return(0); 505 return 0;
506} 506}
507 507
508EXPORT_SYMBOL(inet6_ioctl); 508EXPORT_SYMBOL(inet6_ioctl);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index e1caa5d526c2..14ed0a955b56 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -13,12 +13,12 @@ int ipv6_ext_hdr(u8 nexthdr)
13 /* 13 /*
14 * find out if nexthdr is an extension header or a protocol 14 * find out if nexthdr is an extension header or a protocol
15 */ 15 */
16 return ( (nexthdr == NEXTHDR_HOP) || 16 return (nexthdr == NEXTHDR_HOP) ||
17 (nexthdr == NEXTHDR_ROUTING) || 17 (nexthdr == NEXTHDR_ROUTING) ||
18 (nexthdr == NEXTHDR_FRAGMENT) || 18 (nexthdr == NEXTHDR_FRAGMENT) ||
19 (nexthdr == NEXTHDR_AUTH) || 19 (nexthdr == NEXTHDR_AUTH) ||
20 (nexthdr == NEXTHDR_NONE) || 20 (nexthdr == NEXTHDR_NONE) ||
21 (nexthdr == NEXTHDR_DEST) ); 21 (nexthdr == NEXTHDR_DEST);
22} 22}
23 23
24/* 24/*
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 980912ed7a38..99157b4cd56e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -637,7 +637,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
637 } 637 }
638 mtu -= hlen + sizeof(struct frag_hdr); 638 mtu -= hlen + sizeof(struct frag_hdr);
639 639
640 if (skb_has_frags(skb)) { 640 if (skb_has_frag_list(skb)) {
641 int first_len = skb_pagelen(skb); 641 int first_len = skb_pagelen(skb);
642 struct sk_buff *frag2; 642 struct sk_buff *frag2;
643 643
@@ -878,8 +878,8 @@ static inline int ip6_rt_check(struct rt6key *rt_key,
878 struct in6_addr *fl_addr, 878 struct in6_addr *fl_addr,
879 struct in6_addr *addr_cache) 879 struct in6_addr *addr_cache)
880{ 880{
881 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 881 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
882 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); 882 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
883} 883}
884 884
885static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 885static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0fd027f3f47e..8be3c452af90 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -75,7 +75,7 @@ MODULE_LICENSE("GPL");
75 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ 75 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
76 (HASH_SIZE - 1)) 76 (HASH_SIZE - 1))
77 77
78static void ip6_tnl_dev_init(struct net_device *dev); 78static int ip6_tnl_dev_init(struct net_device *dev);
79static void ip6_tnl_dev_setup(struct net_device *dev); 79static void ip6_tnl_dev_setup(struct net_device *dev);
80 80
81static int ip6_tnl_net_id __read_mostly; 81static int ip6_tnl_net_id __read_mostly;
@@ -83,15 +83,42 @@ struct ip6_tnl_net {
83 /* the IPv6 tunnel fallback device */ 83 /* the IPv6 tunnel fallback device */
84 struct net_device *fb_tnl_dev; 84 struct net_device *fb_tnl_dev;
85 /* lists for storing tunnels in use */ 85 /* lists for storing tunnels in use */
86 struct ip6_tnl *tnls_r_l[HASH_SIZE]; 86 struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
87 struct ip6_tnl *tnls_wc[1]; 87 struct ip6_tnl __rcu *tnls_wc[1];
88 struct ip6_tnl **tnls[2]; 88 struct ip6_tnl __rcu **tnls[2];
89}; 89};
90 90
91/* often modified stats are per cpu, other are shared (netdev->stats) */
92struct pcpu_tstats {
93 unsigned long rx_packets;
94 unsigned long rx_bytes;
95 unsigned long tx_packets;
96 unsigned long tx_bytes;
97};
98
99static struct net_device_stats *ip6_get_stats(struct net_device *dev)
100{
101 struct pcpu_tstats sum = { 0 };
102 int i;
103
104 for_each_possible_cpu(i) {
105 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
106
107 sum.rx_packets += tstats->rx_packets;
108 sum.rx_bytes += tstats->rx_bytes;
109 sum.tx_packets += tstats->tx_packets;
110 sum.tx_bytes += tstats->tx_bytes;
111 }
112 dev->stats.rx_packets = sum.rx_packets;
113 dev->stats.rx_bytes = sum.rx_bytes;
114 dev->stats.tx_packets = sum.tx_packets;
115 dev->stats.tx_bytes = sum.tx_bytes;
116 return &dev->stats;
117}
118
91/* 119/*
92 * Locking : hash tables are protected by RCU and a spinlock 120 * Locking : hash tables are protected by RCU and RTNL
93 */ 121 */
94static DEFINE_SPINLOCK(ip6_tnl_lock);
95 122
96static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 123static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
97{ 124{
@@ -138,8 +165,8 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
138static struct ip6_tnl * 165static struct ip6_tnl *
139ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) 166ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
140{ 167{
141 unsigned h0 = HASH(remote); 168 unsigned int h0 = HASH(remote);
142 unsigned h1 = HASH(local); 169 unsigned int h1 = HASH(local);
143 struct ip6_tnl *t; 170 struct ip6_tnl *t;
144 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 171 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
145 172
@@ -167,7 +194,7 @@ ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
167 * Return: head of IPv6 tunnel list 194 * Return: head of IPv6 tunnel list
168 **/ 195 **/
169 196
170static struct ip6_tnl ** 197static struct ip6_tnl __rcu **
171ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p) 198ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
172{ 199{
173 struct in6_addr *remote = &p->raddr; 200 struct in6_addr *remote = &p->raddr;
@@ -190,12 +217,10 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
190static void 217static void
191ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 218ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
192{ 219{
193 struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms); 220 struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
194 221
195 spin_lock_bh(&ip6_tnl_lock); 222 rcu_assign_pointer(t->next , rtnl_dereference(*tp));
196 t->next = *tp;
197 rcu_assign_pointer(*tp, t); 223 rcu_assign_pointer(*tp, t);
198 spin_unlock_bh(&ip6_tnl_lock);
199} 224}
200 225
201/** 226/**
@@ -206,18 +231,25 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
206static void 231static void
207ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 232ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
208{ 233{
209 struct ip6_tnl **tp; 234 struct ip6_tnl __rcu **tp;
210 235 struct ip6_tnl *iter;
211 for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) { 236
212 if (t == *tp) { 237 for (tp = ip6_tnl_bucket(ip6n, &t->parms);
213 spin_lock_bh(&ip6_tnl_lock); 238 (iter = rtnl_dereference(*tp)) != NULL;
214 *tp = t->next; 239 tp = &iter->next) {
215 spin_unlock_bh(&ip6_tnl_lock); 240 if (t == iter) {
241 rcu_assign_pointer(*tp, t->next);
216 break; 242 break;
217 } 243 }
218 } 244 }
219} 245}
220 246
247static void ip6_dev_free(struct net_device *dev)
248{
249 free_percpu(dev->tstats);
250 free_netdev(dev);
251}
252
221/** 253/**
222 * ip6_tnl_create() - create a new tunnel 254 * ip6_tnl_create() - create a new tunnel
223 * @p: tunnel parameters 255 * @p: tunnel parameters
@@ -256,7 +288,9 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
256 288
257 t = netdev_priv(dev); 289 t = netdev_priv(dev);
258 t->parms = *p; 290 t->parms = *p;
259 ip6_tnl_dev_init(dev); 291 err = ip6_tnl_dev_init(dev);
292 if (err < 0)
293 goto failed_free;
260 294
261 if ((err = register_netdevice(dev)) < 0) 295 if ((err = register_netdevice(dev)) < 0)
262 goto failed_free; 296 goto failed_free;
@@ -266,7 +300,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
266 return t; 300 return t;
267 301
268failed_free: 302failed_free:
269 free_netdev(dev); 303 ip6_dev_free(dev);
270failed: 304failed:
271 return NULL; 305 return NULL;
272} 306}
@@ -290,10 +324,13 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
290{ 324{
291 struct in6_addr *remote = &p->raddr; 325 struct in6_addr *remote = &p->raddr;
292 struct in6_addr *local = &p->laddr; 326 struct in6_addr *local = &p->laddr;
327 struct ip6_tnl __rcu **tp;
293 struct ip6_tnl *t; 328 struct ip6_tnl *t;
294 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 329 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
295 330
296 for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) { 331 for (tp = ip6_tnl_bucket(ip6n, p);
332 (t = rtnl_dereference(*tp)) != NULL;
333 tp = &t->next) {
297 if (ipv6_addr_equal(local, &t->parms.laddr) && 334 if (ipv6_addr_equal(local, &t->parms.laddr) &&
298 ipv6_addr_equal(remote, &t->parms.raddr)) 335 ipv6_addr_equal(remote, &t->parms.raddr))
299 return t; 336 return t;
@@ -318,13 +355,10 @@ ip6_tnl_dev_uninit(struct net_device *dev)
318 struct net *net = dev_net(dev); 355 struct net *net = dev_net(dev);
319 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 356 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
320 357
321 if (dev == ip6n->fb_tnl_dev) { 358 if (dev == ip6n->fb_tnl_dev)
322 spin_lock_bh(&ip6_tnl_lock); 359 rcu_assign_pointer(ip6n->tnls_wc[0], NULL);
323 ip6n->tnls_wc[0] = NULL; 360 else
324 spin_unlock_bh(&ip6_tnl_lock);
325 } else {
326 ip6_tnl_unlink(ip6n, t); 361 ip6_tnl_unlink(ip6n, t);
327 }
328 ip6_tnl_dst_reset(t); 362 ip6_tnl_dst_reset(t);
329 dev_put(dev); 363 dev_put(dev);
330} 364}
@@ -702,6 +736,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
702 736
703 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, 737 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
704 &ipv6h->daddr)) != NULL) { 738 &ipv6h->daddr)) != NULL) {
739 struct pcpu_tstats *tstats;
740
705 if (t->parms.proto != ipproto && t->parms.proto != 0) { 741 if (t->parms.proto != ipproto && t->parms.proto != 0) {
706 rcu_read_unlock(); 742 rcu_read_unlock();
707 goto discard; 743 goto discard;
@@ -724,10 +760,17 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
724 skb->pkt_type = PACKET_HOST; 760 skb->pkt_type = PACKET_HOST;
725 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 761 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
726 762
727 skb_tunnel_rx(skb, t->dev); 763 tstats = this_cpu_ptr(t->dev->tstats);
764 tstats->rx_packets++;
765 tstats->rx_bytes += skb->len;
766
767 __skb_tunnel_rx(skb, t->dev);
728 768
729 dscp_ecn_decapsulate(t, ipv6h, skb); 769 dscp_ecn_decapsulate(t, ipv6h, skb);
730 netif_rx(skb); 770
771 if (netif_rx(skb) == NET_RX_DROP)
772 t->dev->stats.rx_dropped++;
773
731 rcu_read_unlock(); 774 rcu_read_unlock();
732 return 0; 775 return 0;
733 } 776 }
@@ -934,8 +977,10 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
934 err = ip6_local_out(skb); 977 err = ip6_local_out(skb);
935 978
936 if (net_xmit_eval(err) == 0) { 979 if (net_xmit_eval(err) == 0) {
937 stats->tx_bytes += pkt_len; 980 struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats);
938 stats->tx_packets++; 981
982 tstats->tx_bytes += pkt_len;
983 tstats->tx_packets++;
939 } else { 984 } else {
940 stats->tx_errors++; 985 stats->tx_errors++;
941 stats->tx_aborted_errors++; 986 stats->tx_aborted_errors++;
@@ -1300,12 +1345,14 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1300 1345
1301 1346
1302static const struct net_device_ops ip6_tnl_netdev_ops = { 1347static const struct net_device_ops ip6_tnl_netdev_ops = {
1303 .ndo_uninit = ip6_tnl_dev_uninit, 1348 .ndo_uninit = ip6_tnl_dev_uninit,
1304 .ndo_start_xmit = ip6_tnl_xmit, 1349 .ndo_start_xmit = ip6_tnl_xmit,
1305 .ndo_do_ioctl = ip6_tnl_ioctl, 1350 .ndo_do_ioctl = ip6_tnl_ioctl,
1306 .ndo_change_mtu = ip6_tnl_change_mtu, 1351 .ndo_change_mtu = ip6_tnl_change_mtu,
1352 .ndo_get_stats = ip6_get_stats,
1307}; 1353};
1308 1354
1355
1309/** 1356/**
1310 * ip6_tnl_dev_setup - setup virtual tunnel device 1357 * ip6_tnl_dev_setup - setup virtual tunnel device
1311 * @dev: virtual device associated with tunnel 1358 * @dev: virtual device associated with tunnel
@@ -1317,7 +1364,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
1317static void ip6_tnl_dev_setup(struct net_device *dev) 1364static void ip6_tnl_dev_setup(struct net_device *dev)
1318{ 1365{
1319 dev->netdev_ops = &ip6_tnl_netdev_ops; 1366 dev->netdev_ops = &ip6_tnl_netdev_ops;
1320 dev->destructor = free_netdev; 1367 dev->destructor = ip6_dev_free;
1321 1368
1322 dev->type = ARPHRD_TUNNEL6; 1369 dev->type = ARPHRD_TUNNEL6;
1323 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); 1370 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
@@ -1333,12 +1380,17 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
1333 * @dev: virtual device associated with tunnel 1380 * @dev: virtual device associated with tunnel
1334 **/ 1381 **/
1335 1382
1336static inline void 1383static inline int
1337ip6_tnl_dev_init_gen(struct net_device *dev) 1384ip6_tnl_dev_init_gen(struct net_device *dev)
1338{ 1385{
1339 struct ip6_tnl *t = netdev_priv(dev); 1386 struct ip6_tnl *t = netdev_priv(dev);
1387
1340 t->dev = dev; 1388 t->dev = dev;
1341 strcpy(t->parms.name, dev->name); 1389 strcpy(t->parms.name, dev->name);
1390 dev->tstats = alloc_percpu(struct pcpu_tstats);
1391 if (!dev->tstats)
1392 return -ENOMEM;
1393 return 0;
1342} 1394}
1343 1395
1344/** 1396/**
@@ -1346,11 +1398,15 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
1346 * @dev: virtual device associated with tunnel 1398 * @dev: virtual device associated with tunnel
1347 **/ 1399 **/
1348 1400
1349static void ip6_tnl_dev_init(struct net_device *dev) 1401static int ip6_tnl_dev_init(struct net_device *dev)
1350{ 1402{
1351 struct ip6_tnl *t = netdev_priv(dev); 1403 struct ip6_tnl *t = netdev_priv(dev);
1352 ip6_tnl_dev_init_gen(dev); 1404 int err = ip6_tnl_dev_init_gen(dev);
1405
1406 if (err)
1407 return err;
1353 ip6_tnl_link_config(t); 1408 ip6_tnl_link_config(t);
1409 return 0;
1354} 1410}
1355 1411
1356/** 1412/**
@@ -1360,25 +1416,29 @@ static void ip6_tnl_dev_init(struct net_device *dev)
1360 * Return: 0 1416 * Return: 0
1361 **/ 1417 **/
1362 1418
1363static void __net_init ip6_fb_tnl_dev_init(struct net_device *dev) 1419static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1364{ 1420{
1365 struct ip6_tnl *t = netdev_priv(dev); 1421 struct ip6_tnl *t = netdev_priv(dev);
1366 struct net *net = dev_net(dev); 1422 struct net *net = dev_net(dev);
1367 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1423 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1424 int err = ip6_tnl_dev_init_gen(dev);
1425
1426 if (err)
1427 return err;
1368 1428
1369 ip6_tnl_dev_init_gen(dev);
1370 t->parms.proto = IPPROTO_IPV6; 1429 t->parms.proto = IPPROTO_IPV6;
1371 dev_hold(dev); 1430 dev_hold(dev);
1372 ip6n->tnls_wc[0] = t; 1431 rcu_assign_pointer(ip6n->tnls_wc[0], t);
1432 return 0;
1373} 1433}
1374 1434
1375static struct xfrm6_tunnel ip4ip6_handler = { 1435static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1376 .handler = ip4ip6_rcv, 1436 .handler = ip4ip6_rcv,
1377 .err_handler = ip4ip6_err, 1437 .err_handler = ip4ip6_err,
1378 .priority = 1, 1438 .priority = 1,
1379}; 1439};
1380 1440
1381static struct xfrm6_tunnel ip6ip6_handler = { 1441static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1382 .handler = ip6ip6_rcv, 1442 .handler = ip6ip6_rcv,
1383 .err_handler = ip6ip6_err, 1443 .err_handler = ip6ip6_err,
1384 .priority = 1, 1444 .priority = 1,
@@ -1391,14 +1451,14 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1391 LIST_HEAD(list); 1451 LIST_HEAD(list);
1392 1452
1393 for (h = 0; h < HASH_SIZE; h++) { 1453 for (h = 0; h < HASH_SIZE; h++) {
1394 t = ip6n->tnls_r_l[h]; 1454 t = rtnl_dereference(ip6n->tnls_r_l[h]);
1395 while (t != NULL) { 1455 while (t != NULL) {
1396 unregister_netdevice_queue(t->dev, &list); 1456 unregister_netdevice_queue(t->dev, &list);
1397 t = t->next; 1457 t = rtnl_dereference(t->next);
1398 } 1458 }
1399 } 1459 }
1400 1460
1401 t = ip6n->tnls_wc[0]; 1461 t = rtnl_dereference(ip6n->tnls_wc[0]);
1402 unregister_netdevice_queue(t->dev, &list); 1462 unregister_netdevice_queue(t->dev, &list);
1403 unregister_netdevice_many(&list); 1463 unregister_netdevice_many(&list);
1404} 1464}
@@ -1419,7 +1479,9 @@ static int __net_init ip6_tnl_init_net(struct net *net)
1419 goto err_alloc_dev; 1479 goto err_alloc_dev;
1420 dev_net_set(ip6n->fb_tnl_dev, net); 1480 dev_net_set(ip6n->fb_tnl_dev, net);
1421 1481
1422 ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); 1482 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1483 if (err < 0)
1484 goto err_register;
1423 1485
1424 err = register_netdev(ip6n->fb_tnl_dev); 1486 err = register_netdev(ip6n->fb_tnl_dev);
1425 if (err < 0) 1487 if (err < 0)
@@ -1427,7 +1489,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
1427 return 0; 1489 return 0;
1428 1490
1429err_register: 1491err_register:
1430 free_netdev(ip6n->fb_tnl_dev); 1492 ip6_dev_free(ip6n->fb_tnl_dev);
1431err_alloc_dev: 1493err_alloc_dev:
1432 return err; 1494 return err;
1433} 1495}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 66078dad7fe8..2640c9be589d 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -666,7 +666,9 @@ static int pim6_rcv(struct sk_buff *skb)
666 666
667 skb_tunnel_rx(skb, reg_dev); 667 skb_tunnel_rx(skb, reg_dev);
668 668
669 netif_rx(skb); 669 if (netif_rx(skb) == NET_RX_DROP)
670 reg_dev->stats.rx_dropped++;
671
670 dev_put(reg_dev); 672 dev_put(reg_dev);
671 return 0; 673 return 0;
672 drop: 674 drop:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 58841c4ae947..b3dd844cd34f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -228,12 +228,12 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
228 do { 228 do {
229 cur = ((void *)cur) + (cur->nd_opt_len << 3); 229 cur = ((void *)cur) + (cur->nd_opt_len << 3);
230 } while(cur < end && cur->nd_opt_type != type); 230 } while(cur < end && cur->nd_opt_type != type);
231 return (cur <= end && cur->nd_opt_type == type ? cur : NULL); 231 return cur <= end && cur->nd_opt_type == type ? cur : NULL;
232} 232}
233 233
234static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) 234static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
235{ 235{
236 return (opt->nd_opt_type == ND_OPT_RDNSS); 236 return opt->nd_opt_type == ND_OPT_RDNSS;
237} 237}
238 238
239static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, 239static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
@@ -244,7 +244,7 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
244 do { 244 do {
245 cur = ((void *)cur) + (cur->nd_opt_len << 3); 245 cur = ((void *)cur) + (cur->nd_opt_len << 3);
246 } while(cur < end && !ndisc_is_useropt(cur)); 246 } while(cur < end && !ndisc_is_useropt(cur));
247 return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL); 247 return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
248} 248}
249 249
250static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, 250static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
@@ -319,7 +319,7 @@ static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
319 int prepad = ndisc_addr_option_pad(dev->type); 319 int prepad = ndisc_addr_option_pad(dev->type);
320 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad)) 320 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
321 return NULL; 321 return NULL;
322 return (lladdr + prepad); 322 return lladdr + prepad;
323} 323}
324 324
325int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir) 325int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
@@ -1105,6 +1105,18 @@ errout:
1105 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); 1105 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1106} 1106}
1107 1107
1108static inline int accept_ra(struct inet6_dev *in6_dev)
1109{
1110 /*
1111 * If forwarding is enabled, RA are not accepted unless the special
1112 * hybrid mode (accept_ra=2) is enabled.
1113 */
1114 if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
1115 return 0;
1116
1117 return in6_dev->cnf.accept_ra;
1118}
1119
1108static void ndisc_router_discovery(struct sk_buff *skb) 1120static void ndisc_router_discovery(struct sk_buff *skb)
1109{ 1121{
1110 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); 1122 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
@@ -1158,8 +1170,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1158 return; 1170 return;
1159 } 1171 }
1160 1172
1161 /* skip route and link configuration on routers */ 1173 if (!accept_ra(in6_dev))
1162 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra)
1163 goto skip_linkparms; 1174 goto skip_linkparms;
1164 1175
1165#ifdef CONFIG_IPV6_NDISC_NODETYPE 1176#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1309,8 +1320,7 @@ skip_linkparms:
1309 NEIGH_UPDATE_F_ISROUTER); 1320 NEIGH_UPDATE_F_ISROUTER);
1310 } 1321 }
1311 1322
1312 /* skip route and link configuration on routers */ 1323 if (!accept_ra(in6_dev))
1313 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra)
1314 goto out; 1324 goto out;
1315 1325
1316#ifdef CONFIG_IPV6_ROUTE_INFO 1326#ifdef CONFIG_IPV6_ROUTE_INFO
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 8e754be92c24..6b331e9b5706 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -82,13 +82,13 @@ EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
82int 82int
83ip6t_ext_hdr(u8 nexthdr) 83ip6t_ext_hdr(u8 nexthdr)
84{ 84{
85 return ( (nexthdr == IPPROTO_HOPOPTS) || 85 return (nexthdr == IPPROTO_HOPOPTS) ||
86 (nexthdr == IPPROTO_ROUTING) || 86 (nexthdr == IPPROTO_ROUTING) ||
87 (nexthdr == IPPROTO_FRAGMENT) || 87 (nexthdr == IPPROTO_FRAGMENT) ||
88 (nexthdr == IPPROTO_ESP) || 88 (nexthdr == IPPROTO_ESP) ||
89 (nexthdr == IPPROTO_AH) || 89 (nexthdr == IPPROTO_AH) ||
90 (nexthdr == IPPROTO_NONE) || 90 (nexthdr == IPPROTO_NONE) ||
91 (nexthdr == IPPROTO_DSTOPTS) ); 91 (nexthdr == IPPROTO_DSTOPTS);
92} 92}
93 93
94/* Returns whether matches rule or not. */ 94/* Returns whether matches rule or not. */
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 578f3c1a16db..138a8b362706 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -363,7 +363,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
363 /* If the first fragment is fragmented itself, we split 363 /* If the first fragment is fragmented itself, we split
364 * it to two chunks: the first with data and paged part 364 * it to two chunks: the first with data and paged part
365 * and the second, holding only fragments. */ 365 * and the second, holding only fragments. */
366 if (skb_has_frags(head)) { 366 if (skb_has_frag_list(head)) {
367 struct sk_buff *clone; 367 struct sk_buff *clone;
368 int i, plen = 0; 368 int i, plen = 0;
369 369
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 1fa3468f0f32..9bb936ae2452 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,28 +25,14 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <net/protocol.h> 26#include <net/protocol.h>
27 27
28const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS]; 28const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] __read_mostly;
29static DEFINE_SPINLOCK(inet6_proto_lock);
30
31 29
32int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) 30int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
33{ 31{
34 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 32 int hash = protocol & (MAX_INET_PROTOS - 1);
35
36 spin_lock_bh(&inet6_proto_lock);
37
38 if (inet6_protos[hash]) {
39 ret = -1;
40 } else {
41 inet6_protos[hash] = prot;
42 ret = 0;
43 }
44
45 spin_unlock_bh(&inet6_proto_lock);
46 33
47 return ret; 34 return !cmpxchg(&inet6_protos[hash], NULL, prot) ? 0 : -1;
48} 35}
49
50EXPORT_SYMBOL(inet6_add_protocol); 36EXPORT_SYMBOL(inet6_add_protocol);
51 37
52/* 38/*
@@ -57,20 +43,10 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
57{ 43{
58 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 44 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
59 45
60 spin_lock_bh(&inet6_proto_lock); 46 ret = (cmpxchg(&inet6_protos[hash], prot, NULL) == prot) ? 0 : -1;
61
62 if (inet6_protos[hash] != prot) {
63 ret = -1;
64 } else {
65 inet6_protos[hash] = NULL;
66 ret = 0;
67 }
68
69 spin_unlock_bh(&inet6_proto_lock);
70 47
71 synchronize_net(); 48 synchronize_net();
72 49
73 return ret; 50 return ret;
74} 51}
75
76EXPORT_SYMBOL(inet6_del_protocol); 52EXPORT_SYMBOL(inet6_del_protocol);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e677937a07fc..45e6efb7f171 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -764,7 +764,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
764 return -EINVAL; 764 return -EINVAL;
765 765
766 if (sin6->sin6_family && sin6->sin6_family != AF_INET6) 766 if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
767 return(-EAFNOSUPPORT); 767 return -EAFNOSUPPORT;
768 768
769 /* port is the proto value [0..255] carried in nexthdr */ 769 /* port is the proto value [0..255] carried in nexthdr */
770 proto = ntohs(sin6->sin6_port); 770 proto = ntohs(sin6->sin6_port);
@@ -772,10 +772,10 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
772 if (!proto) 772 if (!proto)
773 proto = inet->inet_num; 773 proto = inet->inet_num;
774 else if (proto != inet->inet_num) 774 else if (proto != inet->inet_num)
775 return(-EINVAL); 775 return -EINVAL;
776 776
777 if (proto > 255) 777 if (proto > 255)
778 return(-EINVAL); 778 return -EINVAL;
779 779
780 daddr = &sin6->sin6_addr; 780 daddr = &sin6->sin6_addr;
781 if (np->sndflow) { 781 if (np->sndflow) {
@@ -985,7 +985,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
985 /* You may get strange result with a positive odd offset; 985 /* You may get strange result with a positive odd offset;
986 RFC2292bis agrees with me. */ 986 RFC2292bis agrees with me. */
987 if (val > 0 && (val&1)) 987 if (val > 0 && (val&1))
988 return(-EINVAL); 988 return -EINVAL;
989 if (val < 0) { 989 if (val < 0) {
990 rp->checksum = 0; 990 rp->checksum = 0;
991 } else { 991 } else {
@@ -997,7 +997,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
997 break; 997 break;
998 998
999 default: 999 default:
1000 return(-ENOPROTOOPT); 1000 return -ENOPROTOOPT;
1001 } 1001 }
1002} 1002}
1003 1003
@@ -1190,7 +1190,7 @@ static int rawv6_init_sk(struct sock *sk)
1190 default: 1190 default:
1191 break; 1191 break;
1192 } 1192 }
1193 return(0); 1193 return 0;
1194} 1194}
1195 1195
1196struct proto rawv6_prot = { 1196struct proto rawv6_prot = {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 64cfef1b0a4c..c7ba3149633f 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -458,7 +458,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
458 /* If the first fragment is fragmented itself, we split 458 /* If the first fragment is fragmented itself, we split
459 * it to two chunks: the first with data and paged part 459 * it to two chunks: the first with data and paged part
460 * and the second, holding only fragments. */ 460 * and the second, holding only fragments. */
461 if (skb_has_frags(head)) { 461 if (skb_has_frag_list(head)) {
462 struct sk_buff *clone; 462 struct sk_buff *clone;
463 int i, plen = 0; 463 int i, plen = 0;
464 464
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a275c6e1e25c..17e217933885 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -217,14 +217,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
217 217
218static __inline__ int rt6_check_expired(const struct rt6_info *rt) 218static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219{ 219{
220 return (rt->rt6i_flags & RTF_EXPIRES && 220 return (rt->rt6i_flags & RTF_EXPIRES) &&
221 time_after(jiffies, rt->rt6i_expires)); 221 time_after(jiffies, rt->rt6i_expires);
222} 222}
223 223
224static inline int rt6_need_strict(struct in6_addr *daddr) 224static inline int rt6_need_strict(struct in6_addr *daddr)
225{ 225{
226 return (ipv6_addr_type(daddr) & 226 return ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); 227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
228} 228}
229 229
230/* 230/*
@@ -440,7 +440,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
440 __func__, match); 440 __func__, match);
441 441
442 net = dev_net(rt0->rt6i_dev); 442 net = dev_net(rt0->rt6i_dev);
443 return (match ? match : net->ipv6.ip6_null_entry); 443 return match ? match : net->ipv6.ip6_null_entry;
444} 444}
445 445
446#ifdef CONFIG_IPV6_ROUTE_INFO 446#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -859,7 +859,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
859 859
860 dst_release(*dstp); 860 dst_release(*dstp);
861 *dstp = new; 861 *dstp = new;
862 return (new ? 0 : -ENOMEM); 862 return new ? 0 : -ENOMEM;
863} 863}
864EXPORT_SYMBOL_GPL(ip6_dst_blackhole); 864EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
865 865
@@ -1070,7 +1070,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
1070 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1070 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1071out: 1071out:
1072 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1072 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1073 return (atomic_read(&ops->entries) > rt_max_size); 1073 return atomic_read(&ops->entries) > rt_max_size;
1074} 1074}
1075 1075
1076/* Clean host part of a prefix. Not necessary in radix tree, 1076/* Clean host part of a prefix. Not necessary in radix tree,
@@ -1169,6 +1169,8 @@ int ip6_route_add(struct fib6_config *cfg)
1169 1169
1170 if (addr_type & IPV6_ADDR_MULTICAST) 1170 if (addr_type & IPV6_ADDR_MULTICAST)
1171 rt->dst.input = ip6_mc_input; 1171 rt->dst.input = ip6_mc_input;
1172 else if (cfg->fc_flags & RTF_LOCAL)
1173 rt->dst.input = ip6_input;
1172 else 1174 else
1173 rt->dst.input = ip6_forward; 1175 rt->dst.input = ip6_forward;
1174 1176
@@ -1190,7 +1192,8 @@ int ip6_route_add(struct fib6_config *cfg)
1190 they would result in kernel looping; promote them to reject routes 1192 they would result in kernel looping; promote them to reject routes
1191 */ 1193 */
1192 if ((cfg->fc_flags & RTF_REJECT) || 1194 if ((cfg->fc_flags & RTF_REJECT) ||
1193 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1195 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1196 && !(cfg->fc_flags&RTF_LOCAL))) {
1194 /* hold loopback dev/idev if we haven't done so. */ 1197 /* hold loopback dev/idev if we haven't done so. */
1195 if (dev != net->loopback_dev) { 1198 if (dev != net->loopback_dev) {
1196 if (dev) { 1199 if (dev) {
@@ -2102,6 +2105,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2102 if (rtm->rtm_type == RTN_UNREACHABLE) 2105 if (rtm->rtm_type == RTN_UNREACHABLE)
2103 cfg->fc_flags |= RTF_REJECT; 2106 cfg->fc_flags |= RTF_REJECT;
2104 2107
2108 if (rtm->rtm_type == RTN_LOCAL)
2109 cfg->fc_flags |= RTF_LOCAL;
2110
2105 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2111 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2106 cfg->fc_nlinfo.nlh = nlh; 2112 cfg->fc_nlinfo.nlh = nlh;
2107 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2113 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -2222,6 +2228,8 @@ static int rt6_fill_node(struct net *net,
2222 NLA_PUT_U32(skb, RTA_TABLE, table); 2228 NLA_PUT_U32(skb, RTA_TABLE, table);
2223 if (rt->rt6i_flags&RTF_REJECT) 2229 if (rt->rt6i_flags&RTF_REJECT)
2224 rtm->rtm_type = RTN_UNREACHABLE; 2230 rtm->rtm_type = RTN_UNREACHABLE;
2231 else if (rt->rt6i_flags&RTF_LOCAL)
2232 rtm->rtm_type = RTN_LOCAL;
2225 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2233 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2226 rtm->rtm_type = RTN_LOCAL; 2234 rtm->rtm_type = RTN_LOCAL;
2227 else 2235 else
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 4699cd3c3118..d7701782b639 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -63,36 +63,63 @@
63#define HASH_SIZE 16 63#define HASH_SIZE 16
64#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 64#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
65 65
66static void ipip6_tunnel_init(struct net_device *dev); 66static int ipip6_tunnel_init(struct net_device *dev);
67static void ipip6_tunnel_setup(struct net_device *dev); 67static void ipip6_tunnel_setup(struct net_device *dev);
68static void ipip6_dev_free(struct net_device *dev);
68 69
69static int sit_net_id __read_mostly; 70static int sit_net_id __read_mostly;
70struct sit_net { 71struct sit_net {
71 struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 72 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
72 struct ip_tunnel *tunnels_r[HASH_SIZE]; 73 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
73 struct ip_tunnel *tunnels_l[HASH_SIZE]; 74 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
74 struct ip_tunnel *tunnels_wc[1]; 75 struct ip_tunnel __rcu *tunnels_wc[1];
75 struct ip_tunnel **tunnels[4]; 76 struct ip_tunnel __rcu **tunnels[4];
76 77
77 struct net_device *fb_tunnel_dev; 78 struct net_device *fb_tunnel_dev;
78}; 79};
79 80
80/* 81/*
81 * Locking : hash tables are protected by RCU and a spinlock 82 * Locking : hash tables are protected by RCU and RTNL
82 */ 83 */
83static DEFINE_SPINLOCK(ipip6_lock);
84 84
85#define for_each_ip_tunnel_rcu(start) \ 85#define for_each_ip_tunnel_rcu(start) \
86 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 86 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
87 87
88/* often modified stats are per cpu, other are shared (netdev->stats) */
89struct pcpu_tstats {
90 unsigned long rx_packets;
91 unsigned long rx_bytes;
92 unsigned long tx_packets;
93 unsigned long tx_bytes;
94};
95
96static struct net_device_stats *ipip6_get_stats(struct net_device *dev)
97{
98 struct pcpu_tstats sum = { 0 };
99 int i;
100
101 for_each_possible_cpu(i) {
102 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
103
104 sum.rx_packets += tstats->rx_packets;
105 sum.rx_bytes += tstats->rx_bytes;
106 sum.tx_packets += tstats->tx_packets;
107 sum.tx_bytes += tstats->tx_bytes;
108 }
109 dev->stats.rx_packets = sum.rx_packets;
110 dev->stats.rx_bytes = sum.rx_bytes;
111 dev->stats.tx_packets = sum.tx_packets;
112 dev->stats.tx_bytes = sum.tx_bytes;
113 return &dev->stats;
114}
88/* 115/*
89 * Must be invoked with rcu_read_lock 116 * Must be invoked with rcu_read_lock
90 */ 117 */
91static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, 118static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
92 struct net_device *dev, __be32 remote, __be32 local) 119 struct net_device *dev, __be32 remote, __be32 local)
93{ 120{
94 unsigned h0 = HASH(remote); 121 unsigned int h0 = HASH(remote);
95 unsigned h1 = HASH(local); 122 unsigned int h1 = HASH(local);
96 struct ip_tunnel *t; 123 struct ip_tunnel *t;
97 struct sit_net *sitn = net_generic(net, sit_net_id); 124 struct sit_net *sitn = net_generic(net, sit_net_id);
98 125
@@ -121,12 +148,12 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
121 return NULL; 148 return NULL;
122} 149}
123 150
124static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn, 151static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
125 struct ip_tunnel_parm *parms) 152 struct ip_tunnel_parm *parms)
126{ 153{
127 __be32 remote = parms->iph.daddr; 154 __be32 remote = parms->iph.daddr;
128 __be32 local = parms->iph.saddr; 155 __be32 local = parms->iph.saddr;
129 unsigned h = 0; 156 unsigned int h = 0;
130 int prio = 0; 157 int prio = 0;
131 158
132 if (remote) { 159 if (remote) {
@@ -140,7 +167,7 @@ static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn,
140 return &sitn->tunnels[prio][h]; 167 return &sitn->tunnels[prio][h];
141} 168}
142 169
143static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn, 170static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn,
144 struct ip_tunnel *t) 171 struct ip_tunnel *t)
145{ 172{
146 return __ipip6_bucket(sitn, &t->parms); 173 return __ipip6_bucket(sitn, &t->parms);
@@ -148,13 +175,14 @@ static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn,
148 175
149static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) 176static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
150{ 177{
151 struct ip_tunnel **tp; 178 struct ip_tunnel __rcu **tp;
152 179 struct ip_tunnel *iter;
153 for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) { 180
154 if (t == *tp) { 181 for (tp = ipip6_bucket(sitn, t);
155 spin_lock_bh(&ipip6_lock); 182 (iter = rtnl_dereference(*tp)) != NULL;
156 *tp = t->next; 183 tp = &iter->next) {
157 spin_unlock_bh(&ipip6_lock); 184 if (t == iter) {
185 rcu_assign_pointer(*tp, t->next);
158 break; 186 break;
159 } 187 }
160 } 188 }
@@ -162,12 +190,10 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
162 190
163static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) 191static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
164{ 192{
165 struct ip_tunnel **tp = ipip6_bucket(sitn, t); 193 struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
166 194
167 spin_lock_bh(&ipip6_lock); 195 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
168 t->next = *tp;
169 rcu_assign_pointer(*tp, t); 196 rcu_assign_pointer(*tp, t);
170 spin_unlock_bh(&ipip6_lock);
171} 197}
172 198
173static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) 199static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
@@ -187,17 +213,20 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
187#endif 213#endif
188} 214}
189 215
190static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, 216static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
191 struct ip_tunnel_parm *parms, int create) 217 struct ip_tunnel_parm *parms, int create)
192{ 218{
193 __be32 remote = parms->iph.daddr; 219 __be32 remote = parms->iph.daddr;
194 __be32 local = parms->iph.saddr; 220 __be32 local = parms->iph.saddr;
195 struct ip_tunnel *t, **tp, *nt; 221 struct ip_tunnel *t, *nt;
222 struct ip_tunnel __rcu **tp;
196 struct net_device *dev; 223 struct net_device *dev;
197 char name[IFNAMSIZ]; 224 char name[IFNAMSIZ];
198 struct sit_net *sitn = net_generic(net, sit_net_id); 225 struct sit_net *sitn = net_generic(net, sit_net_id);
199 226
200 for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) { 227 for (tp = __ipip6_bucket(sitn, parms);
228 (t = rtnl_dereference(*tp)) != NULL;
229 tp = &t->next) {
201 if (local == t->parms.iph.saddr && 230 if (local == t->parms.iph.saddr &&
202 remote == t->parms.iph.daddr && 231 remote == t->parms.iph.daddr &&
203 parms->link == t->parms.link) { 232 parms->link == t->parms.link) {
@@ -213,7 +242,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
213 if (parms->name[0]) 242 if (parms->name[0])
214 strlcpy(name, parms->name, IFNAMSIZ); 243 strlcpy(name, parms->name, IFNAMSIZ);
215 else 244 else
216 sprintf(name, "sit%%d"); 245 strcpy(name, "sit%d");
217 246
218 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); 247 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
219 if (dev == NULL) 248 if (dev == NULL)
@@ -229,7 +258,8 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
229 nt = netdev_priv(dev); 258 nt = netdev_priv(dev);
230 259
231 nt->parms = *parms; 260 nt->parms = *parms;
232 ipip6_tunnel_init(dev); 261 if (ipip6_tunnel_init(dev) < 0)
262 goto failed_free;
233 ipip6_tunnel_clone_6rd(dev, sitn); 263 ipip6_tunnel_clone_6rd(dev, sitn);
234 264
235 if (parms->i_flags & SIT_ISATAP) 265 if (parms->i_flags & SIT_ISATAP)
@@ -244,7 +274,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
244 return nt; 274 return nt;
245 275
246failed_free: 276failed_free:
247 free_netdev(dev); 277 ipip6_dev_free(dev);
248failed: 278failed:
249 return NULL; 279 return NULL;
250} 280}
@@ -340,7 +370,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
340 370
341 ASSERT_RTNL(); 371 ASSERT_RTNL();
342 372
343 for (p = t->prl; p; p = p->next) { 373 for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) {
344 if (p->addr == a->addr) { 374 if (p->addr == a->addr) {
345 if (chg) { 375 if (chg) {
346 p->flags = a->flags; 376 p->flags = a->flags;
@@ -451,15 +481,12 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
451 struct sit_net *sitn = net_generic(net, sit_net_id); 481 struct sit_net *sitn = net_generic(net, sit_net_id);
452 482
453 if (dev == sitn->fb_tunnel_dev) { 483 if (dev == sitn->fb_tunnel_dev) {
454 spin_lock_bh(&ipip6_lock); 484 rcu_assign_pointer(sitn->tunnels_wc[0], NULL);
455 sitn->tunnels_wc[0] = NULL;
456 spin_unlock_bh(&ipip6_lock);
457 dev_put(dev);
458 } else { 485 } else {
459 ipip6_tunnel_unlink(sitn, netdev_priv(dev)); 486 ipip6_tunnel_unlink(sitn, netdev_priv(dev));
460 ipip6_tunnel_del_prl(netdev_priv(dev), NULL); 487 ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
461 dev_put(dev);
462 } 488 }
489 dev_put(dev);
463} 490}
464 491
465 492
@@ -548,6 +575,8 @@ static int ipip6_rcv(struct sk_buff *skb)
548 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 575 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
549 iph->saddr, iph->daddr); 576 iph->saddr, iph->daddr);
550 if (tunnel != NULL) { 577 if (tunnel != NULL) {
578 struct pcpu_tstats *tstats;
579
551 secpath_reset(skb); 580 secpath_reset(skb);
552 skb->mac_header = skb->network_header; 581 skb->mac_header = skb->network_header;
553 skb_reset_network_header(skb); 582 skb_reset_network_header(skb);
@@ -563,10 +592,17 @@ static int ipip6_rcv(struct sk_buff *skb)
563 return 0; 592 return 0;
564 } 593 }
565 594
566 skb_tunnel_rx(skb, tunnel->dev); 595 tstats = this_cpu_ptr(tunnel->dev->tstats);
596 tstats->rx_packets++;
597 tstats->rx_bytes += skb->len;
598
599 __skb_tunnel_rx(skb, tunnel->dev);
567 600
568 ipip6_ecn_decapsulate(iph, skb); 601 ipip6_ecn_decapsulate(iph, skb);
569 netif_rx(skb); 602
603 if (netif_rx(skb) == NET_RX_DROP)
604 tunnel->dev->stats.rx_dropped++;
605
570 rcu_read_unlock(); 606 rcu_read_unlock();
571 return 0; 607 return 0;
572 } 608 }
@@ -590,7 +626,7 @@ __be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel)
590#ifdef CONFIG_IPV6_SIT_6RD 626#ifdef CONFIG_IPV6_SIT_6RD
591 if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, 627 if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
592 tunnel->ip6rd.prefixlen)) { 628 tunnel->ip6rd.prefixlen)) {
593 unsigned pbw0, pbi0; 629 unsigned int pbw0, pbi0;
594 int pbi1; 630 int pbi1;
595 u32 d; 631 u32 d;
596 632
@@ -625,14 +661,13 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
625 struct net_device *dev) 661 struct net_device *dev)
626{ 662{
627 struct ip_tunnel *tunnel = netdev_priv(dev); 663 struct ip_tunnel *tunnel = netdev_priv(dev);
628 struct net_device_stats *stats = &dev->stats; 664 struct pcpu_tstats *tstats;
629 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
630 struct iphdr *tiph = &tunnel->parms.iph; 665 struct iphdr *tiph = &tunnel->parms.iph;
631 struct ipv6hdr *iph6 = ipv6_hdr(skb); 666 struct ipv6hdr *iph6 = ipv6_hdr(skb);
632 u8 tos = tunnel->parms.iph.tos; 667 u8 tos = tunnel->parms.iph.tos;
633 __be16 df = tiph->frag_off; 668 __be16 df = tiph->frag_off;
634 struct rtable *rt; /* Route to the other host */ 669 struct rtable *rt; /* Route to the other host */
635 struct net_device *tdev; /* Device to other host */ 670 struct net_device *tdev; /* Device to other host */
636 struct iphdr *iph; /* Our new IP header */ 671 struct iphdr *iph; /* Our new IP header */
637 unsigned int max_headroom; /* The extra header space needed */ 672 unsigned int max_headroom; /* The extra header space needed */
638 __be32 dst = tiph->daddr; 673 __be32 dst = tiph->daddr;
@@ -703,20 +738,20 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
703 .oif = tunnel->parms.link, 738 .oif = tunnel->parms.link,
704 .proto = IPPROTO_IPV6 }; 739 .proto = IPPROTO_IPV6 };
705 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 740 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
706 stats->tx_carrier_errors++; 741 dev->stats.tx_carrier_errors++;
707 goto tx_error_icmp; 742 goto tx_error_icmp;
708 } 743 }
709 } 744 }
710 if (rt->rt_type != RTN_UNICAST) { 745 if (rt->rt_type != RTN_UNICAST) {
711 ip_rt_put(rt); 746 ip_rt_put(rt);
712 stats->tx_carrier_errors++; 747 dev->stats.tx_carrier_errors++;
713 goto tx_error_icmp; 748 goto tx_error_icmp;
714 } 749 }
715 tdev = rt->dst.dev; 750 tdev = rt->dst.dev;
716 751
717 if (tdev == dev) { 752 if (tdev == dev) {
718 ip_rt_put(rt); 753 ip_rt_put(rt);
719 stats->collisions++; 754 dev->stats.collisions++;
720 goto tx_error; 755 goto tx_error;
721 } 756 }
722 757
@@ -724,7 +759,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
724 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 759 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
725 760
726 if (mtu < 68) { 761 if (mtu < 68) {
727 stats->collisions++; 762 dev->stats.collisions++;
728 ip_rt_put(rt); 763 ip_rt_put(rt);
729 goto tx_error; 764 goto tx_error;
730 } 765 }
@@ -763,7 +798,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
763 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 798 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
764 if (!new_skb) { 799 if (!new_skb) {
765 ip_rt_put(rt); 800 ip_rt_put(rt);
766 txq->tx_dropped++; 801 dev->stats.tx_dropped++;
767 dev_kfree_skb(skb); 802 dev_kfree_skb(skb);
768 return NETDEV_TX_OK; 803 return NETDEV_TX_OK;
769 } 804 }
@@ -799,14 +834,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
799 iph->ttl = iph6->hop_limit; 834 iph->ttl = iph6->hop_limit;
800 835
801 nf_reset(skb); 836 nf_reset(skb);
802 837 tstats = this_cpu_ptr(dev->tstats);
803 IPTUNNEL_XMIT(); 838 __IPTUNNEL_XMIT(tstats, &dev->stats);
804 return NETDEV_TX_OK; 839 return NETDEV_TX_OK;
805 840
806tx_error_icmp: 841tx_error_icmp:
807 dst_link_failure(skb); 842 dst_link_failure(skb);
808tx_error: 843tx_error:
809 stats->tx_errors++; 844 dev->stats.tx_errors++;
810 dev_kfree_skb(skb); 845 dev_kfree_skb(skb);
811 return NETDEV_TX_OK; 846 return NETDEV_TX_OK;
812} 847}
@@ -1083,12 +1118,19 @@ static const struct net_device_ops ipip6_netdev_ops = {
1083 .ndo_start_xmit = ipip6_tunnel_xmit, 1118 .ndo_start_xmit = ipip6_tunnel_xmit,
1084 .ndo_do_ioctl = ipip6_tunnel_ioctl, 1119 .ndo_do_ioctl = ipip6_tunnel_ioctl,
1085 .ndo_change_mtu = ipip6_tunnel_change_mtu, 1120 .ndo_change_mtu = ipip6_tunnel_change_mtu,
1121 .ndo_get_stats = ipip6_get_stats,
1086}; 1122};
1087 1123
1124static void ipip6_dev_free(struct net_device *dev)
1125{
1126 free_percpu(dev->tstats);
1127 free_netdev(dev);
1128}
1129
1088static void ipip6_tunnel_setup(struct net_device *dev) 1130static void ipip6_tunnel_setup(struct net_device *dev)
1089{ 1131{
1090 dev->netdev_ops = &ipip6_netdev_ops; 1132 dev->netdev_ops = &ipip6_netdev_ops;
1091 dev->destructor = free_netdev; 1133 dev->destructor = ipip6_dev_free;
1092 1134
1093 dev->type = ARPHRD_SIT; 1135 dev->type = ARPHRD_SIT;
1094 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 1136 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
@@ -1098,9 +1140,10 @@ static void ipip6_tunnel_setup(struct net_device *dev)
1098 dev->iflink = 0; 1140 dev->iflink = 0;
1099 dev->addr_len = 4; 1141 dev->addr_len = 4;
1100 dev->features |= NETIF_F_NETNS_LOCAL; 1142 dev->features |= NETIF_F_NETNS_LOCAL;
1143 dev->features |= NETIF_F_LLTX;
1101} 1144}
1102 1145
1103static void ipip6_tunnel_init(struct net_device *dev) 1146static int ipip6_tunnel_init(struct net_device *dev)
1104{ 1147{
1105 struct ip_tunnel *tunnel = netdev_priv(dev); 1148 struct ip_tunnel *tunnel = netdev_priv(dev);
1106 1149
@@ -1111,9 +1154,14 @@ static void ipip6_tunnel_init(struct net_device *dev)
1111 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1154 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1112 1155
1113 ipip6_tunnel_bind_dev(dev); 1156 ipip6_tunnel_bind_dev(dev);
1157 dev->tstats = alloc_percpu(struct pcpu_tstats);
1158 if (!dev->tstats)
1159 return -ENOMEM;
1160
1161 return 0;
1114} 1162}
1115 1163
1116static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) 1164static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1117{ 1165{
1118 struct ip_tunnel *tunnel = netdev_priv(dev); 1166 struct ip_tunnel *tunnel = netdev_priv(dev);
1119 struct iphdr *iph = &tunnel->parms.iph; 1167 struct iphdr *iph = &tunnel->parms.iph;
@@ -1128,11 +1176,15 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1128 iph->ihl = 5; 1176 iph->ihl = 5;
1129 iph->ttl = 64; 1177 iph->ttl = 64;
1130 1178
1179 dev->tstats = alloc_percpu(struct pcpu_tstats);
1180 if (!dev->tstats)
1181 return -ENOMEM;
1131 dev_hold(dev); 1182 dev_hold(dev);
1132 sitn->tunnels_wc[0] = tunnel; 1183 sitn->tunnels_wc[0] = tunnel;
1184 return 0;
1133} 1185}
1134 1186
1135static struct xfrm_tunnel sit_handler = { 1187static struct xfrm_tunnel sit_handler __read_mostly = {
1136 .handler = ipip6_rcv, 1188 .handler = ipip6_rcv,
1137 .err_handler = ipip6_err, 1189 .err_handler = ipip6_err,
1138 .priority = 1, 1190 .priority = 1,
@@ -1173,7 +1225,10 @@ static int __net_init sit_init_net(struct net *net)
1173 } 1225 }
1174 dev_net_set(sitn->fb_tunnel_dev, net); 1226 dev_net_set(sitn->fb_tunnel_dev, net);
1175 1227
1176 ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); 1228 err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
1229 if (err)
1230 goto err_dev_free;
1231
1177 ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); 1232 ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
1178 1233
1179 if ((err = register_netdev(sitn->fb_tunnel_dev))) 1234 if ((err = register_netdev(sitn->fb_tunnel_dev)))
@@ -1183,7 +1238,8 @@ static int __net_init sit_init_net(struct net *net)
1183 1238
1184err_reg_dev: 1239err_reg_dev:
1185 dev_put(sitn->fb_tunnel_dev); 1240 dev_put(sitn->fb_tunnel_dev);
1186 free_netdev(sitn->fb_tunnel_dev); 1241err_dev_free:
1242 ipip6_dev_free(sitn->fb_tunnel_dev);
1187err_alloc_dev: 1243err_alloc_dev:
1188 return err; 1244 return err;
1189} 1245}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fe6d40418c0b..8d93f6d81979 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -139,7 +139,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
139 return -EINVAL; 139 return -EINVAL;
140 140
141 if (usin->sin6_family != AF_INET6) 141 if (usin->sin6_family != AF_INET6)
142 return(-EAFNOSUPPORT); 142 return -EAFNOSUPPORT;
143 143
144 memset(&fl, 0, sizeof(fl)); 144 memset(&fl, 0, sizeof(fl));
145 145
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index fc3c86a47452..d9864725d0c6 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -30,8 +30,8 @@
30#include <net/protocol.h> 30#include <net/protocol.h>
31#include <net/xfrm.h> 31#include <net/xfrm.h>
32 32
33static struct xfrm6_tunnel *tunnel6_handlers; 33static struct xfrm6_tunnel *tunnel6_handlers __read_mostly;
34static struct xfrm6_tunnel *tunnel46_handlers; 34static struct xfrm6_tunnel *tunnel46_handlers __read_mostly;
35static DEFINE_MUTEX(tunnel6_mutex); 35static DEFINE_MUTEX(tunnel6_mutex);
36 36
37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) 37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
@@ -51,7 +51,7 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
51 } 51 }
52 52
53 handler->next = *pprev; 53 handler->next = *pprev;
54 *pprev = handler; 54 rcu_assign_pointer(*pprev, handler);
55 55
56 ret = 0; 56 ret = 0;
57 57
@@ -88,6 +88,11 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
88 88
89EXPORT_SYMBOL(xfrm6_tunnel_deregister); 89EXPORT_SYMBOL(xfrm6_tunnel_deregister);
90 90
91#define for_each_tunnel_rcu(head, handler) \
92 for (handler = rcu_dereference(head); \
93 handler != NULL; \
94 handler = rcu_dereference(handler->next)) \
95
91static int tunnel6_rcv(struct sk_buff *skb) 96static int tunnel6_rcv(struct sk_buff *skb)
92{ 97{
93 struct xfrm6_tunnel *handler; 98 struct xfrm6_tunnel *handler;
@@ -95,7 +100,7 @@ static int tunnel6_rcv(struct sk_buff *skb)
95 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 100 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
96 goto drop; 101 goto drop;
97 102
98 for (handler = tunnel6_handlers; handler; handler = handler->next) 103 for_each_tunnel_rcu(tunnel6_handlers, handler)
99 if (!handler->handler(skb)) 104 if (!handler->handler(skb))
100 return 0; 105 return 0;
101 106
@@ -113,7 +118,7 @@ static int tunnel46_rcv(struct sk_buff *skb)
113 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 118 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
114 goto drop; 119 goto drop;
115 120
116 for (handler = tunnel46_handlers; handler; handler = handler->next) 121 for_each_tunnel_rcu(tunnel46_handlers, handler)
117 if (!handler->handler(skb)) 122 if (!handler->handler(skb))
118 return 0; 123 return 0;
119 124
@@ -129,7 +134,7 @@ static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
129{ 134{
130 struct xfrm6_tunnel *handler; 135 struct xfrm6_tunnel *handler;
131 136
132 for (handler = tunnel6_handlers; handler; handler = handler->next) 137 for_each_tunnel_rcu(tunnel6_handlers, handler)
133 if (!handler->err_handler(skb, opt, type, code, offset, info)) 138 if (!handler->err_handler(skb, opt, type, code, offset, info))
134 break; 139 break;
135} 140}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 6baeabbbca82..39676eac3a37 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); 199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
200 200
201 xfrm6_policy_afinfo.garbage_collect(net); 201 xfrm6_policy_afinfo.garbage_collect(net);
202 return (atomic_read(&ops->entries) > ops->gc_thresh * 2); 202 return atomic_read(&ops->entries) > ops->gc_thresh * 2;
203} 203}
204 204
205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) 205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 2ce3a8278f26..ac7584b946a5 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -317,13 +317,13 @@ static const struct xfrm_type xfrm6_tunnel_type = {
317 .output = xfrm6_tunnel_output, 317 .output = xfrm6_tunnel_output,
318}; 318};
319 319
320static struct xfrm6_tunnel xfrm6_tunnel_handler = { 320static struct xfrm6_tunnel xfrm6_tunnel_handler __read_mostly = {
321 .handler = xfrm6_tunnel_rcv, 321 .handler = xfrm6_tunnel_rcv,
322 .err_handler = xfrm6_tunnel_err, 322 .err_handler = xfrm6_tunnel_err,
323 .priority = 2, 323 .priority = 2,
324}; 324};
325 325
326static struct xfrm6_tunnel xfrm46_tunnel_handler = { 326static struct xfrm6_tunnel xfrm46_tunnel_handler __read_mostly = {
327 .handler = xfrm6_tunnel_rcv, 327 .handler = xfrm6_tunnel_rcv,
328 .err_handler = xfrm6_tunnel_err, 328 .err_handler = xfrm6_tunnel_err,
329 .priority = 2, 329 .priority = 2,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index fd55b5135de5..bf3635129b17 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -573,9 +573,9 @@ static int irda_find_lsap_sel(struct irda_sock *self, char *name)
573 /* Requested object/attribute doesn't exist */ 573 /* Requested object/attribute doesn't exist */
574 if((self->errno == IAS_CLASS_UNKNOWN) || 574 if((self->errno == IAS_CLASS_UNKNOWN) ||
575 (self->errno == IAS_ATTRIB_UNKNOWN)) 575 (self->errno == IAS_ATTRIB_UNKNOWN))
576 return (-EADDRNOTAVAIL); 576 return -EADDRNOTAVAIL;
577 else 577 else
578 return (-EHOSTUNREACH); 578 return -EHOSTUNREACH;
579 } 579 }
580 580
581 /* Get the remote TSAP selector */ 581 /* Get the remote TSAP selector */
@@ -663,7 +663,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
663 __func__, name); 663 __func__, name);
664 self->daddr = DEV_ADDR_ANY; 664 self->daddr = DEV_ADDR_ANY;
665 kfree(discoveries); 665 kfree(discoveries);
666 return(-ENOTUNIQ); 666 return -ENOTUNIQ;
667 } 667 }
668 /* First time we found that one, save it ! */ 668 /* First time we found that one, save it ! */
669 daddr = self->daddr; 669 daddr = self->daddr;
@@ -677,7 +677,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
677 IRDA_DEBUG(0, "%s(), unexpected IAS query failure\n", __func__); 677 IRDA_DEBUG(0, "%s(), unexpected IAS query failure\n", __func__);
678 self->daddr = DEV_ADDR_ANY; 678 self->daddr = DEV_ADDR_ANY;
679 kfree(discoveries); 679 kfree(discoveries);
680 return(-EHOSTUNREACH); 680 return -EHOSTUNREACH;
681 break; 681 break;
682 } 682 }
683 } 683 }
@@ -689,7 +689,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
689 IRDA_DEBUG(1, "%s(), cannot discover service ''%s'' in any device !!!\n", 689 IRDA_DEBUG(1, "%s(), cannot discover service ''%s'' in any device !!!\n",
690 __func__, name); 690 __func__, name);
691 self->daddr = DEV_ADDR_ANY; 691 self->daddr = DEV_ADDR_ANY;
692 return(-EADDRNOTAVAIL); 692 return -EADDRNOTAVAIL;
693 } 693 }
694 694
695 /* Revert back to discovered device & service */ 695 /* Revert back to discovered device & service */
@@ -2465,9 +2465,9 @@ bed:
2465 /* Requested object/attribute doesn't exist */ 2465 /* Requested object/attribute doesn't exist */
2466 if((self->errno == IAS_CLASS_UNKNOWN) || 2466 if((self->errno == IAS_CLASS_UNKNOWN) ||
2467 (self->errno == IAS_ATTRIB_UNKNOWN)) 2467 (self->errno == IAS_ATTRIB_UNKNOWN))
2468 return (-EADDRNOTAVAIL); 2468 return -EADDRNOTAVAIL;
2469 else 2469 else
2470 return (-EHOSTUNREACH); 2470 return -EHOSTUNREACH;
2471 } 2471 }
2472 2472
2473 /* Translate from internal to user structure */ 2473 /* Translate from internal to user structure */
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index c1c8ae939126..36c3f037f172 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -315,7 +315,7 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
315 315
316 /* Get the actual number of device in the buffer and return */ 316 /* Get the actual number of device in the buffer and return */
317 *pn = i; 317 *pn = i;
318 return(buffer); 318 return buffer;
319} 319}
320 320
321#ifdef CONFIG_PROC_FS 321#ifdef CONFIG_PROC_FS
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index faa82ca2dfdc..a39cca8331df 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -449,8 +449,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
449 } 449 }
450 450
451#ifdef SERIAL_DO_RESTART 451#ifdef SERIAL_DO_RESTART
452 return ((self->flags & ASYNC_HUP_NOTIFY) ? 452 return (self->flags & ASYNC_HUP_NOTIFY) ?
453 -EAGAIN : -ERESTARTSYS); 453 -EAGAIN : -ERESTARTSYS;
454#else 454#else
455 return -EAGAIN; 455 return -EAGAIN;
456#endif 456#endif
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 5bb8353105cc..8ee1ff6c742f 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -45,13 +45,11 @@ static int irlan_eth_close(struct net_device *dev);
45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, 45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
46 struct net_device *dev); 46 struct net_device *dev);
47static void irlan_eth_set_multicast_list( struct net_device *dev); 47static void irlan_eth_set_multicast_list( struct net_device *dev);
48static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev);
49 48
50static const struct net_device_ops irlan_eth_netdev_ops = { 49static const struct net_device_ops irlan_eth_netdev_ops = {
51 .ndo_open = irlan_eth_open, 50 .ndo_open = irlan_eth_open,
52 .ndo_stop = irlan_eth_close, 51 .ndo_stop = irlan_eth_close,
53 .ndo_start_xmit = irlan_eth_xmit, 52 .ndo_start_xmit = irlan_eth_xmit,
54 .ndo_get_stats = irlan_eth_get_stats,
55 .ndo_set_multicast_list = irlan_eth_set_multicast_list, 53 .ndo_set_multicast_list = irlan_eth_set_multicast_list,
56 .ndo_change_mtu = eth_change_mtu, 54 .ndo_change_mtu = eth_change_mtu,
57 .ndo_validate_addr = eth_validate_addr, 55 .ndo_validate_addr = eth_validate_addr,
@@ -208,10 +206,10 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
208 * tried :-) DB 206 * tried :-) DB
209 */ 207 */
210 /* irttp_data_request already free the packet */ 208 /* irttp_data_request already free the packet */
211 self->stats.tx_dropped++; 209 dev->stats.tx_dropped++;
212 } else { 210 } else {
213 self->stats.tx_packets++; 211 dev->stats.tx_packets++;
214 self->stats.tx_bytes += len; 212 dev->stats.tx_bytes += len;
215 } 213 }
216 214
217 return NETDEV_TX_OK; 215 return NETDEV_TX_OK;
@@ -226,15 +224,16 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
226int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) 224int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
227{ 225{
228 struct irlan_cb *self = instance; 226 struct irlan_cb *self = instance;
227 struct net_device *dev = self->dev;
229 228
230 if (skb == NULL) { 229 if (skb == NULL) {
231 ++self->stats.rx_dropped; 230 dev->stats.rx_dropped++;
232 return 0; 231 return 0;
233 } 232 }
234 if (skb->len < ETH_HLEN) { 233 if (skb->len < ETH_HLEN) {
235 IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n", 234 IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n",
236 __func__, skb->len); 235 __func__, skb->len);
237 ++self->stats.rx_dropped; 236 dev->stats.rx_dropped++;
238 dev_kfree_skb(skb); 237 dev_kfree_skb(skb);
239 return 0; 238 return 0;
240 } 239 }
@@ -244,10 +243,10 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
244 * might have been previously set by the low level IrDA network 243 * might have been previously set by the low level IrDA network
245 * device driver 244 * device driver
246 */ 245 */
247 skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */ 246 skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */
248 247
249 self->stats.rx_packets++; 248 dev->stats.rx_packets++;
250 self->stats.rx_bytes += skb->len; 249 dev->stats.rx_bytes += skb->len;
251 250
252 netif_rx(skb); /* Eat it! */ 251 netif_rx(skb); /* Eat it! */
253 252
@@ -348,16 +347,3 @@ static void irlan_eth_set_multicast_list(struct net_device *dev)
348 else 347 else
349 irlan_set_broadcast_filter(self, FALSE); 348 irlan_set_broadcast_filter(self, FALSE);
350} 349}
351
352/*
353 * Function irlan_get_stats (dev)
354 *
355 * Get the current statistics for this device
356 *
357 */
358static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev)
359{
360 struct irlan_cb *self = netdev_priv(dev);
361
362 return &self->stats;
363}
diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c
index cbcb4eb54037..43f16040a6fe 100644
--- a/net/irda/irlan/irlan_event.c
+++ b/net/irda/irlan/irlan_event.c
@@ -24,7 +24,7 @@
24 24
25#include <net/irda/irlan_event.h> 25#include <net/irda/irlan_event.h>
26 26
27char *irlan_state[] = { 27const char * const irlan_state[] = {
28 "IRLAN_IDLE", 28 "IRLAN_IDLE",
29 "IRLAN_QUERY", 29 "IRLAN_QUERY",
30 "IRLAN_CONN", 30 "IRLAN_CONN",
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 0e7d8bde145d..6115a44c0a24 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -939,7 +939,7 @@ struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots)
939 } 939 }
940 940
941 /* Return current cached discovery log */ 941 /* Return current cached discovery log */
942 return(irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE)); 942 return irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE);
943} 943}
944EXPORT_SYMBOL(irlmp_get_discoveries); 944EXPORT_SYMBOL(irlmp_get_discoveries);
945 945
diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c
index 3750884094da..062e63b1c5c4 100644
--- a/net/irda/irlmp_frame.c
+++ b/net/irda/irlmp_frame.c
@@ -448,7 +448,7 @@ static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap_sel,
448 (self->cache.slsap_sel == slsap_sel) && 448 (self->cache.slsap_sel == slsap_sel) &&
449 (self->cache.dlsap_sel == dlsap_sel)) 449 (self->cache.dlsap_sel == dlsap_sel))
450 { 450 {
451 return (self->cache.lsap); 451 return self->cache.lsap;
452 } 452 }
453#endif 453#endif
454 454
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index e98e40d76f4f..7f17a8020e8a 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -238,7 +238,7 @@ irnet_ias_to_tsap(irnet_socket * self,
238 DEXIT(IRDA_SR_TRACE, "\n"); 238 DEXIT(IRDA_SR_TRACE, "\n");
239 239
240 /* Return the TSAP */ 240 /* Return the TSAP */
241 return(dtsap_sel); 241 return dtsap_sel;
242} 242}
243 243
244/*------------------------------------------------------------------*/ 244/*------------------------------------------------------------------*/
@@ -301,7 +301,7 @@ irnet_connect_tsap(irnet_socket * self)
301 { 301 {
302 clear_bit(0, &self->ttp_connect); 302 clear_bit(0, &self->ttp_connect);
303 DERROR(IRDA_SR_ERROR, "connect aborted!\n"); 303 DERROR(IRDA_SR_ERROR, "connect aborted!\n");
304 return(err); 304 return err;
305 } 305 }
306 306
307 /* Connect to remote device */ 307 /* Connect to remote device */
@@ -312,7 +312,7 @@ irnet_connect_tsap(irnet_socket * self)
312 { 312 {
313 clear_bit(0, &self->ttp_connect); 313 clear_bit(0, &self->ttp_connect);
314 DERROR(IRDA_SR_ERROR, "connect aborted!\n"); 314 DERROR(IRDA_SR_ERROR, "connect aborted!\n");
315 return(err); 315 return err;
316 } 316 }
317 317
318 /* The above call is non-blocking. 318 /* The above call is non-blocking.
@@ -321,7 +321,7 @@ irnet_connect_tsap(irnet_socket * self)
321 * See you there ;-) */ 321 * See you there ;-) */
322 322
323 DEXIT(IRDA_SR_TRACE, "\n"); 323 DEXIT(IRDA_SR_TRACE, "\n");
324 return(err); 324 return err;
325} 325}
326 326
327/*------------------------------------------------------------------*/ 327/*------------------------------------------------------------------*/
@@ -362,10 +362,10 @@ irnet_discover_next_daddr(irnet_socket * self)
362 /* The above request is non-blocking. 362 /* The above request is non-blocking.
363 * After a while, IrDA will call us back in irnet_discovervalue_confirm() 363 * After a while, IrDA will call us back in irnet_discovervalue_confirm()
364 * We will then call irnet_ias_to_tsap() and come back here again... */ 364 * We will then call irnet_ias_to_tsap() and come back here again... */
365 return(0); 365 return 0;
366 } 366 }
367 else 367 else
368 return(1); 368 return 1;
369} 369}
370 370
371/*------------------------------------------------------------------*/ 371/*------------------------------------------------------------------*/
@@ -436,7 +436,7 @@ irnet_discover_daddr_and_lsap_sel(irnet_socket * self)
436 /* Follow me in irnet_discovervalue_confirm() */ 436 /* Follow me in irnet_discovervalue_confirm() */
437 437
438 DEXIT(IRDA_SR_TRACE, "\n"); 438 DEXIT(IRDA_SR_TRACE, "\n");
439 return(0); 439 return 0;
440} 440}
441 441
442/*------------------------------------------------------------------*/ 442/*------------------------------------------------------------------*/
@@ -485,7 +485,7 @@ irnet_dname_to_daddr(irnet_socket * self)
485 /* No luck ! */ 485 /* No luck ! */
486 DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname); 486 DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname);
487 kfree(discoveries); 487 kfree(discoveries);
488 return(-EADDRNOTAVAIL); 488 return -EADDRNOTAVAIL;
489} 489}
490 490
491 491
@@ -527,7 +527,7 @@ irda_irnet_create(irnet_socket * self)
527 INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect); 527 INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect);
528 528
529 DEXIT(IRDA_SOCK_TRACE, "\n"); 529 DEXIT(IRDA_SOCK_TRACE, "\n");
530 return(0); 530 return 0;
531} 531}
532 532
533/*------------------------------------------------------------------*/ 533/*------------------------------------------------------------------*/
@@ -601,7 +601,7 @@ irda_irnet_connect(irnet_socket * self)
601 * We will finish the connection procedure in irnet_connect_tsap(). 601 * We will finish the connection procedure in irnet_connect_tsap().
602 */ 602 */
603 DEXIT(IRDA_SOCK_TRACE, "\n"); 603 DEXIT(IRDA_SOCK_TRACE, "\n");
604 return(0); 604 return 0;
605} 605}
606 606
607/*------------------------------------------------------------------*/ 607/*------------------------------------------------------------------*/
@@ -733,7 +733,7 @@ irnet_daddr_to_dname(irnet_socket * self)
733 /* No luck ! */ 733 /* No luck ! */
734 DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr); 734 DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr);
735 kfree(discoveries); 735 kfree(discoveries);
736 return(-EADDRNOTAVAIL); 736 return -EADDRNOTAVAIL;
737} 737}
738 738
739/*------------------------------------------------------------------*/ 739/*------------------------------------------------------------------*/
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index dfe7b38dd4af..69f1fa64994e 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -166,7 +166,7 @@ irnet_ctrl_write(irnet_socket * ap,
166 } 166 }
167 167
168 /* Success : we have parsed all commands successfully */ 168 /* Success : we have parsed all commands successfully */
169 return(count); 169 return count;
170} 170}
171 171
172#ifdef INITIAL_DISCOVERY 172#ifdef INITIAL_DISCOVERY
@@ -300,7 +300,7 @@ irnet_ctrl_read(irnet_socket * ap,
300 } 300 }
301 301
302 DEXIT(CTRL_TRACE, "\n"); 302 DEXIT(CTRL_TRACE, "\n");
303 return(strlen(event)); 303 return strlen(event);
304 } 304 }
305#endif /* INITIAL_DISCOVERY */ 305#endif /* INITIAL_DISCOVERY */
306 306
@@ -409,7 +409,7 @@ irnet_ctrl_read(irnet_socket * ap,
409 } 409 }
410 410
411 DEXIT(CTRL_TRACE, "\n"); 411 DEXIT(CTRL_TRACE, "\n");
412 return(strlen(event)); 412 return strlen(event);
413} 413}
414 414
415/*------------------------------------------------------------------*/ 415/*------------------------------------------------------------------*/
@@ -623,7 +623,7 @@ dev_irnet_poll(struct file * file,
623 mask |= irnet_ctrl_poll(ap, file, wait); 623 mask |= irnet_ctrl_poll(ap, file, wait);
624 624
625 DEXIT(FS_TRACE, " - mask=0x%X\n", mask); 625 DEXIT(FS_TRACE, " - mask=0x%X\n", mask);
626 return(mask); 626 return mask;
627} 627}
628 628
629/*------------------------------------------------------------------*/ 629/*------------------------------------------------------------------*/
diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
index b5df2418f90c..940225866da0 100644
--- a/net/irda/irnet/irnet_ppp.h
+++ b/net/irda/irnet/irnet_ppp.h
@@ -103,7 +103,8 @@ static const struct file_operations irnet_device_fops =
103 .poll = dev_irnet_poll, 103 .poll = dev_irnet_poll,
104 .unlocked_ioctl = dev_irnet_ioctl, 104 .unlocked_ioctl = dev_irnet_ioctl,
105 .open = dev_irnet_open, 105 .open = dev_irnet_open,
106 .release = dev_irnet_close 106 .release = dev_irnet_close,
107 .llseek = noop_llseek,
107 /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */ 108 /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */
108}; 109};
109 110
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 43040e97c474..d87c22df6f1e 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -565,12 +565,12 @@ pfkey_proto2satype(uint16_t proto)
565 565
566static uint8_t pfkey_proto_to_xfrm(uint8_t proto) 566static uint8_t pfkey_proto_to_xfrm(uint8_t proto)
567{ 567{
568 return (proto == IPSEC_PROTO_ANY ? 0 : proto); 568 return proto == IPSEC_PROTO_ANY ? 0 : proto;
569} 569}
570 570
571static uint8_t pfkey_proto_from_xfrm(uint8_t proto) 571static uint8_t pfkey_proto_from_xfrm(uint8_t proto)
572{ 572{
573 return (proto ? proto : IPSEC_PROTO_ANY); 573 return proto ? proto : IPSEC_PROTO_ANY;
574} 574}
575 575
576static inline int pfkey_sockaddr_len(sa_family_t family) 576static inline int pfkey_sockaddr_len(sa_family_t family)
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 1ae697681bc7..8d9ce0accc98 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -144,7 +144,6 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
144 nf_reset(skb); 144 nf_reset(skb);
145 145
146 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { 146 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
147 dev->last_rx = jiffies;
148 dev->stats.rx_packets++; 147 dev->stats.rx_packets++;
149 dev->stats.rx_bytes += data_len; 148 dev->stats.rx_bytes += data_len;
150 } else 149 } else
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index ff954b3e94b6..39a21d0c61c4 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1768,7 +1768,7 @@ static const struct proto_ops pppol2tp_ops = {
1768 .ioctl = pppox_ioctl, 1768 .ioctl = pppox_ioctl,
1769}; 1769};
1770 1770
1771static struct pppox_proto pppol2tp_proto = { 1771static const struct pppox_proto pppol2tp_proto = {
1772 .create = pppol2tp_create, 1772 .create = pppol2tp_create,
1773 .ioctl = pppol2tp_ioctl 1773 .ioctl = pppol2tp_ioctl
1774}; 1774};
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index a87cb3ba2df6..d2b03e0851ef 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -138,10 +138,8 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[])
138 struct crypto_cipher *tfm; 138 struct crypto_cipher *tfm;
139 139
140 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 140 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
141 if (IS_ERR(tfm)) 141 if (!IS_ERR(tfm))
142 return NULL; 142 crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN);
143
144 crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN);
145 143
146 return tfm; 144 return tfm;
147} 145}
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index 3d097b3d7b62..b4d66cca76d6 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -119,10 +119,8 @@ struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[])
119 struct crypto_cipher *tfm; 119 struct crypto_cipher *tfm;
120 120
121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
122 if (IS_ERR(tfm)) 122 if (!IS_ERR(tfm))
123 return NULL; 123 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
124
125 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
126 124
127 return tfm; 125 return tfm;
128} 126}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 965b272499fd..58eab9e8e4ee 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -86,6 +86,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
86 tid, 0, reason); 86 tid, 0, reason);
87 87
88 del_timer_sync(&tid_rx->session_timer); 88 del_timer_sync(&tid_rx->session_timer);
89 del_timer_sync(&tid_rx->reorder_timer);
89 90
90 call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); 91 call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
91} 92}
@@ -120,6 +121,20 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
120 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); 121 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
121} 122}
122 123
124static void sta_rx_agg_reorder_timer_expired(unsigned long data)
125{
126 u8 *ptid = (u8 *)data;
127 u8 *timer_to_id = ptid - *ptid;
128 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
129 timer_to_tid[0]);
130
131 rcu_read_lock();
132 spin_lock(&sta->lock);
133 ieee80211_release_reorder_timeout(sta, *ptid);
134 spin_unlock(&sta->lock);
135 rcu_read_unlock();
136}
137
123static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, 138static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
124 u8 dialog_token, u16 status, u16 policy, 139 u8 dialog_token, u16 status, u16 policy,
125 u16 buf_size, u16 timeout) 140 u16 buf_size, u16 timeout)
@@ -251,11 +266,18 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
251 goto end; 266 goto end;
252 } 267 }
253 268
269 spin_lock_init(&tid_agg_rx->reorder_lock);
270
254 /* rx timer */ 271 /* rx timer */
255 tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired; 272 tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired;
256 tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid]; 273 tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
257 init_timer(&tid_agg_rx->session_timer); 274 init_timer(&tid_agg_rx->session_timer);
258 275
276 /* rx reorder timer */
277 tid_agg_rx->reorder_timer.function = sta_rx_agg_reorder_timer_expired;
278 tid_agg_rx->reorder_timer.data = (unsigned long)&sta->timer_to_tid[tid];
279 init_timer(&tid_agg_rx->reorder_timer);
280
259 /* prepare reordering buffer */ 281 /* prepare reordering buffer */
260 tid_agg_rx->reorder_buf = 282 tid_agg_rx->reorder_buf =
261 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC); 283 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 29ac8e1a509e..c981604b71e6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -19,33 +19,6 @@
19#include "rate.h" 19#include "rate.h"
20#include "mesh.h" 20#include "mesh.h"
21 21
22static bool nl80211_type_check(enum nl80211_iftype type)
23{
24 switch (type) {
25 case NL80211_IFTYPE_ADHOC:
26 case NL80211_IFTYPE_STATION:
27 case NL80211_IFTYPE_MONITOR:
28#ifdef CONFIG_MAC80211_MESH
29 case NL80211_IFTYPE_MESH_POINT:
30#endif
31 case NL80211_IFTYPE_AP:
32 case NL80211_IFTYPE_AP_VLAN:
33 case NL80211_IFTYPE_WDS:
34 return true;
35 default:
36 return false;
37 }
38}
39
40static bool nl80211_params_check(enum nl80211_iftype type,
41 struct vif_params *params)
42{
43 if (!nl80211_type_check(type))
44 return false;
45
46 return true;
47}
48
49static int ieee80211_add_iface(struct wiphy *wiphy, char *name, 22static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
50 enum nl80211_iftype type, u32 *flags, 23 enum nl80211_iftype type, u32 *flags,
51 struct vif_params *params) 24 struct vif_params *params)
@@ -55,9 +28,6 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
55 struct ieee80211_sub_if_data *sdata; 28 struct ieee80211_sub_if_data *sdata;
56 int err; 29 int err;
57 30
58 if (!nl80211_params_check(type, params))
59 return -EINVAL;
60
61 err = ieee80211_if_add(local, name, &dev, type, params); 31 err = ieee80211_if_add(local, name, &dev, type, params);
62 if (err || type != NL80211_IFTYPE_MONITOR || !flags) 32 if (err || type != NL80211_IFTYPE_MONITOR || !flags)
63 return err; 33 return err;
@@ -82,12 +52,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
82 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 52 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
83 int ret; 53 int ret;
84 54
85 if (ieee80211_sdata_running(sdata))
86 return -EBUSY;
87
88 if (!nl80211_params_check(type, params))
89 return -EINVAL;
90
91 ret = ieee80211_if_change_type(sdata, type); 55 ret = ieee80211_if_change_type(sdata, type);
92 if (ret) 56 if (ret)
93 return ret; 57 return ret;
@@ -114,44 +78,30 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
114 u8 key_idx, const u8 *mac_addr, 78 u8 key_idx, const u8 *mac_addr,
115 struct key_params *params) 79 struct key_params *params)
116{ 80{
117 struct ieee80211_sub_if_data *sdata; 81 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
118 struct sta_info *sta = NULL; 82 struct sta_info *sta = NULL;
119 enum ieee80211_key_alg alg;
120 struct ieee80211_key *key; 83 struct ieee80211_key *key;
121 int err; 84 int err;
122 85
123 if (!netif_running(dev)) 86 if (!ieee80211_sdata_running(sdata))
124 return -ENETDOWN; 87 return -ENETDOWN;
125 88
126 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 89 /* reject WEP and TKIP keys if WEP failed to initialize */
127
128 switch (params->cipher) { 90 switch (params->cipher) {
129 case WLAN_CIPHER_SUITE_WEP40: 91 case WLAN_CIPHER_SUITE_WEP40:
130 case WLAN_CIPHER_SUITE_WEP104:
131 alg = ALG_WEP;
132 break;
133 case WLAN_CIPHER_SUITE_TKIP: 92 case WLAN_CIPHER_SUITE_TKIP:
134 alg = ALG_TKIP; 93 case WLAN_CIPHER_SUITE_WEP104:
135 break; 94 if (IS_ERR(sdata->local->wep_tx_tfm))
136 case WLAN_CIPHER_SUITE_CCMP: 95 return -EINVAL;
137 alg = ALG_CCMP;
138 break;
139 case WLAN_CIPHER_SUITE_AES_CMAC:
140 alg = ALG_AES_CMAC;
141 break; 96 break;
142 default: 97 default:
143 return -EINVAL; 98 break;
144 } 99 }
145 100
146 /* reject WEP and TKIP keys if WEP failed to initialize */ 101 key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len,
147 if ((alg == ALG_WEP || alg == ALG_TKIP) && 102 params->key, params->seq_len, params->seq);
148 IS_ERR(sdata->local->wep_tx_tfm)) 103 if (IS_ERR(key))
149 return -EINVAL; 104 return PTR_ERR(key);
150
151 key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key,
152 params->seq_len, params->seq);
153 if (!key)
154 return -ENOMEM;
155 105
156 mutex_lock(&sdata->local->sta_mtx); 106 mutex_lock(&sdata->local->sta_mtx);
157 107
@@ -164,9 +114,10 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
164 } 114 }
165 } 115 }
166 116
167 ieee80211_key_link(key, sdata, sta); 117 err = ieee80211_key_link(key, sdata, sta);
118 if (err)
119 ieee80211_key_free(sdata->local, key);
168 120
169 err = 0;
170 out_unlock: 121 out_unlock:
171 mutex_unlock(&sdata->local->sta_mtx); 122 mutex_unlock(&sdata->local->sta_mtx);
172 123
@@ -247,10 +198,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
247 198
248 memset(&params, 0, sizeof(params)); 199 memset(&params, 0, sizeof(params));
249 200
250 switch (key->conf.alg) { 201 params.cipher = key->conf.cipher;
251 case ALG_TKIP:
252 params.cipher = WLAN_CIPHER_SUITE_TKIP;
253 202
203 switch (key->conf.cipher) {
204 case WLAN_CIPHER_SUITE_TKIP:
254 iv32 = key->u.tkip.tx.iv32; 205 iv32 = key->u.tkip.tx.iv32;
255 iv16 = key->u.tkip.tx.iv16; 206 iv16 = key->u.tkip.tx.iv16;
256 207
@@ -268,8 +219,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
268 params.seq = seq; 219 params.seq = seq;
269 params.seq_len = 6; 220 params.seq_len = 6;
270 break; 221 break;
271 case ALG_CCMP: 222 case WLAN_CIPHER_SUITE_CCMP:
272 params.cipher = WLAN_CIPHER_SUITE_CCMP;
273 seq[0] = key->u.ccmp.tx_pn[5]; 223 seq[0] = key->u.ccmp.tx_pn[5];
274 seq[1] = key->u.ccmp.tx_pn[4]; 224 seq[1] = key->u.ccmp.tx_pn[4];
275 seq[2] = key->u.ccmp.tx_pn[3]; 225 seq[2] = key->u.ccmp.tx_pn[3];
@@ -279,14 +229,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
279 params.seq = seq; 229 params.seq = seq;
280 params.seq_len = 6; 230 params.seq_len = 6;
281 break; 231 break;
282 case ALG_WEP: 232 case WLAN_CIPHER_SUITE_AES_CMAC:
283 if (key->conf.keylen == 5)
284 params.cipher = WLAN_CIPHER_SUITE_WEP40;
285 else
286 params.cipher = WLAN_CIPHER_SUITE_WEP104;
287 break;
288 case ALG_AES_CMAC:
289 params.cipher = WLAN_CIPHER_SUITE_AES_CMAC;
290 seq[0] = key->u.aes_cmac.tx_pn[5]; 233 seq[0] = key->u.aes_cmac.tx_pn[5];
291 seq[1] = key->u.aes_cmac.tx_pn[4]; 234 seq[1] = key->u.aes_cmac.tx_pn[4];
292 seq[2] = key->u.aes_cmac.tx_pn[3]; 235 seq[2] = key->u.aes_cmac.tx_pn[3];
@@ -634,6 +577,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
634 struct sta_info *sta, 577 struct sta_info *sta,
635 struct station_parameters *params) 578 struct station_parameters *params)
636{ 579{
580 unsigned long flags;
637 u32 rates; 581 u32 rates;
638 int i, j; 582 int i, j;
639 struct ieee80211_supported_band *sband; 583 struct ieee80211_supported_band *sband;
@@ -642,7 +586,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
642 586
643 sband = local->hw.wiphy->bands[local->oper_channel->band]; 587 sband = local->hw.wiphy->bands[local->oper_channel->band];
644 588
645 spin_lock_bh(&sta->lock); 589 spin_lock_irqsave(&sta->flaglock, flags);
646 mask = params->sta_flags_mask; 590 mask = params->sta_flags_mask;
647 set = params->sta_flags_set; 591 set = params->sta_flags_set;
648 592
@@ -669,7 +613,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
669 if (set & BIT(NL80211_STA_FLAG_MFP)) 613 if (set & BIT(NL80211_STA_FLAG_MFP))
670 sta->flags |= WLAN_STA_MFP; 614 sta->flags |= WLAN_STA_MFP;
671 } 615 }
672 spin_unlock_bh(&sta->lock); 616 spin_unlock_irqrestore(&sta->flaglock, flags);
673 617
674 /* 618 /*
675 * cfg80211 validates this (1-2007) and allows setting the AID 619 * cfg80211 validates this (1-2007) and allows setting the AID
@@ -1143,9 +1087,9 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
1143 p.uapsd = false; 1087 p.uapsd = false;
1144 1088
1145 if (drv_conf_tx(local, params->queue, &p)) { 1089 if (drv_conf_tx(local, params->queue, &p)) {
1146 printk(KERN_DEBUG "%s: failed to set TX queue " 1090 wiphy_debug(local->hw.wiphy,
1147 "parameters for queue %d\n", 1091 "failed to set TX queue parameters for queue %d\n",
1148 wiphy_name(local->hw.wiphy), params->queue); 1092 params->queue);
1149 return -EINVAL; 1093 return -EINVAL;
1150 } 1094 }
1151 1095
@@ -1207,15 +1151,26 @@ static int ieee80211_scan(struct wiphy *wiphy,
1207 struct net_device *dev, 1151 struct net_device *dev,
1208 struct cfg80211_scan_request *req) 1152 struct cfg80211_scan_request *req)
1209{ 1153{
1210 struct ieee80211_sub_if_data *sdata; 1154 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1211
1212 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1213 1155
1214 if (sdata->vif.type != NL80211_IFTYPE_STATION && 1156 switch (ieee80211_vif_type_p2p(&sdata->vif)) {
1215 sdata->vif.type != NL80211_IFTYPE_ADHOC && 1157 case NL80211_IFTYPE_STATION:
1216 sdata->vif.type != NL80211_IFTYPE_MESH_POINT && 1158 case NL80211_IFTYPE_ADHOC:
1217 (sdata->vif.type != NL80211_IFTYPE_AP || sdata->u.ap.beacon)) 1159 case NL80211_IFTYPE_MESH_POINT:
1160 case NL80211_IFTYPE_P2P_CLIENT:
1161 break;
1162 case NL80211_IFTYPE_P2P_GO:
1163 if (sdata->local->ops->hw_scan)
1164 break;
1165 /* FIXME: implement NoA while scanning in software */
1166 return -EOPNOTSUPP;
1167 case NL80211_IFTYPE_AP:
1168 if (sdata->u.ap.beacon)
1169 return -EOPNOTSUPP;
1170 break;
1171 default:
1218 return -EOPNOTSUPP; 1172 return -EOPNOTSUPP;
1173 }
1219 1174
1220 return ieee80211_request_scan(sdata, req); 1175 return ieee80211_request_scan(sdata, req);
1221} 1176}
@@ -1541,11 +1496,11 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
1541 return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); 1496 return ieee80211_wk_cancel_remain_on_channel(sdata, cookie);
1542} 1497}
1543 1498
1544static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, 1499static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
1545 struct ieee80211_channel *chan, 1500 struct ieee80211_channel *chan,
1546 enum nl80211_channel_type channel_type, 1501 enum nl80211_channel_type channel_type,
1547 bool channel_type_valid, 1502 bool channel_type_valid,
1548 const u8 *buf, size_t len, u64 *cookie) 1503 const u8 *buf, size_t len, u64 *cookie)
1549{ 1504{
1550 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1505 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1551 struct ieee80211_local *local = sdata->local; 1506 struct ieee80211_local *local = sdata->local;
@@ -1575,8 +1530,6 @@ static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
1575 return -ENOLINK; 1530 return -ENOLINK;
1576 break; 1531 break;
1577 case NL80211_IFTYPE_STATION: 1532 case NL80211_IFTYPE_STATION:
1578 if (!(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED))
1579 flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
1580 break; 1533 break;
1581 default: 1534 default:
1582 return -EOPNOTSUPP; 1535 return -EOPNOTSUPP;
@@ -1647,6 +1600,6 @@ struct cfg80211_ops mac80211_config_ops = {
1647 .set_bitrate_mask = ieee80211_set_bitrate_mask, 1600 .set_bitrate_mask = ieee80211_set_bitrate_mask,
1648 .remain_on_channel = ieee80211_remain_on_channel, 1601 .remain_on_channel = ieee80211_remain_on_channel,
1649 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, 1602 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
1650 .action = ieee80211_action, 1603 .mgmt_tx = ieee80211_mgmt_tx,
1651 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, 1604 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
1652}; 1605};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 32be11e4c4d9..5b24740fc0b0 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -11,7 +11,7 @@ __ieee80211_get_channel_mode(struct ieee80211_local *local,
11{ 11{
12 struct ieee80211_sub_if_data *sdata; 12 struct ieee80211_sub_if_data *sdata;
13 13
14 WARN_ON(!mutex_is_locked(&local->iflist_mtx)); 14 lockdep_assert_held(&local->iflist_mtx);
15 15
16 list_for_each_entry(sdata, &local->interfaces, list) { 16 list_for_each_entry(sdata, &local->interfaces, list) {
17 if (sdata == ignore) 17 if (sdata == ignore)
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index a694c593ff6a..ebd5b69f562e 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -85,13 +85,15 @@ static ssize_t tsf_write(struct file *file,
85 if (strncmp(buf, "reset", 5) == 0) { 85 if (strncmp(buf, "reset", 5) == 0) {
86 if (local->ops->reset_tsf) { 86 if (local->ops->reset_tsf) {
87 drv_reset_tsf(local); 87 drv_reset_tsf(local);
88 printk(KERN_INFO "%s: debugfs reset TSF\n", wiphy_name(local->hw.wiphy)); 88 wiphy_info(local->hw.wiphy, "debugfs reset TSF\n");
89 } 89 }
90 } else { 90 } else {
91 tsf = simple_strtoul(buf, NULL, 0); 91 tsf = simple_strtoul(buf, NULL, 0);
92 if (local->ops->set_tsf) { 92 if (local->ops->set_tsf) {
93 drv_set_tsf(local, tsf); 93 drv_set_tsf(local, tsf);
94 printk(KERN_INFO "%s: debugfs set TSF to %#018llx\n", wiphy_name(local->hw.wiphy), tsf); 94 wiphy_info(local->hw.wiphy,
95 "debugfs set TSF to %#018llx\n", tsf);
96
95 } 97 }
96 } 98 }
97 99
@@ -366,7 +368,6 @@ void debugfs_hw_add(struct ieee80211_local *local)
366 if (!phyd) 368 if (!phyd)
367 return; 369 return;
368 370
369 local->debugfs.stations = debugfs_create_dir("stations", phyd);
370 local->debugfs.keys = debugfs_create_dir("keys", phyd); 371 local->debugfs.keys = debugfs_create_dir("keys", phyd);
371 372
372 DEBUGFS_ADD(frequency); 373 DEBUGFS_ADD(frequency);
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index fa5e76e658ef..1647f8dc5cda 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -64,26 +64,13 @@ static ssize_t key_algorithm_read(struct file *file,
64 char __user *userbuf, 64 char __user *userbuf,
65 size_t count, loff_t *ppos) 65 size_t count, loff_t *ppos)
66{ 66{
67 char *alg; 67 char buf[15];
68 struct ieee80211_key *key = file->private_data; 68 struct ieee80211_key *key = file->private_data;
69 u32 c = key->conf.cipher;
69 70
70 switch (key->conf.alg) { 71 sprintf(buf, "%.2x-%.2x-%.2x:%d\n",
71 case ALG_WEP: 72 c >> 24, (c >> 16) & 0xff, (c >> 8) & 0xff, c & 0xff);
72 alg = "WEP\n"; 73 return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
73 break;
74 case ALG_TKIP:
75 alg = "TKIP\n";
76 break;
77 case ALG_CCMP:
78 alg = "CCMP\n";
79 break;
80 case ALG_AES_CMAC:
81 alg = "AES-128-CMAC\n";
82 break;
83 default:
84 return 0;
85 }
86 return simple_read_from_buffer(userbuf, count, ppos, alg, strlen(alg));
87} 74}
88KEY_OPS(algorithm); 75KEY_OPS(algorithm);
89 76
@@ -95,21 +82,22 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
95 int len; 82 int len;
96 struct ieee80211_key *key = file->private_data; 83 struct ieee80211_key *key = file->private_data;
97 84
98 switch (key->conf.alg) { 85 switch (key->conf.cipher) {
99 case ALG_WEP: 86 case WLAN_CIPHER_SUITE_WEP40:
87 case WLAN_CIPHER_SUITE_WEP104:
100 len = scnprintf(buf, sizeof(buf), "\n"); 88 len = scnprintf(buf, sizeof(buf), "\n");
101 break; 89 break;
102 case ALG_TKIP: 90 case WLAN_CIPHER_SUITE_TKIP:
103 len = scnprintf(buf, sizeof(buf), "%08x %04x\n", 91 len = scnprintf(buf, sizeof(buf), "%08x %04x\n",
104 key->u.tkip.tx.iv32, 92 key->u.tkip.tx.iv32,
105 key->u.tkip.tx.iv16); 93 key->u.tkip.tx.iv16);
106 break; 94 break;
107 case ALG_CCMP: 95 case WLAN_CIPHER_SUITE_CCMP:
108 tpn = key->u.ccmp.tx_pn; 96 tpn = key->u.ccmp.tx_pn;
109 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 97 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
110 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); 98 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]);
111 break; 99 break;
112 case ALG_AES_CMAC: 100 case WLAN_CIPHER_SUITE_AES_CMAC:
113 tpn = key->u.aes_cmac.tx_pn; 101 tpn = key->u.aes_cmac.tx_pn;
114 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 102 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
115 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], 103 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4],
@@ -130,11 +118,12 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
130 int i, len; 118 int i, len;
131 const u8 *rpn; 119 const u8 *rpn;
132 120
133 switch (key->conf.alg) { 121 switch (key->conf.cipher) {
134 case ALG_WEP: 122 case WLAN_CIPHER_SUITE_WEP40:
123 case WLAN_CIPHER_SUITE_WEP104:
135 len = scnprintf(buf, sizeof(buf), "\n"); 124 len = scnprintf(buf, sizeof(buf), "\n");
136 break; 125 break;
137 case ALG_TKIP: 126 case WLAN_CIPHER_SUITE_TKIP:
138 for (i = 0; i < NUM_RX_DATA_QUEUES; i++) 127 for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
139 p += scnprintf(p, sizeof(buf)+buf-p, 128 p += scnprintf(p, sizeof(buf)+buf-p,
140 "%08x %04x\n", 129 "%08x %04x\n",
@@ -142,7 +131,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
142 key->u.tkip.rx[i].iv16); 131 key->u.tkip.rx[i].iv16);
143 len = p - buf; 132 len = p - buf;
144 break; 133 break;
145 case ALG_CCMP: 134 case WLAN_CIPHER_SUITE_CCMP:
146 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) { 135 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) {
147 rpn = key->u.ccmp.rx_pn[i]; 136 rpn = key->u.ccmp.rx_pn[i];
148 p += scnprintf(p, sizeof(buf)+buf-p, 137 p += scnprintf(p, sizeof(buf)+buf-p,
@@ -152,7 +141,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
152 } 141 }
153 len = p - buf; 142 len = p - buf;
154 break; 143 break;
155 case ALG_AES_CMAC: 144 case WLAN_CIPHER_SUITE_AES_CMAC:
156 rpn = key->u.aes_cmac.rx_pn; 145 rpn = key->u.aes_cmac.rx_pn;
157 p += scnprintf(p, sizeof(buf)+buf-p, 146 p += scnprintf(p, sizeof(buf)+buf-p,
158 "%02x%02x%02x%02x%02x%02x\n", 147 "%02x%02x%02x%02x%02x%02x\n",
@@ -174,11 +163,11 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
174 char buf[20]; 163 char buf[20];
175 int len; 164 int len;
176 165
177 switch (key->conf.alg) { 166 switch (key->conf.cipher) {
178 case ALG_CCMP: 167 case WLAN_CIPHER_SUITE_CCMP:
179 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); 168 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
180 break; 169 break;
181 case ALG_AES_CMAC: 170 case WLAN_CIPHER_SUITE_AES_CMAC:
182 len = scnprintf(buf, sizeof(buf), "%u\n", 171 len = scnprintf(buf, sizeof(buf), "%u\n",
183 key->u.aes_cmac.replays); 172 key->u.aes_cmac.replays);
184 break; 173 break;
@@ -196,8 +185,8 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
196 char buf[20]; 185 char buf[20];
197 int len; 186 int len;
198 187
199 switch (key->conf.alg) { 188 switch (key->conf.cipher) {
200 case ALG_AES_CMAC: 189 case WLAN_CIPHER_SUITE_AES_CMAC:
201 len = scnprintf(buf, sizeof(buf), "%u\n", 190 len = scnprintf(buf, sizeof(buf), "%u\n",
202 key->u.aes_cmac.icverrors); 191 key->u.aes_cmac.icverrors);
203 break; 192 break;
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 20b2998fa0ed..3e12430591b7 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -409,6 +409,9 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
409 sprintf(buf, "netdev:%s", sdata->name); 409 sprintf(buf, "netdev:%s", sdata->name);
410 sdata->debugfs.dir = debugfs_create_dir(buf, 410 sdata->debugfs.dir = debugfs_create_dir(buf,
411 sdata->local->hw.wiphy->debugfsdir); 411 sdata->local->hw.wiphy->debugfsdir);
412 if (sdata->debugfs.dir)
413 sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
414 sdata->debugfs.dir);
412 add_files(sdata); 415 add_files(sdata);
413} 416}
414 417
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 76839d4dfaac..6b7ff9fb4604 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -300,7 +300,7 @@ STA_OPS(ht_capa);
300 300
301void ieee80211_sta_debugfs_add(struct sta_info *sta) 301void ieee80211_sta_debugfs_add(struct sta_info *sta)
302{ 302{
303 struct dentry *stations_dir = sta->local->debugfs.stations; 303 struct dentry *stations_dir = sta->sdata->debugfs.subdir_stations;
304 u8 mac[3*ETH_ALEN]; 304 u8 mac[3*ETH_ALEN];
305 305
306 sta->debugfs.add_has_run = true; 306 sta->debugfs.add_has_run = true;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 14123dce544b..16983825f8e8 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -54,6 +54,20 @@ static inline int drv_add_interface(struct ieee80211_local *local,
54 return ret; 54 return ret;
55} 55}
56 56
57static inline int drv_change_interface(struct ieee80211_local *local,
58 struct ieee80211_sub_if_data *sdata,
59 enum nl80211_iftype type, bool p2p)
60{
61 int ret;
62
63 might_sleep();
64
65 trace_drv_change_interface(local, sdata, type, p2p);
66 ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
67 trace_drv_return_int(local, ret);
68 return ret;
69}
70
57static inline void drv_remove_interface(struct ieee80211_local *local, 71static inline void drv_remove_interface(struct ieee80211_local *local,
58 struct ieee80211_vif *vif) 72 struct ieee80211_vif *vif)
59{ 73{
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 5d5d2a974668..6831fb1641c8 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -25,12 +25,14 @@ static inline void trace_ ## name(proto) {}
25#define STA_PR_FMT " sta:%pM" 25#define STA_PR_FMT " sta:%pM"
26#define STA_PR_ARG __entry->sta_addr 26#define STA_PR_ARG __entry->sta_addr
27 27
28#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \ 28#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \
29 __field(bool, p2p) \
29 __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") 30 __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
30#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ 31#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
32 __entry->p2p = sdata->vif.p2p; \
31 __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") 33 __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
32#define VIF_PR_FMT " vif:%s(%d)" 34#define VIF_PR_FMT " vif:%s(%d%s)"
33#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type 35#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
34 36
35/* 37/*
36 * Tracing for driver callbacks. 38 * Tracing for driver callbacks.
@@ -136,6 +138,34 @@ TRACE_EVENT(drv_add_interface,
136 ) 138 )
137); 139);
138 140
141TRACE_EVENT(drv_change_interface,
142 TP_PROTO(struct ieee80211_local *local,
143 struct ieee80211_sub_if_data *sdata,
144 enum nl80211_iftype type, bool p2p),
145
146 TP_ARGS(local, sdata, type, p2p),
147
148 TP_STRUCT__entry(
149 LOCAL_ENTRY
150 VIF_ENTRY
151 __field(u32, new_type)
152 __field(bool, new_p2p)
153 ),
154
155 TP_fast_assign(
156 LOCAL_ASSIGN;
157 VIF_ASSIGN;
158 __entry->new_type = type;
159 __entry->new_p2p = p2p;
160 ),
161
162 TP_printk(
163 LOCAL_PR_FMT VIF_PR_FMT " new type:%d%s",
164 LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type,
165 __entry->new_p2p ? "/p2p" : ""
166 )
167);
168
139TRACE_EVENT(drv_remove_interface, 169TRACE_EVENT(drv_remove_interface,
140 TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), 170 TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata),
141 171
@@ -336,7 +366,7 @@ TRACE_EVENT(drv_set_key,
336 LOCAL_ENTRY 366 LOCAL_ENTRY
337 VIF_ENTRY 367 VIF_ENTRY
338 STA_ENTRY 368 STA_ENTRY
339 __field(enum ieee80211_key_alg, alg) 369 __field(u32, cipher)
340 __field(u8, hw_key_idx) 370 __field(u8, hw_key_idx)
341 __field(u8, flags) 371 __field(u8, flags)
342 __field(s8, keyidx) 372 __field(s8, keyidx)
@@ -346,7 +376,7 @@ TRACE_EVENT(drv_set_key,
346 LOCAL_ASSIGN; 376 LOCAL_ASSIGN;
347 VIF_ASSIGN; 377 VIF_ASSIGN;
348 STA_ASSIGN; 378 STA_ASSIGN;
349 __entry->alg = key->alg; 379 __entry->cipher = key->cipher;
350 __entry->flags = key->flags; 380 __entry->flags = key->flags;
351 __entry->keyidx = key->keyidx; 381 __entry->keyidx = key->keyidx;
352 __entry->hw_key_idx = key->hw_key_idx; 382 __entry->hw_key_idx = key->hw_key_idx;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 9d101fb33861..11f74f5f7b2f 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -265,3 +265,31 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
265 265
266 return 0; 266 return 0;
267} 267}
268
269void ieee80211_request_smps_work(struct work_struct *work)
270{
271 struct ieee80211_sub_if_data *sdata =
272 container_of(work, struct ieee80211_sub_if_data,
273 u.mgd.request_smps_work);
274
275 mutex_lock(&sdata->u.mgd.mtx);
276 __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode);
277 mutex_unlock(&sdata->u.mgd.mtx);
278}
279
280void ieee80211_request_smps(struct ieee80211_vif *vif,
281 enum ieee80211_smps_mode smps_mode)
282{
283 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
284
285 if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
286 return;
287
288 if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF))
289 smps_mode = IEEE80211_SMPS_AUTOMATIC;
290
291 ieee80211_queue_work(&sdata->local->hw,
292 &sdata->u.mgd.request_smps_work);
293}
294/* this might change ... don't want non-open drivers using it */
295EXPORT_SYMBOL_GPL(ieee80211_request_smps);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index c691780725a7..1a3aae54f0cf 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -427,8 +427,8 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
427 return NULL; 427 return NULL;
428 428
429#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 429#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
430 printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n", 430 wiphy_debug(local->hw.wiphy, "Adding new IBSS station %pM (dev=%s)\n",
431 wiphy_name(local->hw.wiphy), addr, sdata->name); 431 addr, sdata->name);
432#endif 432#endif
433 433
434 sta = sta_info_alloc(sdata, addr, gfp); 434 sta = sta_info_alloc(sdata, addr, gfp);
@@ -920,12 +920,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
920 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); 920 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN);
921 sdata->u.ibss.ssid_len = params->ssid_len; 921 sdata->u.ibss.ssid_len = params->ssid_len;
922 922
923 mutex_unlock(&sdata->u.ibss.mtx);
924
925 mutex_lock(&sdata->local->mtx);
923 ieee80211_recalc_idle(sdata->local); 926 ieee80211_recalc_idle(sdata->local);
927 mutex_unlock(&sdata->local->mtx);
924 928
925 ieee80211_queue_work(&sdata->local->hw, &sdata->work); 929 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
926 930
927 mutex_unlock(&sdata->u.ibss.mtx);
928
929 return 0; 931 return 0;
930} 932}
931 933
@@ -980,7 +982,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
980 982
981 mutex_unlock(&sdata->u.ibss.mtx); 983 mutex_unlock(&sdata->u.ibss.mtx);
982 984
985 mutex_lock(&local->mtx);
983 ieee80211_recalc_idle(sdata->local); 986 ieee80211_recalc_idle(sdata->local);
987 mutex_unlock(&local->mtx);
984 988
985 return 0; 989 return 0;
986} 990}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 65e0ed6c2975..945fbf29719d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -50,12 +50,6 @@ struct ieee80211_local;
50 * increased memory use (about 2 kB of RAM per entry). */ 50 * increased memory use (about 2 kB of RAM per entry). */
51#define IEEE80211_FRAGMENT_MAX 4 51#define IEEE80211_FRAGMENT_MAX 4
52 52
53/*
54 * Time after which we ignore scan results and no longer report/use
55 * them in any way.
56 */
57#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
58
59#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024)) 53#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024))
60 54
61#define IEEE80211_DEFAULT_UAPSD_QUEUES \ 55#define IEEE80211_DEFAULT_UAPSD_QUEUES \
@@ -165,12 +159,37 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
165#define RX_DROP_MONITOR ((__force ieee80211_rx_result) 2u) 159#define RX_DROP_MONITOR ((__force ieee80211_rx_result) 2u)
166#define RX_QUEUED ((__force ieee80211_rx_result) 3u) 160#define RX_QUEUED ((__force ieee80211_rx_result) 3u)
167 161
168#define IEEE80211_RX_IN_SCAN BIT(0) 162/**
169/* frame is destined to interface currently processed (incl. multicast frames) */ 163 * enum ieee80211_packet_rx_flags - packet RX flags
170#define IEEE80211_RX_RA_MATCH BIT(1) 164 * @IEEE80211_RX_RA_MATCH: frame is destined to interface currently processed
171#define IEEE80211_RX_AMSDU BIT(2) 165 * (incl. multicast frames)
172#define IEEE80211_RX_FRAGMENTED BIT(3) 166 * @IEEE80211_RX_IN_SCAN: received while scanning
173/* only add flags here that do not change with subframes of an aMPDU */ 167 * @IEEE80211_RX_FRAGMENTED: fragmented frame
168 * @IEEE80211_RX_AMSDU: a-MSDU packet
169 * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed
170 *
171 * These are per-frame flags that are attached to a frame in the
172 * @rx_flags field of &struct ieee80211_rx_status.
173 */
174enum ieee80211_packet_rx_flags {
175 IEEE80211_RX_IN_SCAN = BIT(0),
176 IEEE80211_RX_RA_MATCH = BIT(1),
177 IEEE80211_RX_FRAGMENTED = BIT(2),
178 IEEE80211_RX_AMSDU = BIT(3),
179 IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4),
180};
181
182/**
183 * enum ieee80211_rx_flags - RX data flags
184 *
185 * @IEEE80211_RX_CMNTR: received on cooked monitor already
186 *
187 * These flags are used across handling multiple interfaces
188 * for a single frame.
189 */
190enum ieee80211_rx_flags {
191 IEEE80211_RX_CMNTR = BIT(0),
192};
174 193
175struct ieee80211_rx_data { 194struct ieee80211_rx_data {
176 struct sk_buff *skb; 195 struct sk_buff *skb;
@@ -343,7 +362,10 @@ struct ieee80211_if_managed {
343 unsigned long timers_running; /* used for quiesce/restart */ 362 unsigned long timers_running; /* used for quiesce/restart */
344 bool powersave; /* powersave requested for this iface */ 363 bool powersave; /* powersave requested for this iface */
345 enum ieee80211_smps_mode req_smps, /* requested smps mode */ 364 enum ieee80211_smps_mode req_smps, /* requested smps mode */
346 ap_smps; /* smps mode AP thinks we're in */ 365 ap_smps, /* smps mode AP thinks we're in */
366 driver_smps_mode; /* smps mode request */
367
368 struct work_struct request_smps_work;
347 369
348 unsigned int flags; 370 unsigned int flags;
349 371
@@ -371,6 +393,13 @@ struct ieee80211_if_managed {
371 int ave_beacon_signal; 393 int ave_beacon_signal;
372 394
373 /* 395 /*
396 * Number of Beacon frames used in ave_beacon_signal. This can be used
397 * to avoid generating less reliable cqm events that would be based
398 * only on couple of received frames.
399 */
400 unsigned int count_beacon_signal;
401
402 /*
374 * Last Beacon frame signal strength average (ave_beacon_signal / 16) 403 * Last Beacon frame signal strength average (ave_beacon_signal / 16)
375 * that triggered a cqm event. 0 indicates that no event has been 404 * that triggered a cqm event. 0 indicates that no event has been
376 * generated for the current association. 405 * generated for the current association.
@@ -474,6 +503,19 @@ enum ieee80211_sub_if_data_flags {
474 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), 503 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
475}; 504};
476 505
506/**
507 * enum ieee80211_sdata_state_bits - virtual interface state bits
508 * @SDATA_STATE_RUNNING: virtual interface is up & running; this
509 * mirrors netif_running() but is separate for interface type
510 * change handling while the interface is up
511 * @SDATA_STATE_OFFCHANNEL: This interface is currently in offchannel
512 * mode, so queues are stopped
513 */
514enum ieee80211_sdata_state_bits {
515 SDATA_STATE_RUNNING,
516 SDATA_STATE_OFFCHANNEL,
517};
518
477struct ieee80211_sub_if_data { 519struct ieee80211_sub_if_data {
478 struct list_head list; 520 struct list_head list;
479 521
@@ -487,6 +529,8 @@ struct ieee80211_sub_if_data {
487 529
488 unsigned int flags; 530 unsigned int flags;
489 531
532 unsigned long state;
533
490 int drop_unencrypted; 534 int drop_unencrypted;
491 535
492 char name[IFNAMSIZ]; 536 char name[IFNAMSIZ];
@@ -497,6 +541,9 @@ struct ieee80211_sub_if_data {
497 */ 541 */
498 bool ht_opmode_valid; 542 bool ht_opmode_valid;
499 543
544 /* to detect idle changes */
545 bool old_idle;
546
500 /* Fragment table for host-based reassembly */ 547 /* Fragment table for host-based reassembly */
501 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; 548 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
502 unsigned int fragment_next; 549 unsigned int fragment_next;
@@ -508,6 +555,8 @@ struct ieee80211_sub_if_data {
508 struct ieee80211_key *default_mgmt_key; 555 struct ieee80211_key *default_mgmt_key;
509 556
510 u16 sequence_number; 557 u16 sequence_number;
558 __be16 control_port_protocol;
559 bool control_port_no_encrypt;
511 560
512 struct work_struct work; 561 struct work_struct work;
513 struct sk_buff_head skb_queue; 562 struct sk_buff_head skb_queue;
@@ -539,6 +588,7 @@ struct ieee80211_sub_if_data {
539#ifdef CONFIG_MAC80211_DEBUGFS 588#ifdef CONFIG_MAC80211_DEBUGFS
540 struct { 589 struct {
541 struct dentry *dir; 590 struct dentry *dir;
591 struct dentry *subdir_stations;
542 struct dentry *default_key; 592 struct dentry *default_key;
543 struct dentry *default_mgmt_key; 593 struct dentry *default_mgmt_key;
544 } debugfs; 594 } debugfs;
@@ -595,11 +645,17 @@ enum queue_stop_reason {
595 * determine if we are on the operating channel or not 645 * determine if we are on the operating channel or not
596 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, 646 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning,
597 * gets only set in conjunction with SCAN_SW_SCANNING 647 * gets only set in conjunction with SCAN_SW_SCANNING
648 * @SCAN_COMPLETED: Set for our scan work function when the driver reported
649 * that the scan completed.
650 * @SCAN_ABORTED: Set for our scan work function when the driver reported
651 * a scan complete for an aborted scan.
598 */ 652 */
599enum { 653enum {
600 SCAN_SW_SCANNING, 654 SCAN_SW_SCANNING,
601 SCAN_HW_SCANNING, 655 SCAN_HW_SCANNING,
602 SCAN_OFF_CHANNEL, 656 SCAN_OFF_CHANNEL,
657 SCAN_COMPLETED,
658 SCAN_ABORTED,
603}; 659};
604 660
605/** 661/**
@@ -634,7 +690,6 @@ struct ieee80211_local {
634 /* 690 /*
635 * work stuff, potentially off-channel (in the future) 691 * work stuff, potentially off-channel (in the future)
636 */ 692 */
637 struct mutex work_mtx;
638 struct list_head work_list; 693 struct list_head work_list;
639 struct timer_list work_timer; 694 struct timer_list work_timer;
640 struct work_struct work_work; 695 struct work_struct work_work;
@@ -656,6 +711,8 @@ struct ieee80211_local {
656 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll; 711 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll;
657 unsigned int filter_flags; /* FIF_* */ 712 unsigned int filter_flags; /* FIF_* */
658 713
714 bool wiphy_ciphers_allocated;
715
659 /* protects the aggregated multicast list and filter calls */ 716 /* protects the aggregated multicast list and filter calls */
660 spinlock_t filter_lock; 717 spinlock_t filter_lock;
661 718
@@ -746,9 +803,10 @@ struct ieee80211_local {
746 */ 803 */
747 struct mutex key_mtx; 804 struct mutex key_mtx;
748 805
806 /* mutex for scan and work locking */
807 struct mutex mtx;
749 808
750 /* Scanning and BSS list */ 809 /* Scanning and BSS list */
751 struct mutex scan_mtx;
752 unsigned long scanning; 810 unsigned long scanning;
753 struct cfg80211_ssid scan_ssid; 811 struct cfg80211_ssid scan_ssid;
754 struct cfg80211_scan_request *int_scan_req; 812 struct cfg80211_scan_request *int_scan_req;
@@ -866,10 +924,14 @@ struct ieee80211_local {
866#ifdef CONFIG_MAC80211_DEBUGFS 924#ifdef CONFIG_MAC80211_DEBUGFS
867 struct local_debugfsdentries { 925 struct local_debugfsdentries {
868 struct dentry *rcdir; 926 struct dentry *rcdir;
869 struct dentry *stations;
870 struct dentry *keys; 927 struct dentry *keys;
871 } debugfs; 928 } debugfs;
872#endif 929#endif
930
931 /* dummy netdev for use w/ NAPI */
932 struct net_device napi_dev;
933
934 struct napi_struct napi;
873}; 935};
874 936
875static inline struct ieee80211_sub_if_data * 937static inline struct ieee80211_sub_if_data *
@@ -1003,6 +1065,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
1003void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); 1065void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
1004void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 1066void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1005 struct sk_buff *skb); 1067 struct sk_buff *skb);
1068void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata);
1069void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata);
1006 1070
1007/* IBSS code */ 1071/* IBSS code */
1008void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); 1072void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
@@ -1071,7 +1135,7 @@ void ieee80211_recalc_idle(struct ieee80211_local *local);
1071 1135
1072static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) 1136static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
1073{ 1137{
1074 return netif_running(sdata->dev); 1138 return test_bit(SDATA_STATE_RUNNING, &sdata->state);
1075} 1139}
1076 1140
1077/* tx handling */ 1141/* tx handling */
@@ -1105,6 +1169,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
1105int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, 1169int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
1106 enum ieee80211_smps_mode smps, const u8 *da, 1170 enum ieee80211_smps_mode smps, const u8 *da,
1107 const u8 *bssid); 1171 const u8 *bssid);
1172void ieee80211_request_smps_work(struct work_struct *work);
1108 1173
1109void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 1174void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1110 u16 initiator, u16 reason); 1175 u16 initiator, u16 reason);
@@ -1131,6 +1196,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
1131void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); 1196void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
1132void ieee80211_ba_session_work(struct work_struct *work); 1197void ieee80211_ba_session_work(struct work_struct *work);
1133void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid); 1198void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
1199void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
1134 1200
1135/* Spectrum management */ 1201/* Spectrum management */
1136void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 1202void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1146,6 +1212,12 @@ int __ieee80211_suspend(struct ieee80211_hw *hw);
1146 1212
1147static inline int __ieee80211_resume(struct ieee80211_hw *hw) 1213static inline int __ieee80211_resume(struct ieee80211_hw *hw)
1148{ 1214{
1215 struct ieee80211_local *local = hw_to_local(hw);
1216
1217 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
1218 "%s: resume with hardware scan still in progress\n",
1219 wiphy_name(hw->wiphy));
1220
1149 return ieee80211_reconfig(hw_to_local(hw)); 1221 return ieee80211_reconfig(hw_to_local(hw));
1150} 1222}
1151#else 1223#else
@@ -1208,7 +1280,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
1208 const u8 *key, u8 key_len, u8 key_idx); 1280 const u8 *key, u8 key_len, u8 key_idx);
1209int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, 1281int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1210 const u8 *ie, size_t ie_len, 1282 const u8 *ie, size_t ie_len,
1211 enum ieee80211_band band); 1283 enum ieee80211_band band, u32 rate_mask,
1284 u8 channel);
1212void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, 1285void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1213 const u8 *ssid, size_t ssid_len, 1286 const u8 *ssid, size_t ssid_len,
1214 const u8 *ie, size_t ie_len); 1287 const u8 *ie, size_t ie_len);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ebbe264e2b0b..66785739dad3 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -94,21 +94,14 @@ static inline int identical_mac_addr_allowed(int type1, int type2)
94 type2 == NL80211_IFTYPE_AP_VLAN)); 94 type2 == NL80211_IFTYPE_AP_VLAN));
95} 95}
96 96
97static int ieee80211_open(struct net_device *dev) 97static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
98 enum nl80211_iftype iftype)
98{ 99{
99 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
100 struct ieee80211_sub_if_data *nsdata;
101 struct ieee80211_local *local = sdata->local; 100 struct ieee80211_local *local = sdata->local;
102 struct sta_info *sta; 101 struct ieee80211_sub_if_data *nsdata;
103 u32 changed = 0; 102 struct net_device *dev = sdata->dev;
104 int res;
105 u32 hw_reconf_flags = 0;
106 u8 null_addr[ETH_ALEN] = {0};
107 103
108 /* fail early if user set an invalid address */ 104 ASSERT_RTNL();
109 if (compare_ether_addr(dev->dev_addr, null_addr) &&
110 !is_valid_ether_addr(dev->dev_addr))
111 return -EADDRNOTAVAIL;
112 105
113 /* we hold the RTNL here so can safely walk the list */ 106 /* we hold the RTNL here so can safely walk the list */
114 list_for_each_entry(nsdata, &local->interfaces, list) { 107 list_for_each_entry(nsdata, &local->interfaces, list) {
@@ -125,7 +118,7 @@ static int ieee80211_open(struct net_device *dev)
125 * belonging to the same hardware. Then, however, we're 118 * belonging to the same hardware. Then, however, we're
126 * faced with having to adopt two different TSF timers... 119 * faced with having to adopt two different TSF timers...
127 */ 120 */
128 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 121 if (iftype == NL80211_IFTYPE_ADHOC &&
129 nsdata->vif.type == NL80211_IFTYPE_ADHOC) 122 nsdata->vif.type == NL80211_IFTYPE_ADHOC)
130 return -EBUSY; 123 return -EBUSY;
131 124
@@ -139,19 +132,36 @@ static int ieee80211_open(struct net_device *dev)
139 /* 132 /*
140 * check whether it may have the same address 133 * check whether it may have the same address
141 */ 134 */
142 if (!identical_mac_addr_allowed(sdata->vif.type, 135 if (!identical_mac_addr_allowed(iftype,
143 nsdata->vif.type)) 136 nsdata->vif.type))
144 return -ENOTUNIQ; 137 return -ENOTUNIQ;
145 138
146 /* 139 /*
147 * can only add VLANs to enabled APs 140 * can only add VLANs to enabled APs
148 */ 141 */
149 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && 142 if (iftype == NL80211_IFTYPE_AP_VLAN &&
150 nsdata->vif.type == NL80211_IFTYPE_AP) 143 nsdata->vif.type == NL80211_IFTYPE_AP)
151 sdata->bss = &nsdata->u.ap; 144 sdata->bss = &nsdata->u.ap;
152 } 145 }
153 } 146 }
154 147
148 return 0;
149}
150
151/*
152 * NOTE: Be very careful when changing this function, it must NOT return
153 * an error on interface type changes that have been pre-checked, so most
154 * checks should be in ieee80211_check_concurrent_iface.
155 */
156static int ieee80211_do_open(struct net_device *dev, bool coming_up)
157{
158 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
159 struct ieee80211_local *local = sdata->local;
160 struct sta_info *sta;
161 u32 changed = 0;
162 int res;
163 u32 hw_reconf_flags = 0;
164
155 switch (sdata->vif.type) { 165 switch (sdata->vif.type) {
156 case NL80211_IFTYPE_WDS: 166 case NL80211_IFTYPE_WDS:
157 if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) 167 if (!is_valid_ether_addr(sdata->u.wds.remote_addr))
@@ -177,7 +187,9 @@ static int ieee80211_open(struct net_device *dev)
177 /* no special treatment */ 187 /* no special treatment */
178 break; 188 break;
179 case NL80211_IFTYPE_UNSPECIFIED: 189 case NL80211_IFTYPE_UNSPECIFIED:
180 case __NL80211_IFTYPE_AFTER_LAST: 190 case NUM_NL80211_IFTYPES:
191 case NL80211_IFTYPE_P2P_CLIENT:
192 case NL80211_IFTYPE_P2P_GO:
181 /* cannot happen */ 193 /* cannot happen */
182 WARN_ON(1); 194 WARN_ON(1);
183 break; 195 break;
@@ -187,39 +199,30 @@ static int ieee80211_open(struct net_device *dev)
187 res = drv_start(local); 199 res = drv_start(local);
188 if (res) 200 if (res)
189 goto err_del_bss; 201 goto err_del_bss;
202 if (local->ops->napi_poll)
203 napi_enable(&local->napi);
190 /* we're brought up, everything changes */ 204 /* we're brought up, everything changes */
191 hw_reconf_flags = ~0; 205 hw_reconf_flags = ~0;
192 ieee80211_led_radio(local, true); 206 ieee80211_led_radio(local, true);
193 } 207 }
194 208
195 /* 209 /*
196 * Check all interfaces and copy the hopefully now-present 210 * Copy the hopefully now-present MAC address to
197 * MAC address to those that have the special null one. 211 * this interface, if it has the special null one.
198 */ 212 */
199 list_for_each_entry(nsdata, &local->interfaces, list) { 213 if (is_zero_ether_addr(dev->dev_addr)) {
200 struct net_device *ndev = nsdata->dev; 214 memcpy(dev->dev_addr,
201 215 local->hw.wiphy->perm_addr,
202 /* 216 ETH_ALEN);
203 * No need to check running since we do not allow 217 memcpy(dev->perm_addr, dev->dev_addr, ETH_ALEN);
204 * it to start up with this invalid address. 218
205 */ 219 if (!is_valid_ether_addr(dev->dev_addr)) {
206 if (compare_ether_addr(null_addr, ndev->dev_addr) == 0) { 220 if (!local->open_count)
207 memcpy(ndev->dev_addr, 221 drv_stop(local);
208 local->hw.wiphy->perm_addr, 222 return -EADDRNOTAVAIL;
209 ETH_ALEN);
210 memcpy(ndev->perm_addr, ndev->dev_addr, ETH_ALEN);
211 } 223 }
212 } 224 }
213 225
214 /*
215 * Validate the MAC address for this device.
216 */
217 if (!is_valid_ether_addr(dev->dev_addr)) {
218 if (!local->open_count)
219 drv_stop(local);
220 return -EADDRNOTAVAIL;
221 }
222
223 switch (sdata->vif.type) { 226 switch (sdata->vif.type) {
224 case NL80211_IFTYPE_AP_VLAN: 227 case NL80211_IFTYPE_AP_VLAN:
225 /* no need to tell driver */ 228 /* no need to tell driver */
@@ -253,9 +256,11 @@ static int ieee80211_open(struct net_device *dev)
253 netif_carrier_on(dev); 256 netif_carrier_on(dev);
254 break; 257 break;
255 default: 258 default:
256 res = drv_add_interface(local, &sdata->vif); 259 if (coming_up) {
257 if (res) 260 res = drv_add_interface(local, &sdata->vif);
258 goto err_stop; 261 if (res)
262 goto err_stop;
263 }
259 264
260 if (ieee80211_vif_is_mesh(&sdata->vif)) { 265 if (ieee80211_vif_is_mesh(&sdata->vif)) {
261 local->fif_other_bss++; 266 local->fif_other_bss++;
@@ -277,6 +282,8 @@ static int ieee80211_open(struct net_device *dev)
277 netif_carrier_on(dev); 282 netif_carrier_on(dev);
278 } 283 }
279 284
285 set_bit(SDATA_STATE_RUNNING, &sdata->state);
286
280 if (sdata->vif.type == NL80211_IFTYPE_WDS) { 287 if (sdata->vif.type == NL80211_IFTYPE_WDS) {
281 /* Create STA entry for the WDS peer */ 288 /* Create STA entry for the WDS peer */
282 sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, 289 sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr,
@@ -307,9 +314,13 @@ static int ieee80211_open(struct net_device *dev)
307 if (sdata->flags & IEEE80211_SDATA_PROMISC) 314 if (sdata->flags & IEEE80211_SDATA_PROMISC)
308 atomic_inc(&local->iff_promiscs); 315 atomic_inc(&local->iff_promiscs);
309 316
317 mutex_lock(&local->mtx);
310 hw_reconf_flags |= __ieee80211_recalc_idle(local); 318 hw_reconf_flags |= __ieee80211_recalc_idle(local);
319 mutex_unlock(&local->mtx);
320
321 if (coming_up)
322 local->open_count++;
311 323
312 local->open_count++;
313 if (hw_reconf_flags) { 324 if (hw_reconf_flags) {
314 ieee80211_hw_config(local, hw_reconf_flags); 325 ieee80211_hw_config(local, hw_reconf_flags);
315 /* 326 /*
@@ -334,22 +345,42 @@ static int ieee80211_open(struct net_device *dev)
334 sdata->bss = NULL; 345 sdata->bss = NULL;
335 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 346 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
336 list_del(&sdata->u.vlan.list); 347 list_del(&sdata->u.vlan.list);
348 clear_bit(SDATA_STATE_RUNNING, &sdata->state);
337 return res; 349 return res;
338} 350}
339 351
340static int ieee80211_stop(struct net_device *dev) 352static int ieee80211_open(struct net_device *dev)
341{ 353{
342 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 354 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
355 int err;
356
357 /* fail early if user set an invalid address */
358 if (!is_zero_ether_addr(dev->dev_addr) &&
359 !is_valid_ether_addr(dev->dev_addr))
360 return -EADDRNOTAVAIL;
361
362 err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type);
363 if (err)
364 return err;
365
366 return ieee80211_do_open(dev, true);
367}
368
369static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
370 bool going_down)
371{
343 struct ieee80211_local *local = sdata->local; 372 struct ieee80211_local *local = sdata->local;
344 unsigned long flags; 373 unsigned long flags;
345 struct sk_buff *skb, *tmp; 374 struct sk_buff *skb, *tmp;
346 u32 hw_reconf_flags = 0; 375 u32 hw_reconf_flags = 0;
347 int i; 376 int i;
348 377
378 clear_bit(SDATA_STATE_RUNNING, &sdata->state);
379
349 /* 380 /*
350 * Stop TX on this interface first. 381 * Stop TX on this interface first.
351 */ 382 */
352 netif_tx_stop_all_queues(dev); 383 netif_tx_stop_all_queues(sdata->dev);
353 384
354 /* 385 /*
355 * Purge work for this interface. 386 * Purge work for this interface.
@@ -366,12 +397,9 @@ static int ieee80211_stop(struct net_device *dev)
366 * (because if we remove a STA after ops->remove_interface() 397 * (because if we remove a STA after ops->remove_interface()
367 * the driver will have removed the vif info already!) 398 * the driver will have removed the vif info already!)
368 * 399 *
369 * We could relax this and only unlink the stations from the 400 * This is relevant only in AP, WDS and mesh modes, since in
370 * hash table and list but keep them on a per-sdata list that 401 * all other modes we've already removed all stations when
371 * will be inserted back again when the interface is brought 402 * disconnecting etc.
372 * up again, but I don't currently see a use case for that,
373 * except with WDS which gets a STA entry created when it is
374 * brought up.
375 */ 403 */
376 sta_info_flush(local, sdata); 404 sta_info_flush(local, sdata);
377 405
@@ -390,11 +418,12 @@ static int ieee80211_stop(struct net_device *dev)
390 if (sdata->vif.type == NL80211_IFTYPE_AP) 418 if (sdata->vif.type == NL80211_IFTYPE_AP)
391 local->fif_pspoll--; 419 local->fif_pspoll--;
392 420
393 netif_addr_lock_bh(dev); 421 netif_addr_lock_bh(sdata->dev);
394 spin_lock_bh(&local->filter_lock); 422 spin_lock_bh(&local->filter_lock);
395 __hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len); 423 __hw_addr_unsync(&local->mc_list, &sdata->dev->mc,
424 sdata->dev->addr_len);
396 spin_unlock_bh(&local->filter_lock); 425 spin_unlock_bh(&local->filter_lock);
397 netif_addr_unlock_bh(dev); 426 netif_addr_unlock_bh(sdata->dev);
398 427
399 ieee80211_configure_filter(local); 428 ieee80211_configure_filter(local);
400 429
@@ -406,11 +435,21 @@ static int ieee80211_stop(struct net_device *dev)
406 struct ieee80211_sub_if_data *vlan, *tmpsdata; 435 struct ieee80211_sub_if_data *vlan, *tmpsdata;
407 struct beacon_data *old_beacon = sdata->u.ap.beacon; 436 struct beacon_data *old_beacon = sdata->u.ap.beacon;
408 437
438 /* sdata_running will return false, so this will disable */
439 ieee80211_bss_info_change_notify(sdata,
440 BSS_CHANGED_BEACON_ENABLED);
441
409 /* remove beacon */ 442 /* remove beacon */
410 rcu_assign_pointer(sdata->u.ap.beacon, NULL); 443 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
411 synchronize_rcu(); 444 synchronize_rcu();
412 kfree(old_beacon); 445 kfree(old_beacon);
413 446
447 /* free all potentially still buffered bcast frames */
448 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
449 local->total_ps_buffered--;
450 dev_kfree_skb(skb);
451 }
452
414 /* down all dependent devices, that is VLANs */ 453 /* down all dependent devices, that is VLANs */
415 list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, 454 list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
416 u.vlan.list) 455 u.vlan.list)
@@ -418,7 +457,8 @@ static int ieee80211_stop(struct net_device *dev)
418 WARN_ON(!list_empty(&sdata->u.ap.vlans)); 457 WARN_ON(!list_empty(&sdata->u.ap.vlans));
419 } 458 }
420 459
421 local->open_count--; 460 if (going_down)
461 local->open_count--;
422 462
423 switch (sdata->vif.type) { 463 switch (sdata->vif.type) {
424 case NL80211_IFTYPE_AP_VLAN: 464 case NL80211_IFTYPE_AP_VLAN:
@@ -450,27 +490,6 @@ static int ieee80211_stop(struct net_device *dev)
450 490
451 ieee80211_configure_filter(local); 491 ieee80211_configure_filter(local);
452 break; 492 break;
453 case NL80211_IFTYPE_STATION:
454 del_timer_sync(&sdata->u.mgd.chswitch_timer);
455 del_timer_sync(&sdata->u.mgd.timer);
456 del_timer_sync(&sdata->u.mgd.conn_mon_timer);
457 del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
458 /*
459 * If any of the timers fired while we waited for it, it will
460 * have queued its work. Now the work will be running again
461 * but will not rearm the timer again because it checks
462 * whether the interface is running, which, at this point,
463 * it no longer is.
464 */
465 cancel_work_sync(&sdata->u.mgd.chswitch_work);
466 cancel_work_sync(&sdata->u.mgd.monitor_work);
467 cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work);
468
469 /* fall through */
470 case NL80211_IFTYPE_ADHOC:
471 if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
472 del_timer_sync(&sdata->u.ibss.timer);
473 /* fall through */
474 case NL80211_IFTYPE_MESH_POINT: 493 case NL80211_IFTYPE_MESH_POINT:
475 if (ieee80211_vif_is_mesh(&sdata->vif)) { 494 if (ieee80211_vif_is_mesh(&sdata->vif)) {
476 /* other_bss and allmulti are always set on mesh 495 /* other_bss and allmulti are always set on mesh
@@ -498,27 +517,34 @@ static int ieee80211_stop(struct net_device *dev)
498 ieee80211_scan_cancel(local); 517 ieee80211_scan_cancel(local);
499 518
500 /* 519 /*
501 * Disable beaconing for AP and mesh, IBSS can't 520 * Disable beaconing here for mesh only, AP and IBSS
502 * still be joined to a network at this point. 521 * are already taken care of.
503 */ 522 */
504 if (sdata->vif.type == NL80211_IFTYPE_AP || 523 if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
505 sdata->vif.type == NL80211_IFTYPE_MESH_POINT) {
506 ieee80211_bss_info_change_notify(sdata, 524 ieee80211_bss_info_change_notify(sdata,
507 BSS_CHANGED_BEACON_ENABLED); 525 BSS_CHANGED_BEACON_ENABLED);
508 }
509 526
510 /* free all remaining keys, there shouldn't be any */ 527 /*
528 * Free all remaining keys, there shouldn't be any,
529 * except maybe group keys in AP more or WDS?
530 */
511 ieee80211_free_keys(sdata); 531 ieee80211_free_keys(sdata);
512 drv_remove_interface(local, &sdata->vif); 532
533 if (going_down)
534 drv_remove_interface(local, &sdata->vif);
513 } 535 }
514 536
515 sdata->bss = NULL; 537 sdata->bss = NULL;
516 538
539 mutex_lock(&local->mtx);
517 hw_reconf_flags |= __ieee80211_recalc_idle(local); 540 hw_reconf_flags |= __ieee80211_recalc_idle(local);
541 mutex_unlock(&local->mtx);
518 542
519 ieee80211_recalc_ps(local, -1); 543 ieee80211_recalc_ps(local, -1);
520 544
521 if (local->open_count == 0) { 545 if (local->open_count == 0) {
546 if (local->ops->napi_poll)
547 napi_disable(&local->napi);
522 ieee80211_clear_tx_pending(local); 548 ieee80211_clear_tx_pending(local);
523 ieee80211_stop_device(local); 549 ieee80211_stop_device(local);
524 550
@@ -541,6 +567,13 @@ static int ieee80211_stop(struct net_device *dev)
541 } 567 }
542 } 568 }
543 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 569 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
570}
571
572static int ieee80211_stop(struct net_device *dev)
573{
574 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
575
576 ieee80211_do_stop(sdata, true);
544 577
545 return 0; 578 return 0;
546} 579}
@@ -585,8 +618,6 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
585{ 618{
586 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 619 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
587 struct ieee80211_local *local = sdata->local; 620 struct ieee80211_local *local = sdata->local;
588 struct beacon_data *beacon;
589 struct sk_buff *skb;
590 int flushed; 621 int flushed;
591 int i; 622 int i;
592 623
@@ -599,37 +630,8 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
599 __skb_queue_purge(&sdata->fragments[i].skb_list); 630 __skb_queue_purge(&sdata->fragments[i].skb_list);
600 sdata->fragment_next = 0; 631 sdata->fragment_next = 0;
601 632
602 switch (sdata->vif.type) { 633 if (ieee80211_vif_is_mesh(&sdata->vif))
603 case NL80211_IFTYPE_AP: 634 mesh_rmc_free(sdata);
604 beacon = sdata->u.ap.beacon;
605 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
606 synchronize_rcu();
607 kfree(beacon);
608
609 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
610 local->total_ps_buffered--;
611 dev_kfree_skb(skb);
612 }
613
614 break;
615 case NL80211_IFTYPE_MESH_POINT:
616 if (ieee80211_vif_is_mesh(&sdata->vif))
617 mesh_rmc_free(sdata);
618 break;
619 case NL80211_IFTYPE_ADHOC:
620 if (WARN_ON(sdata->u.ibss.presp))
621 kfree_skb(sdata->u.ibss.presp);
622 break;
623 case NL80211_IFTYPE_STATION:
624 case NL80211_IFTYPE_WDS:
625 case NL80211_IFTYPE_AP_VLAN:
626 case NL80211_IFTYPE_MONITOR:
627 break;
628 case NL80211_IFTYPE_UNSPECIFIED:
629 case __NL80211_IFTYPE_AFTER_LAST:
630 BUG();
631 break;
632 }
633 635
634 flushed = sta_info_flush(local, sdata); 636 flushed = sta_info_flush(local, sdata);
635 WARN_ON(flushed); 637 WARN_ON(flushed);
@@ -844,9 +846,13 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
844 846
845 /* and set some type-dependent values */ 847 /* and set some type-dependent values */
846 sdata->vif.type = type; 848 sdata->vif.type = type;
849 sdata->vif.p2p = false;
847 sdata->dev->netdev_ops = &ieee80211_dataif_ops; 850 sdata->dev->netdev_ops = &ieee80211_dataif_ops;
848 sdata->wdev.iftype = type; 851 sdata->wdev.iftype = type;
849 852
853 sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE);
854 sdata->control_port_no_encrypt = false;
855
850 /* only monitor differs */ 856 /* only monitor differs */
851 sdata->dev->type = ARPHRD_ETHER; 857 sdata->dev->type = ARPHRD_ETHER;
852 858
@@ -854,10 +860,20 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
854 INIT_WORK(&sdata->work, ieee80211_iface_work); 860 INIT_WORK(&sdata->work, ieee80211_iface_work);
855 861
856 switch (type) { 862 switch (type) {
863 case NL80211_IFTYPE_P2P_GO:
864 type = NL80211_IFTYPE_AP;
865 sdata->vif.type = type;
866 sdata->vif.p2p = true;
867 /* fall through */
857 case NL80211_IFTYPE_AP: 868 case NL80211_IFTYPE_AP:
858 skb_queue_head_init(&sdata->u.ap.ps_bc_buf); 869 skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
859 INIT_LIST_HEAD(&sdata->u.ap.vlans); 870 INIT_LIST_HEAD(&sdata->u.ap.vlans);
860 break; 871 break;
872 case NL80211_IFTYPE_P2P_CLIENT:
873 type = NL80211_IFTYPE_STATION;
874 sdata->vif.type = type;
875 sdata->vif.p2p = true;
876 /* fall through */
861 case NL80211_IFTYPE_STATION: 877 case NL80211_IFTYPE_STATION:
862 ieee80211_sta_setup_sdata(sdata); 878 ieee80211_sta_setup_sdata(sdata);
863 break; 879 break;
@@ -878,7 +894,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
878 case NL80211_IFTYPE_AP_VLAN: 894 case NL80211_IFTYPE_AP_VLAN:
879 break; 895 break;
880 case NL80211_IFTYPE_UNSPECIFIED: 896 case NL80211_IFTYPE_UNSPECIFIED:
881 case __NL80211_IFTYPE_AFTER_LAST: 897 case NUM_NL80211_IFTYPES:
882 BUG(); 898 BUG();
883 break; 899 break;
884 } 900 }
@@ -886,12 +902,85 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
886 ieee80211_debugfs_add_netdev(sdata); 902 ieee80211_debugfs_add_netdev(sdata);
887} 903}
888 904
905static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
906 enum nl80211_iftype type)
907{
908 struct ieee80211_local *local = sdata->local;
909 int ret, err;
910 enum nl80211_iftype internal_type = type;
911 bool p2p = false;
912
913 ASSERT_RTNL();
914
915 if (!local->ops->change_interface)
916 return -EBUSY;
917
918 switch (sdata->vif.type) {
919 case NL80211_IFTYPE_AP:
920 case NL80211_IFTYPE_STATION:
921 case NL80211_IFTYPE_ADHOC:
922 /*
923 * Could maybe also all others here?
924 * Just not sure how that interacts
925 * with the RX/config path e.g. for
926 * mesh.
927 */
928 break;
929 default:
930 return -EBUSY;
931 }
932
933 switch (type) {
934 case NL80211_IFTYPE_AP:
935 case NL80211_IFTYPE_STATION:
936 case NL80211_IFTYPE_ADHOC:
937 /*
938 * Could probably support everything
939 * but WDS here (WDS do_open can fail
940 * under memory pressure, which this
941 * code isn't prepared to handle).
942 */
943 break;
944 case NL80211_IFTYPE_P2P_CLIENT:
945 p2p = true;
946 internal_type = NL80211_IFTYPE_STATION;
947 break;
948 case NL80211_IFTYPE_P2P_GO:
949 p2p = true;
950 internal_type = NL80211_IFTYPE_AP;
951 break;
952 default:
953 return -EBUSY;
954 }
955
956 ret = ieee80211_check_concurrent_iface(sdata, internal_type);
957 if (ret)
958 return ret;
959
960 ieee80211_do_stop(sdata, false);
961
962 ieee80211_teardown_sdata(sdata->dev);
963
964 ret = drv_change_interface(local, sdata, internal_type, p2p);
965 if (ret)
966 type = sdata->vif.type;
967
968 ieee80211_setup_sdata(sdata, type);
969
970 err = ieee80211_do_open(sdata->dev, false);
971 WARN(err, "type change: do_open returned %d", err);
972
973 return ret;
974}
975
889int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, 976int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
890 enum nl80211_iftype type) 977 enum nl80211_iftype type)
891{ 978{
979 int ret;
980
892 ASSERT_RTNL(); 981 ASSERT_RTNL();
893 982
894 if (type == sdata->vif.type) 983 if (type == ieee80211_vif_type_p2p(&sdata->vif))
895 return 0; 984 return 0;
896 985
897 /* Setting ad-hoc mode on non-IBSS channel is not supported. */ 986 /* Setting ad-hoc mode on non-IBSS channel is not supported. */
@@ -899,18 +988,15 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
899 type == NL80211_IFTYPE_ADHOC) 988 type == NL80211_IFTYPE_ADHOC)
900 return -EOPNOTSUPP; 989 return -EOPNOTSUPP;
901 990
902 /* 991 if (ieee80211_sdata_running(sdata)) {
903 * We could, here, on changes between IBSS/STA/MESH modes, 992 ret = ieee80211_runtime_change_iftype(sdata, type);
904 * invoke an MLME function instead that disassociates etc. 993 if (ret)
905 * and goes into the requested mode. 994 return ret;
906 */ 995 } else {
907 996 /* Purge and reset type-dependent state. */
908 if (ieee80211_sdata_running(sdata)) 997 ieee80211_teardown_sdata(sdata->dev);
909 return -EBUSY; 998 ieee80211_setup_sdata(sdata, type);
910 999 }
911 /* Purge and reset type-dependent state. */
912 ieee80211_teardown_sdata(sdata->dev);
913 ieee80211_setup_sdata(sdata, type);
914 1000
915 /* reset some values that shouldn't be kept across type changes */ 1001 /* reset some values that shouldn't be kept across type changes */
916 sdata->vif.bss_conf.basic_rates = 1002 sdata->vif.bss_conf.basic_rates =
@@ -1167,8 +1253,7 @@ static u32 ieee80211_idle_off(struct ieee80211_local *local,
1167 return 0; 1253 return 0;
1168 1254
1169#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1255#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1170 printk(KERN_DEBUG "%s: device no longer idle - %s\n", 1256 wiphy_debug(local->hw.wiphy, "device no longer idle - %s\n", reason);
1171 wiphy_name(local->hw.wiphy), reason);
1172#endif 1257#endif
1173 1258
1174 local->hw.conf.flags &= ~IEEE80211_CONF_IDLE; 1259 local->hw.conf.flags &= ~IEEE80211_CONF_IDLE;
@@ -1181,8 +1266,7 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local)
1181 return 0; 1266 return 0;
1182 1267
1183#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1268#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1184 printk(KERN_DEBUG "%s: device now idle\n", 1269 wiphy_debug(local->hw.wiphy, "device now idle\n");
1185 wiphy_name(local->hw.wiphy));
1186#endif 1270#endif
1187 1271
1188 drv_flush(local, false); 1272 drv_flush(local, false);
@@ -1195,28 +1279,61 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
1195{ 1279{
1196 struct ieee80211_sub_if_data *sdata; 1280 struct ieee80211_sub_if_data *sdata;
1197 int count = 0; 1281 int count = 0;
1282 bool working = false, scanning = false;
1283 struct ieee80211_work *wk;
1198 1284
1199 if (!list_empty(&local->work_list)) 1285#ifdef CONFIG_PROVE_LOCKING
1200 return ieee80211_idle_off(local, "working"); 1286 WARN_ON(debug_locks && !lockdep_rtnl_is_held() &&
1201 1287 !lockdep_is_held(&local->iflist_mtx));
1202 if (local->scanning) 1288#endif
1203 return ieee80211_idle_off(local, "scanning"); 1289 lockdep_assert_held(&local->mtx);
1204 1290
1205 list_for_each_entry(sdata, &local->interfaces, list) { 1291 list_for_each_entry(sdata, &local->interfaces, list) {
1206 if (!ieee80211_sdata_running(sdata)) 1292 if (!ieee80211_sdata_running(sdata)) {
1293 sdata->vif.bss_conf.idle = true;
1207 continue; 1294 continue;
1295 }
1296
1297 sdata->old_idle = sdata->vif.bss_conf.idle;
1298
1208 /* do not count disabled managed interfaces */ 1299 /* do not count disabled managed interfaces */
1209 if (sdata->vif.type == NL80211_IFTYPE_STATION && 1300 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
1210 !sdata->u.mgd.associated) 1301 !sdata->u.mgd.associated) {
1302 sdata->vif.bss_conf.idle = true;
1211 continue; 1303 continue;
1304 }
1212 /* do not count unused IBSS interfaces */ 1305 /* do not count unused IBSS interfaces */
1213 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 1306 if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
1214 !sdata->u.ibss.ssid_len) 1307 !sdata->u.ibss.ssid_len) {
1308 sdata->vif.bss_conf.idle = true;
1215 continue; 1309 continue;
1310 }
1216 /* count everything else */ 1311 /* count everything else */
1217 count++; 1312 count++;
1218 } 1313 }
1219 1314
1315 list_for_each_entry(wk, &local->work_list, list) {
1316 working = true;
1317 wk->sdata->vif.bss_conf.idle = false;
1318 }
1319
1320 if (local->scan_sdata) {
1321 scanning = true;
1322 local->scan_sdata->vif.bss_conf.idle = false;
1323 }
1324
1325 list_for_each_entry(sdata, &local->interfaces, list) {
1326 if (sdata->old_idle == sdata->vif.bss_conf.idle)
1327 continue;
1328 if (!ieee80211_sdata_running(sdata))
1329 continue;
1330 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
1331 }
1332
1333 if (working)
1334 return ieee80211_idle_off(local, "working");
1335 if (scanning)
1336 return ieee80211_idle_off(local, "scanning");
1220 if (!count) 1337 if (!count)
1221 return ieee80211_idle_on(local); 1338 return ieee80211_idle_on(local);
1222 else 1339 else
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 1b9d87ed143a..6a63d1abd14d 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -49,7 +49,7 @@ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
49 49
50static void assert_key_lock(struct ieee80211_local *local) 50static void assert_key_lock(struct ieee80211_local *local)
51{ 51{
52 WARN_ON(!mutex_is_locked(&local->key_mtx)); 52 lockdep_assert_held(&local->key_mtx);
53} 53}
54 54
55static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key) 55static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
@@ -60,7 +60,7 @@ static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
60 return NULL; 60 return NULL;
61} 61}
62 62
63static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) 63static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
64{ 64{
65 struct ieee80211_sub_if_data *sdata; 65 struct ieee80211_sub_if_data *sdata;
66 struct ieee80211_sta *sta; 66 struct ieee80211_sta *sta;
@@ -68,8 +68,10 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
68 68
69 might_sleep(); 69 might_sleep();
70 70
71 if (!key->local->ops->set_key) 71 if (!key->local->ops->set_key) {
72 return; 72 ret = -EOPNOTSUPP;
73 goto out_unsupported;
74 }
73 75
74 assert_key_lock(key->local); 76 assert_key_lock(key->local);
75 77
@@ -87,10 +89,27 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
87 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; 89 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
88 90
89 if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) 91 if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP)
90 printk(KERN_ERR "mac80211-%s: failed to set key " 92 wiphy_err(key->local->hw.wiphy,
91 "(%d, %pM) to hardware (%d)\n", 93 "failed to set key (%d, %pM) to hardware (%d)\n",
92 wiphy_name(key->local->hw.wiphy), 94 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
93 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); 95
96out_unsupported:
97 if (ret) {
98 switch (key->conf.cipher) {
99 case WLAN_CIPHER_SUITE_WEP40:
100 case WLAN_CIPHER_SUITE_WEP104:
101 case WLAN_CIPHER_SUITE_TKIP:
102 case WLAN_CIPHER_SUITE_CCMP:
103 case WLAN_CIPHER_SUITE_AES_CMAC:
104 /* all of these we can do in software */
105 ret = 0;
106 break;
107 default:
108 ret = -EINVAL;
109 }
110 }
111
112 return ret;
94} 113}
95 114
96static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) 115static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
@@ -121,10 +140,9 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
121 sta, &key->conf); 140 sta, &key->conf);
122 141
123 if (ret) 142 if (ret)
124 printk(KERN_ERR "mac80211-%s: failed to remove key " 143 wiphy_err(key->local->hw.wiphy,
125 "(%d, %pM) from hardware (%d)\n", 144 "failed to remove key (%d, %pM) from hardware (%d)\n",
126 wiphy_name(key->local->hw.wiphy), 145 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
127 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
128 146
129 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; 147 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
130} 148}
@@ -227,20 +245,18 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
227 } 245 }
228} 246}
229 247
230struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, 248struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
231 int idx,
232 size_t key_len,
233 const u8 *key_data, 249 const u8 *key_data,
234 size_t seq_len, const u8 *seq) 250 size_t seq_len, const u8 *seq)
235{ 251{
236 struct ieee80211_key *key; 252 struct ieee80211_key *key;
237 int i, j; 253 int i, j, err;
238 254
239 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS); 255 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
240 256
241 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); 257 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL);
242 if (!key) 258 if (!key)
243 return NULL; 259 return ERR_PTR(-ENOMEM);
244 260
245 /* 261 /*
246 * Default to software encryption; we'll later upload the 262 * Default to software encryption; we'll later upload the
@@ -249,15 +265,16 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
249 key->conf.flags = 0; 265 key->conf.flags = 0;
250 key->flags = 0; 266 key->flags = 0;
251 267
252 key->conf.alg = alg; 268 key->conf.cipher = cipher;
253 key->conf.keyidx = idx; 269 key->conf.keyidx = idx;
254 key->conf.keylen = key_len; 270 key->conf.keylen = key_len;
255 switch (alg) { 271 switch (cipher) {
256 case ALG_WEP: 272 case WLAN_CIPHER_SUITE_WEP40:
273 case WLAN_CIPHER_SUITE_WEP104:
257 key->conf.iv_len = WEP_IV_LEN; 274 key->conf.iv_len = WEP_IV_LEN;
258 key->conf.icv_len = WEP_ICV_LEN; 275 key->conf.icv_len = WEP_ICV_LEN;
259 break; 276 break;
260 case ALG_TKIP: 277 case WLAN_CIPHER_SUITE_TKIP:
261 key->conf.iv_len = TKIP_IV_LEN; 278 key->conf.iv_len = TKIP_IV_LEN;
262 key->conf.icv_len = TKIP_ICV_LEN; 279 key->conf.icv_len = TKIP_ICV_LEN;
263 if (seq) { 280 if (seq) {
@@ -269,7 +286,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
269 } 286 }
270 } 287 }
271 break; 288 break;
272 case ALG_CCMP: 289 case WLAN_CIPHER_SUITE_CCMP:
273 key->conf.iv_len = CCMP_HDR_LEN; 290 key->conf.iv_len = CCMP_HDR_LEN;
274 key->conf.icv_len = CCMP_MIC_LEN; 291 key->conf.icv_len = CCMP_MIC_LEN;
275 if (seq) { 292 if (seq) {
@@ -278,42 +295,38 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
278 key->u.ccmp.rx_pn[i][j] = 295 key->u.ccmp.rx_pn[i][j] =
279 seq[CCMP_PN_LEN - j - 1]; 296 seq[CCMP_PN_LEN - j - 1];
280 } 297 }
281 break;
282 case ALG_AES_CMAC:
283 key->conf.iv_len = 0;
284 key->conf.icv_len = sizeof(struct ieee80211_mmie);
285 if (seq)
286 for (j = 0; j < 6; j++)
287 key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1];
288 break;
289 }
290 memcpy(key->conf.key, key_data, key_len);
291 INIT_LIST_HEAD(&key->list);
292
293 if (alg == ALG_CCMP) {
294 /* 298 /*
295 * Initialize AES key state here as an optimization so that 299 * Initialize AES key state here as an optimization so that
296 * it does not need to be initialized for every packet. 300 * it does not need to be initialized for every packet.
297 */ 301 */
298 key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data); 302 key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data);
299 if (!key->u.ccmp.tfm) { 303 if (IS_ERR(key->u.ccmp.tfm)) {
304 err = PTR_ERR(key->u.ccmp.tfm);
300 kfree(key); 305 kfree(key);
301 return NULL; 306 key = ERR_PTR(err);
302 } 307 }
303 } 308 break;
304 309 case WLAN_CIPHER_SUITE_AES_CMAC:
305 if (alg == ALG_AES_CMAC) { 310 key->conf.iv_len = 0;
311 key->conf.icv_len = sizeof(struct ieee80211_mmie);
312 if (seq)
313 for (j = 0; j < 6; j++)
314 key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1];
306 /* 315 /*
307 * Initialize AES key state here as an optimization so that 316 * Initialize AES key state here as an optimization so that
308 * it does not need to be initialized for every packet. 317 * it does not need to be initialized for every packet.
309 */ 318 */
310 key->u.aes_cmac.tfm = 319 key->u.aes_cmac.tfm =
311 ieee80211_aes_cmac_key_setup(key_data); 320 ieee80211_aes_cmac_key_setup(key_data);
312 if (!key->u.aes_cmac.tfm) { 321 if (IS_ERR(key->u.aes_cmac.tfm)) {
322 err = PTR_ERR(key->u.aes_cmac.tfm);
313 kfree(key); 323 kfree(key);
314 return NULL; 324 key = ERR_PTR(err);
315 } 325 }
326 break;
316 } 327 }
328 memcpy(key->conf.key, key_data, key_len);
329 INIT_LIST_HEAD(&key->list);
317 330
318 return key; 331 return key;
319} 332}
@@ -326,9 +339,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
326 if (key->local) 339 if (key->local)
327 ieee80211_key_disable_hw_accel(key); 340 ieee80211_key_disable_hw_accel(key);
328 341
329 if (key->conf.alg == ALG_CCMP) 342 if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
330 ieee80211_aes_key_free(key->u.ccmp.tfm); 343 ieee80211_aes_key_free(key->u.ccmp.tfm);
331 if (key->conf.alg == ALG_AES_CMAC) 344 if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
332 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); 345 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
333 if (key->local) 346 if (key->local)
334 ieee80211_debugfs_key_remove(key); 347 ieee80211_debugfs_key_remove(key);
@@ -336,12 +349,12 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
336 kfree(key); 349 kfree(key);
337} 350}
338 351
339void ieee80211_key_link(struct ieee80211_key *key, 352int ieee80211_key_link(struct ieee80211_key *key,
340 struct ieee80211_sub_if_data *sdata, 353 struct ieee80211_sub_if_data *sdata,
341 struct sta_info *sta) 354 struct sta_info *sta)
342{ 355{
343 struct ieee80211_key *old_key; 356 struct ieee80211_key *old_key;
344 int idx; 357 int idx, ret;
345 358
346 BUG_ON(!sdata); 359 BUG_ON(!sdata);
347 BUG_ON(!key); 360 BUG_ON(!key);
@@ -396,9 +409,11 @@ void ieee80211_key_link(struct ieee80211_key *key,
396 409
397 ieee80211_debugfs_key_add(key); 410 ieee80211_debugfs_key_add(key);
398 411
399 ieee80211_key_enable_hw_accel(key); 412 ret = ieee80211_key_enable_hw_accel(key);
400 413
401 mutex_unlock(&sdata->local->key_mtx); 414 mutex_unlock(&sdata->local->key_mtx);
415
416 return ret;
402} 417}
403 418
404static void __ieee80211_key_free(struct ieee80211_key *key) 419static void __ieee80211_key_free(struct ieee80211_key *key)
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index b665bbb7a471..cb9a4a65cc68 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -123,18 +123,16 @@ struct ieee80211_key {
123 struct ieee80211_key_conf conf; 123 struct ieee80211_key_conf conf;
124}; 124};
125 125
126struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, 126struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
127 int idx,
128 size_t key_len,
129 const u8 *key_data, 127 const u8 *key_data,
130 size_t seq_len, const u8 *seq); 128 size_t seq_len, const u8 *seq);
131/* 129/*
132 * Insert a key into data structures (sdata, sta if necessary) 130 * Insert a key into data structures (sdata, sta if necessary)
133 * to make it used, free old key. 131 * to make it used, free old key.
134 */ 132 */
135void ieee80211_key_link(struct ieee80211_key *key, 133int __must_check ieee80211_key_link(struct ieee80211_key *key,
136 struct ieee80211_sub_if_data *sdata, 134 struct ieee80211_sub_if_data *sdata,
137 struct sta_info *sta); 135 struct sta_info *sta);
138void ieee80211_key_free(struct ieee80211_local *local, 136void ieee80211_key_free(struct ieee80211_local *local,
139 struct ieee80211_key *key); 137 struct ieee80211_key *key);
140void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); 138void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ded5c3843e06..db341a99c7c7 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -99,16 +99,19 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
99 int ret = 0; 99 int ret = 0;
100 int power; 100 int power;
101 enum nl80211_channel_type channel_type; 101 enum nl80211_channel_type channel_type;
102 u32 offchannel_flag;
102 103
103 might_sleep(); 104 might_sleep();
104 105
105 scan_chan = local->scan_channel; 106 scan_chan = local->scan_channel;
106 107
108 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
107 if (scan_chan) { 109 if (scan_chan) {
108 chan = scan_chan; 110 chan = scan_chan;
109 channel_type = NL80211_CHAN_NO_HT; 111 channel_type = NL80211_CHAN_NO_HT;
110 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; 112 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
111 } else if (local->tmp_channel) { 113 } else if (local->tmp_channel &&
114 local->oper_channel != local->tmp_channel) {
112 chan = scan_chan = local->tmp_channel; 115 chan = scan_chan = local->tmp_channel;
113 channel_type = local->tmp_channel_type; 116 channel_type = local->tmp_channel_type;
114 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; 117 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
@@ -117,8 +120,9 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
117 channel_type = local->_oper_channel_type; 120 channel_type = local->_oper_channel_type;
118 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; 121 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
119 } 122 }
123 offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
120 124
121 if (chan != local->hw.conf.channel || 125 if (offchannel_flag || chan != local->hw.conf.channel ||
122 channel_type != local->hw.conf.channel_type) { 126 channel_type != local->hw.conf.channel_type) {
123 local->hw.conf.channel = chan; 127 local->hw.conf.channel = chan;
124 local->hw.conf.channel_type = channel_type; 128 local->hw.conf.channel_type = channel_type;
@@ -302,7 +306,16 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
302 306
303 trace_api_restart_hw(local); 307 trace_api_restart_hw(local);
304 308
305 /* use this reason, __ieee80211_resume will unblock it */ 309 /* wait for scan work complete */
310 flush_workqueue(local->workqueue);
311
312 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
313 "%s called with hardware scan in progress\n", __func__);
314
315 if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning)))
316 ieee80211_scan_cancel(local);
317
318 /* use this reason, ieee80211_reconfig will unblock it */
306 ieee80211_stop_queues_by_reason(hw, 319 ieee80211_stop_queues_by_reason(hw,
307 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 320 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
308 321
@@ -336,9 +349,6 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
336 struct ieee80211_if_managed *ifmgd; 349 struct ieee80211_if_managed *ifmgd;
337 int c = 0; 350 int c = 0;
338 351
339 if (!netif_running(ndev))
340 return NOTIFY_DONE;
341
342 /* Make sure it's our interface that got changed */ 352 /* Make sure it's our interface that got changed */
343 if (!wdev) 353 if (!wdev)
344 return NOTIFY_DONE; 354 return NOTIFY_DONE;
@@ -349,11 +359,14 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
349 sdata = IEEE80211_DEV_TO_SUB_IF(ndev); 359 sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
350 bss_conf = &sdata->vif.bss_conf; 360 bss_conf = &sdata->vif.bss_conf;
351 361
362 if (!ieee80211_sdata_running(sdata))
363 return NOTIFY_DONE;
364
352 /* ARP filtering is only supported in managed mode */ 365 /* ARP filtering is only supported in managed mode */
353 if (sdata->vif.type != NL80211_IFTYPE_STATION) 366 if (sdata->vif.type != NL80211_IFTYPE_STATION)
354 return NOTIFY_DONE; 367 return NOTIFY_DONE;
355 368
356 idev = sdata->dev->ip_ptr; 369 idev = __in_dev_get_rtnl(sdata->dev);
357 if (!idev) 370 if (!idev)
358 return NOTIFY_DONE; 371 return NOTIFY_DONE;
359 372
@@ -390,6 +403,80 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
390} 403}
391#endif 404#endif
392 405
406static int ieee80211_napi_poll(struct napi_struct *napi, int budget)
407{
408 struct ieee80211_local *local =
409 container_of(napi, struct ieee80211_local, napi);
410
411 return local->ops->napi_poll(&local->hw, budget);
412}
413
414void ieee80211_napi_schedule(struct ieee80211_hw *hw)
415{
416 struct ieee80211_local *local = hw_to_local(hw);
417
418 napi_schedule(&local->napi);
419}
420EXPORT_SYMBOL(ieee80211_napi_schedule);
421
422void ieee80211_napi_complete(struct ieee80211_hw *hw)
423{
424 struct ieee80211_local *local = hw_to_local(hw);
425
426 napi_complete(&local->napi);
427}
428EXPORT_SYMBOL(ieee80211_napi_complete);
429
430/* There isn't a lot of sense in it, but you can transmit anything you like */
431static const struct ieee80211_txrx_stypes
432ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
433 [NL80211_IFTYPE_ADHOC] = {
434 .tx = 0xffff,
435 .rx = BIT(IEEE80211_STYPE_ACTION >> 4),
436 },
437 [NL80211_IFTYPE_STATION] = {
438 .tx = 0xffff,
439 .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
440 BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
441 },
442 [NL80211_IFTYPE_AP] = {
443 .tx = 0xffff,
444 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
445 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
446 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
447 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
448 BIT(IEEE80211_STYPE_AUTH >> 4) |
449 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
450 BIT(IEEE80211_STYPE_ACTION >> 4),
451 },
452 [NL80211_IFTYPE_AP_VLAN] = {
453 /* copy AP */
454 .tx = 0xffff,
455 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
456 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
457 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
458 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
459 BIT(IEEE80211_STYPE_AUTH >> 4) |
460 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
461 BIT(IEEE80211_STYPE_ACTION >> 4),
462 },
463 [NL80211_IFTYPE_P2P_CLIENT] = {
464 .tx = 0xffff,
465 .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
466 BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
467 },
468 [NL80211_IFTYPE_P2P_GO] = {
469 .tx = 0xffff,
470 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
471 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
472 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
473 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
474 BIT(IEEE80211_STYPE_AUTH >> 4) |
475 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
476 BIT(IEEE80211_STYPE_ACTION >> 4),
477 },
478};
479
393struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, 480struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
394 const struct ieee80211_ops *ops) 481 const struct ieee80211_ops *ops)
395{ 482{
@@ -419,6 +506,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
419 if (!wiphy) 506 if (!wiphy)
420 return NULL; 507 return NULL;
421 508
509 wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes;
510
422 wiphy->flags |= WIPHY_FLAG_NETNS_OK | 511 wiphy->flags |= WIPHY_FLAG_NETNS_OK |
423 WIPHY_FLAG_4ADDR_AP | 512 WIPHY_FLAG_4ADDR_AP |
424 WIPHY_FLAG_4ADDR_STATION; 513 WIPHY_FLAG_4ADDR_STATION;
@@ -455,7 +544,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
455 __hw_addr_init(&local->mc_list); 544 __hw_addr_init(&local->mc_list);
456 545
457 mutex_init(&local->iflist_mtx); 546 mutex_init(&local->iflist_mtx);
458 mutex_init(&local->scan_mtx); 547 mutex_init(&local->mtx);
459 548
460 mutex_init(&local->key_mtx); 549 mutex_init(&local->key_mtx);
461 spin_lock_init(&local->filter_lock); 550 spin_lock_init(&local->filter_lock);
@@ -494,6 +583,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
494 skb_queue_head_init(&local->skb_queue); 583 skb_queue_head_init(&local->skb_queue);
495 skb_queue_head_init(&local->skb_queue_unreliable); 584 skb_queue_head_init(&local->skb_queue_unreliable);
496 585
586 /* init dummy netdev for use w/ NAPI */
587 init_dummy_netdev(&local->napi_dev);
588
497 return local_to_hw(local); 589 return local_to_hw(local);
498} 590}
499EXPORT_SYMBOL(ieee80211_alloc_hw); 591EXPORT_SYMBOL(ieee80211_alloc_hw);
@@ -506,6 +598,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
506 int channels, max_bitrates; 598 int channels, max_bitrates;
507 bool supp_ht; 599 bool supp_ht;
508 static const u32 cipher_suites[] = { 600 static const u32 cipher_suites[] = {
601 /* keep WEP first, it may be removed below */
509 WLAN_CIPHER_SUITE_WEP40, 602 WLAN_CIPHER_SUITE_WEP40,
510 WLAN_CIPHER_SUITE_WEP104, 603 WLAN_CIPHER_SUITE_WEP104,
511 WLAN_CIPHER_SUITE_TKIP, 604 WLAN_CIPHER_SUITE_TKIP,
@@ -554,6 +647,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
554 /* mac80211 always supports monitor */ 647 /* mac80211 always supports monitor */
555 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); 648 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
556 649
650#ifndef CONFIG_MAC80211_MESH
651 /* mesh depends on Kconfig, but drivers should set it if they want */
652 local->hw.wiphy->interface_modes &= ~BIT(NL80211_IFTYPE_MESH_POINT);
653#endif
654
655 /* mac80211 supports control port protocol changing */
656 local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL;
657
557 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 658 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
558 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; 659 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
559 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) 660 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
@@ -589,10 +690,41 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
589 if (local->hw.wiphy->max_scan_ie_len) 690 if (local->hw.wiphy->max_scan_ie_len)
590 local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len; 691 local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;
591 692
592 local->hw.wiphy->cipher_suites = cipher_suites; 693 /* Set up cipher suites unless driver already did */
593 local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); 694 if (!local->hw.wiphy->cipher_suites) {
594 if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) 695 local->hw.wiphy->cipher_suites = cipher_suites;
595 local->hw.wiphy->n_cipher_suites--; 696 local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
697 if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE))
698 local->hw.wiphy->n_cipher_suites--;
699 }
700 if (IS_ERR(local->wep_tx_tfm) || IS_ERR(local->wep_rx_tfm)) {
701 if (local->hw.wiphy->cipher_suites == cipher_suites) {
702 local->hw.wiphy->cipher_suites += 2;
703 local->hw.wiphy->n_cipher_suites -= 2;
704 } else {
705 u32 *suites;
706 int r, w = 0;
707
708 /* Filter out WEP */
709
710 suites = kmemdup(
711 local->hw.wiphy->cipher_suites,
712 sizeof(u32) * local->hw.wiphy->n_cipher_suites,
713 GFP_KERNEL);
714 if (!suites)
715 return -ENOMEM;
716 for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) {
717 u32 suite = local->hw.wiphy->cipher_suites[r];
718 if (suite == WLAN_CIPHER_SUITE_WEP40 ||
719 suite == WLAN_CIPHER_SUITE_WEP104)
720 continue;
721 suites[w++] = suite;
722 }
723 local->hw.wiphy->cipher_suites = suites;
724 local->hw.wiphy->n_cipher_suites = w;
725 local->wiphy_ciphers_allocated = true;
726 }
727 }
596 728
597 result = wiphy_register(local->hw.wiphy); 729 result = wiphy_register(local->hw.wiphy);
598 if (result < 0) 730 if (result < 0)
@@ -641,16 +773,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
641 773
642 result = ieee80211_wep_init(local); 774 result = ieee80211_wep_init(local);
643 if (result < 0) 775 if (result < 0)
644 printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", 776 wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
645 wiphy_name(local->hw.wiphy), result); 777 result);
646 778
647 rtnl_lock(); 779 rtnl_lock();
648 780
649 result = ieee80211_init_rate_ctrl_alg(local, 781 result = ieee80211_init_rate_ctrl_alg(local,
650 hw->rate_control_algorithm); 782 hw->rate_control_algorithm);
651 if (result < 0) { 783 if (result < 0) {
652 printk(KERN_DEBUG "%s: Failed to initialize rate control " 784 wiphy_debug(local->hw.wiphy,
653 "algorithm\n", wiphy_name(local->hw.wiphy)); 785 "Failed to initialize rate control algorithm\n");
654 goto fail_rate; 786 goto fail_rate;
655 } 787 }
656 788
@@ -659,8 +791,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
659 result = ieee80211_if_add(local, "wlan%d", NULL, 791 result = ieee80211_if_add(local, "wlan%d", NULL,
660 NL80211_IFTYPE_STATION, NULL); 792 NL80211_IFTYPE_STATION, NULL);
661 if (result) 793 if (result)
662 printk(KERN_WARNING "%s: Failed to add default virtual iface\n", 794 wiphy_warn(local->hw.wiphy,
663 wiphy_name(local->hw.wiphy)); 795 "Failed to add default virtual iface\n");
664 } 796 }
665 797
666 rtnl_unlock(); 798 rtnl_unlock();
@@ -683,6 +815,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
683 goto fail_ifa; 815 goto fail_ifa;
684#endif 816#endif
685 817
818 netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll,
819 local->hw.napi_weight);
820
686 return 0; 821 return 0;
687 822
688#ifdef CONFIG_INET 823#ifdef CONFIG_INET
@@ -703,6 +838,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
703 fail_workqueue: 838 fail_workqueue:
704 wiphy_unregister(local->hw.wiphy); 839 wiphy_unregister(local->hw.wiphy);
705 fail_wiphy_register: 840 fail_wiphy_register:
841 if (local->wiphy_ciphers_allocated)
842 kfree(local->hw.wiphy->cipher_suites);
706 kfree(local->int_scan_req); 843 kfree(local->int_scan_req);
707 return result; 844 return result;
708} 845}
@@ -738,6 +875,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
738 */ 875 */
739 del_timer_sync(&local->work_timer); 876 del_timer_sync(&local->work_timer);
740 877
878 cancel_work_sync(&local->restart_work);
741 cancel_work_sync(&local->reconfig_filter); 879 cancel_work_sync(&local->reconfig_filter);
742 880
743 ieee80211_clear_tx_pending(local); 881 ieee80211_clear_tx_pending(local);
@@ -746,8 +884,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
746 884
747 if (skb_queue_len(&local->skb_queue) || 885 if (skb_queue_len(&local->skb_queue) ||
748 skb_queue_len(&local->skb_queue_unreliable)) 886 skb_queue_len(&local->skb_queue_unreliable))
749 printk(KERN_WARNING "%s: skb_queue not empty\n", 887 wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
750 wiphy_name(local->hw.wiphy));
751 skb_queue_purge(&local->skb_queue); 888 skb_queue_purge(&local->skb_queue);
752 skb_queue_purge(&local->skb_queue_unreliable); 889 skb_queue_purge(&local->skb_queue_unreliable);
753 890
@@ -764,7 +901,10 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
764 struct ieee80211_local *local = hw_to_local(hw); 901 struct ieee80211_local *local = hw_to_local(hw);
765 902
766 mutex_destroy(&local->iflist_mtx); 903 mutex_destroy(&local->iflist_mtx);
767 mutex_destroy(&local->scan_mtx); 904 mutex_destroy(&local->mtx);
905
906 if (local->wiphy_ciphers_allocated)
907 kfree(local->hw.wiphy->cipher_suites);
768 908
769 wiphy_free(local->hw.wiphy); 909 wiphy_free(local->hw.wiphy);
770} 910}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b6c163ac22da..77913a15f537 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -54,6 +54,12 @@
54 */ 54 */
55#define IEEE80211_SIGNAL_AVE_WEIGHT 3 55#define IEEE80211_SIGNAL_AVE_WEIGHT 3
56 56
57/*
58 * How many Beacon frames need to have been used in average signal strength
59 * before starting to indicate signal change events.
60 */
61#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
62
57#define TMR_RUNNING_TIMER 0 63#define TMR_RUNNING_TIMER 0
58#define TMR_RUNNING_CHANSW 1 64#define TMR_RUNNING_CHANSW 1
59 65
@@ -86,7 +92,7 @@ enum rx_mgmt_action {
86/* utils */ 92/* utils */
87static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) 93static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd)
88{ 94{
89 WARN_ON(!mutex_is_locked(&ifmgd->mtx)); 95 lockdep_assert_held(&ifmgd->mtx);
90} 96}
91 97
92/* 98/*
@@ -109,7 +115,7 @@ static void run_again(struct ieee80211_if_managed *ifmgd,
109 mod_timer(&ifmgd->timer, timeout); 115 mod_timer(&ifmgd->timer, timeout);
110} 116}
111 117
112static void mod_beacon_timer(struct ieee80211_sub_if_data *sdata) 118void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata)
113{ 119{
114 if (sdata->local->hw.flags & IEEE80211_HW_BEACON_FILTER) 120 if (sdata->local->hw.flags & IEEE80211_HW_BEACON_FILTER)
115 return; 121 return;
@@ -118,6 +124,19 @@ static void mod_beacon_timer(struct ieee80211_sub_if_data *sdata)
118 round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME)); 124 round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME));
119} 125}
120 126
127void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
128{
129 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
130
131 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
132 return;
133
134 mod_timer(&sdata->u.mgd.conn_mon_timer,
135 round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
136
137 ifmgd->probe_send_count = 0;
138}
139
121static int ecw2cw(int ecw) 140static int ecw2cw(int ecw)
122{ 141{
123 return (1 << ecw) - 1; 142 return (1 << ecw) - 1;
@@ -778,16 +797,17 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
778 params.uapsd = uapsd; 797 params.uapsd = uapsd;
779 798
780#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 799#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
781 printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d " 800 wiphy_debug(local->hw.wiphy,
782 "cWmin=%d cWmax=%d txop=%d uapsd=%d\n", 801 "WMM queue=%d aci=%d acm=%d aifs=%d "
783 wiphy_name(local->hw.wiphy), queue, aci, acm, 802 "cWmin=%d cWmax=%d txop=%d uapsd=%d\n",
784 params.aifs, params.cw_min, params.cw_max, params.txop, 803 queue, aci, acm,
785 params.uapsd); 804 params.aifs, params.cw_min, params.cw_max,
805 params.txop, params.uapsd);
786#endif 806#endif
787 if (drv_conf_tx(local, queue, &params)) 807 if (drv_conf_tx(local, queue, &params))
788 printk(KERN_DEBUG "%s: failed to set TX queue " 808 wiphy_debug(local->hw.wiphy,
789 "parameters for queue %d\n", 809 "failed to set TX queue parameters for queue %d\n",
790 wiphy_name(local->hw.wiphy), queue); 810 queue);
791 } 811 }
792 812
793 /* enable WMM or activate new settings */ 813 /* enable WMM or activate new settings */
@@ -860,14 +880,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
860 sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL | 880 sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL |
861 IEEE80211_STA_BEACON_POLL); 881 IEEE80211_STA_BEACON_POLL);
862 882
863 /*
864 * Always handle WMM once after association regardless
865 * of the first value the AP uses. Setting -1 here has
866 * that effect because the AP values is an unsigned
867 * 4-bit value.
868 */
869 sdata->u.mgd.wmm_last_param_set = -1;
870
871 ieee80211_led_assoc(local, 1); 883 ieee80211_led_assoc(local, 1);
872 884
873 if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) 885 if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD)
@@ -990,6 +1002,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
990 1002
991 if (remove_sta) 1003 if (remove_sta)
992 sta_info_destroy_addr(sdata, bssid); 1004 sta_info_destroy_addr(sdata, bssid);
1005
1006 del_timer_sync(&sdata->u.mgd.conn_mon_timer);
1007 del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
1008 del_timer_sync(&sdata->u.mgd.timer);
1009 del_timer_sync(&sdata->u.mgd.chswitch_timer);
993} 1010}
994 1011
995void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, 1012void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
@@ -1006,21 +1023,26 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1006 if (is_multicast_ether_addr(hdr->addr1)) 1023 if (is_multicast_ether_addr(hdr->addr1))
1007 return; 1024 return;
1008 1025
1009 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) 1026 ieee80211_sta_reset_conn_monitor(sdata);
1010 return;
1011
1012 mod_timer(&sdata->u.mgd.conn_mon_timer,
1013 round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
1014} 1027}
1015 1028
1016static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) 1029static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1017{ 1030{
1018 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1031 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1019 const u8 *ssid; 1032 const u8 *ssid;
1033 u8 *dst = ifmgd->associated->bssid;
1034 u8 unicast_limit = max(1, IEEE80211_MAX_PROBE_TRIES - 3);
1035
1036 /*
1037 * Try sending broadcast probe requests for the last three
1038 * probe requests after the first ones failed since some
1039 * buggy APs only support broadcast probe requests.
1040 */
1041 if (ifmgd->probe_send_count >= unicast_limit)
1042 dst = NULL;
1020 1043
1021 ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); 1044 ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
1022 ieee80211_send_probe_req(sdata, ifmgd->associated->bssid, 1045 ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0);
1023 ssid + 2, ssid[1], NULL, 0);
1024 1046
1025 ifmgd->probe_send_count++; 1047 ifmgd->probe_send_count++;
1026 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; 1048 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT;
@@ -1103,8 +1125,11 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
1103 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); 1125 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
1104 1126
1105 ieee80211_set_disassoc(sdata, true); 1127 ieee80211_set_disassoc(sdata, true);
1106 ieee80211_recalc_idle(local);
1107 mutex_unlock(&ifmgd->mtx); 1128 mutex_unlock(&ifmgd->mtx);
1129
1130 mutex_lock(&local->mtx);
1131 ieee80211_recalc_idle(local);
1132 mutex_unlock(&local->mtx);
1108 /* 1133 /*
1109 * must be outside lock due to cfg80211, 1134 * must be outside lock due to cfg80211,
1110 * but that's not a problem. 1135 * but that's not a problem.
@@ -1173,7 +1198,9 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
1173 sdata->name, bssid, reason_code); 1198 sdata->name, bssid, reason_code);
1174 1199
1175 ieee80211_set_disassoc(sdata, true); 1200 ieee80211_set_disassoc(sdata, true);
1201 mutex_lock(&sdata->local->mtx);
1176 ieee80211_recalc_idle(sdata->local); 1202 ieee80211_recalc_idle(sdata->local);
1203 mutex_unlock(&sdata->local->mtx);
1177 1204
1178 return RX_MGMT_CFG80211_DEAUTH; 1205 return RX_MGMT_CFG80211_DEAUTH;
1179} 1206}
@@ -1203,7 +1230,9 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
1203 sdata->name, mgmt->sa, reason_code); 1230 sdata->name, mgmt->sa, reason_code);
1204 1231
1205 ieee80211_set_disassoc(sdata, true); 1232 ieee80211_set_disassoc(sdata, true);
1233 mutex_lock(&sdata->local->mtx);
1206 ieee80211_recalc_idle(sdata->local); 1234 ieee80211_recalc_idle(sdata->local);
1235 mutex_unlock(&sdata->local->mtx);
1207 return RX_MGMT_CFG80211_DISASSOC; 1236 return RX_MGMT_CFG80211_DISASSOC;
1208} 1237}
1209 1238
@@ -1330,6 +1359,14 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1330 return false; 1359 return false;
1331 } 1360 }
1332 1361
1362 /*
1363 * Always handle WMM once after association regardless
1364 * of the first value the AP uses. Setting -1 here has
1365 * that effect because the AP values is an unsigned
1366 * 4-bit value.
1367 */
1368 ifmgd->wmm_last_param_set = -1;
1369
1333 if (elems.wmm_param) 1370 if (elems.wmm_param)
1334 ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, 1371 ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
1335 elems.wmm_param_len); 1372 elems.wmm_param_len);
@@ -1362,7 +1399,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1362 * Also start the timer that will detect beacon loss. 1399 * Also start the timer that will detect beacon loss.
1363 */ 1400 */
1364 ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt); 1401 ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt);
1365 mod_beacon_timer(sdata); 1402 ieee80211_sta_reset_beacon_monitor(sdata);
1366 1403
1367 return true; 1404 return true;
1368} 1405}
@@ -1465,7 +1502,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
1465 * we have or will be receiving any beacons or data, so let's 1502 * we have or will be receiving any beacons or data, so let's
1466 * schedule the timers again, just in case. 1503 * schedule the timers again, just in case.
1467 */ 1504 */
1468 mod_beacon_timer(sdata); 1505 ieee80211_sta_reset_beacon_monitor(sdata);
1469 1506
1470 mod_timer(&ifmgd->conn_mon_timer, 1507 mod_timer(&ifmgd->conn_mon_timer,
1471 round_jiffies_up(jiffies + 1508 round_jiffies_up(jiffies +
@@ -1540,15 +1577,18 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1540 ifmgd->last_beacon_signal = rx_status->signal; 1577 ifmgd->last_beacon_signal = rx_status->signal;
1541 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) { 1578 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
1542 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE; 1579 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
1543 ifmgd->ave_beacon_signal = rx_status->signal; 1580 ifmgd->ave_beacon_signal = rx_status->signal * 16;
1544 ifmgd->last_cqm_event_signal = 0; 1581 ifmgd->last_cqm_event_signal = 0;
1582 ifmgd->count_beacon_signal = 1;
1545 } else { 1583 } else {
1546 ifmgd->ave_beacon_signal = 1584 ifmgd->ave_beacon_signal =
1547 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 + 1585 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 +
1548 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) * 1586 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) *
1549 ifmgd->ave_beacon_signal) / 16; 1587 ifmgd->ave_beacon_signal) / 16;
1588 ifmgd->count_beacon_signal++;
1550 } 1589 }
1551 if (bss_conf->cqm_rssi_thold && 1590 if (bss_conf->cqm_rssi_thold &&
1591 ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT &&
1552 !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) { 1592 !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) {
1553 int sig = ifmgd->ave_beacon_signal / 16; 1593 int sig = ifmgd->ave_beacon_signal / 16;
1554 int last_event = ifmgd->last_cqm_event_signal; 1594 int last_event = ifmgd->last_cqm_event_signal;
@@ -1588,7 +1628,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1588 * Push the beacon loss detection into the future since 1628 * Push the beacon loss detection into the future since
1589 * we are processing a beacon from the AP just now. 1629 * we are processing a beacon from the AP just now.
1590 */ 1630 */
1591 mod_beacon_timer(sdata); 1631 ieee80211_sta_reset_beacon_monitor(sdata);
1592 1632
1593 ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); 1633 ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
1594 ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable, 1634 ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable,
@@ -1751,7 +1791,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1751 struct ieee80211_local *local = sdata->local; 1791 struct ieee80211_local *local = sdata->local;
1752 struct ieee80211_work *wk; 1792 struct ieee80211_work *wk;
1753 1793
1754 mutex_lock(&local->work_mtx); 1794 mutex_lock(&local->mtx);
1755 list_for_each_entry(wk, &local->work_list, list) { 1795 list_for_each_entry(wk, &local->work_list, list) {
1756 if (wk->sdata != sdata) 1796 if (wk->sdata != sdata)
1757 continue; 1797 continue;
@@ -1783,7 +1823,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1783 free_work(wk); 1823 free_work(wk);
1784 break; 1824 break;
1785 } 1825 }
1786 mutex_unlock(&local->work_mtx); 1826 mutex_unlock(&local->mtx);
1787 1827
1788 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); 1828 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
1789 } 1829 }
@@ -1840,8 +1880,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1840 " after %dms, disconnecting.\n", 1880 " after %dms, disconnecting.\n",
1841 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); 1881 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
1842 ieee80211_set_disassoc(sdata, true); 1882 ieee80211_set_disassoc(sdata, true);
1843 ieee80211_recalc_idle(local);
1844 mutex_unlock(&ifmgd->mtx); 1883 mutex_unlock(&ifmgd->mtx);
1884 mutex_lock(&local->mtx);
1885 ieee80211_recalc_idle(local);
1886 mutex_unlock(&local->mtx);
1845 /* 1887 /*
1846 * must be outside lock due to cfg80211, 1888 * must be outside lock due to cfg80211,
1847 * but that's not a problem. 1889 * but that's not a problem.
@@ -1917,6 +1959,8 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
1917 * time -- the code here is properly synchronised. 1959 * time -- the code here is properly synchronised.
1918 */ 1960 */
1919 1961
1962 cancel_work_sync(&ifmgd->request_smps_work);
1963
1920 cancel_work_sync(&ifmgd->beacon_connection_loss_work); 1964 cancel_work_sync(&ifmgd->beacon_connection_loss_work);
1921 if (del_timer_sync(&ifmgd->timer)) 1965 if (del_timer_sync(&ifmgd->timer))
1922 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); 1966 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
@@ -1952,6 +1996,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
1952 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); 1996 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
1953 INIT_WORK(&ifmgd->beacon_connection_loss_work, 1997 INIT_WORK(&ifmgd->beacon_connection_loss_work,
1954 ieee80211_beacon_connection_loss_work); 1998 ieee80211_beacon_connection_loss_work);
1999 INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work);
1955 setup_timer(&ifmgd->timer, ieee80211_sta_timer, 2000 setup_timer(&ifmgd->timer, ieee80211_sta_timer,
1956 (unsigned long) sdata); 2001 (unsigned long) sdata);
1957 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 2002 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
@@ -2249,6 +2294,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2249 else 2294 else
2250 ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT; 2295 ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT;
2251 2296
2297 sdata->control_port_protocol = req->crypto.control_port_ethertype;
2298 sdata->control_port_no_encrypt = req->crypto.control_port_no_encrypt;
2299
2252 ieee80211_add_work(wk); 2300 ieee80211_add_work(wk);
2253 return 0; 2301 return 0;
2254} 2302}
@@ -2275,7 +2323,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2275 2323
2276 mutex_unlock(&ifmgd->mtx); 2324 mutex_unlock(&ifmgd->mtx);
2277 2325
2278 mutex_lock(&local->work_mtx); 2326 mutex_lock(&local->mtx);
2279 list_for_each_entry(wk, &local->work_list, list) { 2327 list_for_each_entry(wk, &local->work_list, list) {
2280 if (wk->sdata != sdata) 2328 if (wk->sdata != sdata)
2281 continue; 2329 continue;
@@ -2294,7 +2342,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2294 free_work(wk); 2342 free_work(wk);
2295 break; 2343 break;
2296 } 2344 }
2297 mutex_unlock(&local->work_mtx); 2345 mutex_unlock(&local->mtx);
2298 2346
2299 /* 2347 /*
2300 * If somebody requests authentication and we haven't 2348 * If somebody requests authentication and we haven't
@@ -2319,7 +2367,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2319 if (assoc_bss) 2367 if (assoc_bss)
2320 sta_info_destroy_addr(sdata, bssid); 2368 sta_info_destroy_addr(sdata, bssid);
2321 2369
2370 mutex_lock(&sdata->local->mtx);
2322 ieee80211_recalc_idle(sdata->local); 2371 ieee80211_recalc_idle(sdata->local);
2372 mutex_unlock(&sdata->local->mtx);
2323 2373
2324 return 0; 2374 return 0;
2325} 2375}
@@ -2357,7 +2407,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
2357 cookie, !req->local_state_change); 2407 cookie, !req->local_state_change);
2358 sta_info_destroy_addr(sdata, bssid); 2408 sta_info_destroy_addr(sdata, bssid);
2359 2409
2410 mutex_lock(&sdata->local->mtx);
2360 ieee80211_recalc_idle(sdata->local); 2411 ieee80211_recalc_idle(sdata->local);
2412 mutex_unlock(&sdata->local->mtx);
2361 2413
2362 return 0; 2414 return 0;
2363} 2415}
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index c36b1911987a..4b564091e51d 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -22,12 +22,16 @@
22static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) 22static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
23{ 23{
24 struct ieee80211_local *local = sdata->local; 24 struct ieee80211_local *local = sdata->local;
25 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
25 26
26 local->offchannel_ps_enabled = false; 27 local->offchannel_ps_enabled = false;
27 28
28 /* FIXME: what to do when local->pspolling is true? */ 29 /* FIXME: what to do when local->pspolling is true? */
29 30
30 del_timer_sync(&local->dynamic_ps_timer); 31 del_timer_sync(&local->dynamic_ps_timer);
32 del_timer_sync(&ifmgd->bcn_mon_timer);
33 del_timer_sync(&ifmgd->conn_mon_timer);
34
31 cancel_work_sync(&local->dynamic_ps_enable_work); 35 cancel_work_sync(&local->dynamic_ps_enable_work);
32 36
33 if (local->hw.conf.flags & IEEE80211_CONF_PS) { 37 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
@@ -85,6 +89,9 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
85 mod_timer(&local->dynamic_ps_timer, jiffies + 89 mod_timer(&local->dynamic_ps_timer, jiffies +
86 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); 90 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
87 } 91 }
92
93 ieee80211_sta_reset_beacon_monitor(sdata);
94 ieee80211_sta_reset_conn_monitor(sdata);
88} 95}
89 96
90void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local) 97void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
@@ -112,8 +119,10 @@ void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
112 * used from user space controlled off-channel operations. 119 * used from user space controlled off-channel operations.
113 */ 120 */
114 if (sdata->vif.type != NL80211_IFTYPE_STATION && 121 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
115 sdata->vif.type != NL80211_IFTYPE_MONITOR) 122 sdata->vif.type != NL80211_IFTYPE_MONITOR) {
123 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
116 netif_tx_stop_all_queues(sdata->dev); 124 netif_tx_stop_all_queues(sdata->dev);
125 }
117 } 126 }
118 mutex_unlock(&local->iflist_mtx); 127 mutex_unlock(&local->iflist_mtx);
119} 128}
@@ -131,6 +140,7 @@ void ieee80211_offchannel_stop_station(struct ieee80211_local *local)
131 continue; 140 continue;
132 141
133 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 142 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
143 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
134 netif_tx_stop_all_queues(sdata->dev); 144 netif_tx_stop_all_queues(sdata->dev);
135 if (sdata->u.mgd.associated) 145 if (sdata->u.mgd.associated)
136 ieee80211_offchannel_ps_enable(sdata); 146 ieee80211_offchannel_ps_enable(sdata);
@@ -155,8 +165,20 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
155 ieee80211_offchannel_ps_disable(sdata); 165 ieee80211_offchannel_ps_disable(sdata);
156 } 166 }
157 167
158 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) 168 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
169 clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
170 /*
171 * This may wake up queues even though the driver
172 * currently has them stopped. This is not very
173 * likely, since the driver won't have gotten any
174 * (or hardly any) new packets while we weren't
175 * on the right channel, and even if it happens
176 * it will at most lead to queueing up one more
177 * packet per queue in mac80211 rather than on
178 * the interface qdisc.
179 */
159 netif_tx_wake_all_queues(sdata->dev); 180 netif_tx_wake_all_queues(sdata->dev);
181 }
160 182
161 /* re-enable beaconing */ 183 /* re-enable beaconing */
162 if (enable_beaconing && 184 if (enable_beaconing &&
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d287fde0431d..ce671dfd238c 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -12,7 +12,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
12 struct ieee80211_sub_if_data *sdata; 12 struct ieee80211_sub_if_data *sdata;
13 struct sta_info *sta; 13 struct sta_info *sta;
14 14
15 ieee80211_scan_cancel(local); 15 if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning)))
16 ieee80211_scan_cancel(local);
16 17
17 ieee80211_stop_queues_by_reason(hw, 18 ieee80211_stop_queues_by_reason(hw,
18 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 19 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index be04d46110fe..b0cc385bf989 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -207,7 +207,7 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc)
207 207
208 fc = hdr->frame_control; 208 fc = hdr->frame_control;
209 209
210 return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc)); 210 return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc);
211} 211}
212 212
213static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) 213static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx)
@@ -368,8 +368,8 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
368 368
369 ref = rate_control_alloc(name, local); 369 ref = rate_control_alloc(name, local);
370 if (!ref) { 370 if (!ref) {
371 printk(KERN_WARNING "%s: Failed to select rate control " 371 wiphy_warn(local->hw.wiphy,
372 "algorithm\n", wiphy_name(local->hw.wiphy)); 372 "Failed to select rate control algorithm\n");
373 return -ENOENT; 373 return -ENOENT;
374 } 374 }
375 375
@@ -380,9 +380,8 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
380 sta_info_flush(local, NULL); 380 sta_info_flush(local, NULL);
381 } 381 }
382 382
383 printk(KERN_DEBUG "%s: Selected rate control " 383 wiphy_debug(local->hw.wiphy, "Selected rate control algorithm '%s'\n",
384 "algorithm '%s'\n", wiphy_name(local->hw.wiphy), 384 ref->ops->name);
385 ref->ops->name);
386 385
387 return 0; 386 return 0;
388} 387}
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index 47438b4a9af5..135f36fd4d5d 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -162,7 +162,7 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
162 file_info->next_entry = (file_info->next_entry + 1) % 162 file_info->next_entry = (file_info->next_entry + 1) %
163 RC_PID_EVENT_RING_SIZE; 163 RC_PID_EVENT_RING_SIZE;
164 164
165 /* Print information about the event. Note that userpace needs to 165 /* Print information about the event. Note that userspace needs to
166 * provide large enough buffers. */ 166 * provide large enough buffers. */
167 length = length < RC_PID_PRINT_BUF_SIZE ? 167 length = length < RC_PID_PRINT_BUF_SIZE ?
168 length : RC_PID_PRINT_BUF_SIZE; 168 length : RC_PID_PRINT_BUF_SIZE;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 28624282c5f3..0b0e83ebe3d5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -315,6 +315,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
315static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) 315static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
316{ 316{
317 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 317 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
318 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
318 int tid; 319 int tid;
319 320
320 /* does the frame have a qos control field? */ 321 /* does the frame have a qos control field? */
@@ -323,9 +324,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
323 /* frame has qos control */ 324 /* frame has qos control */
324 tid = *qc & IEEE80211_QOS_CTL_TID_MASK; 325 tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
325 if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT) 326 if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT)
326 rx->flags |= IEEE80211_RX_AMSDU; 327 status->rx_flags |= IEEE80211_RX_AMSDU;
327 else
328 rx->flags &= ~IEEE80211_RX_AMSDU;
329 } else { 328 } else {
330 /* 329 /*
331 * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"): 330 * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"):
@@ -387,26 +386,25 @@ static ieee80211_rx_result debug_noinline
387ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) 386ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
388{ 387{
389 struct ieee80211_local *local = rx->local; 388 struct ieee80211_local *local = rx->local;
389 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
390 struct sk_buff *skb = rx->skb; 390 struct sk_buff *skb = rx->skb;
391 391
392 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning))) 392 if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN)))
393 return RX_CONTINUE;
394
395 if (test_bit(SCAN_HW_SCANNING, &local->scanning))
393 return ieee80211_scan_rx(rx->sdata, skb); 396 return ieee80211_scan_rx(rx->sdata, skb);
394 397
395 if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning) && 398 if (test_bit(SCAN_SW_SCANNING, &local->scanning)) {
396 (rx->flags & IEEE80211_RX_IN_SCAN))) {
397 /* drop all the other packets during a software scan anyway */ 399 /* drop all the other packets during a software scan anyway */
398 if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED) 400 if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED)
399 dev_kfree_skb(skb); 401 dev_kfree_skb(skb);
400 return RX_QUEUED; 402 return RX_QUEUED;
401 } 403 }
402 404
403 if (unlikely(rx->flags & IEEE80211_RX_IN_SCAN)) { 405 /* scanning finished during invoking of handlers */
404 /* scanning finished during invoking of handlers */ 406 I802_DEBUG_INC(local->rx_handlers_drop_passive_scan);
405 I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); 407 return RX_DROP_UNUSABLE;
406 return RX_DROP_UNUSABLE;
407 }
408
409 return RX_CONTINUE;
410} 408}
411 409
412 410
@@ -538,20 +536,12 @@ static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw,
538 int index, 536 int index,
539 struct sk_buff_head *frames) 537 struct sk_buff_head *frames)
540{ 538{
541 struct ieee80211_supported_band *sband;
542 struct ieee80211_rate *rate = NULL;
543 struct sk_buff *skb = tid_agg_rx->reorder_buf[index]; 539 struct sk_buff *skb = tid_agg_rx->reorder_buf[index];
544 struct ieee80211_rx_status *status;
545 540
546 if (!skb) 541 if (!skb)
547 goto no_frame; 542 goto no_frame;
548 543
549 status = IEEE80211_SKB_RXCB(skb); 544 /* release the frame from the reorder ring buffer */
550
551 /* release the reordered frames to stack */
552 sband = hw->wiphy->bands[status->band];
553 if (!(status->flag & RX_FLAG_HT))
554 rate = &sband->bitrates[status->rate_idx];
555 tid_agg_rx->stored_mpdu_num--; 545 tid_agg_rx->stored_mpdu_num--;
556 tid_agg_rx->reorder_buf[index] = NULL; 546 tid_agg_rx->reorder_buf[index] = NULL;
557 __skb_queue_tail(frames, skb); 547 __skb_queue_tail(frames, skb);
@@ -580,9 +570,78 @@ static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw,
580 * frames that have not yet been received are assumed to be lost and the skb 570 * frames that have not yet been received are assumed to be lost and the skb
581 * can be released for processing. This may also release other skb's from the 571 * can be released for processing. This may also release other skb's from the
582 * reorder buffer if there are no additional gaps between the frames. 572 * reorder buffer if there are no additional gaps between the frames.
573 *
574 * Callers must hold tid_agg_rx->reorder_lock.
583 */ 575 */
584#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10) 576#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10)
585 577
578static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
579 struct tid_ampdu_rx *tid_agg_rx,
580 struct sk_buff_head *frames)
581{
582 int index, j;
583
584 /* release the buffer until next missing frame */
585 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
586 tid_agg_rx->buf_size;
587 if (!tid_agg_rx->reorder_buf[index] &&
588 tid_agg_rx->stored_mpdu_num > 1) {
589 /*
590 * No buffers ready to be released, but check whether any
591 * frames in the reorder buffer have timed out.
592 */
593 int skipped = 1;
594 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
595 j = (j + 1) % tid_agg_rx->buf_size) {
596 if (!tid_agg_rx->reorder_buf[j]) {
597 skipped++;
598 continue;
599 }
600 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] +
601 HT_RX_REORDER_BUF_TIMEOUT))
602 goto set_release_timer;
603
604#ifdef CONFIG_MAC80211_HT_DEBUG
605 if (net_ratelimit())
606 wiphy_debug(hw->wiphy,
607 "release an RX reorder frame due to timeout on earlier frames\n");
608#endif
609 ieee80211_release_reorder_frame(hw, tid_agg_rx,
610 j, frames);
611
612 /*
613 * Increment the head seq# also for the skipped slots.
614 */
615 tid_agg_rx->head_seq_num =
616 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
617 skipped = 0;
618 }
619 } else while (tid_agg_rx->reorder_buf[index]) {
620 ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
621 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
622 tid_agg_rx->buf_size;
623 }
624
625 if (tid_agg_rx->stored_mpdu_num) {
626 j = index = seq_sub(tid_agg_rx->head_seq_num,
627 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
628
629 for (; j != (index - 1) % tid_agg_rx->buf_size;
630 j = (j + 1) % tid_agg_rx->buf_size) {
631 if (tid_agg_rx->reorder_buf[j])
632 break;
633 }
634
635 set_release_timer:
636
637 mod_timer(&tid_agg_rx->reorder_timer,
638 tid_agg_rx->reorder_time[j] +
639 HT_RX_REORDER_BUF_TIMEOUT);
640 } else {
641 del_timer(&tid_agg_rx->reorder_timer);
642 }
643}
644
586/* 645/*
587 * As this function belongs to the RX path it must be under 646 * As this function belongs to the RX path it must be under
588 * rcu_read_lock protection. It returns false if the frame 647 * rcu_read_lock protection. It returns false if the frame
@@ -598,14 +657,16 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
598 u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; 657 u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
599 u16 head_seq_num, buf_size; 658 u16 head_seq_num, buf_size;
600 int index; 659 int index;
660 bool ret = true;
601 661
602 buf_size = tid_agg_rx->buf_size; 662 buf_size = tid_agg_rx->buf_size;
603 head_seq_num = tid_agg_rx->head_seq_num; 663 head_seq_num = tid_agg_rx->head_seq_num;
604 664
665 spin_lock(&tid_agg_rx->reorder_lock);
605 /* frame with out of date sequence number */ 666 /* frame with out of date sequence number */
606 if (seq_less(mpdu_seq_num, head_seq_num)) { 667 if (seq_less(mpdu_seq_num, head_seq_num)) {
607 dev_kfree_skb(skb); 668 dev_kfree_skb(skb);
608 return true; 669 goto out;
609 } 670 }
610 671
611 /* 672 /*
@@ -626,7 +687,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
626 /* check if we already stored this frame */ 687 /* check if we already stored this frame */
627 if (tid_agg_rx->reorder_buf[index]) { 688 if (tid_agg_rx->reorder_buf[index]) {
628 dev_kfree_skb(skb); 689 dev_kfree_skb(skb);
629 return true; 690 goto out;
630 } 691 }
631 692
632 /* 693 /*
@@ -636,58 +697,19 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
636 if (mpdu_seq_num == tid_agg_rx->head_seq_num && 697 if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
637 tid_agg_rx->stored_mpdu_num == 0) { 698 tid_agg_rx->stored_mpdu_num == 0) {
638 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); 699 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
639 return false; 700 ret = false;
701 goto out;
640 } 702 }
641 703
642 /* put the frame in the reordering buffer */ 704 /* put the frame in the reordering buffer */
643 tid_agg_rx->reorder_buf[index] = skb; 705 tid_agg_rx->reorder_buf[index] = skb;
644 tid_agg_rx->reorder_time[index] = jiffies; 706 tid_agg_rx->reorder_time[index] = jiffies;
645 tid_agg_rx->stored_mpdu_num++; 707 tid_agg_rx->stored_mpdu_num++;
646 /* release the buffer until next missing frame */ 708 ieee80211_sta_reorder_release(hw, tid_agg_rx, frames);
647 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
648 tid_agg_rx->buf_size;
649 if (!tid_agg_rx->reorder_buf[index] &&
650 tid_agg_rx->stored_mpdu_num > 1) {
651 /*
652 * No buffers ready to be released, but check whether any
653 * frames in the reorder buffer have timed out.
654 */
655 int j;
656 int skipped = 1;
657 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
658 j = (j + 1) % tid_agg_rx->buf_size) {
659 if (!tid_agg_rx->reorder_buf[j]) {
660 skipped++;
661 continue;
662 }
663 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] +
664 HT_RX_REORDER_BUF_TIMEOUT))
665 break;
666
667#ifdef CONFIG_MAC80211_HT_DEBUG
668 if (net_ratelimit())
669 printk(KERN_DEBUG "%s: release an RX reorder "
670 "frame due to timeout on earlier "
671 "frames\n",
672 wiphy_name(hw->wiphy));
673#endif
674 ieee80211_release_reorder_frame(hw, tid_agg_rx,
675 j, frames);
676 709
677 /* 710 out:
678 * Increment the head seq# also for the skipped slots. 711 spin_unlock(&tid_agg_rx->reorder_lock);
679 */ 712 return ret;
680 tid_agg_rx->head_seq_num =
681 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
682 skipped = 0;
683 }
684 } else while (tid_agg_rx->reorder_buf[index]) {
685 ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
686 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
687 tid_agg_rx->buf_size;
688 }
689
690 return true;
691} 713}
692 714
693/* 715/*
@@ -761,13 +783,14 @@ static ieee80211_rx_result debug_noinline
761ieee80211_rx_h_check(struct ieee80211_rx_data *rx) 783ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
762{ 784{
763 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 785 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
786 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
764 787
765 /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ 788 /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */
766 if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { 789 if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) {
767 if (unlikely(ieee80211_has_retry(hdr->frame_control) && 790 if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
768 rx->sta->last_seq_ctrl[rx->queue] == 791 rx->sta->last_seq_ctrl[rx->queue] ==
769 hdr->seq_ctrl)) { 792 hdr->seq_ctrl)) {
770 if (rx->flags & IEEE80211_RX_RA_MATCH) { 793 if (status->rx_flags & IEEE80211_RX_RA_MATCH) {
771 rx->local->dot11FrameDuplicateCount++; 794 rx->local->dot11FrameDuplicateCount++;
772 rx->sta->num_duplicates++; 795 rx->sta->num_duplicates++;
773 } 796 }
@@ -800,7 +823,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
800 if ((!ieee80211_has_fromds(hdr->frame_control) && 823 if ((!ieee80211_has_fromds(hdr->frame_control) &&
801 !ieee80211_has_tods(hdr->frame_control) && 824 !ieee80211_has_tods(hdr->frame_control) &&
802 ieee80211_is_data(hdr->frame_control)) || 825 ieee80211_is_data(hdr->frame_control)) ||
803 !(rx->flags & IEEE80211_RX_RA_MATCH)) { 826 !(status->rx_flags & IEEE80211_RX_RA_MATCH)) {
804 /* Drop IBSS frames and frames for other hosts 827 /* Drop IBSS frames and frames for other hosts
805 * silently. */ 828 * silently. */
806 return RX_DROP_MONITOR; 829 return RX_DROP_MONITOR;
@@ -857,7 +880,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
857 * No point in finding a key and decrypting if the frame is neither 880 * No point in finding a key and decrypting if the frame is neither
858 * addressed to us nor a multicast frame. 881 * addressed to us nor a multicast frame.
859 */ 882 */
860 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 883 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
861 return RX_CONTINUE; 884 return RX_CONTINUE;
862 885
863 /* start without a key */ 886 /* start without a key */
@@ -873,6 +896,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
873 896
874 if (!is_multicast_ether_addr(hdr->addr1) && stakey) { 897 if (!is_multicast_ether_addr(hdr->addr1) && stakey) {
875 rx->key = stakey; 898 rx->key = stakey;
899 if ((status->flag & RX_FLAG_DECRYPTED) &&
900 (status->flag & RX_FLAG_IV_STRIPPED))
901 return RX_CONTINUE;
876 /* Skip decryption if the frame is not protected. */ 902 /* Skip decryption if the frame is not protected. */
877 if (!ieee80211_has_protected(fc)) 903 if (!ieee80211_has_protected(fc))
878 return RX_CONTINUE; 904 return RX_CONTINUE;
@@ -935,7 +961,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
935 * pairwise or station-to-station keys, but for WEP we allow 961 * pairwise or station-to-station keys, but for WEP we allow
936 * using a key index as well. 962 * using a key index as well.
937 */ 963 */
938 if (rx->key && rx->key->conf.alg != ALG_WEP && 964 if (rx->key && rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
965 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
939 !is_multicast_ether_addr(hdr->addr1)) 966 !is_multicast_ether_addr(hdr->addr1))
940 rx->key = NULL; 967 rx->key = NULL;
941 } 968 }
@@ -951,8 +978,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
951 return RX_DROP_UNUSABLE; 978 return RX_DROP_UNUSABLE;
952 /* the hdr variable is invalid now! */ 979 /* the hdr variable is invalid now! */
953 980
954 switch (rx->key->conf.alg) { 981 switch (rx->key->conf.cipher) {
955 case ALG_WEP: 982 case WLAN_CIPHER_SUITE_WEP40:
983 case WLAN_CIPHER_SUITE_WEP104:
956 /* Check for weak IVs if possible */ 984 /* Check for weak IVs if possible */
957 if (rx->sta && ieee80211_is_data(fc) && 985 if (rx->sta && ieee80211_is_data(fc) &&
958 (!(status->flag & RX_FLAG_IV_STRIPPED) || 986 (!(status->flag & RX_FLAG_IV_STRIPPED) ||
@@ -962,15 +990,21 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
962 990
963 result = ieee80211_crypto_wep_decrypt(rx); 991 result = ieee80211_crypto_wep_decrypt(rx);
964 break; 992 break;
965 case ALG_TKIP: 993 case WLAN_CIPHER_SUITE_TKIP:
966 result = ieee80211_crypto_tkip_decrypt(rx); 994 result = ieee80211_crypto_tkip_decrypt(rx);
967 break; 995 break;
968 case ALG_CCMP: 996 case WLAN_CIPHER_SUITE_CCMP:
969 result = ieee80211_crypto_ccmp_decrypt(rx); 997 result = ieee80211_crypto_ccmp_decrypt(rx);
970 break; 998 break;
971 case ALG_AES_CMAC: 999 case WLAN_CIPHER_SUITE_AES_CMAC:
972 result = ieee80211_crypto_aes_cmac_decrypt(rx); 1000 result = ieee80211_crypto_aes_cmac_decrypt(rx);
973 break; 1001 break;
1002 default:
1003 /*
1004 * We can reach here only with HW-only algorithms
1005 * but why didn't it decrypt the frame?!
1006 */
1007 return RX_DROP_UNUSABLE;
974 } 1008 }
975 1009
976 /* either the frame has been decrypted or will be dropped */ 1010 /* either the frame has been decrypted or will be dropped */
@@ -1079,7 +1113,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1079 sta->last_rx = jiffies; 1113 sta->last_rx = jiffies;
1080 } 1114 }
1081 1115
1082 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 1116 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1083 return RX_CONTINUE; 1117 return RX_CONTINUE;
1084 1118
1085 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) 1119 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION)
@@ -1236,6 +1270,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1236 unsigned int frag, seq; 1270 unsigned int frag, seq;
1237 struct ieee80211_fragment_entry *entry; 1271 struct ieee80211_fragment_entry *entry;
1238 struct sk_buff *skb; 1272 struct sk_buff *skb;
1273 struct ieee80211_rx_status *status;
1239 1274
1240 hdr = (struct ieee80211_hdr *)rx->skb->data; 1275 hdr = (struct ieee80211_hdr *)rx->skb->data;
1241 fc = hdr->frame_control; 1276 fc = hdr->frame_control;
@@ -1265,7 +1300,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1265 /* This is the first fragment of a new frame. */ 1300 /* This is the first fragment of a new frame. */
1266 entry = ieee80211_reassemble_add(rx->sdata, frag, seq, 1301 entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
1267 rx->queue, &(rx->skb)); 1302 rx->queue, &(rx->skb));
1268 if (rx->key && rx->key->conf.alg == ALG_CCMP && 1303 if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP &&
1269 ieee80211_has_protected(fc)) { 1304 ieee80211_has_protected(fc)) {
1270 int queue = ieee80211_is_mgmt(fc) ? 1305 int queue = ieee80211_is_mgmt(fc) ?
1271 NUM_RX_DATA_QUEUES : rx->queue; 1306 NUM_RX_DATA_QUEUES : rx->queue;
@@ -1294,7 +1329,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1294 int i; 1329 int i;
1295 u8 pn[CCMP_PN_LEN], *rpn; 1330 u8 pn[CCMP_PN_LEN], *rpn;
1296 int queue; 1331 int queue;
1297 if (!rx->key || rx->key->conf.alg != ALG_CCMP) 1332 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP)
1298 return RX_DROP_UNUSABLE; 1333 return RX_DROP_UNUSABLE;
1299 memcpy(pn, entry->last_pn, CCMP_PN_LEN); 1334 memcpy(pn, entry->last_pn, CCMP_PN_LEN);
1300 for (i = CCMP_PN_LEN - 1; i >= 0; i--) { 1335 for (i = CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -1335,7 +1370,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1335 } 1370 }
1336 1371
1337 /* Complete frame has been reassembled - process it now */ 1372 /* Complete frame has been reassembled - process it now */
1338 rx->flags |= IEEE80211_RX_FRAGMENTED; 1373 status = IEEE80211_SKB_RXCB(rx->skb);
1374 status->rx_flags |= IEEE80211_RX_FRAGMENTED;
1339 1375
1340 out: 1376 out:
1341 if (rx->sta) 1377 if (rx->sta)
@@ -1352,9 +1388,10 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
1352{ 1388{
1353 struct ieee80211_sub_if_data *sdata = rx->sdata; 1389 struct ieee80211_sub_if_data *sdata = rx->sdata;
1354 __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control; 1390 __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control;
1391 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1355 1392
1356 if (likely(!rx->sta || !ieee80211_is_pspoll(fc) || 1393 if (likely(!rx->sta || !ieee80211_is_pspoll(fc) ||
1357 !(rx->flags & IEEE80211_RX_RA_MATCH))) 1394 !(status->rx_flags & IEEE80211_RX_RA_MATCH)))
1358 return RX_CONTINUE; 1395 return RX_CONTINUE;
1359 1396
1360 if ((sdata->vif.type != NL80211_IFTYPE_AP) && 1397 if ((sdata->vif.type != NL80211_IFTYPE_AP) &&
@@ -1492,7 +1529,7 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
1492 * Allow EAPOL frames to us/the PAE group address regardless 1529 * Allow EAPOL frames to us/the PAE group address regardless
1493 * of whether the frame was encrypted or not. 1530 * of whether the frame was encrypted or not.
1494 */ 1531 */
1495 if (ehdr->h_proto == htons(ETH_P_PAE) && 1532 if (ehdr->h_proto == rx->sdata->control_port_protocol &&
1496 (compare_ether_addr(ehdr->h_dest, rx->sdata->vif.addr) == 0 || 1533 (compare_ether_addr(ehdr->h_dest, rx->sdata->vif.addr) == 0 ||
1497 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0)) 1534 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0))
1498 return true; 1535 return true;
@@ -1515,6 +1552,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1515 struct sk_buff *skb, *xmit_skb; 1552 struct sk_buff *skb, *xmit_skb;
1516 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; 1553 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
1517 struct sta_info *dsta; 1554 struct sta_info *dsta;
1555 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1518 1556
1519 skb = rx->skb; 1557 skb = rx->skb;
1520 xmit_skb = NULL; 1558 xmit_skb = NULL;
@@ -1522,7 +1560,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1522 if ((sdata->vif.type == NL80211_IFTYPE_AP || 1560 if ((sdata->vif.type == NL80211_IFTYPE_AP ||
1523 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && 1561 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
1524 !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && 1562 !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
1525 (rx->flags & IEEE80211_RX_RA_MATCH) && 1563 (status->rx_flags & IEEE80211_RX_RA_MATCH) &&
1526 (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { 1564 (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
1527 if (is_multicast_ether_addr(ehdr->h_dest)) { 1565 if (is_multicast_ether_addr(ehdr->h_dest)) {
1528 /* 1566 /*
@@ -1599,6 +1637,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
1599 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 1637 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
1600 __le16 fc = hdr->frame_control; 1638 __le16 fc = hdr->frame_control;
1601 struct sk_buff_head frame_list; 1639 struct sk_buff_head frame_list;
1640 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1602 1641
1603 if (unlikely(!ieee80211_is_data(fc))) 1642 if (unlikely(!ieee80211_is_data(fc)))
1604 return RX_CONTINUE; 1643 return RX_CONTINUE;
@@ -1606,7 +1645,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
1606 if (unlikely(!ieee80211_is_data_present(fc))) 1645 if (unlikely(!ieee80211_is_data_present(fc)))
1607 return RX_DROP_MONITOR; 1646 return RX_DROP_MONITOR;
1608 1647
1609 if (!(rx->flags & IEEE80211_RX_AMSDU)) 1648 if (!(status->rx_flags & IEEE80211_RX_AMSDU))
1610 return RX_CONTINUE; 1649 return RX_CONTINUE;
1611 1650
1612 if (ieee80211_has_a4(hdr->frame_control) && 1651 if (ieee80211_has_a4(hdr->frame_control) &&
@@ -1657,6 +1696,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
1657 struct sk_buff *skb = rx->skb, *fwd_skb; 1696 struct sk_buff *skb = rx->skb, *fwd_skb;
1658 struct ieee80211_local *local = rx->local; 1697 struct ieee80211_local *local = rx->local;
1659 struct ieee80211_sub_if_data *sdata = rx->sdata; 1698 struct ieee80211_sub_if_data *sdata = rx->sdata;
1699 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
1660 1700
1661 hdr = (struct ieee80211_hdr *) skb->data; 1701 hdr = (struct ieee80211_hdr *) skb->data;
1662 hdrlen = ieee80211_hdrlen(hdr->frame_control); 1702 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -1702,7 +1742,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
1702 1742
1703 mesh_hdr->ttl--; 1743 mesh_hdr->ttl--;
1704 1744
1705 if (rx->flags & IEEE80211_RX_RA_MATCH) { 1745 if (status->rx_flags & IEEE80211_RX_RA_MATCH) {
1706 if (!mesh_hdr->ttl) 1746 if (!mesh_hdr->ttl)
1707 IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh, 1747 IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh,
1708 dropped_frames_ttl); 1748 dropped_frames_ttl);
@@ -1909,13 +1949,38 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
1909} 1949}
1910 1950
1911static ieee80211_rx_result debug_noinline 1951static ieee80211_rx_result debug_noinline
1952ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
1953{
1954 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1955 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1956
1957 /*
1958 * From here on, look only at management frames.
1959 * Data and control frames are already handled,
1960 * and unknown (reserved) frames are useless.
1961 */
1962 if (rx->skb->len < 24)
1963 return RX_DROP_MONITOR;
1964
1965 if (!ieee80211_is_mgmt(mgmt->frame_control))
1966 return RX_DROP_MONITOR;
1967
1968 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1969 return RX_DROP_MONITOR;
1970
1971 if (ieee80211_drop_unencrypted_mgmt(rx))
1972 return RX_DROP_UNUSABLE;
1973
1974 return RX_CONTINUE;
1975}
1976
1977static ieee80211_rx_result debug_noinline
1912ieee80211_rx_h_action(struct ieee80211_rx_data *rx) 1978ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1913{ 1979{
1914 struct ieee80211_local *local = rx->local; 1980 struct ieee80211_local *local = rx->local;
1915 struct ieee80211_sub_if_data *sdata = rx->sdata; 1981 struct ieee80211_sub_if_data *sdata = rx->sdata;
1916 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; 1982 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1917 struct sk_buff *nskb; 1983 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1918 struct ieee80211_rx_status *status;
1919 int len = rx->skb->len; 1984 int len = rx->skb->len;
1920 1985
1921 if (!ieee80211_is_action(mgmt->frame_control)) 1986 if (!ieee80211_is_action(mgmt->frame_control))
@@ -1928,10 +1993,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1928 if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) 1993 if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC)
1929 return RX_DROP_UNUSABLE; 1994 return RX_DROP_UNUSABLE;
1930 1995
1931 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 1996 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1932 return RX_DROP_UNUSABLE;
1933
1934 if (ieee80211_drop_unencrypted_mgmt(rx))
1935 return RX_DROP_UNUSABLE; 1997 return RX_DROP_UNUSABLE;
1936 1998
1937 switch (mgmt->u.action.category) { 1999 switch (mgmt->u.action.category) {
@@ -2024,17 +2086,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2024 goto queue; 2086 goto queue;
2025 } 2087 }
2026 2088
2089 return RX_CONTINUE;
2090
2027 invalid: 2091 invalid:
2028 /* 2092 status->rx_flags |= IEEE80211_RX_MALFORMED_ACTION_FRM;
2029 * For AP mode, hostapd is responsible for handling any action 2093 /* will return in the next handlers */
2030 * frames that we didn't handle, including returning unknown 2094 return RX_CONTINUE;
2031 * ones. For all other modes we will return them to the sender, 2095
2032 * setting the 0x80 bit in the action category, as required by 2096 handled:
2033 * 802.11-2007 7.3.1.11. 2097 if (rx->sta)
2034 */ 2098 rx->sta->rx_packets++;
2035 if (sdata->vif.type == NL80211_IFTYPE_AP || 2099 dev_kfree_skb(rx->skb);
2036 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 2100 return RX_QUEUED;
2037 return RX_DROP_MONITOR; 2101
2102 queue:
2103 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2104 skb_queue_tail(&sdata->skb_queue, rx->skb);
2105 ieee80211_queue_work(&local->hw, &sdata->work);
2106 if (rx->sta)
2107 rx->sta->rx_packets++;
2108 return RX_QUEUED;
2109}
2110
2111static ieee80211_rx_result debug_noinline
2112ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
2113{
2114 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
2115
2116 /* skip known-bad action frames and return them in the next handler */
2117 if (status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM)
2118 return RX_CONTINUE;
2038 2119
2039 /* 2120 /*
2040 * Getting here means the kernel doesn't know how to handle 2121 * Getting here means the kernel doesn't know how to handle
@@ -2042,12 +2123,46 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2042 * so userspace can register for those to know whether ones 2123 * so userspace can register for those to know whether ones
2043 * it transmitted were processed or returned. 2124 * it transmitted were processed or returned.
2044 */ 2125 */
2045 status = IEEE80211_SKB_RXCB(rx->skb);
2046 2126
2047 if (cfg80211_rx_action(rx->sdata->dev, status->freq, 2127 if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq,
2048 rx->skb->data, rx->skb->len, 2128 rx->skb->data, rx->skb->len,
2049 GFP_ATOMIC)) 2129 GFP_ATOMIC)) {
2050 goto handled; 2130 if (rx->sta)
2131 rx->sta->rx_packets++;
2132 dev_kfree_skb(rx->skb);
2133 return RX_QUEUED;
2134 }
2135
2136
2137 return RX_CONTINUE;
2138}
2139
2140static ieee80211_rx_result debug_noinline
2141ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
2142{
2143 struct ieee80211_local *local = rx->local;
2144 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
2145 struct sk_buff *nskb;
2146 struct ieee80211_sub_if_data *sdata = rx->sdata;
2147 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
2148
2149 if (!ieee80211_is_action(mgmt->frame_control))
2150 return RX_CONTINUE;
2151
2152 /*
2153 * For AP mode, hostapd is responsible for handling any action
2154 * frames that we didn't handle, including returning unknown
2155 * ones. For all other modes we will return them to the sender,
2156 * setting the 0x80 bit in the action category, as required by
2157 * 802.11-2007 7.3.1.11.
2158 * Newer versions of hostapd shall also use the management frame
2159 * registration mechanisms, but older ones still use cooked
2160 * monitor interfaces so push all frames there.
2161 */
2162 if (!(status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM) &&
2163 (sdata->vif.type == NL80211_IFTYPE_AP ||
2164 sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
2165 return RX_DROP_MONITOR;
2051 2166
2052 /* do not return rejected action frames */ 2167 /* do not return rejected action frames */
2053 if (mgmt->u.action.category & 0x80) 2168 if (mgmt->u.action.category & 0x80)
@@ -2066,20 +2181,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2066 2181
2067 ieee80211_tx_skb(rx->sdata, nskb); 2182 ieee80211_tx_skb(rx->sdata, nskb);
2068 } 2183 }
2069
2070 handled:
2071 if (rx->sta)
2072 rx->sta->rx_packets++;
2073 dev_kfree_skb(rx->skb); 2184 dev_kfree_skb(rx->skb);
2074 return RX_QUEUED; 2185 return RX_QUEUED;
2075
2076 queue:
2077 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2078 skb_queue_tail(&sdata->skb_queue, rx->skb);
2079 ieee80211_queue_work(&local->hw, &sdata->work);
2080 if (rx->sta)
2081 rx->sta->rx_packets++;
2082 return RX_QUEUED;
2083} 2186}
2084 2187
2085static ieee80211_rx_result debug_noinline 2188static ieee80211_rx_result debug_noinline
@@ -2090,15 +2193,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
2090 struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; 2193 struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
2091 __le16 stype; 2194 __le16 stype;
2092 2195
2093 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
2094 return RX_DROP_MONITOR;
2095
2096 if (rx->skb->len < 24)
2097 return RX_DROP_MONITOR;
2098
2099 if (ieee80211_drop_unencrypted_mgmt(rx))
2100 return RX_DROP_UNUSABLE;
2101
2102 rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb); 2196 rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb);
2103 if (rxs != RX_CONTINUE) 2197 if (rxs != RX_CONTINUE)
2104 return rxs; 2198 return rxs;
@@ -2199,6 +2293,14 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2199 struct net_device *prev_dev = NULL; 2293 struct net_device *prev_dev = NULL;
2200 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2294 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2201 2295
2296 /*
2297 * If cooked monitor has been processed already, then
2298 * don't do it again. If not, set the flag.
2299 */
2300 if (rx->flags & IEEE80211_RX_CMNTR)
2301 goto out_free_skb;
2302 rx->flags |= IEEE80211_RX_CMNTR;
2303
2202 if (skb_headroom(skb) < sizeof(*rthdr) && 2304 if (skb_headroom(skb) < sizeof(*rthdr) &&
2203 pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) 2305 pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC))
2204 goto out_free_skb; 2306 goto out_free_skb;
@@ -2253,29 +2355,53 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2253 if (prev_dev) { 2355 if (prev_dev) {
2254 skb->dev = prev_dev; 2356 skb->dev = prev_dev;
2255 netif_receive_skb(skb); 2357 netif_receive_skb(skb);
2256 skb = NULL; 2358 return;
2257 } else 2359 }
2258 goto out_free_skb;
2259
2260 return;
2261 2360
2262 out_free_skb: 2361 out_free_skb:
2263 dev_kfree_skb(skb); 2362 dev_kfree_skb(skb);
2264} 2363}
2265 2364
2365static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
2366 ieee80211_rx_result res)
2367{
2368 switch (res) {
2369 case RX_DROP_MONITOR:
2370 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
2371 if (rx->sta)
2372 rx->sta->rx_dropped++;
2373 /* fall through */
2374 case RX_CONTINUE: {
2375 struct ieee80211_rate *rate = NULL;
2376 struct ieee80211_supported_band *sband;
2377 struct ieee80211_rx_status *status;
2378
2379 status = IEEE80211_SKB_RXCB((rx->skb));
2380
2381 sband = rx->local->hw.wiphy->bands[status->band];
2382 if (!(status->flag & RX_FLAG_HT))
2383 rate = &sband->bitrates[status->rate_idx];
2384
2385 ieee80211_rx_cooked_monitor(rx, rate);
2386 break;
2387 }
2388 case RX_DROP_UNUSABLE:
2389 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
2390 if (rx->sta)
2391 rx->sta->rx_dropped++;
2392 dev_kfree_skb(rx->skb);
2393 break;
2394 case RX_QUEUED:
2395 I802_DEBUG_INC(rx->sdata->local->rx_handlers_queued);
2396 break;
2397 }
2398}
2266 2399
2267static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata, 2400static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
2268 struct ieee80211_rx_data *rx, 2401 struct sk_buff_head *frames)
2269 struct sk_buff *skb,
2270 struct ieee80211_rate *rate)
2271{ 2402{
2272 struct sk_buff_head reorder_release;
2273 ieee80211_rx_result res = RX_DROP_MONITOR; 2403 ieee80211_rx_result res = RX_DROP_MONITOR;
2274 2404 struct sk_buff *skb;
2275 __skb_queue_head_init(&reorder_release);
2276
2277 rx->skb = skb;
2278 rx->sdata = sdata;
2279 2405
2280#define CALL_RXH(rxh) \ 2406#define CALL_RXH(rxh) \
2281 do { \ 2407 do { \
@@ -2284,23 +2410,14 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2284 goto rxh_next; \ 2410 goto rxh_next; \
2285 } while (0); 2411 } while (0);
2286 2412
2287 /* 2413 while ((skb = __skb_dequeue(frames))) {
2288 * NB: the rxh_next label works even if we jump
2289 * to it from here because then the list will
2290 * be empty, which is a trivial check
2291 */
2292 CALL_RXH(ieee80211_rx_h_passive_scan)
2293 CALL_RXH(ieee80211_rx_h_check)
2294
2295 ieee80211_rx_reorder_ampdu(rx, &reorder_release);
2296
2297 while ((skb = __skb_dequeue(&reorder_release))) {
2298 /* 2414 /*
2299 * all the other fields are valid across frames 2415 * all the other fields are valid across frames
2300 * that belong to an aMPDU since they are on the 2416 * that belong to an aMPDU since they are on the
2301 * same TID from the same station 2417 * same TID from the same station
2302 */ 2418 */
2303 rx->skb = skb; 2419 rx->skb = skb;
2420 rx->flags = 0;
2304 2421
2305 CALL_RXH(ieee80211_rx_h_decrypt) 2422 CALL_RXH(ieee80211_rx_h_decrypt)
2306 CALL_RXH(ieee80211_rx_h_check_more_data) 2423 CALL_RXH(ieee80211_rx_h_check_more_data)
@@ -2312,50 +2429,92 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2312 CALL_RXH(ieee80211_rx_h_remove_qos_control) 2429 CALL_RXH(ieee80211_rx_h_remove_qos_control)
2313 CALL_RXH(ieee80211_rx_h_amsdu) 2430 CALL_RXH(ieee80211_rx_h_amsdu)
2314#ifdef CONFIG_MAC80211_MESH 2431#ifdef CONFIG_MAC80211_MESH
2315 if (ieee80211_vif_is_mesh(&sdata->vif)) 2432 if (ieee80211_vif_is_mesh(&rx->sdata->vif))
2316 CALL_RXH(ieee80211_rx_h_mesh_fwding); 2433 CALL_RXH(ieee80211_rx_h_mesh_fwding);
2317#endif 2434#endif
2318 CALL_RXH(ieee80211_rx_h_data) 2435 CALL_RXH(ieee80211_rx_h_data)
2319 2436
2320 /* special treatment -- needs the queue */ 2437 /* special treatment -- needs the queue */
2321 res = ieee80211_rx_h_ctrl(rx, &reorder_release); 2438 res = ieee80211_rx_h_ctrl(rx, frames);
2322 if (res != RX_CONTINUE) 2439 if (res != RX_CONTINUE)
2323 goto rxh_next; 2440 goto rxh_next;
2324 2441
2442 CALL_RXH(ieee80211_rx_h_mgmt_check)
2325 CALL_RXH(ieee80211_rx_h_action) 2443 CALL_RXH(ieee80211_rx_h_action)
2444 CALL_RXH(ieee80211_rx_h_userspace_mgmt)
2445 CALL_RXH(ieee80211_rx_h_action_return)
2326 CALL_RXH(ieee80211_rx_h_mgmt) 2446 CALL_RXH(ieee80211_rx_h_mgmt)
2327 2447
2448 rxh_next:
2449 ieee80211_rx_handlers_result(rx, res);
2450
2328#undef CALL_RXH 2451#undef CALL_RXH
2452 }
2453}
2454
2455static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
2456{
2457 struct sk_buff_head reorder_release;
2458 ieee80211_rx_result res = RX_DROP_MONITOR;
2459
2460 __skb_queue_head_init(&reorder_release);
2461
2462#define CALL_RXH(rxh) \
2463 do { \
2464 res = rxh(rx); \
2465 if (res != RX_CONTINUE) \
2466 goto rxh_next; \
2467 } while (0);
2468
2469 CALL_RXH(ieee80211_rx_h_passive_scan)
2470 CALL_RXH(ieee80211_rx_h_check)
2471
2472 ieee80211_rx_reorder_ampdu(rx, &reorder_release);
2473
2474 ieee80211_rx_handlers(rx, &reorder_release);
2475 return;
2329 2476
2330 rxh_next: 2477 rxh_next:
2331 switch (res) { 2478 ieee80211_rx_handlers_result(rx, res);
2332 case RX_DROP_MONITOR: 2479
2333 I802_DEBUG_INC(sdata->local->rx_handlers_drop); 2480#undef CALL_RXH
2334 if (rx->sta) 2481}
2335 rx->sta->rx_dropped++; 2482
2336 /* fall through */ 2483/*
2337 case RX_CONTINUE: 2484 * This function makes calls into the RX path. Therefore the
2338 ieee80211_rx_cooked_monitor(rx, rate); 2485 * caller must hold the sta_info->lock and everything has to
2339 break; 2486 * be under rcu_read_lock protection as well.
2340 case RX_DROP_UNUSABLE: 2487 */
2341 I802_DEBUG_INC(sdata->local->rx_handlers_drop); 2488void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
2342 if (rx->sta) 2489{
2343 rx->sta->rx_dropped++; 2490 struct sk_buff_head frames;
2344 dev_kfree_skb(rx->skb); 2491 struct ieee80211_rx_data rx = {
2345 break; 2492 .sta = sta,
2346 case RX_QUEUED: 2493 .sdata = sta->sdata,
2347 I802_DEBUG_INC(sdata->local->rx_handlers_queued); 2494 .local = sta->local,
2348 break; 2495 .queue = tid,
2349 } 2496 };
2350 } 2497 struct tid_ampdu_rx *tid_agg_rx;
2498
2499 tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
2500 if (!tid_agg_rx)
2501 return;
2502
2503 __skb_queue_head_init(&frames);
2504
2505 spin_lock(&tid_agg_rx->reorder_lock);
2506 ieee80211_sta_reorder_release(&sta->local->hw, tid_agg_rx, &frames);
2507 spin_unlock(&tid_agg_rx->reorder_lock);
2508
2509 ieee80211_rx_handlers(&rx, &frames);
2351} 2510}
2352 2511
2353/* main receive path */ 2512/* main receive path */
2354 2513
2355static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, 2514static int prepare_for_handlers(struct ieee80211_rx_data *rx,
2356 struct ieee80211_rx_data *rx,
2357 struct ieee80211_hdr *hdr) 2515 struct ieee80211_hdr *hdr)
2358{ 2516{
2517 struct ieee80211_sub_if_data *sdata = rx->sdata;
2359 struct sk_buff *skb = rx->skb; 2518 struct sk_buff *skb = rx->skb;
2360 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2519 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2361 u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); 2520 u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
@@ -2369,7 +2528,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2369 compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) { 2528 compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) {
2370 if (!(sdata->dev->flags & IFF_PROMISC)) 2529 if (!(sdata->dev->flags & IFF_PROMISC))
2371 return 0; 2530 return 0;
2372 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2531 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2373 } 2532 }
2374 break; 2533 break;
2375 case NL80211_IFTYPE_ADHOC: 2534 case NL80211_IFTYPE_ADHOC:
@@ -2379,15 +2538,15 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2379 return 1; 2538 return 1;
2380 } 2539 }
2381 else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { 2540 else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
2382 if (!(rx->flags & IEEE80211_RX_IN_SCAN)) 2541 if (!(status->rx_flags & IEEE80211_RX_IN_SCAN))
2383 return 0; 2542 return 0;
2384 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2543 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2385 } else if (!multicast && 2544 } else if (!multicast &&
2386 compare_ether_addr(sdata->vif.addr, 2545 compare_ether_addr(sdata->vif.addr,
2387 hdr->addr1) != 0) { 2546 hdr->addr1) != 0) {
2388 if (!(sdata->dev->flags & IFF_PROMISC)) 2547 if (!(sdata->dev->flags & IFF_PROMISC))
2389 return 0; 2548 return 0;
2390 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2549 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2391 } else if (!rx->sta) { 2550 } else if (!rx->sta) {
2392 int rate_idx; 2551 int rate_idx;
2393 if (status->flag & RX_FLAG_HT) 2552 if (status->flag & RX_FLAG_HT)
@@ -2405,7 +2564,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2405 if (!(sdata->dev->flags & IFF_PROMISC)) 2564 if (!(sdata->dev->flags & IFF_PROMISC))
2406 return 0; 2565 return 0;
2407 2566
2408 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2567 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2409 } 2568 }
2410 break; 2569 break;
2411 case NL80211_IFTYPE_AP_VLAN: 2570 case NL80211_IFTYPE_AP_VLAN:
@@ -2416,9 +2575,9 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2416 return 0; 2575 return 0;
2417 } else if (!ieee80211_bssid_match(bssid, 2576 } else if (!ieee80211_bssid_match(bssid,
2418 sdata->vif.addr)) { 2577 sdata->vif.addr)) {
2419 if (!(rx->flags & IEEE80211_RX_IN_SCAN)) 2578 if (!(status->rx_flags & IEEE80211_RX_IN_SCAN))
2420 return 0; 2579 return 0;
2421 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2580 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2422 } 2581 }
2423 break; 2582 break;
2424 case NL80211_IFTYPE_WDS: 2583 case NL80211_IFTYPE_WDS:
@@ -2427,9 +2586,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2427 if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) 2586 if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2))
2428 return 0; 2587 return 0;
2429 break; 2588 break;
2430 case NL80211_IFTYPE_MONITOR: 2589 default:
2431 case NL80211_IFTYPE_UNSPECIFIED:
2432 case __NL80211_IFTYPE_AFTER_LAST:
2433 /* should never get here */ 2590 /* should never get here */
2434 WARN_ON(1); 2591 WARN_ON(1);
2435 break; 2592 break;
@@ -2439,12 +2596,56 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2439} 2596}
2440 2597
2441/* 2598/*
2599 * This function returns whether or not the SKB
2600 * was destined for RX processing or not, which,
2601 * if consume is true, is equivalent to whether
2602 * or not the skb was consumed.
2603 */
2604static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
2605 struct sk_buff *skb, bool consume)
2606{
2607 struct ieee80211_local *local = rx->local;
2608 struct ieee80211_sub_if_data *sdata = rx->sdata;
2609 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2610 struct ieee80211_hdr *hdr = (void *)skb->data;
2611 int prepares;
2612
2613 rx->skb = skb;
2614 status->rx_flags |= IEEE80211_RX_RA_MATCH;
2615 prepares = prepare_for_handlers(rx, hdr);
2616
2617 if (!prepares)
2618 return false;
2619
2620 if (status->flag & RX_FLAG_MMIC_ERROR) {
2621 if (status->rx_flags & IEEE80211_RX_RA_MATCH)
2622 ieee80211_rx_michael_mic_report(hdr, rx);
2623 return false;
2624 }
2625
2626 if (!consume) {
2627 skb = skb_copy(skb, GFP_ATOMIC);
2628 if (!skb) {
2629 if (net_ratelimit())
2630 wiphy_debug(local->hw.wiphy,
2631 "failed to copy multicast frame for %s\n",
2632 sdata->name);
2633 return true;
2634 }
2635
2636 rx->skb = skb;
2637 }
2638
2639 ieee80211_invoke_rx_handlers(rx);
2640 return true;
2641}
2642
2643/*
2442 * This is the actual Rx frames handler. as it blongs to Rx path it must 2644 * This is the actual Rx frames handler. as it blongs to Rx path it must
2443 * be called with rcu_read_lock protection. 2645 * be called with rcu_read_lock protection.
2444 */ 2646 */
2445static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, 2647static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2446 struct sk_buff *skb, 2648 struct sk_buff *skb)
2447 struct ieee80211_rate *rate)
2448{ 2649{
2449 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2650 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2450 struct ieee80211_local *local = hw_to_local(hw); 2651 struct ieee80211_local *local = hw_to_local(hw);
@@ -2452,11 +2653,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2452 struct ieee80211_hdr *hdr; 2653 struct ieee80211_hdr *hdr;
2453 __le16 fc; 2654 __le16 fc;
2454 struct ieee80211_rx_data rx; 2655 struct ieee80211_rx_data rx;
2455 int prepares; 2656 struct ieee80211_sub_if_data *prev;
2456 struct ieee80211_sub_if_data *prev = NULL; 2657 struct sta_info *sta, *tmp, *prev_sta;
2457 struct sk_buff *skb_new;
2458 struct sta_info *sta, *tmp;
2459 bool found_sta = false;
2460 int err = 0; 2658 int err = 0;
2461 2659
2462 fc = ((struct ieee80211_hdr *)skb->data)->frame_control; 2660 fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
@@ -2469,7 +2667,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2469 2667
2470 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || 2668 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) ||
2471 test_bit(SCAN_OFF_CHANNEL, &local->scanning))) 2669 test_bit(SCAN_OFF_CHANNEL, &local->scanning)))
2472 rx.flags |= IEEE80211_RX_IN_SCAN; 2670 status->rx_flags |= IEEE80211_RX_IN_SCAN;
2473 2671
2474 if (ieee80211_is_mgmt(fc)) 2672 if (ieee80211_is_mgmt(fc))
2475 err = skb_linearize(skb); 2673 err = skb_linearize(skb);
@@ -2486,91 +2684,67 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2486 ieee80211_verify_alignment(&rx); 2684 ieee80211_verify_alignment(&rx);
2487 2685
2488 if (ieee80211_is_data(fc)) { 2686 if (ieee80211_is_data(fc)) {
2687 prev_sta = NULL;
2688
2489 for_each_sta_info(local, hdr->addr2, sta, tmp) { 2689 for_each_sta_info(local, hdr->addr2, sta, tmp) {
2490 rx.sta = sta; 2690 if (!prev_sta) {
2491 found_sta = true; 2691 prev_sta = sta;
2492 rx.sdata = sta->sdata;
2493
2494 rx.flags |= IEEE80211_RX_RA_MATCH;
2495 prepares = prepare_for_handlers(rx.sdata, &rx, hdr);
2496 if (prepares) {
2497 if (status->flag & RX_FLAG_MMIC_ERROR) {
2498 if (rx.flags & IEEE80211_RX_RA_MATCH)
2499 ieee80211_rx_michael_mic_report(hdr, &rx);
2500 } else
2501 prev = rx.sdata;
2502 }
2503 }
2504 }
2505 if (!found_sta) {
2506 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
2507 if (!ieee80211_sdata_running(sdata))
2508 continue; 2692 continue;
2693 }
2509 2694
2510 if (sdata->vif.type == NL80211_IFTYPE_MONITOR || 2695 rx.sta = prev_sta;
2511 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 2696 rx.sdata = prev_sta->sdata;
2512 continue; 2697 ieee80211_prepare_and_rx_handle(&rx, skb, false);
2513 2698
2514 /* 2699 prev_sta = sta;
2515 * frame is destined for this interface, but if it's 2700 }
2516 * not also for the previous one we handle that after
2517 * the loop to avoid copying the SKB once too much
2518 */
2519 2701
2520 if (!prev) { 2702 if (prev_sta) {
2521 prev = sdata; 2703 rx.sta = prev_sta;
2522 continue; 2704 rx.sdata = prev_sta->sdata;
2523 }
2524 2705
2525 rx.sta = sta_info_get_bss(prev, hdr->addr2); 2706 if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
2707 return;
2708 }
2709 }
2526 2710
2527 rx.flags |= IEEE80211_RX_RA_MATCH; 2711 prev = NULL;
2528 prepares = prepare_for_handlers(prev, &rx, hdr);
2529 2712
2530 if (!prepares) 2713 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
2531 goto next; 2714 if (!ieee80211_sdata_running(sdata))
2715 continue;
2532 2716
2533 if (status->flag & RX_FLAG_MMIC_ERROR) { 2717 if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
2534 rx.sdata = prev; 2718 sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
2535 if (rx.flags & IEEE80211_RX_RA_MATCH) 2719 continue;
2536 ieee80211_rx_michael_mic_report(hdr,
2537 &rx);
2538 goto next;
2539 }
2540 2720
2541 /* 2721 /*
2542 * frame was destined for the previous interface 2722 * frame is destined for this interface, but if it's
2543 * so invoke RX handlers for it 2723 * not also for the previous one we handle that after
2544 */ 2724 * the loop to avoid copying the SKB once too much
2725 */
2545 2726
2546 skb_new = skb_copy(skb, GFP_ATOMIC); 2727 if (!prev) {
2547 if (!skb_new) {
2548 if (net_ratelimit())
2549 printk(KERN_DEBUG "%s: failed to copy "
2550 "multicast frame for %s\n",
2551 wiphy_name(local->hw.wiphy),
2552 prev->name);
2553 goto next;
2554 }
2555 ieee80211_invoke_rx_handlers(prev, &rx, skb_new, rate);
2556next:
2557 prev = sdata; 2728 prev = sdata;
2729 continue;
2558 } 2730 }
2559 2731
2560 if (prev) { 2732 rx.sta = sta_info_get_bss(prev, hdr->addr2);
2561 rx.sta = sta_info_get_bss(prev, hdr->addr2); 2733 rx.sdata = prev;
2734 ieee80211_prepare_and_rx_handle(&rx, skb, false);
2562 2735
2563 rx.flags |= IEEE80211_RX_RA_MATCH; 2736 prev = sdata;
2564 prepares = prepare_for_handlers(prev, &rx, hdr); 2737 }
2565 2738
2566 if (!prepares) 2739 if (prev) {
2567 prev = NULL; 2740 rx.sta = sta_info_get_bss(prev, hdr->addr2);
2568 } 2741 rx.sdata = prev;
2742
2743 if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
2744 return;
2569 } 2745 }
2570 if (prev) 2746
2571 ieee80211_invoke_rx_handlers(prev, &rx, skb, rate); 2747 dev_kfree_skb(skb);
2572 else
2573 dev_kfree_skb(skb);
2574} 2748}
2575 2749
2576/* 2750/*
@@ -2611,30 +2785,41 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
2611 if (WARN_ON(!local->started)) 2785 if (WARN_ON(!local->started))
2612 goto drop; 2786 goto drop;
2613 2787
2614 if (status->flag & RX_FLAG_HT) { 2788 if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC))) {
2615 /* 2789 /*
2616 * rate_idx is MCS index, which can be [0-76] as documented on: 2790 * Validate the rate, unless a PLCP error means that
2617 * 2791 * we probably can't have a valid rate here anyway.
2618 * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
2619 *
2620 * Anything else would be some sort of driver or hardware error.
2621 * The driver should catch hardware errors.
2622 */ 2792 */
2623 if (WARN((status->rate_idx < 0 || 2793
2624 status->rate_idx > 76), 2794 if (status->flag & RX_FLAG_HT) {
2625 "Rate marked as an HT rate but passed " 2795 /*
2626 "status->rate_idx is not " 2796 * rate_idx is MCS index, which can be [0-76]
2627 "an MCS index [0-76]: %d (0x%02x)\n", 2797 * as documented on:
2628 status->rate_idx, 2798 *
2629 status->rate_idx)) 2799 * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
2630 goto drop; 2800 *
2631 } else { 2801 * Anything else would be some sort of driver or
2632 if (WARN_ON(status->rate_idx < 0 || 2802 * hardware error. The driver should catch hardware
2633 status->rate_idx >= sband->n_bitrates)) 2803 * errors.
2634 goto drop; 2804 */
2635 rate = &sband->bitrates[status->rate_idx]; 2805 if (WARN((status->rate_idx < 0 ||
2806 status->rate_idx > 76),
2807 "Rate marked as an HT rate but passed "
2808 "status->rate_idx is not "
2809 "an MCS index [0-76]: %d (0x%02x)\n",
2810 status->rate_idx,
2811 status->rate_idx))
2812 goto drop;
2813 } else {
2814 if (WARN_ON(status->rate_idx < 0 ||
2815 status->rate_idx >= sband->n_bitrates))
2816 goto drop;
2817 rate = &sband->bitrates[status->rate_idx];
2818 }
2636 } 2819 }
2637 2820
2821 status->rx_flags = 0;
2822
2638 /* 2823 /*
2639 * key references and virtual interfaces are protected using RCU 2824 * key references and virtual interfaces are protected using RCU
2640 * and this requires that we are in a read-side RCU section during 2825 * and this requires that we are in a read-side RCU section during
@@ -2654,7 +2839,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
2654 return; 2839 return;
2655 } 2840 }
2656 2841
2657 __ieee80211_rx_handle_packet(hw, skb, rate); 2842 __ieee80211_rx_handle_packet(hw, skb);
2658 2843
2659 rcu_read_unlock(); 2844 rcu_read_unlock();
2660 2845
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 872d7b6ef6b3..5171a9581631 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -242,20 +242,19 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
242 local->hw_scan_req->n_channels = n_chans; 242 local->hw_scan_req->n_channels = n_chans;
243 243
244 ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie, 244 ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie,
245 req->ie, req->ie_len, band); 245 req->ie, req->ie_len, band, (u32) -1,
246 0);
246 local->hw_scan_req->ie_len = ielen; 247 local->hw_scan_req->ie_len = ielen;
247 248
248 return true; 249 return true;
249} 250}
250 251
251void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) 252static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
252{ 253{
253 struct ieee80211_local *local = hw_to_local(hw); 254 struct ieee80211_local *local = hw_to_local(hw);
254 bool was_hw_scan; 255 bool was_hw_scan;
255 256
256 trace_api_scan_completed(local, aborted); 257 mutex_lock(&local->mtx);
257
258 mutex_lock(&local->scan_mtx);
259 258
260 /* 259 /*
261 * It's ok to abort a not-yet-running scan (that 260 * It's ok to abort a not-yet-running scan (that
@@ -267,7 +266,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
267 aborted = true; 266 aborted = true;
268 267
269 if (WARN_ON(!local->scan_req)) { 268 if (WARN_ON(!local->scan_req)) {
270 mutex_unlock(&local->scan_mtx); 269 mutex_unlock(&local->mtx);
271 return; 270 return;
272 } 271 }
273 272
@@ -275,7 +274,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
275 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { 274 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) {
276 ieee80211_queue_delayed_work(&local->hw, 275 ieee80211_queue_delayed_work(&local->hw,
277 &local->scan_work, 0); 276 &local->scan_work, 0);
278 mutex_unlock(&local->scan_mtx); 277 mutex_unlock(&local->mtx);
279 return; 278 return;
280 } 279 }
281 280
@@ -291,7 +290,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
291 local->scan_channel = NULL; 290 local->scan_channel = NULL;
292 291
293 /* we only have to protect scan_req and hw/sw scan */ 292 /* we only have to protect scan_req and hw/sw scan */
294 mutex_unlock(&local->scan_mtx); 293 mutex_unlock(&local->mtx);
295 294
296 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 295 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
297 if (was_hw_scan) 296 if (was_hw_scan)
@@ -304,12 +303,26 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
304 ieee80211_offchannel_return(local, true); 303 ieee80211_offchannel_return(local, true);
305 304
306 done: 305 done:
306 mutex_lock(&local->mtx);
307 ieee80211_recalc_idle(local); 307 ieee80211_recalc_idle(local);
308 mutex_unlock(&local->mtx);
308 ieee80211_mlme_notify_scan_completed(local); 309 ieee80211_mlme_notify_scan_completed(local);
309 ieee80211_ibss_notify_scan_completed(local); 310 ieee80211_ibss_notify_scan_completed(local);
310 ieee80211_mesh_notify_scan_completed(local); 311 ieee80211_mesh_notify_scan_completed(local);
311 ieee80211_queue_work(&local->hw, &local->work_work); 312 ieee80211_queue_work(&local->hw, &local->work_work);
312} 313}
314
315void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
316{
317 struct ieee80211_local *local = hw_to_local(hw);
318
319 trace_api_scan_completed(local, aborted);
320
321 set_bit(SCAN_COMPLETED, &local->scanning);
322 if (aborted)
323 set_bit(SCAN_ABORTED, &local->scanning);
324 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
325}
313EXPORT_SYMBOL(ieee80211_scan_completed); 326EXPORT_SYMBOL(ieee80211_scan_completed);
314 327
315static int ieee80211_start_sw_scan(struct ieee80211_local *local) 328static int ieee80211_start_sw_scan(struct ieee80211_local *local)
@@ -447,7 +460,7 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
447 460
448 /* if no more bands/channels left, complete scan and advance to the idle state */ 461 /* if no more bands/channels left, complete scan and advance to the idle state */
449 if (local->scan_channel_idx >= local->scan_req->n_channels) { 462 if (local->scan_channel_idx >= local->scan_req->n_channels) {
450 ieee80211_scan_completed(&local->hw, false); 463 __ieee80211_scan_completed(&local->hw, false);
451 return 1; 464 return 1;
452 } 465 }
453 466
@@ -639,17 +652,25 @@ void ieee80211_scan_work(struct work_struct *work)
639 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 652 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
640 unsigned long next_delay = 0; 653 unsigned long next_delay = 0;
641 654
642 mutex_lock(&local->scan_mtx); 655 if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) {
656 bool aborted;
657
658 aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning);
659 __ieee80211_scan_completed(&local->hw, aborted);
660 return;
661 }
662
663 mutex_lock(&local->mtx);
643 if (!sdata || !local->scan_req) { 664 if (!sdata || !local->scan_req) {
644 mutex_unlock(&local->scan_mtx); 665 mutex_unlock(&local->mtx);
645 return; 666 return;
646 } 667 }
647 668
648 if (local->hw_scan_req) { 669 if (local->hw_scan_req) {
649 int rc = drv_hw_scan(local, sdata, local->hw_scan_req); 670 int rc = drv_hw_scan(local, sdata, local->hw_scan_req);
650 mutex_unlock(&local->scan_mtx); 671 mutex_unlock(&local->mtx);
651 if (rc) 672 if (rc)
652 ieee80211_scan_completed(&local->hw, true); 673 __ieee80211_scan_completed(&local->hw, true);
653 return; 674 return;
654 } 675 }
655 676
@@ -661,20 +682,20 @@ void ieee80211_scan_work(struct work_struct *work)
661 local->scan_sdata = NULL; 682 local->scan_sdata = NULL;
662 683
663 rc = __ieee80211_start_scan(sdata, req); 684 rc = __ieee80211_start_scan(sdata, req);
664 mutex_unlock(&local->scan_mtx); 685 mutex_unlock(&local->mtx);
665 686
666 if (rc) 687 if (rc)
667 ieee80211_scan_completed(&local->hw, true); 688 __ieee80211_scan_completed(&local->hw, true);
668 return; 689 return;
669 } 690 }
670 691
671 mutex_unlock(&local->scan_mtx); 692 mutex_unlock(&local->mtx);
672 693
673 /* 694 /*
674 * Avoid re-scheduling when the sdata is going away. 695 * Avoid re-scheduling when the sdata is going away.
675 */ 696 */
676 if (!ieee80211_sdata_running(sdata)) { 697 if (!ieee80211_sdata_running(sdata)) {
677 ieee80211_scan_completed(&local->hw, true); 698 __ieee80211_scan_completed(&local->hw, true);
678 return; 699 return;
679 } 700 }
680 701
@@ -711,9 +732,9 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
711{ 732{
712 int res; 733 int res;
713 734
714 mutex_lock(&sdata->local->scan_mtx); 735 mutex_lock(&sdata->local->mtx);
715 res = __ieee80211_start_scan(sdata, req); 736 res = __ieee80211_start_scan(sdata, req);
716 mutex_unlock(&sdata->local->scan_mtx); 737 mutex_unlock(&sdata->local->mtx);
717 738
718 return res; 739 return res;
719} 740}
@@ -726,7 +747,7 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
726 int ret = -EBUSY; 747 int ret = -EBUSY;
727 enum ieee80211_band band; 748 enum ieee80211_band band;
728 749
729 mutex_lock(&local->scan_mtx); 750 mutex_lock(&local->mtx);
730 751
731 /* busy scanning */ 752 /* busy scanning */
732 if (local->scan_req) 753 if (local->scan_req)
@@ -761,7 +782,7 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
761 782
762 ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req); 783 ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req);
763 unlock: 784 unlock:
764 mutex_unlock(&local->scan_mtx); 785 mutex_unlock(&local->mtx);
765 return ret; 786 return ret;
766} 787}
767 788
@@ -775,11 +796,11 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
775 * Only call this function when a scan can't be 796 * Only call this function when a scan can't be
776 * queued -- mostly at suspend under RTNL. 797 * queued -- mostly at suspend under RTNL.
777 */ 798 */
778 mutex_lock(&local->scan_mtx); 799 mutex_lock(&local->mtx);
779 abortscan = test_bit(SCAN_SW_SCANNING, &local->scanning) || 800 abortscan = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
780 (!local->scanning && local->scan_req); 801 (!local->scanning && local->scan_req);
781 mutex_unlock(&local->scan_mtx); 802 mutex_unlock(&local->mtx);
782 803
783 if (abortscan) 804 if (abortscan)
784 ieee80211_scan_completed(&local->hw, true); 805 __ieee80211_scan_completed(&local->hw, true);
785} 806}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 6d86f0c1ad04..ca2cba9cea87 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -125,7 +125,7 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
125 lockdep_is_held(&local->sta_mtx)); 125 lockdep_is_held(&local->sta_mtx));
126 while (sta) { 126 while (sta) {
127 if ((sta->sdata == sdata || 127 if ((sta->sdata == sdata ||
128 sta->sdata->bss == sdata->bss) && 128 (sta->sdata->bss && sta->sdata->bss == sdata->bss)) &&
129 memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) 129 memcmp(sta->sta.addr, addr, ETH_ALEN) == 0)
130 break; 130 break;
131 sta = rcu_dereference_check(sta->hnext, 131 sta = rcu_dereference_check(sta->hnext,
@@ -174,8 +174,7 @@ static void __sta_info_free(struct ieee80211_local *local,
174 } 174 }
175 175
176#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 176#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
177 printk(KERN_DEBUG "%s: Destroyed STA %pM\n", 177 wiphy_debug(local->hw.wiphy, "Destroyed STA %pM\n", sta->sta.addr);
178 wiphy_name(local->hw.wiphy), sta->sta.addr);
179#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 178#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
180 179
181 kfree(sta); 180 kfree(sta);
@@ -262,8 +261,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
262 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); 261 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
263 262
264#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 263#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
265 printk(KERN_DEBUG "%s: Allocated STA %pM\n", 264 wiphy_debug(local->hw.wiphy, "Allocated STA %pM\n", sta->sta.addr);
266 wiphy_name(local->hw.wiphy), sta->sta.addr);
267#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 265#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
268 266
269#ifdef CONFIG_MAC80211_MESH 267#ifdef CONFIG_MAC80211_MESH
@@ -282,7 +280,7 @@ static int sta_info_finish_insert(struct sta_info *sta, bool async)
282 unsigned long flags; 280 unsigned long flags;
283 int err = 0; 281 int err = 0;
284 282
285 WARN_ON(!mutex_is_locked(&local->sta_mtx)); 283 lockdep_assert_held(&local->sta_mtx);
286 284
287 /* notify driver */ 285 /* notify driver */
288 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 286 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
@@ -300,8 +298,9 @@ static int sta_info_finish_insert(struct sta_info *sta, bool async)
300 sta->uploaded = true; 298 sta->uploaded = true;
301#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 299#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
302 if (async) 300 if (async)
303 printk(KERN_DEBUG "%s: Finished adding IBSS STA %pM\n", 301 wiphy_debug(local->hw.wiphy,
304 wiphy_name(local->hw.wiphy), sta->sta.addr); 302 "Finished adding IBSS STA %pM\n",
303 sta->sta.addr);
305#endif 304#endif
306 } 305 }
307 306
@@ -411,8 +410,8 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
411 spin_unlock_irqrestore(&local->sta_lock, flags); 410 spin_unlock_irqrestore(&local->sta_lock, flags);
412 411
413#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 412#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
414 printk(KERN_DEBUG "%s: Added IBSS STA %pM\n", 413 wiphy_debug(local->hw.wiphy, "Added IBSS STA %pM\n",
415 wiphy_name(local->hw.wiphy), sta->sta.addr); 414 sta->sta.addr);
416#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 415#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
417 416
418 ieee80211_queue_work(&local->hw, &local->sta_finish_work); 417 ieee80211_queue_work(&local->hw, &local->sta_finish_work);
@@ -459,8 +458,7 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
459 } 458 }
460 459
461#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 460#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
462 printk(KERN_DEBUG "%s: Inserted STA %pM\n", 461 wiphy_debug(local->hw.wiphy, "Inserted STA %pM\n", sta->sta.addr);
463 wiphy_name(local->hw.wiphy), sta->sta.addr);
464#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 462#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
465 463
466 /* move reference to rcu-protected */ 464 /* move reference to rcu-protected */
@@ -690,8 +688,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
690#endif 688#endif
691 689
692#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 690#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
693 printk(KERN_DEBUG "%s: Removed STA %pM\n", 691 wiphy_debug(local->hw.wiphy, "Removed STA %pM\n", sta->sta.addr);
694 wiphy_name(local->hw.wiphy), sta->sta.addr);
695#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 692#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
696 cancel_work_sync(&sta->drv_unblock_wk); 693 cancel_work_sync(&sta->drv_unblock_wk);
697 694
@@ -841,13 +838,20 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
841 mutex_unlock(&local->sta_mtx); 838 mutex_unlock(&local->sta_mtx);
842} 839}
843 840
844struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, 841struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw,
845 const u8 *addr) 842 const u8 *addr,
843 const u8 *localaddr)
846{ 844{
847 struct sta_info *sta, *nxt; 845 struct sta_info *sta, *nxt;
848 846
849 /* Just return a random station ... first in list ... */ 847 /*
848 * Just return a random station if localaddr is NULL
849 * ... first in list.
850 */
850 for_each_sta_info(hw_to_local(hw), addr, sta, nxt) { 851 for_each_sta_info(hw_to_local(hw), addr, sta, nxt) {
852 if (localaddr &&
853 compare_ether_addr(sta->sdata->vif.addr, localaddr) != 0)
854 continue;
851 if (!sta->uploaded) 855 if (!sta->uploaded)
852 return NULL; 856 return NULL;
853 return &sta->sta; 857 return &sta->sta;
@@ -855,7 +859,7 @@ struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw,
855 859
856 return NULL; 860 return NULL;
857} 861}
858EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw); 862EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_ifaddr);
859 863
860struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, 864struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
861 const u8 *addr) 865 const u8 *addr)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 54262e72376d..810c5ce98316 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -103,6 +103,7 @@ struct tid_ampdu_tx {
103 * @reorder_buf: buffer to reorder incoming aggregated MPDUs 103 * @reorder_buf: buffer to reorder incoming aggregated MPDUs
104 * @reorder_time: jiffies when skb was added 104 * @reorder_time: jiffies when skb was added
105 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) 105 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
106 * @reorder_timer: releases expired frames from the reorder buffer.
106 * @head_seq_num: head sequence number in reordering buffer. 107 * @head_seq_num: head sequence number in reordering buffer.
107 * @stored_mpdu_num: number of MPDUs in reordering buffer 108 * @stored_mpdu_num: number of MPDUs in reordering buffer
108 * @ssn: Starting Sequence Number expected to be aggregated. 109 * @ssn: Starting Sequence Number expected to be aggregated.
@@ -110,20 +111,25 @@ struct tid_ampdu_tx {
110 * @timeout: reset timer value (in TUs). 111 * @timeout: reset timer value (in TUs).
111 * @dialog_token: dialog token for aggregation session 112 * @dialog_token: dialog token for aggregation session
112 * @rcu_head: RCU head used for freeing this struct 113 * @rcu_head: RCU head used for freeing this struct
114 * @reorder_lock: serializes access to reorder buffer, see below.
113 * 115 *
114 * This structure is protected by RCU and the per-station 116 * This structure is protected by RCU and the per-station
115 * spinlock. Assignments to the array holding it must hold 117 * spinlock. Assignments to the array holding it must hold
116 * the spinlock, only the RX path can access it under RCU 118 * the spinlock.
117 * lock-free. The RX path, since it is single-threaded, 119 *
118 * can even modify the structure without locking since the 120 * The @reorder_lock is used to protect the variables and
119 * only other modifications to it are done when the struct 121 * arrays such as @reorder_buf, @reorder_time, @head_seq_num,
120 * can not yet or no longer be found by the RX path. 122 * @stored_mpdu_num and @reorder_time from being corrupted by
123 * concurrent access of the RX path and the expired frame
124 * release timer.
121 */ 125 */
122struct tid_ampdu_rx { 126struct tid_ampdu_rx {
123 struct rcu_head rcu_head; 127 struct rcu_head rcu_head;
128 spinlock_t reorder_lock;
124 struct sk_buff **reorder_buf; 129 struct sk_buff **reorder_buf;
125 unsigned long *reorder_time; 130 unsigned long *reorder_time;
126 struct timer_list session_timer; 131 struct timer_list session_timer;
132 struct timer_list reorder_timer;
127 u16 head_seq_num; 133 u16 head_seq_num;
128 u16 stored_mpdu_num; 134 u16 stored_mpdu_num;
129 u16 ssn; 135 u16 ssn;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 10caec5ea8fa..dd85006c4fe8 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -58,6 +58,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
58 info->control.vif = &sta->sdata->vif; 58 info->control.vif = &sta->sdata->vif;
59 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING | 59 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING |
60 IEEE80211_TX_INTFL_RETRANSMISSION; 60 IEEE80211_TX_INTFL_RETRANSMISSION;
61 info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
61 62
62 sta->tx_filtered_count++; 63 sta->tx_filtered_count++;
63 64
@@ -114,11 +115,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
114 115
115#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 116#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
116 if (net_ratelimit()) 117 if (net_ratelimit())
117 printk(KERN_DEBUG "%s: dropped TX filtered frame, " 118 wiphy_debug(local->hw.wiphy,
118 "queue_len=%d PS=%d @%lu\n", 119 "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n",
119 wiphy_name(local->hw.wiphy), 120 skb_queue_len(&sta->tx_filtered),
120 skb_queue_len(&sta->tx_filtered), 121 !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies);
121 !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies);
122#endif 122#endif
123 dev_kfree_skb(skb); 123 dev_kfree_skb(skb);
124} 124}
@@ -296,7 +296,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
296 } 296 }
297 297
298 if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) 298 if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX)
299 cfg80211_action_tx_status( 299 cfg80211_mgmt_tx_status(
300 skb->dev, (unsigned long) skb, skb->data, skb->len, 300 skb->dev, (unsigned long) skb, skb->data, skb->len,
301 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); 301 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
302 302
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c54db966926b..e1733dcb58a7 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -351,8 +351,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
351 351
352 local->total_ps_buffered = total; 352 local->total_ps_buffered = total;
353#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG 353#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
354 printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", 354 wiphy_debug(local->hw.wiphy, "PS buffers full - purged %d frames\n",
355 wiphy_name(local->hw.wiphy), purged); 355 purged);
356#endif 356#endif
357} 357}
358 358
@@ -509,6 +509,18 @@ ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
509} 509}
510 510
511static ieee80211_tx_result debug_noinline 511static ieee80211_tx_result debug_noinline
512ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx)
513{
514 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
515
516 if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol &&
517 tx->sdata->control_port_no_encrypt))
518 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
519
520 return TX_CONTINUE;
521}
522
523static ieee80211_tx_result debug_noinline
512ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) 524ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
513{ 525{
514 struct ieee80211_key *key = NULL; 526 struct ieee80211_key *key = NULL;
@@ -527,7 +539,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
527 else if ((key = rcu_dereference(tx->sdata->default_key))) 539 else if ((key = rcu_dereference(tx->sdata->default_key)))
528 tx->key = key; 540 tx->key = key;
529 else if (tx->sdata->drop_unencrypted && 541 else if (tx->sdata->drop_unencrypted &&
530 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) && 542 (tx->skb->protocol != tx->sdata->control_port_protocol) &&
531 !(info->flags & IEEE80211_TX_CTL_INJECTED) && 543 !(info->flags & IEEE80211_TX_CTL_INJECTED) &&
532 (!ieee80211_is_robust_mgmt_frame(hdr) || 544 (!ieee80211_is_robust_mgmt_frame(hdr) ||
533 (ieee80211_is_action(hdr->frame_control) && 545 (ieee80211_is_action(hdr->frame_control) &&
@@ -543,15 +555,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
543 tx->key->tx_rx_count++; 555 tx->key->tx_rx_count++;
544 /* TODO: add threshold stuff again */ 556 /* TODO: add threshold stuff again */
545 557
546 switch (tx->key->conf.alg) { 558 switch (tx->key->conf.cipher) {
547 case ALG_WEP: 559 case WLAN_CIPHER_SUITE_WEP40:
560 case WLAN_CIPHER_SUITE_WEP104:
548 if (ieee80211_is_auth(hdr->frame_control)) 561 if (ieee80211_is_auth(hdr->frame_control))
549 break; 562 break;
550 case ALG_TKIP: 563 case WLAN_CIPHER_SUITE_TKIP:
551 if (!ieee80211_is_data_present(hdr->frame_control)) 564 if (!ieee80211_is_data_present(hdr->frame_control))
552 tx->key = NULL; 565 tx->key = NULL;
553 break; 566 break;
554 case ALG_CCMP: 567 case WLAN_CIPHER_SUITE_CCMP:
555 if (!ieee80211_is_data_present(hdr->frame_control) && 568 if (!ieee80211_is_data_present(hdr->frame_control) &&
556 !ieee80211_use_mfp(hdr->frame_control, tx->sta, 569 !ieee80211_use_mfp(hdr->frame_control, tx->sta,
557 tx->skb)) 570 tx->skb))
@@ -561,7 +574,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
561 IEEE80211_KEY_FLAG_SW_MGMT) && 574 IEEE80211_KEY_FLAG_SW_MGMT) &&
562 ieee80211_is_mgmt(hdr->frame_control); 575 ieee80211_is_mgmt(hdr->frame_control);
563 break; 576 break;
564 case ALG_AES_CMAC: 577 case WLAN_CIPHER_SUITE_AES_CMAC:
565 if (!ieee80211_is_mgmt(hdr->frame_control)) 578 if (!ieee80211_is_mgmt(hdr->frame_control))
566 tx->key = NULL; 579 tx->key = NULL;
567 break; 580 break;
@@ -946,22 +959,31 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
946static ieee80211_tx_result debug_noinline 959static ieee80211_tx_result debug_noinline
947ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) 960ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
948{ 961{
962 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
963
949 if (!tx->key) 964 if (!tx->key)
950 return TX_CONTINUE; 965 return TX_CONTINUE;
951 966
952 switch (tx->key->conf.alg) { 967 switch (tx->key->conf.cipher) {
953 case ALG_WEP: 968 case WLAN_CIPHER_SUITE_WEP40:
969 case WLAN_CIPHER_SUITE_WEP104:
954 return ieee80211_crypto_wep_encrypt(tx); 970 return ieee80211_crypto_wep_encrypt(tx);
955 case ALG_TKIP: 971 case WLAN_CIPHER_SUITE_TKIP:
956 return ieee80211_crypto_tkip_encrypt(tx); 972 return ieee80211_crypto_tkip_encrypt(tx);
957 case ALG_CCMP: 973 case WLAN_CIPHER_SUITE_CCMP:
958 return ieee80211_crypto_ccmp_encrypt(tx); 974 return ieee80211_crypto_ccmp_encrypt(tx);
959 case ALG_AES_CMAC: 975 case WLAN_CIPHER_SUITE_AES_CMAC:
960 return ieee80211_crypto_aes_cmac_encrypt(tx); 976 return ieee80211_crypto_aes_cmac_encrypt(tx);
977 default:
978 /* handle hw-only algorithm */
979 if (info->control.hw_key) {
980 ieee80211_tx_set_protected(tx);
981 return TX_CONTINUE;
982 }
983 break;
984
961 } 985 }
962 986
963 /* not reached */
964 WARN_ON(1);
965 return TX_DROP; 987 return TX_DROP;
966} 988}
967 989
@@ -1339,6 +1361,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1339 CALL_TXH(ieee80211_tx_h_dynamic_ps); 1361 CALL_TXH(ieee80211_tx_h_dynamic_ps);
1340 CALL_TXH(ieee80211_tx_h_check_assoc); 1362 CALL_TXH(ieee80211_tx_h_check_assoc);
1341 CALL_TXH(ieee80211_tx_h_ps_buf); 1363 CALL_TXH(ieee80211_tx_h_ps_buf);
1364 CALL_TXH(ieee80211_tx_h_check_control_port_protocol);
1342 CALL_TXH(ieee80211_tx_h_select_key); 1365 CALL_TXH(ieee80211_tx_h_select_key);
1343 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) 1366 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
1344 CALL_TXH(ieee80211_tx_h_rate_ctrl); 1367 CALL_TXH(ieee80211_tx_h_rate_ctrl);
@@ -1511,8 +1534,8 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
1511 I802_DEBUG_INC(local->tx_expand_skb_head); 1534 I802_DEBUG_INC(local->tx_expand_skb_head);
1512 1535
1513 if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) { 1536 if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) {
1514 printk(KERN_DEBUG "%s: failed to reallocate TX buffer\n", 1537 wiphy_debug(local->hw.wiphy,
1515 wiphy_name(local->hw.wiphy)); 1538 "failed to reallocate TX buffer\n");
1516 return -ENOMEM; 1539 return -ENOMEM;
1517 } 1540 }
1518 1541
@@ -1586,6 +1609,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
1586 return; 1609 return;
1587 } 1610 }
1588 1611
1612 hdr = (struct ieee80211_hdr *) skb->data;
1589 info->control.vif = &sdata->vif; 1613 info->control.vif = &sdata->vif;
1590 1614
1591 if (ieee80211_vif_is_mesh(&sdata->vif) && 1615 if (ieee80211_vif_is_mesh(&sdata->vif) &&
@@ -1699,7 +1723,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1699 u16 ethertype, hdrlen, meshhdrlen = 0; 1723 u16 ethertype, hdrlen, meshhdrlen = 0;
1700 __le16 fc; 1724 __le16 fc;
1701 struct ieee80211_hdr hdr; 1725 struct ieee80211_hdr hdr;
1702 struct ieee80211s_hdr mesh_hdr; 1726 struct ieee80211s_hdr mesh_hdr __maybe_unused;
1703 const u8 *encaps_data; 1727 const u8 *encaps_data;
1704 int encaps_len, skip_header_bytes; 1728 int encaps_len, skip_header_bytes;
1705 int nh_pos, h_pos; 1729 int nh_pos, h_pos;
@@ -1816,7 +1840,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1816#endif 1840#endif
1817 case NL80211_IFTYPE_STATION: 1841 case NL80211_IFTYPE_STATION:
1818 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); 1842 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN);
1819 if (sdata->u.mgd.use_4addr && ethertype != ETH_P_PAE) { 1843 if (sdata->u.mgd.use_4addr &&
1844 cpu_to_be16(ethertype) != sdata->control_port_protocol) {
1820 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); 1845 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
1821 /* RA TA DA SA */ 1846 /* RA TA DA SA */
1822 memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN); 1847 memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
@@ -1869,7 +1894,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1869 if (!ieee80211_vif_is_mesh(&sdata->vif) && 1894 if (!ieee80211_vif_is_mesh(&sdata->vif) &&
1870 unlikely(!is_multicast_ether_addr(hdr.addr1) && 1895 unlikely(!is_multicast_ether_addr(hdr.addr1) &&
1871 !(sta_flags & WLAN_STA_AUTHORIZED) && 1896 !(sta_flags & WLAN_STA_AUTHORIZED) &&
1872 !(ethertype == ETH_P_PAE && 1897 !(cpu_to_be16(ethertype) == sdata->control_port_protocol &&
1873 compare_ether_addr(sdata->vif.addr, 1898 compare_ether_addr(sdata->vif.addr,
1874 skb->data + ETH_ALEN) == 0))) { 1899 skb->data + ETH_ALEN) == 0))) {
1875#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1900#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -2068,8 +2093,7 @@ void ieee80211_tx_pending(unsigned long data)
2068 2093
2069 if (skb_queue_empty(&local->pending[i])) 2094 if (skb_queue_empty(&local->pending[i]))
2070 list_for_each_entry_rcu(sdata, &local->interfaces, list) 2095 list_for_each_entry_rcu(sdata, &local->interfaces, list)
2071 netif_tx_wake_queue( 2096 netif_wake_subqueue(sdata->dev, i);
2072 netdev_get_tx_queue(sdata->dev, i));
2073 } 2097 }
2074 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 2098 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
2075 2099
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 748387d45bc0..aba025d748e9 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -283,8 +283,11 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
283 283
284 if (skb_queue_empty(&local->pending[queue])) { 284 if (skb_queue_empty(&local->pending[queue])) {
285 rcu_read_lock(); 285 rcu_read_lock();
286 list_for_each_entry_rcu(sdata, &local->interfaces, list) 286 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
287 netif_tx_wake_queue(netdev_get_tx_queue(sdata->dev, queue)); 287 if (test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
288 continue;
289 netif_wake_subqueue(sdata->dev, queue);
290 }
288 rcu_read_unlock(); 291 rcu_read_unlock();
289 } else 292 } else
290 tasklet_schedule(&local->tx_pending_tasklet); 293 tasklet_schedule(&local->tx_pending_tasklet);
@@ -323,7 +326,7 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
323 326
324 rcu_read_lock(); 327 rcu_read_lock();
325 list_for_each_entry_rcu(sdata, &local->interfaces, list) 328 list_for_each_entry_rcu(sdata, &local->interfaces, list)
326 netif_tx_stop_queue(netdev_get_tx_queue(sdata->dev, queue)); 329 netif_stop_subqueue(sdata->dev, queue);
327 rcu_read_unlock(); 330 rcu_read_unlock();
328} 331}
329 332
@@ -471,16 +474,10 @@ void ieee80211_iterate_active_interfaces(
471 474
472 list_for_each_entry(sdata, &local->interfaces, list) { 475 list_for_each_entry(sdata, &local->interfaces, list) {
473 switch (sdata->vif.type) { 476 switch (sdata->vif.type) {
474 case __NL80211_IFTYPE_AFTER_LAST:
475 case NL80211_IFTYPE_UNSPECIFIED:
476 case NL80211_IFTYPE_MONITOR: 477 case NL80211_IFTYPE_MONITOR:
477 case NL80211_IFTYPE_AP_VLAN: 478 case NL80211_IFTYPE_AP_VLAN:
478 continue; 479 continue;
479 case NL80211_IFTYPE_AP: 480 default:
480 case NL80211_IFTYPE_STATION:
481 case NL80211_IFTYPE_ADHOC:
482 case NL80211_IFTYPE_WDS:
483 case NL80211_IFTYPE_MESH_POINT:
484 break; 481 break;
485 } 482 }
486 if (ieee80211_sdata_running(sdata)) 483 if (ieee80211_sdata_running(sdata))
@@ -505,16 +502,10 @@ void ieee80211_iterate_active_interfaces_atomic(
505 502
506 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 503 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
507 switch (sdata->vif.type) { 504 switch (sdata->vif.type) {
508 case __NL80211_IFTYPE_AFTER_LAST:
509 case NL80211_IFTYPE_UNSPECIFIED:
510 case NL80211_IFTYPE_MONITOR: 505 case NL80211_IFTYPE_MONITOR:
511 case NL80211_IFTYPE_AP_VLAN: 506 case NL80211_IFTYPE_AP_VLAN:
512 continue; 507 continue;
513 case NL80211_IFTYPE_AP: 508 default:
514 case NL80211_IFTYPE_STATION:
515 case NL80211_IFTYPE_ADHOC:
516 case NL80211_IFTYPE_WDS:
517 case NL80211_IFTYPE_MESH_POINT:
518 break; 509 break;
519 } 510 }
520 if (ieee80211_sdata_running(sdata)) 511 if (ieee80211_sdata_running(sdata))
@@ -904,26 +895,34 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
904 895
905int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, 896int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
906 const u8 *ie, size_t ie_len, 897 const u8 *ie, size_t ie_len,
907 enum ieee80211_band band) 898 enum ieee80211_band band, u32 rate_mask,
899 u8 channel)
908{ 900{
909 struct ieee80211_supported_band *sband; 901 struct ieee80211_supported_band *sband;
910 u8 *pos; 902 u8 *pos;
911 size_t offset = 0, noffset; 903 size_t offset = 0, noffset;
912 int supp_rates_len, i; 904 int supp_rates_len, i;
905 u8 rates[32];
906 int num_rates;
907 int ext_rates_len;
913 908
914 sband = local->hw.wiphy->bands[band]; 909 sband = local->hw.wiphy->bands[band];
915 910
916 pos = buffer; 911 pos = buffer;
917 912
918 supp_rates_len = min_t(int, sband->n_bitrates, 8); 913 num_rates = 0;
914 for (i = 0; i < sband->n_bitrates; i++) {
915 if ((BIT(i) & rate_mask) == 0)
916 continue; /* skip rate */
917 rates[num_rates++] = (u8) (sband->bitrates[i].bitrate / 5);
918 }
919
920 supp_rates_len = min_t(int, num_rates, 8);
919 921
920 *pos++ = WLAN_EID_SUPP_RATES; 922 *pos++ = WLAN_EID_SUPP_RATES;
921 *pos++ = supp_rates_len; 923 *pos++ = supp_rates_len;
922 924 memcpy(pos, rates, supp_rates_len);
923 for (i = 0; i < supp_rates_len; i++) { 925 pos += supp_rates_len;
924 int rate = sband->bitrates[i].bitrate;
925 *pos++ = (u8) (rate / 5);
926 }
927 926
928 /* insert "request information" if in custom IEs */ 927 /* insert "request information" if in custom IEs */
929 if (ie && ie_len) { 928 if (ie && ie_len) {
@@ -941,14 +940,18 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
941 offset = noffset; 940 offset = noffset;
942 } 941 }
943 942
944 if (sband->n_bitrates > i) { 943 ext_rates_len = num_rates - supp_rates_len;
944 if (ext_rates_len > 0) {
945 *pos++ = WLAN_EID_EXT_SUPP_RATES; 945 *pos++ = WLAN_EID_EXT_SUPP_RATES;
946 *pos++ = sband->n_bitrates - i; 946 *pos++ = ext_rates_len;
947 memcpy(pos, rates + supp_rates_len, ext_rates_len);
948 pos += ext_rates_len;
949 }
947 950
948 for (; i < sband->n_bitrates; i++) { 951 if (channel && sband->band == IEEE80211_BAND_2GHZ) {
949 int rate = sband->bitrates[i].bitrate; 952 *pos++ = WLAN_EID_DS_PARAMS;
950 *pos++ = (u8) (rate / 5); 953 *pos++ = 1;
951 } 954 *pos++ = channel;
952 } 955 }
953 956
954 /* insert custom IEs that go before HT */ 957 /* insert custom IEs that go before HT */
@@ -1017,6 +1020,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1017 struct ieee80211_mgmt *mgmt; 1020 struct ieee80211_mgmt *mgmt;
1018 size_t buf_len; 1021 size_t buf_len;
1019 u8 *buf; 1022 u8 *buf;
1023 u8 chan;
1020 1024
1021 /* FIXME: come up with a proper value */ 1025 /* FIXME: come up with a proper value */
1022 buf = kmalloc(200 + ie_len, GFP_KERNEL); 1026 buf = kmalloc(200 + ie_len, GFP_KERNEL);
@@ -1026,8 +1030,14 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1026 return; 1030 return;
1027 } 1031 }
1028 1032
1033 chan = ieee80211_frequency_to_channel(
1034 local->hw.conf.channel->center_freq);
1035
1029 buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len, 1036 buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len,
1030 local->hw.conf.channel->band); 1037 local->hw.conf.channel->band,
1038 sdata->rc_rateidx_mask
1039 [local->hw.conf.channel->band],
1040 chan);
1031 1041
1032 skb = ieee80211_probereq_get(&local->hw, &sdata->vif, 1042 skb = ieee80211_probereq_get(&local->hw, &sdata->vif,
1033 ssid, ssid_len, 1043 ssid, ssid_len,
@@ -1189,7 +1199,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1189 /* ignore virtual */ 1199 /* ignore virtual */
1190 break; 1200 break;
1191 case NL80211_IFTYPE_UNSPECIFIED: 1201 case NL80211_IFTYPE_UNSPECIFIED:
1192 case __NL80211_IFTYPE_AFTER_LAST: 1202 case NUM_NL80211_IFTYPES:
1203 case NL80211_IFTYPE_P2P_CLIENT:
1204 case NL80211_IFTYPE_P2P_GO:
1193 WARN_ON(1); 1205 WARN_ON(1);
1194 break; 1206 break;
1195 } 1207 }
@@ -1293,9 +1305,9 @@ void ieee80211_recalc_smps(struct ieee80211_local *local,
1293 int count = 0; 1305 int count = 0;
1294 1306
1295 if (forsdata) 1307 if (forsdata)
1296 WARN_ON(!mutex_is_locked(&forsdata->u.mgd.mtx)); 1308 lockdep_assert_held(&forsdata->u.mgd.mtx);
1297 1309
1298 WARN_ON(!mutex_is_locked(&local->iflist_mtx)); 1310 lockdep_assert_held(&local->iflist_mtx);
1299 1311
1300 /* 1312 /*
1301 * This function could be improved to handle multiple 1313 * This function could be improved to handle multiple
@@ -1308,7 +1320,7 @@ void ieee80211_recalc_smps(struct ieee80211_local *local,
1308 */ 1320 */
1309 1321
1310 list_for_each_entry(sdata, &local->interfaces, list) { 1322 list_for_each_entry(sdata, &local->interfaces, list) {
1311 if (!netif_running(sdata->dev)) 1323 if (!ieee80211_sdata_running(sdata))
1312 continue; 1324 continue;
1313 if (sdata->vif.type != NL80211_IFTYPE_STATION) 1325 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1314 goto set; 1326 goto set;
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 9ebc8d8a1f5b..f27484c22b9f 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -240,7 +240,7 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
240 240
241 keyidx = skb->data[hdrlen + 3] >> 6; 241 keyidx = skb->data[hdrlen + 3] >> 6;
242 242
243 if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP) 243 if (!key || keyidx != key->conf.keyidx)
244 return -1; 244 return -1;
245 245
246 klen = 3 + key->conf.keylen; 246 klen = 3 + key->conf.keylen;
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 81d4ad64184a..ae344d1ba056 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -43,7 +43,7 @@ enum work_action {
43/* utils */ 43/* utils */
44static inline void ASSERT_WORK_MTX(struct ieee80211_local *local) 44static inline void ASSERT_WORK_MTX(struct ieee80211_local *local)
45{ 45{
46 WARN_ON(!mutex_is_locked(&local->work_mtx)); 46 lockdep_assert_held(&local->mtx);
47} 47}
48 48
49/* 49/*
@@ -757,7 +757,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
757 mgmt = (struct ieee80211_mgmt *) skb->data; 757 mgmt = (struct ieee80211_mgmt *) skb->data;
758 fc = le16_to_cpu(mgmt->frame_control); 758 fc = le16_to_cpu(mgmt->frame_control);
759 759
760 mutex_lock(&local->work_mtx); 760 mutex_lock(&local->mtx);
761 761
762 list_for_each_entry(wk, &local->work_list, list) { 762 list_for_each_entry(wk, &local->work_list, list) {
763 const u8 *bssid = NULL; 763 const u8 *bssid = NULL;
@@ -833,7 +833,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
833 WARN(1, "unexpected: %d", rma); 833 WARN(1, "unexpected: %d", rma);
834 } 834 }
835 835
836 mutex_unlock(&local->work_mtx); 836 mutex_unlock(&local->mtx);
837 837
838 if (rma != WORK_ACT_DONE) 838 if (rma != WORK_ACT_DONE)
839 goto out; 839 goto out;
@@ -845,9 +845,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
845 case WORK_DONE_REQUEUE: 845 case WORK_DONE_REQUEUE:
846 synchronize_rcu(); 846 synchronize_rcu();
847 wk->started = false; /* restart */ 847 wk->started = false; /* restart */
848 mutex_lock(&local->work_mtx); 848 mutex_lock(&local->mtx);
849 list_add_tail(&wk->list, &local->work_list); 849 list_add_tail(&wk->list, &local->work_list);
850 mutex_unlock(&local->work_mtx); 850 mutex_unlock(&local->mtx);
851 } 851 }
852 852
853 out: 853 out:
@@ -888,9 +888,9 @@ static void ieee80211_work_work(struct work_struct *work)
888 while ((skb = skb_dequeue(&local->work_skb_queue))) 888 while ((skb = skb_dequeue(&local->work_skb_queue)))
889 ieee80211_work_rx_queued_mgmt(local, skb); 889 ieee80211_work_rx_queued_mgmt(local, skb);
890 890
891 ieee80211_recalc_idle(local); 891 mutex_lock(&local->mtx);
892 892
893 mutex_lock(&local->work_mtx); 893 ieee80211_recalc_idle(local);
894 894
895 list_for_each_entry_safe(wk, tmp, &local->work_list, list) { 895 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
896 bool started = wk->started; 896 bool started = wk->started;
@@ -995,20 +995,16 @@ static void ieee80211_work_work(struct work_struct *work)
995 run_again(local, jiffies + HZ/2); 995 run_again(local, jiffies + HZ/2);
996 } 996 }
997 997
998 mutex_lock(&local->scan_mtx);
999
1000 if (list_empty(&local->work_list) && local->scan_req && 998 if (list_empty(&local->work_list) && local->scan_req &&
1001 !local->scanning) 999 !local->scanning)
1002 ieee80211_queue_delayed_work(&local->hw, 1000 ieee80211_queue_delayed_work(&local->hw,
1003 &local->scan_work, 1001 &local->scan_work,
1004 round_jiffies_relative(0)); 1002 round_jiffies_relative(0));
1005 1003
1006 mutex_unlock(&local->scan_mtx);
1007
1008 mutex_unlock(&local->work_mtx);
1009
1010 ieee80211_recalc_idle(local); 1004 ieee80211_recalc_idle(local);
1011 1005
1006 mutex_unlock(&local->mtx);
1007
1012 list_for_each_entry_safe(wk, tmp, &free_work, list) { 1008 list_for_each_entry_safe(wk, tmp, &free_work, list) {
1013 wk->done(wk, NULL); 1009 wk->done(wk, NULL);
1014 list_del(&wk->list); 1010 list_del(&wk->list);
@@ -1035,16 +1031,15 @@ void ieee80211_add_work(struct ieee80211_work *wk)
1035 wk->started = false; 1031 wk->started = false;
1036 1032
1037 local = wk->sdata->local; 1033 local = wk->sdata->local;
1038 mutex_lock(&local->work_mtx); 1034 mutex_lock(&local->mtx);
1039 list_add_tail(&wk->list, &local->work_list); 1035 list_add_tail(&wk->list, &local->work_list);
1040 mutex_unlock(&local->work_mtx); 1036 mutex_unlock(&local->mtx);
1041 1037
1042 ieee80211_queue_work(&local->hw, &local->work_work); 1038 ieee80211_queue_work(&local->hw, &local->work_work);
1043} 1039}
1044 1040
1045void ieee80211_work_init(struct ieee80211_local *local) 1041void ieee80211_work_init(struct ieee80211_local *local)
1046{ 1042{
1047 mutex_init(&local->work_mtx);
1048 INIT_LIST_HEAD(&local->work_list); 1043 INIT_LIST_HEAD(&local->work_list);
1049 setup_timer(&local->work_timer, ieee80211_work_timer, 1044 setup_timer(&local->work_timer, ieee80211_work_timer,
1050 (unsigned long)local); 1045 (unsigned long)local);
@@ -1057,7 +1052,7 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1057 struct ieee80211_local *local = sdata->local; 1052 struct ieee80211_local *local = sdata->local;
1058 struct ieee80211_work *wk; 1053 struct ieee80211_work *wk;
1059 1054
1060 mutex_lock(&local->work_mtx); 1055 mutex_lock(&local->mtx);
1061 list_for_each_entry(wk, &local->work_list, list) { 1056 list_for_each_entry(wk, &local->work_list, list) {
1062 if (wk->sdata != sdata) 1057 if (wk->sdata != sdata)
1063 continue; 1058 continue;
@@ -1065,19 +1060,19 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1065 wk->started = true; 1060 wk->started = true;
1066 wk->timeout = jiffies; 1061 wk->timeout = jiffies;
1067 } 1062 }
1068 mutex_unlock(&local->work_mtx); 1063 mutex_unlock(&local->mtx);
1069 1064
1070 /* run cleanups etc. */ 1065 /* run cleanups etc. */
1071 ieee80211_work_work(&local->work_work); 1066 ieee80211_work_work(&local->work_work);
1072 1067
1073 mutex_lock(&local->work_mtx); 1068 mutex_lock(&local->mtx);
1074 list_for_each_entry(wk, &local->work_list, list) { 1069 list_for_each_entry(wk, &local->work_list, list) {
1075 if (wk->sdata != sdata) 1070 if (wk->sdata != sdata)
1076 continue; 1071 continue;
1077 WARN_ON(1); 1072 WARN_ON(1);
1078 break; 1073 break;
1079 } 1074 }
1080 mutex_unlock(&local->work_mtx); 1075 mutex_unlock(&local->mtx);
1081} 1076}
1082 1077
1083ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata, 1078ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
@@ -1163,7 +1158,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1163 struct ieee80211_work *wk, *tmp; 1158 struct ieee80211_work *wk, *tmp;
1164 bool found = false; 1159 bool found = false;
1165 1160
1166 mutex_lock(&local->work_mtx); 1161 mutex_lock(&local->mtx);
1167 list_for_each_entry_safe(wk, tmp, &local->work_list, list) { 1162 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
1168 if ((unsigned long) wk == cookie) { 1163 if ((unsigned long) wk == cookie) {
1169 wk->timeout = jiffies; 1164 wk->timeout = jiffies;
@@ -1171,7 +1166,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1171 break; 1166 break;
1172 } 1167 }
1173 } 1168 }
1174 mutex_unlock(&local->work_mtx); 1169 mutex_unlock(&local->mtx);
1175 1170
1176 if (!found) 1171 if (!found)
1177 return -ENOENT; 1172 return -ENOENT;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 8d59d27d887e..bee230d8fd11 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -36,8 +36,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
36 int tail; 36 int tail;
37 37
38 hdr = (struct ieee80211_hdr *)skb->data; 38 hdr = (struct ieee80211_hdr *)skb->data;
39 if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || 39 if (!tx->key || tx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
40 !ieee80211_is_data_present(hdr->frame_control)) 40 skb->len < 24 || !ieee80211_is_data_present(hdr->frame_control))
41 return TX_CONTINUE; 41 return TX_CONTINUE;
42 42
43 hdrlen = ieee80211_hdrlen(hdr->frame_control); 43 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -94,7 +94,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
94 if (status->flag & RX_FLAG_MMIC_STRIPPED) 94 if (status->flag & RX_FLAG_MMIC_STRIPPED)
95 return RX_CONTINUE; 95 return RX_CONTINUE;
96 96
97 if (!rx->key || rx->key->conf.alg != ALG_TKIP || 97 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
98 !ieee80211_has_protected(hdr->frame_control) || 98 !ieee80211_has_protected(hdr->frame_control) ||
99 !ieee80211_is_data_present(hdr->frame_control)) 99 !ieee80211_is_data_present(hdr->frame_control))
100 return RX_CONTINUE; 100 return RX_CONTINUE;
@@ -117,7 +117,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
117 key = &rx->key->conf.key[key_offset]; 117 key = &rx->key->conf.key[key_offset];
118 michael_mic(key, hdr, data, data_len, mic); 118 michael_mic(key, hdr, data, data_len, mic);
119 if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { 119 if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) {
120 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 120 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
121 return RX_DROP_UNUSABLE; 121 return RX_DROP_UNUSABLE;
122 122
123 mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx, 123 mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx,
@@ -221,19 +221,13 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
221 if (!rx->sta || skb->len - hdrlen < 12) 221 if (!rx->sta || skb->len - hdrlen < 12)
222 return RX_DROP_UNUSABLE; 222 return RX_DROP_UNUSABLE;
223 223
224 if (status->flag & RX_FLAG_DECRYPTED) { 224 /*
225 if (status->flag & RX_FLAG_IV_STRIPPED) { 225 * Let TKIP code verify IV, but skip decryption.
226 /* 226 * In the case where hardware checks the IV as well,
227 * Hardware took care of all processing, including 227 * we don't even get here, see ieee80211_rx_h_decrypt()
228 * replay protection, and stripped the ICV/IV so 228 */
229 * we cannot do any checks here. 229 if (status->flag & RX_FLAG_DECRYPTED)
230 */
231 return RX_CONTINUE;
232 }
233
234 /* let TKIP code verify IV, but skip decryption */
235 hwaccel = 1; 230 hwaccel = 1;
236 }
237 231
238 res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm, 232 res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm,
239 key, skb->data + hdrlen, 233 key, skb->data + hdrlen,
@@ -447,10 +441,6 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
447 if (!rx->sta || data_len < 0) 441 if (!rx->sta || data_len < 0)
448 return RX_DROP_UNUSABLE; 442 return RX_DROP_UNUSABLE;
449 443
450 if ((status->flag & RX_FLAG_DECRYPTED) &&
451 (status->flag & RX_FLAG_IV_STRIPPED))
452 return RX_CONTINUE;
453
454 ccmp_hdr2pn(pn, skb->data + hdrlen); 444 ccmp_hdr2pn(pn, skb->data + hdrlen);
455 445
456 queue = ieee80211_is_mgmt(hdr->frame_control) ? 446 queue = ieee80211_is_mgmt(hdr->frame_control) ?
@@ -564,10 +554,6 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
564 if (!ieee80211_is_mgmt(hdr->frame_control)) 554 if (!ieee80211_is_mgmt(hdr->frame_control))
565 return RX_CONTINUE; 555 return RX_CONTINUE;
566 556
567 if ((status->flag & RX_FLAG_DECRYPTED) &&
568 (status->flag & RX_FLAG_IV_STRIPPED))
569 return RX_CONTINUE;
570
571 if (skb->len < 24 + sizeof(*mmie)) 557 if (skb->len < 24 + sizeof(*mmie))
572 return RX_DROP_UNUSABLE; 558 return RX_DROP_UNUSABLE;
573 559
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4c2f89df5cce..0c043b6ce65e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -40,6 +40,7 @@
40#include <net/udp.h> 40#include <net/udp.h>
41#include <net/icmp.h> /* for icmp_send */ 41#include <net/icmp.h> /* for icmp_send */
42#include <net/route.h> 42#include <net/route.h>
43#include <net/ip6_checksum.h>
43 44
44#include <linux/netfilter.h> 45#include <linux/netfilter.h>
45#include <linux/netfilter_ipv4.h> 46#include <linux/netfilter_ipv4.h>
@@ -637,10 +638,12 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
637 } 638 }
638 639
639 /* And finally the ICMP checksum */ 640 /* And finally the ICMP checksum */
640 icmph->icmp6_cksum = 0; 641 icmph->icmp6_cksum = ~csum_ipv6_magic(&iph->saddr, &iph->daddr,
641 /* TODO IPv6: is this correct for ICMPv6? */ 642 skb->len - icmp_offset,
642 ip_vs_checksum_complete(skb, icmp_offset); 643 IPPROTO_ICMPV6, 0);
643 skb->ip_summed = CHECKSUM_UNNECESSARY; 644 skb->csum_start = skb_network_header(skb) - skb->head + icmp_offset;
645 skb->csum_offset = offsetof(struct icmp6hdr, icmp6_cksum);
646 skb->ip_summed = CHECKSUM_PARTIAL;
644 647
645 if (inout) 648 if (inout)
646 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 649 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
@@ -1381,8 +1384,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1381 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1384 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1382 cp->protocol == IPPROTO_SCTP) { 1385 cp->protocol == IPPROTO_SCTP) {
1383 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1386 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1384 (atomic_read(&cp->in_pkts) % 1387 (pkts % sysctl_ip_vs_sync_threshold[1]
1385 sysctl_ip_vs_sync_threshold[1]
1386 == sysctl_ip_vs_sync_threshold[0])) || 1388 == sysctl_ip_vs_sync_threshold[0])) ||
1387 (cp->old_state != cp->state && 1389 (cp->old_state != cp->state &&
1388 ((cp->state == IP_VS_SCTP_S_CLOSED) || 1390 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
@@ -1393,7 +1395,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1393 } 1395 }
1394 } 1396 }
1395 1397
1396 if (af == AF_INET && 1398 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1399 else if (af == AF_INET &&
1397 (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1400 (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1398 (((cp->protocol != IPPROTO_TCP || 1401 (((cp->protocol != IPPROTO_TCP ||
1399 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1402 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 0f0c079c422a..ca8ec8c4f311 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -61,7 +61,7 @@ static DEFINE_RWLOCK(__ip_vs_svc_lock);
61static DEFINE_RWLOCK(__ip_vs_rs_lock); 61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62 62
63/* lock for state and timeout tables */ 63/* lock for state and timeout tables */
64static DEFINE_RWLOCK(__ip_vs_securetcp_lock); 64static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65 65
66/* lock for drop entry handling */ 66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); 67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
@@ -204,7 +204,7 @@ static void update_defense_level(void)
204 spin_unlock(&__ip_vs_droppacket_lock); 204 spin_unlock(&__ip_vs_droppacket_lock);
205 205
206 /* secure_tcp */ 206 /* secure_tcp */
207 write_lock(&__ip_vs_securetcp_lock); 207 spin_lock(&ip_vs_securetcp_lock);
208 switch (sysctl_ip_vs_secure_tcp) { 208 switch (sysctl_ip_vs_secure_tcp) {
209 case 0: 209 case 0:
210 if (old_secure_tcp >= 2) 210 if (old_secure_tcp >= 2)
@@ -238,7 +238,7 @@ static void update_defense_level(void)
238 old_secure_tcp = sysctl_ip_vs_secure_tcp; 238 old_secure_tcp = sysctl_ip_vs_secure_tcp;
239 if (to_change >= 0) 239 if (to_change >= 0)
240 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 240 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
241 write_unlock(&__ip_vs_securetcp_lock); 241 spin_unlock(&ip_vs_securetcp_lock);
242 242
243 local_bh_enable(); 243 local_bh_enable();
244} 244}
@@ -843,7 +843,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
843 return -EINVAL; 843 return -EINVAL;
844 } 844 }
845 845
846 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); 846 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
847 if (dest == NULL) { 847 if (dest == NULL) {
848 pr_err("%s(): no memory.\n", __func__); 848 pr_err("%s(): no memory.\n", __func__);
849 return -ENOMEM; 849 return -ENOMEM;
@@ -1177,7 +1177,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1177 } 1177 }
1178#endif 1178#endif
1179 1179
1180 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); 1180 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1181 if (svc == NULL) { 1181 if (svc == NULL) {
1182 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1182 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1183 ret = -ENOMEM; 1183 ret = -ENOMEM;
@@ -2155,7 +2155,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2155 if (cmd != IP_VS_SO_SET_ADD 2155 if (cmd != IP_VS_SO_SET_ADD
2156 && (svc == NULL || svc->protocol != usvc.protocol)) { 2156 && (svc == NULL || svc->protocol != usvc.protocol)) {
2157 ret = -ESRCH; 2157 ret = -ESRCH;
2158 goto out_unlock; 2158 goto out_drop_service;
2159 } 2159 }
2160 2160
2161 switch (cmd) { 2161 switch (cmd) {
@@ -2189,6 +2189,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2189 ret = -EINVAL; 2189 ret = -EINVAL;
2190 } 2190 }
2191 2191
2192out_drop_service:
2192 if (svc) 2193 if (svc)
2193 ip_vs_service_put(svc); 2194 ip_vs_service_put(svc);
2194 2195
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index bbc1ac795952..727e45b66953 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -35,7 +35,7 @@
35static LIST_HEAD(ip_vs_schedulers); 35static LIST_HEAD(ip_vs_schedulers);
36 36
37/* lock for service table */ 37/* lock for service table */
38static DEFINE_RWLOCK(__ip_vs_sched_lock); 38static DEFINE_SPINLOCK(ip_vs_sched_lock);
39 39
40 40
41/* 41/*
@@ -108,7 +108,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
108 108
109 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name); 109 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
110 110
111 read_lock_bh(&__ip_vs_sched_lock); 111 spin_lock_bh(&ip_vs_sched_lock);
112 112
113 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 113 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
114 /* 114 /*
@@ -122,14 +122,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
122 } 122 }
123 if (strcmp(sched_name, sched->name)==0) { 123 if (strcmp(sched_name, sched->name)==0) {
124 /* HIT */ 124 /* HIT */
125 read_unlock_bh(&__ip_vs_sched_lock); 125 spin_unlock_bh(&ip_vs_sched_lock);
126 return sched; 126 return sched;
127 } 127 }
128 if (sched->module) 128 if (sched->module)
129 module_put(sched->module); 129 module_put(sched->module);
130 } 130 }
131 131
132 read_unlock_bh(&__ip_vs_sched_lock); 132 spin_unlock_bh(&ip_vs_sched_lock);
133 return NULL; 133 return NULL;
134} 134}
135 135
@@ -184,10 +184,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
184 /* increase the module use count */ 184 /* increase the module use count */
185 ip_vs_use_count_inc(); 185 ip_vs_use_count_inc();
186 186
187 write_lock_bh(&__ip_vs_sched_lock); 187 spin_lock_bh(&ip_vs_sched_lock);
188 188
189 if (!list_empty(&scheduler->n_list)) { 189 if (!list_empty(&scheduler->n_list)) {
190 write_unlock_bh(&__ip_vs_sched_lock); 190 spin_unlock_bh(&ip_vs_sched_lock);
191 ip_vs_use_count_dec(); 191 ip_vs_use_count_dec();
192 pr_err("%s(): [%s] scheduler already linked\n", 192 pr_err("%s(): [%s] scheduler already linked\n",
193 __func__, scheduler->name); 193 __func__, scheduler->name);
@@ -200,7 +200,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
200 */ 200 */
201 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 201 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
202 if (strcmp(scheduler->name, sched->name) == 0) { 202 if (strcmp(scheduler->name, sched->name) == 0) {
203 write_unlock_bh(&__ip_vs_sched_lock); 203 spin_unlock_bh(&ip_vs_sched_lock);
204 ip_vs_use_count_dec(); 204 ip_vs_use_count_dec();
205 pr_err("%s(): [%s] scheduler already existed " 205 pr_err("%s(): [%s] scheduler already existed "
206 "in the system\n", __func__, scheduler->name); 206 "in the system\n", __func__, scheduler->name);
@@ -211,7 +211,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
211 * Add it into the d-linked scheduler list 211 * Add it into the d-linked scheduler list
212 */ 212 */
213 list_add(&scheduler->n_list, &ip_vs_schedulers); 213 list_add(&scheduler->n_list, &ip_vs_schedulers);
214 write_unlock_bh(&__ip_vs_sched_lock); 214 spin_unlock_bh(&ip_vs_sched_lock);
215 215
216 pr_info("[%s] scheduler registered.\n", scheduler->name); 216 pr_info("[%s] scheduler registered.\n", scheduler->name);
217 217
@@ -229,9 +229,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
229 return -EINVAL; 229 return -EINVAL;
230 } 230 }
231 231
232 write_lock_bh(&__ip_vs_sched_lock); 232 spin_lock_bh(&ip_vs_sched_lock);
233 if (list_empty(&scheduler->n_list)) { 233 if (list_empty(&scheduler->n_list)) {
234 write_unlock_bh(&__ip_vs_sched_lock); 234 spin_unlock_bh(&ip_vs_sched_lock);
235 pr_err("%s(): [%s] scheduler is not in the list. failed\n", 235 pr_err("%s(): [%s] scheduler is not in the list. failed\n",
236 __func__, scheduler->name); 236 __func__, scheduler->name);
237 return -EINVAL; 237 return -EINVAL;
@@ -241,7 +241,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
241 * Remove it from the d-linked scheduler list 241 * Remove it from the d-linked scheduler list
242 */ 242 */
243 list_del(&scheduler->n_list); 243 list_del(&scheduler->n_list);
244 write_unlock_bh(&__ip_vs_sched_lock); 244 spin_unlock_bh(&ip_vs_sched_lock);
245 245
246 /* decrease the module use count */ 246 /* decrease the module use count */
247 ip_vs_use_count_dec(); 247 ip_vs_use_count_dec();
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index b46a8390896d..9228ee0dc11a 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -448,6 +448,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
448{ 448{
449 __be16 _ports[2], *ports; 449 __be16 _ports[2], *ports;
450 u8 nexthdr; 450 u8 nexthdr;
451 int poff;
451 452
452 memset(dst, 0, sizeof(*dst)); 453 memset(dst, 0, sizeof(*dst));
453 454
@@ -492,19 +493,13 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
492 return 0; 493 return 0;
493 } 494 }
494 495
495 switch (nexthdr) { 496 poff = proto_ports_offset(nexthdr);
496 case IPPROTO_TCP: 497 if (poff >= 0) {
497 case IPPROTO_UDP: 498 ports = skb_header_pointer(skb, protoff + poff, sizeof(_ports),
498 case IPPROTO_UDPLITE:
499 case IPPROTO_SCTP:
500 case IPPROTO_DCCP:
501 ports = skb_header_pointer(skb, protoff, sizeof(_ports),
502 &_ports); 499 &_ports);
503 break; 500 } else {
504 default:
505 _ports[0] = _ports[1] = 0; 501 _ports[0] = _ports[1] = 0;
506 ports = _ports; 502 ports = _ports;
507 break;
508 } 503 }
509 if (!ports) 504 if (!ports)
510 return -1; 505 return -1;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9a17f28b1253..3616f27b9d46 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -488,7 +488,7 @@ retry:
488 skb->dev = dev; 488 skb->dev = dev;
489 skb->priority = sk->sk_priority; 489 skb->priority = sk->sk_priority;
490 skb->mark = sk->sk_mark; 490 skb->mark = sk->sk_mark;
491 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 491 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
492 if (err < 0) 492 if (err < 0)
493 goto out_unlock; 493 goto out_unlock;
494 494
@@ -1209,7 +1209,7 @@ static int packet_snd(struct socket *sock,
1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); 1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1210 if (err) 1210 if (err)
1211 goto out_free; 1211 goto out_free;
1212 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 1212 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1213 if (err < 0) 1213 if (err < 0)
1214 goto out_free; 1214 goto out_free;
1215 1215
diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
index 6ec7d55b1769..901956ada9c8 100644
--- a/net/phonet/Kconfig
+++ b/net/phonet/Kconfig
@@ -14,3 +14,14 @@ config PHONET
14 14
15 To compile this driver as a module, choose M here: the module 15 To compile this driver as a module, choose M here: the module
16 will be called phonet. If unsure, say N. 16 will be called phonet. If unsure, say N.
17
18config PHONET_PIPECTRLR
19 bool "Phonet Pipe Controller"
20 depends on PHONET
21 default N
22 help
23 The Pipe Controller implementation in Phonet stack to support Pipe
24 data with Nokia Slim modems like WG2.5 used on ST-Ericsson U8500
25 platform.
26
27 If unsure, say N.
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 73aee7f2fcdc..fd95beb72f5d 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -251,6 +251,16 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
251 else if (phonet_address_lookup(net, daddr) == 0) { 251 else if (phonet_address_lookup(net, daddr) == 0) {
252 dev = phonet_device_get(net); 252 dev = phonet_device_get(net);
253 skb->pkt_type = PACKET_LOOPBACK; 253 skb->pkt_type = PACKET_LOOPBACK;
254 } else if (pn_sockaddr_get_object(target) == 0) {
255 /* Resource routing (small race until phonet_rcv()) */
256 struct sock *sk = pn_find_sock_by_res(net,
257 target->spn_resource);
258 if (sk) {
259 sock_put(sk);
260 dev = phonet_device_get(net);
261 skb->pkt_type = PACKET_LOOPBACK;
262 } else
263 dev = phonet_route_output(net, daddr);
254 } else 264 } else
255 dev = phonet_route_output(net, daddr); 265 dev = phonet_route_output(net, daddr);
256 266
@@ -383,6 +393,13 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
383 goto out; 393 goto out;
384 } 394 }
385 395
396 /* resource routing */
397 if (pn_sockaddr_get_object(&sa) == 0) {
398 struct sock *sk = pn_find_sock_by_res(net, sa.spn_resource);
399 if (sk)
400 return sk_receive_skb(sk, skb, 0);
401 }
402
386 /* check if we are the destination */ 403 /* check if we are the destination */
387 if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) { 404 if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) {
388 /* Phonet packet input */ 405 /* Phonet packet input */
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 1bd38db4fe1e..2f032381bd45 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -52,6 +52,19 @@ static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg)
52 answ = skb ? skb->len : 0; 52 answ = skb ? skb->len : 0;
53 release_sock(sk); 53 release_sock(sk);
54 return put_user(answ, (int __user *)arg); 54 return put_user(answ, (int __user *)arg);
55
56 case SIOCPNADDRESOURCE:
57 case SIOCPNDELRESOURCE: {
58 u32 res;
59 if (get_user(res, (u32 __user *)arg))
60 return -EFAULT;
61 if (res >= 256)
62 return -EINVAL;
63 if (cmd == SIOCPNADDRESOURCE)
64 return pn_sock_bind_res(sk, res);
65 else
66 return pn_sock_unbind_res(sk, res);
67 }
55 } 68 }
56 69
57 return -ENOIOCTLCMD; 70 return -ENOIOCTLCMD;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 15003021f4f0..aa3d8700d213 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -88,6 +88,15 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb,
88 const struct pnpipehdr *oph = pnp_hdr(oskb); 88 const struct pnpipehdr *oph = pnp_hdr(oskb);
89 struct pnpipehdr *ph; 89 struct pnpipehdr *ph;
90 struct sk_buff *skb; 90 struct sk_buff *skb;
91#ifdef CONFIG_PHONET_PIPECTRLR
92 const struct phonethdr *hdr = pn_hdr(oskb);
93 struct sockaddr_pn spn = {
94 .spn_family = AF_PHONET,
95 .spn_resource = 0xD9,
96 .spn_dev = hdr->pn_sdev,
97 .spn_obj = hdr->pn_sobj,
98 };
99#endif
91 100
92 skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); 101 skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
93 if (!skb) 102 if (!skb)
@@ -105,10 +114,271 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb,
105 ph->pipe_handle = oph->pipe_handle; 114 ph->pipe_handle = oph->pipe_handle;
106 ph->error_code = code; 115 ph->error_code = code;
107 116
117#ifdef CONFIG_PHONET_PIPECTRLR
118 return pn_skb_send(sk, skb, &spn);
119#else
108 return pn_skb_send(sk, skb, &pipe_srv); 120 return pn_skb_send(sk, skb, &pipe_srv);
121#endif
109} 122}
110 123
111#define PAD 0x00 124#define PAD 0x00
125
126#ifdef CONFIG_PHONET_PIPECTRLR
127static u8 pipe_negotiate_fc(u8 *host_fc, u8 *remote_fc, int len)
128{
129 int i, j;
130 u8 base_fc, final_fc;
131
132 for (i = 0; i < len; i++) {
133 base_fc = host_fc[i];
134 for (j = 0; j < len; j++) {
135 if (remote_fc[j] == base_fc) {
136 final_fc = base_fc;
137 goto done;
138 }
139 }
140 }
141 return -EINVAL;
142
143done:
144 return final_fc;
145
146}
147
148static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb,
149 u8 *pref_rx_fc, u8 *req_tx_fc)
150{
151 struct pnpipehdr *hdr;
152 u8 n_sb;
153
154 if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
155 return -EINVAL;
156
157 hdr = pnp_hdr(skb);
158 n_sb = hdr->data[4];
159
160 __skb_pull(skb, sizeof(*hdr) + 4);
161 while (n_sb > 0) {
162 u8 type, buf[3], len = sizeof(buf);
163 u8 *data = pep_get_sb(skb, &type, &len, buf);
164
165 if (data == NULL)
166 return -EINVAL;
167
168 switch (type) {
169 case PN_PIPE_SB_REQUIRED_FC_TX:
170 if (len < 3 || (data[2] | data[3] | data[4]) > 3)
171 break;
172 req_tx_fc[0] = data[2];
173 req_tx_fc[1] = data[3];
174 req_tx_fc[2] = data[4];
175 break;
176
177 case PN_PIPE_SB_PREFERRED_FC_RX:
178 if (len < 3 || (data[2] | data[3] | data[4]) > 3)
179 break;
180 pref_rx_fc[0] = data[2];
181 pref_rx_fc[1] = data[3];
182 pref_rx_fc[2] = data[4];
183 break;
184
185 }
186 n_sb--;
187 }
188 return 0;
189}
190
191static int pipe_handler_send_req(struct sock *sk, u16 dobj, u8 utid,
192 u8 msg_id, u8 p_handle, gfp_t priority)
193{
194 int len;
195 struct pnpipehdr *ph;
196 struct sk_buff *skb;
197 struct sockaddr_pn spn = {
198 .spn_family = AF_PHONET,
199 .spn_resource = 0xD9,
200 .spn_dev = pn_dev(dobj),
201 .spn_obj = pn_obj(dobj),
202 };
203
204 static const u8 data[4] = {
205 PAD, PAD, PAD, PAD,
206 };
207
208 switch (msg_id) {
209 case PNS_PEP_CONNECT_REQ:
210 len = sizeof(data);
211 break;
212
213 case PNS_PEP_DISCONNECT_REQ:
214 case PNS_PEP_ENABLE_REQ:
215 case PNS_PEP_DISABLE_REQ:
216 len = 0;
217 break;
218
219 default:
220 return -EINVAL;
221 }
222
223 skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
224 if (!skb)
225 return -ENOMEM;
226 skb_set_owner_w(skb, sk);
227
228 skb_reserve(skb, MAX_PNPIPE_HEADER);
229 if (len) {
230 __skb_put(skb, len);
231 skb_copy_to_linear_data(skb, data, len);
232 }
233 __skb_push(skb, sizeof(*ph));
234 skb_reset_transport_header(skb);
235 ph = pnp_hdr(skb);
236 ph->utid = utid;
237 ph->message_id = msg_id;
238 ph->pipe_handle = p_handle;
239 ph->error_code = PN_PIPE_NO_ERROR;
240
241 return pn_skb_send(sk, skb, &spn);
242}
243
244static int pipe_handler_send_created_ind(struct sock *sk, u16 dobj,
245 u8 utid, u8 p_handle, u8 msg_id, u8 tx_fc, u8 rx_fc)
246{
247 int err_code;
248 struct pnpipehdr *ph;
249 struct sk_buff *skb;
250 struct sockaddr_pn spn = {
251 .spn_family = AF_PHONET,
252 .spn_resource = 0xD9,
253 .spn_dev = pn_dev(dobj),
254 .spn_obj = pn_obj(dobj),
255 };
256
257 static u8 data[4] = {
258 0x03, 0x04,
259 };
260 data[2] = tx_fc;
261 data[3] = rx_fc;
262
263 /*
264 * actually, below is number of sub-blocks and not error code.
265 * Pipe_created_ind message format does not have any
266 * error code field. However, the Phonet stack will always send
267 * an error code as part of pnpipehdr. So, use that err_code to
268 * specify the number of sub-blocks.
269 */
270 err_code = 0x01;
271
272 skb = alloc_skb(MAX_PNPIPE_HEADER + sizeof(data), GFP_ATOMIC);
273 if (!skb)
274 return -ENOMEM;
275 skb_set_owner_w(skb, sk);
276
277 skb_reserve(skb, MAX_PNPIPE_HEADER);
278 __skb_put(skb, sizeof(data));
279 skb_copy_to_linear_data(skb, data, sizeof(data));
280 __skb_push(skb, sizeof(*ph));
281 skb_reset_transport_header(skb);
282 ph = pnp_hdr(skb);
283 ph->utid = utid;
284 ph->message_id = msg_id;
285 ph->pipe_handle = p_handle;
286 ph->error_code = err_code;
287
288 return pn_skb_send(sk, skb, &spn);
289}
290
291static int pipe_handler_send_ind(struct sock *sk, u16 dobj, u8 utid,
292 u8 p_handle, u8 msg_id)
293{
294 int err_code;
295 struct pnpipehdr *ph;
296 struct sk_buff *skb;
297 struct sockaddr_pn spn = {
298 .spn_family = AF_PHONET,
299 .spn_resource = 0xD9,
300 .spn_dev = pn_dev(dobj),
301 .spn_obj = pn_obj(dobj),
302 };
303
304 /*
305 * actually, below is a filler.
306 * Pipe_enabled/disabled_ind message format does not have any
307 * error code field. However, the Phonet stack will always send
308 * an error code as part of pnpipehdr. So, use that err_code to
309 * specify the filler value.
310 */
311 err_code = 0x0;
312
313 skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC);
314 if (!skb)
315 return -ENOMEM;
316 skb_set_owner_w(skb, sk);
317
318 skb_reserve(skb, MAX_PNPIPE_HEADER);
319 __skb_push(skb, sizeof(*ph));
320 skb_reset_transport_header(skb);
321 ph = pnp_hdr(skb);
322 ph->utid = utid;
323 ph->message_id = msg_id;
324 ph->pipe_handle = p_handle;
325 ph->error_code = err_code;
326
327 return pn_skb_send(sk, skb, &spn);
328}
329
330static int pipe_handler_enable_pipe(struct sock *sk, int cmd)
331{
332 int ret;
333 struct pep_sock *pn = pep_sk(sk);
334
335 switch (cmd) {
336 case PNPIPE_ENABLE:
337 ret = pipe_handler_send_req(sk, pn->pn_sk.sobject,
338 PNS_PIPE_ENABLE_UTID, PNS_PEP_ENABLE_REQ,
339 pn->pipe_handle, GFP_ATOMIC);
340 break;
341
342 case PNPIPE_DISABLE:
343 ret = pipe_handler_send_req(sk, pn->pn_sk.sobject,
344 PNS_PIPE_DISABLE_UTID, PNS_PEP_DISABLE_REQ,
345 pn->pipe_handle, GFP_ATOMIC);
346 break;
347
348 default:
349 ret = -EINVAL;
350 }
351
352 return ret;
353}
354
355static int pipe_handler_create_pipe(struct sock *sk, int pipe_handle, int cmd)
356{
357 int ret;
358 struct pep_sock *pn = pep_sk(sk);
359
360 switch (cmd) {
361 case PNPIPE_CREATE:
362 ret = pipe_handler_send_req(sk, pn->pn_sk.sobject,
363 PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ,
364 pipe_handle, GFP_ATOMIC);
365 break;
366
367 case PNPIPE_DESTROY:
368 ret = pipe_handler_send_req(sk, pn->remote_pep,
369 PNS_PEP_DISCONNECT_UTID,
370 PNS_PEP_DISCONNECT_REQ,
371 pn->pipe_handle, GFP_ATOMIC);
372 break;
373
374 default:
375 ret = -EINVAL;
376 }
377
378 return ret;
379}
380#endif
381
112static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) 382static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
113{ 383{
114 static const u8 data[20] = { 384 static const u8 data[20] = {
@@ -173,6 +443,14 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
173 struct pep_sock *pn = pep_sk(sk); 443 struct pep_sock *pn = pep_sk(sk);
174 struct pnpipehdr *ph; 444 struct pnpipehdr *ph;
175 struct sk_buff *skb; 445 struct sk_buff *skb;
446#ifdef CONFIG_PHONET_PIPECTRLR
447 struct sockaddr_pn spn = {
448 .spn_family = AF_PHONET,
449 .spn_resource = 0xD9,
450 .spn_dev = pn_dev(pn->remote_pep),
451 .spn_obj = pn_obj(pn->remote_pep),
452 };
453#endif
176 454
177 skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); 455 skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority);
178 if (!skb) 456 if (!skb)
@@ -192,7 +470,11 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
192 ph->data[3] = PAD; 470 ph->data[3] = PAD;
193 ph->data[4] = status; 471 ph->data[4] = status;
194 472
473#ifdef CONFIG_PHONET_PIPECTRLR
474 return pn_skb_send(sk, skb, &spn);
475#else
195 return pn_skb_send(sk, skb, &pipe_srv); 476 return pn_skb_send(sk, skb, &pipe_srv);
477#endif
196} 478}
197 479
198/* Send our RX flow control information to the sender. 480/* Send our RX flow control information to the sender.
@@ -309,6 +591,12 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
309 struct pnpipehdr *hdr = pnp_hdr(skb); 591 struct pnpipehdr *hdr = pnp_hdr(skb);
310 struct sk_buff_head *queue; 592 struct sk_buff_head *queue;
311 int err = 0; 593 int err = 0;
594#ifdef CONFIG_PHONET_PIPECTRLR
595 struct phonethdr *ph = pn_hdr(skb);
596 static u8 host_pref_rx_fc[3], host_req_tx_fc[3];
597 u8 remote_pref_rx_fc[3], remote_req_tx_fc[3];
598 u8 negotiated_rx_fc, negotiated_tx_fc;
599#endif
312 600
313 BUG_ON(sk->sk_state == TCP_CLOSE_WAIT); 601 BUG_ON(sk->sk_state == TCP_CLOSE_WAIT);
314 602
@@ -317,6 +605,40 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
317 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); 605 pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE);
318 break; 606 break;
319 607
608#ifdef CONFIG_PHONET_PIPECTRLR
609 case PNS_PEP_CONNECT_RESP:
610 if ((ph->pn_sdev == pn_dev(pn->remote_pep)) &&
611 (ph->pn_sobj == pn_obj(pn->remote_pep))) {
612 pipe_get_flow_info(sk, skb, remote_pref_rx_fc,
613 remote_req_tx_fc);
614
615 negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc,
616 host_pref_rx_fc,
617 sizeof(host_pref_rx_fc));
618 negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc,
619 remote_pref_rx_fc,
620 sizeof(host_pref_rx_fc));
621
622 pn->pipe_state = PIPE_DISABLED;
623 pipe_handler_send_created_ind(sk, pn->remote_pep,
624 PNS_PIPE_CREATED_IND_UTID,
625 pn->pipe_handle, PNS_PIPE_CREATED_IND,
626 negotiated_tx_fc, negotiated_rx_fc);
627 pipe_handler_send_created_ind(sk, pn->pn_sk.sobject,
628 PNS_PIPE_CREATED_IND_UTID,
629 pn->pipe_handle, PNS_PIPE_CREATED_IND,
630 negotiated_tx_fc, negotiated_rx_fc);
631 } else {
632 pipe_handler_send_req(sk, pn->remote_pep,
633 PNS_PEP_CONNECT_UTID,
634 PNS_PEP_CONNECT_REQ, pn->pipe_handle,
635 GFP_ATOMIC);
636 pipe_get_flow_info(sk, skb, host_pref_rx_fc,
637 host_req_tx_fc);
638 }
639 break;
640#endif
641
320 case PNS_PEP_DISCONNECT_REQ: 642 case PNS_PEP_DISCONNECT_REQ:
321 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 643 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
322 sk->sk_state = TCP_CLOSE_WAIT; 644 sk->sk_state = TCP_CLOSE_WAIT;
@@ -324,11 +646,41 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
324 sk->sk_state_change(sk); 646 sk->sk_state_change(sk);
325 break; 647 break;
326 648
649#ifdef CONFIG_PHONET_PIPECTRLR
650 case PNS_PEP_DISCONNECT_RESP:
651 pn->pipe_state = PIPE_IDLE;
652 pipe_handler_send_req(sk, pn->pn_sk.sobject,
653 PNS_PEP_DISCONNECT_UTID,
654 PNS_PEP_DISCONNECT_REQ, pn->pipe_handle,
655 GFP_KERNEL);
656 break;
657#endif
658
327 case PNS_PEP_ENABLE_REQ: 659 case PNS_PEP_ENABLE_REQ:
328 /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ 660 /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */
329 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 661 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
330 break; 662 break;
331 663
664#ifdef CONFIG_PHONET_PIPECTRLR
665 case PNS_PEP_ENABLE_RESP:
666 if ((ph->pn_sdev == pn_dev(pn->remote_pep)) &&
667 (ph->pn_sobj == pn_obj(pn->remote_pep))) {
668 pn->pipe_state = PIPE_ENABLED;
669 pipe_handler_send_ind(sk, pn->remote_pep,
670 PNS_PIPE_ENABLED_IND_UTID,
671 pn->pipe_handle, PNS_PIPE_ENABLED_IND);
672 pipe_handler_send_ind(sk, pn->pn_sk.sobject,
673 PNS_PIPE_ENABLED_IND_UTID,
674 pn->pipe_handle, PNS_PIPE_ENABLED_IND);
675 } else
676 pipe_handler_send_req(sk, pn->remote_pep,
677 PNS_PIPE_ENABLE_UTID,
678 PNS_PEP_ENABLE_REQ, pn->pipe_handle,
679 GFP_KERNEL);
680
681 break;
682#endif
683
332 case PNS_PEP_RESET_REQ: 684 case PNS_PEP_RESET_REQ:
333 switch (hdr->state_after_reset) { 685 switch (hdr->state_after_reset) {
334 case PN_PIPE_DISABLE: 686 case PN_PIPE_DISABLE:
@@ -347,6 +699,27 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
347 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 699 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
348 break; 700 break;
349 701
702#ifdef CONFIG_PHONET_PIPECTRLR
703 case PNS_PEP_DISABLE_RESP:
704 if ((ph->pn_sdev == pn_dev(pn->remote_pep)) &&
705 (ph->pn_sobj == pn_obj(pn->remote_pep))) {
706 pn->pipe_state = PIPE_DISABLED;
707 pipe_handler_send_ind(sk, pn->remote_pep,
708 PNS_PIPE_DISABLED_IND_UTID,
709 pn->pipe_handle,
710 PNS_PIPE_DISABLED_IND);
711 pipe_handler_send_ind(sk, pn->pn_sk.sobject,
712 PNS_PIPE_DISABLED_IND_UTID,
713 pn->pipe_handle,
714 PNS_PIPE_DISABLED_IND);
715 } else
716 pipe_handler_send_req(sk, pn->remote_pep,
717 PNS_PIPE_DISABLE_UTID,
718 PNS_PEP_DISABLE_REQ, pn->pipe_handle,
719 GFP_KERNEL);
720 break;
721#endif
722
350 case PNS_PEP_CTRL_REQ: 723 case PNS_PEP_CTRL_REQ:
351 if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) { 724 if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) {
352 atomic_inc(&sk->sk_drops); 725 atomic_inc(&sk->sk_drops);
@@ -520,6 +893,9 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
520 newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; 893 newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
521 newpn->init_enable = enabled; 894 newpn->init_enable = enabled;
522 newpn->aligned = aligned; 895 newpn->aligned = aligned;
896#ifdef CONFIG_PHONET_PIPECTRLR
897 newpn->remote_pep = pn->remote_pep;
898#endif
523 899
524 BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); 900 BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue));
525 skb_queue_head(&newsk->sk_receive_queue, skb); 901 skb_queue_head(&newsk->sk_receive_queue, skb);
@@ -621,6 +997,28 @@ drop:
621 return err; 997 return err;
622} 998}
623 999
1000static int pipe_do_remove(struct sock *sk)
1001{
1002 struct pep_sock *pn = pep_sk(sk);
1003 struct pnpipehdr *ph;
1004 struct sk_buff *skb;
1005
1006 skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL);
1007 if (!skb)
1008 return -ENOMEM;
1009
1010 skb_reserve(skb, MAX_PNPIPE_HEADER);
1011 __skb_push(skb, sizeof(*ph));
1012 skb_reset_transport_header(skb);
1013 ph = pnp_hdr(skb);
1014 ph->utid = 0;
1015 ph->message_id = PNS_PIPE_REMOVE_REQ;
1016 ph->pipe_handle = pn->pipe_handle;
1017 ph->data[0] = PAD;
1018
1019 return pn_skb_send(sk, skb, &pipe_srv);
1020}
1021
624/* associated socket ceases to exist */ 1022/* associated socket ceases to exist */
625static void pep_sock_close(struct sock *sk, long timeout) 1023static void pep_sock_close(struct sock *sk, long timeout)
626{ 1024{
@@ -639,7 +1037,10 @@ static void pep_sock_close(struct sock *sk, long timeout)
639 sk_for_each_safe(sknode, p, n, &pn->ackq) 1037 sk_for_each_safe(sknode, p, n, &pn->ackq)
640 sk_del_node_init(sknode); 1038 sk_del_node_init(sknode);
641 sk->sk_state = TCP_CLOSE; 1039 sk->sk_state = TCP_CLOSE;
642 } 1040 } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
1041 /* Forcefully remove dangling Phonet pipe */
1042 pipe_do_remove(sk);
1043
643 ifindex = pn->ifindex; 1044 ifindex = pn->ifindex;
644 pn->ifindex = 0; 1045 pn->ifindex = 0;
645 release_sock(sk); 1046 release_sock(sk);
@@ -757,6 +1158,10 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
757{ 1158{
758 struct pep_sock *pn = pep_sk(sk); 1159 struct pep_sock *pn = pep_sk(sk);
759 int val = 0, err = 0; 1160 int val = 0, err = 0;
1161#ifdef CONFIG_PHONET_PIPECTRLR
1162 int remote_pep;
1163 int pipe_handle;
1164#endif
760 1165
761 if (level != SOL_PNPIPE) 1166 if (level != SOL_PNPIPE)
762 return -ENOPROTOOPT; 1167 return -ENOPROTOOPT;
@@ -767,6 +1172,48 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
767 1172
768 lock_sock(sk); 1173 lock_sock(sk);
769 switch (optname) { 1174 switch (optname) {
1175#ifdef CONFIG_PHONET_PIPECTRLR
1176 case PNPIPE_CREATE:
1177 if (val) {
1178 if (pn->pipe_state > PIPE_IDLE) {
1179 err = -EFAULT;
1180 break;
1181 }
1182 remote_pep = val & 0xFFFF;
1183 pipe_handle = (val >> 16) & 0xFF;
1184 pn->remote_pep = remote_pep;
1185 err = pipe_handler_create_pipe(sk, pipe_handle,
1186 PNPIPE_CREATE);
1187 break;
1188 }
1189
1190 case PNPIPE_ENABLE:
1191 if (pn->pipe_state != PIPE_DISABLED) {
1192 err = -EFAULT;
1193 break;
1194 }
1195 err = pipe_handler_enable_pipe(sk, PNPIPE_ENABLE);
1196 break;
1197
1198 case PNPIPE_DISABLE:
1199 if (pn->pipe_state != PIPE_ENABLED) {
1200 err = -EFAULT;
1201 break;
1202 }
1203
1204 err = pipe_handler_enable_pipe(sk, PNPIPE_DISABLE);
1205 break;
1206
1207 case PNPIPE_DESTROY:
1208 if (pn->pipe_state < PIPE_DISABLED) {
1209 err = -EFAULT;
1210 break;
1211 }
1212
1213 err = pipe_handler_create_pipe(sk, 0x0, PNPIPE_DESTROY);
1214 break;
1215#endif
1216
770 case PNPIPE_ENCAP: 1217 case PNPIPE_ENCAP:
771 if (val && val != PNPIPE_ENCAP_IP) { 1218 if (val && val != PNPIPE_ENCAP_IP) {
772 err = -EINVAL; 1219 err = -EINVAL;
@@ -816,6 +1263,13 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
816 case PNPIPE_ENCAP: 1263 case PNPIPE_ENCAP:
817 val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE; 1264 val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE;
818 break; 1265 break;
1266
1267#ifdef CONFIG_PHONET_PIPECTRLR
1268 case PNPIPE_INQ:
1269 val = pn->pipe_state;
1270 break;
1271#endif
1272
819 case PNPIPE_IFINDEX: 1273 case PNPIPE_IFINDEX:
820 val = pn->ifindex; 1274 val = pn->ifindex;
821 break; 1275 break;
@@ -835,6 +1289,15 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
835{ 1289{
836 struct pep_sock *pn = pep_sk(sk); 1290 struct pep_sock *pn = pep_sk(sk);
837 struct pnpipehdr *ph; 1291 struct pnpipehdr *ph;
1292 int err;
1293#ifdef CONFIG_PHONET_PIPECTRLR
1294 struct sockaddr_pn spn = {
1295 .spn_family = AF_PHONET,
1296 .spn_resource = 0xD9,
1297 .spn_dev = pn_dev(pn->remote_pep),
1298 .spn_obj = pn_obj(pn->remote_pep),
1299 };
1300#endif
838 1301
839 if (pn_flow_safe(pn->tx_fc) && 1302 if (pn_flow_safe(pn->tx_fc) &&
840 !atomic_add_unless(&pn->tx_credits, -1, 0)) { 1303 !atomic_add_unless(&pn->tx_credits, -1, 0)) {
@@ -852,8 +1315,16 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
852 } else 1315 } else
853 ph->message_id = PNS_PIPE_DATA; 1316 ph->message_id = PNS_PIPE_DATA;
854 ph->pipe_handle = pn->pipe_handle; 1317 ph->pipe_handle = pn->pipe_handle;
1318#ifdef CONFIG_PHONET_PIPECTRLR
1319 err = pn_skb_send(sk, skb, &spn);
1320#else
1321 err = pn_skb_send(sk, skb, &pipe_srv);
1322#endif
1323
1324 if (err && pn_flow_safe(pn->tx_fc))
1325 atomic_inc(&pn->tx_credits);
1326 return err;
855 1327
856 return pn_skb_send(sk, skb, &pipe_srv);
857} 1328}
858 1329
859static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, 1330static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
@@ -873,7 +1344,7 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
873 skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len, 1344 skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len,
874 flags & MSG_DONTWAIT, &err); 1345 flags & MSG_DONTWAIT, &err);
875 if (!skb) 1346 if (!skb)
876 return -ENOBUFS; 1347 return err;
877 1348
878 skb_reserve(skb, MAX_PHONET_HEADER + 3); 1349 skb_reserve(skb, MAX_PHONET_HEADER + 3);
879 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 1350 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index b18e48fae975..947038ddd04c 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -292,8 +292,7 @@ static void phonet_route_autodel(struct net_device *dev)
292 if (bitmap_empty(deleted, 64)) 292 if (bitmap_empty(deleted, 64))
293 return; /* short-circuit RCU */ 293 return; /* short-circuit RCU */
294 synchronize_rcu(); 294 synchronize_rcu();
295 for (i = find_first_bit(deleted, 64); i < 64; 295 for_each_set_bit(i, deleted, 64) {
296 i = find_next_bit(deleted, 64, i + 1)) {
297 rtm_phonet_notify(RTM_DELROUTE, dev, i); 296 rtm_phonet_notify(RTM_DELROUTE, dev, i);
298 dev_put(dev); 297 dev_put(dev);
299 } 298 }
@@ -374,6 +373,7 @@ int __init phonet_device_init(void)
374 if (err) 373 if (err)
375 return err; 374 return err;
376 375
376 proc_net_fops_create(&init_net, "pnresource", 0, &pn_res_seq_fops);
377 register_netdevice_notifier(&phonet_device_notifier); 377 register_netdevice_notifier(&phonet_device_notifier);
378 err = phonet_netlink_register(); 378 err = phonet_netlink_register();
379 if (err) 379 if (err)
@@ -386,6 +386,7 @@ void phonet_device_exit(void)
386 rtnl_unregister_all(PF_PHONET); 386 rtnl_unregister_all(PF_PHONET);
387 unregister_netdevice_notifier(&phonet_device_notifier); 387 unregister_netdevice_notifier(&phonet_device_notifier);
388 unregister_pernet_device(&phonet_net_ops); 388 unregister_pernet_device(&phonet_net_ops);
389 proc_net_remove(&init_net, "pnresource");
389} 390}
390 391
391int phonet_route_add(struct net_device *dev, u8 daddr) 392int phonet_route_add(struct net_device *dev, u8 daddr)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 6e9848bf0370..aca8fba099e9 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -158,6 +158,7 @@ void pn_sock_unhash(struct sock *sk)
158 spin_lock_bh(&pnsocks.lock); 158 spin_lock_bh(&pnsocks.lock);
159 sk_del_node_init(sk); 159 sk_del_node_init(sk);
160 spin_unlock_bh(&pnsocks.lock); 160 spin_unlock_bh(&pnsocks.lock);
161 pn_sock_unbind_all_res(sk);
161} 162}
162EXPORT_SYMBOL(pn_sock_unhash); 163EXPORT_SYMBOL(pn_sock_unhash);
163 164
@@ -281,7 +282,9 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
281 if (!mask && sk->sk_state == TCP_CLOSE_WAIT) 282 if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
282 return POLLHUP; 283 return POLLHUP;
283 284
284 if (sk->sk_state == TCP_ESTABLISHED && atomic_read(&pn->tx_credits)) 285 if (sk->sk_state == TCP_ESTABLISHED &&
286 atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
287 atomic_read(&pn->tx_credits))
285 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 288 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
286 289
287 return mask; 290 return mask;
@@ -563,3 +566,188 @@ const struct file_operations pn_sock_seq_fops = {
563 .release = seq_release_net, 566 .release = seq_release_net,
564}; 567};
565#endif 568#endif
569
570static struct {
571 struct sock *sk[256];
572} pnres;
573
574/*
575 * Find and hold socket based on resource.
576 */
577struct sock *pn_find_sock_by_res(struct net *net, u8 res)
578{
579 struct sock *sk;
580
581 if (!net_eq(net, &init_net))
582 return NULL;
583
584 rcu_read_lock();
585 sk = rcu_dereference(pnres.sk[res]);
586 if (sk)
587 sock_hold(sk);
588 rcu_read_unlock();
589 return sk;
590}
591
592static DEFINE_MUTEX(resource_mutex);
593
594int pn_sock_bind_res(struct sock *sk, u8 res)
595{
596 int ret = -EADDRINUSE;
597
598 if (!net_eq(sock_net(sk), &init_net))
599 return -ENOIOCTLCMD;
600 if (!capable(CAP_SYS_ADMIN))
601 return -EPERM;
602 if (pn_socket_autobind(sk->sk_socket))
603 return -EAGAIN;
604
605 mutex_lock(&resource_mutex);
606 if (pnres.sk[res] == NULL) {
607 sock_hold(sk);
608 rcu_assign_pointer(pnres.sk[res], sk);
609 ret = 0;
610 }
611 mutex_unlock(&resource_mutex);
612 return ret;
613}
614
615int pn_sock_unbind_res(struct sock *sk, u8 res)
616{
617 int ret = -ENOENT;
618
619 if (!capable(CAP_SYS_ADMIN))
620 return -EPERM;
621
622 mutex_lock(&resource_mutex);
623 if (pnres.sk[res] == sk) {
624 rcu_assign_pointer(pnres.sk[res], NULL);
625 ret = 0;
626 }
627 mutex_unlock(&resource_mutex);
628
629 if (ret == 0) {
630 synchronize_rcu();
631 sock_put(sk);
632 }
633 return ret;
634}
635
636void pn_sock_unbind_all_res(struct sock *sk)
637{
638 unsigned res, match = 0;
639
640 mutex_lock(&resource_mutex);
641 for (res = 0; res < 256; res++) {
642 if (pnres.sk[res] == sk) {
643 rcu_assign_pointer(pnres.sk[res], NULL);
644 match++;
645 }
646 }
647 mutex_unlock(&resource_mutex);
648
649 if (match == 0)
650 return;
651 synchronize_rcu();
652 while (match > 0) {
653 sock_put(sk);
654 match--;
655 }
656}
657
658#ifdef CONFIG_PROC_FS
659static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos)
660{
661 struct net *net = seq_file_net(seq);
662 unsigned i;
663
664 if (!net_eq(net, &init_net))
665 return NULL;
666
667 for (i = 0; i < 256; i++) {
668 if (pnres.sk[i] == NULL)
669 continue;
670 if (!pos)
671 return pnres.sk + i;
672 pos--;
673 }
674 return NULL;
675}
676
677static struct sock **pn_res_get_next(struct seq_file *seq, struct sock **sk)
678{
679 struct net *net = seq_file_net(seq);
680 unsigned i;
681
682 BUG_ON(!net_eq(net, &init_net));
683
684 for (i = (sk - pnres.sk) + 1; i < 256; i++)
685 if (pnres.sk[i])
686 return pnres.sk + i;
687 return NULL;
688}
689
690static void *pn_res_seq_start(struct seq_file *seq, loff_t *pos)
691 __acquires(resource_mutex)
692{
693 mutex_lock(&resource_mutex);
694 return *pos ? pn_res_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
695}
696
697static void *pn_res_seq_next(struct seq_file *seq, void *v, loff_t *pos)
698{
699 struct sock **sk;
700
701 if (v == SEQ_START_TOKEN)
702 sk = pn_res_get_idx(seq, 0);
703 else
704 sk = pn_res_get_next(seq, v);
705 (*pos)++;
706 return sk;
707}
708
709static void pn_res_seq_stop(struct seq_file *seq, void *v)
710 __releases(resource_mutex)
711{
712 mutex_unlock(&resource_mutex);
713}
714
715static int pn_res_seq_show(struct seq_file *seq, void *v)
716{
717 int len;
718
719 if (v == SEQ_START_TOKEN)
720 seq_printf(seq, "%s%n", "rs uid inode", &len);
721 else {
722 struct sock **psk = v;
723 struct sock *sk = *psk;
724
725 seq_printf(seq, "%02X %5d %lu%n",
726 (int) (psk - pnres.sk), sock_i_uid(sk),
727 sock_i_ino(sk), &len);
728 }
729 seq_printf(seq, "%*s\n", 63 - len, "");
730 return 0;
731}
732
733static const struct seq_operations pn_res_seq_ops = {
734 .start = pn_res_seq_start,
735 .next = pn_res_seq_next,
736 .stop = pn_res_seq_stop,
737 .show = pn_res_seq_show,
738};
739
740static int pn_res_open(struct inode *inode, struct file *file)
741{
742 return seq_open_net(inode, file, &pn_res_seq_ops,
743 sizeof(struct seq_net_private));
744}
745
746const struct file_operations pn_res_seq_fops = {
747 .owner = THIS_MODULE,
748 .open = pn_res_open,
749 .read = seq_read,
750 .llseek = seq_lseek,
751 .release = seq_release_net,
752};
753#endif
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index aebfecbdb841..bb6ad81b671d 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -39,7 +39,15 @@
39#include <net/sock.h> 39#include <net/sock.h>
40 40
41#include "rds.h" 41#include "rds.h"
42#include "rdma.h" 42
43char *rds_str_array(char **array, size_t elements, size_t index)
44{
45 if ((index < elements) && array[index])
46 return array[index];
47 else
48 return "unknown";
49}
50EXPORT_SYMBOL(rds_str_array);
43 51
44/* this is just used for stats gathering :/ */ 52/* this is just used for stats gathering :/ */
45static DEFINE_SPINLOCK(rds_sock_lock); 53static DEFINE_SPINLOCK(rds_sock_lock);
@@ -62,7 +70,7 @@ static int rds_release(struct socket *sock)
62 struct rds_sock *rs; 70 struct rds_sock *rs;
63 unsigned long flags; 71 unsigned long flags;
64 72
65 if (sk == NULL) 73 if (!sk)
66 goto out; 74 goto out;
67 75
68 rs = rds_sk_to_rs(sk); 76 rs = rds_sk_to_rs(sk);
@@ -73,7 +81,15 @@ static int rds_release(struct socket *sock)
73 * with the socket. */ 81 * with the socket. */
74 rds_clear_recv_queue(rs); 82 rds_clear_recv_queue(rs);
75 rds_cong_remove_socket(rs); 83 rds_cong_remove_socket(rs);
84
85 /*
86 * the binding lookup hash uses rcu, we need to
87 * make sure we sychronize_rcu before we free our
88 * entry
89 */
76 rds_remove_bound(rs); 90 rds_remove_bound(rs);
91 synchronize_rcu();
92
77 rds_send_drop_to(rs, NULL); 93 rds_send_drop_to(rs, NULL);
78 rds_rdma_drop_keys(rs); 94 rds_rdma_drop_keys(rs);
79 rds_notify_queue_get(rs, NULL); 95 rds_notify_queue_get(rs, NULL);
@@ -83,6 +99,8 @@ static int rds_release(struct socket *sock)
83 rds_sock_count--; 99 rds_sock_count--;
84 spin_unlock_irqrestore(&rds_sock_lock, flags); 100 spin_unlock_irqrestore(&rds_sock_lock, flags);
85 101
102 rds_trans_put(rs->rs_transport);
103
86 sock->sk = NULL; 104 sock->sk = NULL;
87 sock_put(sk); 105 sock_put(sk);
88out: 106out:
@@ -514,7 +532,7 @@ out:
514 spin_unlock_irqrestore(&rds_sock_lock, flags); 532 spin_unlock_irqrestore(&rds_sock_lock, flags);
515} 533}
516 534
517static void __exit rds_exit(void) 535static void rds_exit(void)
518{ 536{
519 sock_unregister(rds_family_ops.family); 537 sock_unregister(rds_family_ops.family);
520 proto_unregister(&rds_proto); 538 proto_unregister(&rds_proto);
@@ -529,7 +547,7 @@ static void __exit rds_exit(void)
529} 547}
530module_exit(rds_exit); 548module_exit(rds_exit);
531 549
532static int __init rds_init(void) 550static int rds_init(void)
533{ 551{
534 int ret; 552 int ret;
535 553
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 5d95fc007f1a..2f6b3fcc79f8 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -34,45 +34,52 @@
34#include <net/sock.h> 34#include <net/sock.h>
35#include <linux/in.h> 35#include <linux/in.h>
36#include <linux/if_arp.h> 36#include <linux/if_arp.h>
37#include <linux/jhash.h>
37#include "rds.h" 38#include "rds.h"
38 39
39/* 40#define BIND_HASH_SIZE 1024
40 * XXX this probably still needs more work.. no INADDR_ANY, and rbtrees aren't 41static struct hlist_head bind_hash_table[BIND_HASH_SIZE];
41 * particularly zippy.
42 *
43 * This is now called for every incoming frame so we arguably care much more
44 * about it than we used to.
45 */
46static DEFINE_SPINLOCK(rds_bind_lock); 42static DEFINE_SPINLOCK(rds_bind_lock);
47static struct rb_root rds_bind_tree = RB_ROOT;
48 43
49static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port, 44static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port)
50 struct rds_sock *insert) 45{
46 return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
47 (BIND_HASH_SIZE - 1));
48}
49
50static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
51 struct rds_sock *insert)
51{ 52{
52 struct rb_node **p = &rds_bind_tree.rb_node;
53 struct rb_node *parent = NULL;
54 struct rds_sock *rs; 53 struct rds_sock *rs;
54 struct hlist_node *node;
55 struct hlist_head *head = hash_to_bucket(addr, port);
55 u64 cmp; 56 u64 cmp;
56 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); 57 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
57 58
58 while (*p) { 59 rcu_read_lock();
59 parent = *p; 60 hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) {
60 rs = rb_entry(parent, struct rds_sock, rs_bound_node);
61
62 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | 61 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
63 be16_to_cpu(rs->rs_bound_port); 62 be16_to_cpu(rs->rs_bound_port);
64 63
65 if (needle < cmp) 64 if (cmp == needle) {
66 p = &(*p)->rb_left; 65 rcu_read_unlock();
67 else if (needle > cmp)
68 p = &(*p)->rb_right;
69 else
70 return rs; 66 return rs;
67 }
71 } 68 }
69 rcu_read_unlock();
72 70
73 if (insert) { 71 if (insert) {
74 rb_link_node(&insert->rs_bound_node, parent, p); 72 /*
75 rb_insert_color(&insert->rs_bound_node, &rds_bind_tree); 73 * make sure our addr and port are set before
74 * we are added to the list, other people
75 * in rcu will find us as soon as the
76 * hlist_add_head_rcu is done
77 */
78 insert->rs_bound_addr = addr;
79 insert->rs_bound_port = port;
80 rds_sock_addref(insert);
81
82 hlist_add_head_rcu(&insert->rs_bound_node, head);
76 } 83 }
77 return NULL; 84 return NULL;
78} 85}
@@ -86,15 +93,13 @@ static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port,
86struct rds_sock *rds_find_bound(__be32 addr, __be16 port) 93struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
87{ 94{
88 struct rds_sock *rs; 95 struct rds_sock *rs;
89 unsigned long flags;
90 96
91 spin_lock_irqsave(&rds_bind_lock, flags); 97 rs = rds_bind_lookup(addr, port, NULL);
92 rs = rds_bind_tree_walk(addr, port, NULL); 98
93 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) 99 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
94 rds_sock_addref(rs); 100 rds_sock_addref(rs);
95 else 101 else
96 rs = NULL; 102 rs = NULL;
97 spin_unlock_irqrestore(&rds_bind_lock, flags);
98 103
99 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, 104 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
100 ntohs(port)); 105 ntohs(port));
@@ -121,22 +126,15 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
121 do { 126 do {
122 if (rover == 0) 127 if (rover == 0)
123 rover++; 128 rover++;
124 if (rds_bind_tree_walk(addr, cpu_to_be16(rover), rs) == NULL) { 129 if (!rds_bind_lookup(addr, cpu_to_be16(rover), rs)) {
125 *port = cpu_to_be16(rover); 130 *port = rs->rs_bound_port;
126 ret = 0; 131 ret = 0;
132 rdsdebug("rs %p binding to %pI4:%d\n",
133 rs, &addr, (int)ntohs(*port));
127 break; 134 break;
128 } 135 }
129 } while (rover++ != last); 136 } while (rover++ != last);
130 137
131 if (ret == 0) {
132 rs->rs_bound_addr = addr;
133 rs->rs_bound_port = *port;
134 rds_sock_addref(rs);
135
136 rdsdebug("rs %p binding to %pI4:%d\n",
137 rs, &addr, (int)ntohs(*port));
138 }
139
140 spin_unlock_irqrestore(&rds_bind_lock, flags); 138 spin_unlock_irqrestore(&rds_bind_lock, flags);
141 139
142 return ret; 140 return ret;
@@ -153,7 +151,7 @@ void rds_remove_bound(struct rds_sock *rs)
153 rs, &rs->rs_bound_addr, 151 rs, &rs->rs_bound_addr,
154 ntohs(rs->rs_bound_port)); 152 ntohs(rs->rs_bound_port));
155 153
156 rb_erase(&rs->rs_bound_node, &rds_bind_tree); 154 hlist_del_init_rcu(&rs->rs_bound_node);
157 rds_sock_put(rs); 155 rds_sock_put(rs);
158 rs->rs_bound_addr = 0; 156 rs->rs_bound_addr = 0;
159 } 157 }
@@ -184,7 +182,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
184 goto out; 182 goto out;
185 183
186 trans = rds_trans_get_preferred(sin->sin_addr.s_addr); 184 trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
187 if (trans == NULL) { 185 if (!trans) {
188 ret = -EADDRNOTAVAIL; 186 ret = -EADDRNOTAVAIL;
189 rds_remove_bound(rs); 187 rds_remove_bound(rs);
190 if (printk_ratelimit()) 188 if (printk_ratelimit())
@@ -198,5 +196,9 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
198 196
199out: 197out:
200 release_sock(sk); 198 release_sock(sk);
199
200 /* we might have called rds_remove_bound on error */
201 if (ret)
202 synchronize_rcu();
201 return ret; 203 return ret;
202} 204}
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 0871a29f0780..75ea686f27d5 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -141,7 +141,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
141 unsigned long flags; 141 unsigned long flags;
142 142
143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL); 143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
144 if (map == NULL) 144 if (!map)
145 return NULL; 145 return NULL;
146 146
147 map->m_addr = addr; 147 map->m_addr = addr;
@@ -159,7 +159,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
159 ret = rds_cong_tree_walk(addr, map); 159 ret = rds_cong_tree_walk(addr, map);
160 spin_unlock_irqrestore(&rds_cong_lock, flags); 160 spin_unlock_irqrestore(&rds_cong_lock, flags);
161 161
162 if (ret == NULL) { 162 if (!ret) {
163 ret = map; 163 ret = map;
164 map = NULL; 164 map = NULL;
165 } 165 }
@@ -205,7 +205,7 @@ int rds_cong_get_maps(struct rds_connection *conn)
205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr); 205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr); 206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
207 207
208 if (conn->c_lcong == NULL || conn->c_fcong == NULL) 208 if (!(conn->c_lcong && conn->c_fcong))
209 return -ENOMEM; 209 return -ENOMEM;
210 210
211 return 0; 211 return 0;
@@ -221,7 +221,7 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) { 221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (!test_and_set_bit(0, &conn->c_map_queued)) { 222 if (!test_and_set_bit(0, &conn->c_map_queued)) {
223 rds_stats_inc(s_cong_update_queued); 223 rds_stats_inc(s_cong_update_queued);
224 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 224 rds_send_xmit(conn);
225 } 225 }
226 } 226 }
227 227
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7619b671ca28..870992e08cae 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -37,7 +37,6 @@
37 37
38#include "rds.h" 38#include "rds.h"
39#include "loop.h" 39#include "loop.h"
40#include "rdma.h"
41 40
42#define RDS_CONNECTION_HASH_BITS 12 41#define RDS_CONNECTION_HASH_BITS 12
43#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) 42#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS)
@@ -63,18 +62,7 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
63 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ 62 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \
64} while (0) 63} while (0)
65 64
66static inline int rds_conn_is_sending(struct rds_connection *conn) 65/* rcu read lock must be held or the connection spinlock */
67{
68 int ret = 0;
69
70 if (!mutex_trylock(&conn->c_send_lock))
71 ret = 1;
72 else
73 mutex_unlock(&conn->c_send_lock);
74
75 return ret;
76}
77
78static struct rds_connection *rds_conn_lookup(struct hlist_head *head, 66static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
79 __be32 laddr, __be32 faddr, 67 __be32 laddr, __be32 faddr,
80 struct rds_transport *trans) 68 struct rds_transport *trans)
@@ -82,7 +70,7 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
82 struct rds_connection *conn, *ret = NULL; 70 struct rds_connection *conn, *ret = NULL;
83 struct hlist_node *pos; 71 struct hlist_node *pos;
84 72
85 hlist_for_each_entry(conn, pos, head, c_hash_node) { 73 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
86 if (conn->c_faddr == faddr && conn->c_laddr == laddr && 74 if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
87 conn->c_trans == trans) { 75 conn->c_trans == trans) {
88 ret = conn; 76 ret = conn;
@@ -129,10 +117,11 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
129{ 117{
130 struct rds_connection *conn, *parent = NULL; 118 struct rds_connection *conn, *parent = NULL;
131 struct hlist_head *head = rds_conn_bucket(laddr, faddr); 119 struct hlist_head *head = rds_conn_bucket(laddr, faddr);
120 struct rds_transport *loop_trans;
132 unsigned long flags; 121 unsigned long flags;
133 int ret; 122 int ret;
134 123
135 spin_lock_irqsave(&rds_conn_lock, flags); 124 rcu_read_lock();
136 conn = rds_conn_lookup(head, laddr, faddr, trans); 125 conn = rds_conn_lookup(head, laddr, faddr, trans);
137 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && 126 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
138 !is_outgoing) { 127 !is_outgoing) {
@@ -143,12 +132,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
143 parent = conn; 132 parent = conn;
144 conn = parent->c_passive; 133 conn = parent->c_passive;
145 } 134 }
146 spin_unlock_irqrestore(&rds_conn_lock, flags); 135 rcu_read_unlock();
147 if (conn) 136 if (conn)
148 goto out; 137 goto out;
149 138
150 conn = kmem_cache_zalloc(rds_conn_slab, gfp); 139 conn = kmem_cache_zalloc(rds_conn_slab, gfp);
151 if (conn == NULL) { 140 if (!conn) {
152 conn = ERR_PTR(-ENOMEM); 141 conn = ERR_PTR(-ENOMEM);
153 goto out; 142 goto out;
154 } 143 }
@@ -159,7 +148,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
159 spin_lock_init(&conn->c_lock); 148 spin_lock_init(&conn->c_lock);
160 conn->c_next_tx_seq = 1; 149 conn->c_next_tx_seq = 1;
161 150
162 mutex_init(&conn->c_send_lock); 151 init_waitqueue_head(&conn->c_waitq);
163 INIT_LIST_HEAD(&conn->c_send_queue); 152 INIT_LIST_HEAD(&conn->c_send_queue);
164 INIT_LIST_HEAD(&conn->c_retrans); 153 INIT_LIST_HEAD(&conn->c_retrans);
165 154
@@ -175,7 +164,9 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
175 * can bind to the destination address then we'd rather the messages 164 * can bind to the destination address then we'd rather the messages
176 * flow through loopback rather than either transport. 165 * flow through loopback rather than either transport.
177 */ 166 */
178 if (rds_trans_get_preferred(faddr)) { 167 loop_trans = rds_trans_get_preferred(faddr);
168 if (loop_trans) {
169 rds_trans_put(loop_trans);
179 conn->c_loopback = 1; 170 conn->c_loopback = 1;
180 if (is_outgoing && trans->t_prefer_loopback) { 171 if (is_outgoing && trans->t_prefer_loopback) {
181 /* "outgoing" connection - and the transport 172 /* "outgoing" connection - and the transport
@@ -238,7 +229,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
238 kmem_cache_free(rds_conn_slab, conn); 229 kmem_cache_free(rds_conn_slab, conn);
239 conn = found; 230 conn = found;
240 } else { 231 } else {
241 hlist_add_head(&conn->c_hash_node, head); 232 hlist_add_head_rcu(&conn->c_hash_node, head);
242 rds_cong_add_conn(conn); 233 rds_cong_add_conn(conn);
243 rds_conn_count++; 234 rds_conn_count++;
244 } 235 }
@@ -263,21 +254,91 @@ struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
263} 254}
264EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); 255EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
265 256
257void rds_conn_shutdown(struct rds_connection *conn)
258{
259 /* shut it down unless it's down already */
260 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
261 /*
262 * Quiesce the connection mgmt handlers before we start tearing
263 * things down. We don't hold the mutex for the entire
264 * duration of the shutdown operation, else we may be
265 * deadlocking with the CM handler. Instead, the CM event
266 * handler is supposed to check for state DISCONNECTING
267 */
268 mutex_lock(&conn->c_cm_lock);
269 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
270 && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
271 rds_conn_error(conn, "shutdown called in state %d\n",
272 atomic_read(&conn->c_state));
273 mutex_unlock(&conn->c_cm_lock);
274 return;
275 }
276 mutex_unlock(&conn->c_cm_lock);
277
278 wait_event(conn->c_waitq,
279 !test_bit(RDS_IN_XMIT, &conn->c_flags));
280
281 conn->c_trans->conn_shutdown(conn);
282 rds_conn_reset(conn);
283
284 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
285 /* This can happen - eg when we're in the middle of tearing
286 * down the connection, and someone unloads the rds module.
287 * Quite reproduceable with loopback connections.
288 * Mostly harmless.
289 */
290 rds_conn_error(conn,
291 "%s: failed to transition to state DOWN, "
292 "current state is %d\n",
293 __func__,
294 atomic_read(&conn->c_state));
295 return;
296 }
297 }
298
299 /* Then reconnect if it's still live.
300 * The passive side of an IB loopback connection is never added
301 * to the conn hash, so we never trigger a reconnect on this
302 * conn - the reconnect is always triggered by the active peer. */
303 cancel_delayed_work_sync(&conn->c_conn_w);
304 rcu_read_lock();
305 if (!hlist_unhashed(&conn->c_hash_node)) {
306 rcu_read_unlock();
307 rds_queue_reconnect(conn);
308 } else {
309 rcu_read_unlock();
310 }
311}
312
313/*
314 * Stop and free a connection.
315 *
316 * This can only be used in very limited circumstances. It assumes that once
317 * the conn has been shutdown that no one else is referencing the connection.
318 * We can only ensure this in the rmmod path in the current code.
319 */
266void rds_conn_destroy(struct rds_connection *conn) 320void rds_conn_destroy(struct rds_connection *conn)
267{ 321{
268 struct rds_message *rm, *rtmp; 322 struct rds_message *rm, *rtmp;
323 unsigned long flags;
269 324
270 rdsdebug("freeing conn %p for %pI4 -> " 325 rdsdebug("freeing conn %p for %pI4 -> "
271 "%pI4\n", conn, &conn->c_laddr, 326 "%pI4\n", conn, &conn->c_laddr,
272 &conn->c_faddr); 327 &conn->c_faddr);
273 328
274 hlist_del_init(&conn->c_hash_node); 329 /* Ensure conn will not be scheduled for reconnect */
330 spin_lock_irq(&rds_conn_lock);
331 hlist_del_init_rcu(&conn->c_hash_node);
332 spin_unlock_irq(&rds_conn_lock);
333 synchronize_rcu();
275 334
276 /* wait for the rds thread to shut it down */ 335 /* shut the connection down */
277 atomic_set(&conn->c_state, RDS_CONN_ERROR); 336 rds_conn_drop(conn);
278 cancel_delayed_work(&conn->c_conn_w); 337 flush_work(&conn->c_down_w);
279 queue_work(rds_wq, &conn->c_down_w); 338
280 flush_workqueue(rds_wq); 339 /* make sure lingering queued work won't try to ref the conn */
340 cancel_delayed_work_sync(&conn->c_send_w);
341 cancel_delayed_work_sync(&conn->c_recv_w);
281 342
282 /* tear down queued messages */ 343 /* tear down queued messages */
283 list_for_each_entry_safe(rm, rtmp, 344 list_for_each_entry_safe(rm, rtmp,
@@ -302,7 +363,9 @@ void rds_conn_destroy(struct rds_connection *conn)
302 BUG_ON(!list_empty(&conn->c_retrans)); 363 BUG_ON(!list_empty(&conn->c_retrans));
303 kmem_cache_free(rds_conn_slab, conn); 364 kmem_cache_free(rds_conn_slab, conn);
304 365
366 spin_lock_irqsave(&rds_conn_lock, flags);
305 rds_conn_count--; 367 rds_conn_count--;
368 spin_unlock_irqrestore(&rds_conn_lock, flags);
306} 369}
307EXPORT_SYMBOL_GPL(rds_conn_destroy); 370EXPORT_SYMBOL_GPL(rds_conn_destroy);
308 371
@@ -316,23 +379,23 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
316 struct list_head *list; 379 struct list_head *list;
317 struct rds_connection *conn; 380 struct rds_connection *conn;
318 struct rds_message *rm; 381 struct rds_message *rm;
319 unsigned long flags;
320 unsigned int total = 0; 382 unsigned int total = 0;
383 unsigned long flags;
321 size_t i; 384 size_t i;
322 385
323 len /= sizeof(struct rds_info_message); 386 len /= sizeof(struct rds_info_message);
324 387
325 spin_lock_irqsave(&rds_conn_lock, flags); 388 rcu_read_lock();
326 389
327 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
328 i++, head++) { 391 i++, head++) {
329 hlist_for_each_entry(conn, pos, head, c_hash_node) { 392 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
330 if (want_send) 393 if (want_send)
331 list = &conn->c_send_queue; 394 list = &conn->c_send_queue;
332 else 395 else
333 list = &conn->c_retrans; 396 list = &conn->c_retrans;
334 397
335 spin_lock(&conn->c_lock); 398 spin_lock_irqsave(&conn->c_lock, flags);
336 399
337 /* XXX too lazy to maintain counts.. */ 400 /* XXX too lazy to maintain counts.. */
338 list_for_each_entry(rm, list, m_conn_item) { 401 list_for_each_entry(rm, list, m_conn_item) {
@@ -343,11 +406,10 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
343 conn->c_faddr, 0); 406 conn->c_faddr, 0);
344 } 407 }
345 408
346 spin_unlock(&conn->c_lock); 409 spin_unlock_irqrestore(&conn->c_lock, flags);
347 } 410 }
348 } 411 }
349 412 rcu_read_unlock();
350 spin_unlock_irqrestore(&rds_conn_lock, flags);
351 413
352 lens->nr = total; 414 lens->nr = total;
353 lens->each = sizeof(struct rds_info_message); 415 lens->each = sizeof(struct rds_info_message);
@@ -377,19 +439,17 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
377 uint64_t buffer[(item_len + 7) / 8]; 439 uint64_t buffer[(item_len + 7) / 8];
378 struct hlist_head *head; 440 struct hlist_head *head;
379 struct hlist_node *pos; 441 struct hlist_node *pos;
380 struct hlist_node *tmp;
381 struct rds_connection *conn; 442 struct rds_connection *conn;
382 unsigned long flags;
383 size_t i; 443 size_t i;
384 444
385 spin_lock_irqsave(&rds_conn_lock, flags); 445 rcu_read_lock();
386 446
387 lens->nr = 0; 447 lens->nr = 0;
388 lens->each = item_len; 448 lens->each = item_len;
389 449
390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 450 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
391 i++, head++) { 451 i++, head++) {
392 hlist_for_each_entry_safe(conn, pos, tmp, head, c_hash_node) { 452 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
393 453
394 /* XXX no c_lock usage.. */ 454 /* XXX no c_lock usage.. */
395 if (!visitor(conn, buffer)) 455 if (!visitor(conn, buffer))
@@ -405,8 +465,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
405 lens->nr++; 465 lens->nr++;
406 } 466 }
407 } 467 }
408 468 rcu_read_unlock();
409 spin_unlock_irqrestore(&rds_conn_lock, flags);
410} 469}
411EXPORT_SYMBOL_GPL(rds_for_each_conn_info); 470EXPORT_SYMBOL_GPL(rds_for_each_conn_info);
412 471
@@ -423,8 +482,8 @@ static int rds_conn_info_visitor(struct rds_connection *conn,
423 sizeof(cinfo->transport)); 482 sizeof(cinfo->transport));
424 cinfo->flags = 0; 483 cinfo->flags = 0;
425 484
426 rds_conn_info_set(cinfo->flags, 485 rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &conn->c_flags),
427 rds_conn_is_sending(conn), SENDING); 486 SENDING);
428 /* XXX Future: return the state rather than these funky bits */ 487 /* XXX Future: return the state rather than these funky bits */
429 rds_conn_info_set(cinfo->flags, 488 rds_conn_info_set(cinfo->flags,
430 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING, 489 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
@@ -444,12 +503,12 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
444 sizeof(struct rds_info_connection)); 503 sizeof(struct rds_info_connection));
445} 504}
446 505
447int __init rds_conn_init(void) 506int rds_conn_init(void)
448{ 507{
449 rds_conn_slab = kmem_cache_create("rds_connection", 508 rds_conn_slab = kmem_cache_create("rds_connection",
450 sizeof(struct rds_connection), 509 sizeof(struct rds_connection),
451 0, 0, NULL); 510 0, 0, NULL);
452 if (rds_conn_slab == NULL) 511 if (!rds_conn_slab)
453 return -ENOMEM; 512 return -ENOMEM;
454 513
455 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); 514 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
@@ -487,6 +546,18 @@ void rds_conn_drop(struct rds_connection *conn)
487EXPORT_SYMBOL_GPL(rds_conn_drop); 546EXPORT_SYMBOL_GPL(rds_conn_drop);
488 547
489/* 548/*
549 * If the connection is down, trigger a connect. We may have scheduled a
550 * delayed reconnect however - in this case we should not interfere.
551 */
552void rds_conn_connect_if_down(struct rds_connection *conn)
553{
554 if (rds_conn_state(conn) == RDS_CONN_DOWN &&
555 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
556 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
557}
558EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
559
560/*
490 * An error occurred on the connection 561 * An error occurred on the connection
491 */ 562 */
492void 563void
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 8f2d6dd7700a..b12a3951167d 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -53,12 +53,71 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
53module_param(rds_ib_retry_count, int, 0444); 53module_param(rds_ib_retry_count, int, 0444);
54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); 54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
55 55
56/*
57 * we have a clumsy combination of RCU and a rwsem protecting this list
58 * because it is used both in the get_mr fast path and while blocking in
59 * the FMR flushing path.
60 */
61DECLARE_RWSEM(rds_ib_devices_lock);
56struct list_head rds_ib_devices; 62struct list_head rds_ib_devices;
57 63
58/* NOTE: if also grabbing ibdev lock, grab this first */ 64/* NOTE: if also grabbing ibdev lock, grab this first */
59DEFINE_SPINLOCK(ib_nodev_conns_lock); 65DEFINE_SPINLOCK(ib_nodev_conns_lock);
60LIST_HEAD(ib_nodev_conns); 66LIST_HEAD(ib_nodev_conns);
61 67
68void rds_ib_nodev_connect(void)
69{
70 struct rds_ib_connection *ic;
71
72 spin_lock(&ib_nodev_conns_lock);
73 list_for_each_entry(ic, &ib_nodev_conns, ib_node)
74 rds_conn_connect_if_down(ic->conn);
75 spin_unlock(&ib_nodev_conns_lock);
76}
77
78void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev)
79{
80 struct rds_ib_connection *ic;
81 unsigned long flags;
82
83 spin_lock_irqsave(&rds_ibdev->spinlock, flags);
84 list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node)
85 rds_conn_drop(ic->conn);
86 spin_unlock_irqrestore(&rds_ibdev->spinlock, flags);
87}
88
89/*
90 * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references
91 * from interrupt context so we push freing off into a work struct in krdsd.
92 */
93static void rds_ib_dev_free(struct work_struct *work)
94{
95 struct rds_ib_ipaddr *i_ipaddr, *i_next;
96 struct rds_ib_device *rds_ibdev = container_of(work,
97 struct rds_ib_device, free_work);
98
99 if (rds_ibdev->mr_pool)
100 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
101 if (rds_ibdev->mr)
102 ib_dereg_mr(rds_ibdev->mr);
103 if (rds_ibdev->pd)
104 ib_dealloc_pd(rds_ibdev->pd);
105
106 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
107 list_del(&i_ipaddr->list);
108 kfree(i_ipaddr);
109 }
110
111 kfree(rds_ibdev);
112}
113
114void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
115{
116 BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0);
117 if (atomic_dec_and_test(&rds_ibdev->refcount))
118 queue_work(rds_wq, &rds_ibdev->free_work);
119}
120
62void rds_ib_add_one(struct ib_device *device) 121void rds_ib_add_one(struct ib_device *device)
63{ 122{
64 struct rds_ib_device *rds_ibdev; 123 struct rds_ib_device *rds_ibdev;
@@ -77,11 +136,14 @@ void rds_ib_add_one(struct ib_device *device)
77 goto free_attr; 136 goto free_attr;
78 } 137 }
79 138
80 rds_ibdev = kmalloc(sizeof *rds_ibdev, GFP_KERNEL); 139 rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
140 ibdev_to_node(device));
81 if (!rds_ibdev) 141 if (!rds_ibdev)
82 goto free_attr; 142 goto free_attr;
83 143
84 spin_lock_init(&rds_ibdev->spinlock); 144 spin_lock_init(&rds_ibdev->spinlock);
145 atomic_set(&rds_ibdev->refcount, 1);
146 INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
85 147
86 rds_ibdev->max_wrs = dev_attr->max_qp_wr; 148 rds_ibdev->max_wrs = dev_attr->max_qp_wr;
87 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); 149 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
@@ -91,68 +153,107 @@ void rds_ib_add_one(struct ib_device *device)
91 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) : 153 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) :
92 fmr_pool_size; 154 fmr_pool_size;
93 155
156 rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
157 rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
158
94 rds_ibdev->dev = device; 159 rds_ibdev->dev = device;
95 rds_ibdev->pd = ib_alloc_pd(device); 160 rds_ibdev->pd = ib_alloc_pd(device);
96 if (IS_ERR(rds_ibdev->pd)) 161 if (IS_ERR(rds_ibdev->pd)) {
97 goto free_dev; 162 rds_ibdev->pd = NULL;
163 goto put_dev;
164 }
98 165
99 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, 166 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, IB_ACCESS_LOCAL_WRITE);
100 IB_ACCESS_LOCAL_WRITE); 167 if (IS_ERR(rds_ibdev->mr)) {
101 if (IS_ERR(rds_ibdev->mr)) 168 rds_ibdev->mr = NULL;
102 goto err_pd; 169 goto put_dev;
170 }
103 171
104 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); 172 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev);
105 if (IS_ERR(rds_ibdev->mr_pool)) { 173 if (IS_ERR(rds_ibdev->mr_pool)) {
106 rds_ibdev->mr_pool = NULL; 174 rds_ibdev->mr_pool = NULL;
107 goto err_mr; 175 goto put_dev;
108 } 176 }
109 177
110 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); 178 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
111 INIT_LIST_HEAD(&rds_ibdev->conn_list); 179 INIT_LIST_HEAD(&rds_ibdev->conn_list);
112 list_add_tail(&rds_ibdev->list, &rds_ib_devices); 180
181 down_write(&rds_ib_devices_lock);
182 list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
183 up_write(&rds_ib_devices_lock);
184 atomic_inc(&rds_ibdev->refcount);
113 185
114 ib_set_client_data(device, &rds_ib_client, rds_ibdev); 186 ib_set_client_data(device, &rds_ib_client, rds_ibdev);
187 atomic_inc(&rds_ibdev->refcount);
115 188
116 goto free_attr; 189 rds_ib_nodev_connect();
117 190
118err_mr: 191put_dev:
119 ib_dereg_mr(rds_ibdev->mr); 192 rds_ib_dev_put(rds_ibdev);
120err_pd:
121 ib_dealloc_pd(rds_ibdev->pd);
122free_dev:
123 kfree(rds_ibdev);
124free_attr: 193free_attr:
125 kfree(dev_attr); 194 kfree(dev_attr);
126} 195}
127 196
197/*
198 * New connections use this to find the device to associate with the
199 * connection. It's not in the fast path so we're not concerned about the
200 * performance of the IB call. (As of this writing, it uses an interrupt
201 * blocking spinlock to serialize walking a per-device list of all registered
202 * clients.)
203 *
204 * RCU is used to handle incoming connections racing with device teardown.
205 * Rather than use a lock to serialize removal from the client_data and
206 * getting a new reference, we use an RCU grace period. The destruction
207 * path removes the device from client_data and then waits for all RCU
208 * readers to finish.
209 *
210 * A new connection can get NULL from this if its arriving on a
211 * device that is in the process of being removed.
212 */
213struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device)
214{
215 struct rds_ib_device *rds_ibdev;
216
217 rcu_read_lock();
218 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
219 if (rds_ibdev)
220 atomic_inc(&rds_ibdev->refcount);
221 rcu_read_unlock();
222 return rds_ibdev;
223}
224
225/*
226 * The IB stack is letting us know that a device is going away. This can
227 * happen if the underlying HCA driver is removed or if PCI hotplug is removing
228 * the pci function, for example.
229 *
230 * This can be called at any time and can be racing with any other RDS path.
231 */
128void rds_ib_remove_one(struct ib_device *device) 232void rds_ib_remove_one(struct ib_device *device)
129{ 233{
130 struct rds_ib_device *rds_ibdev; 234 struct rds_ib_device *rds_ibdev;
131 struct rds_ib_ipaddr *i_ipaddr, *i_next;
132 235
133 rds_ibdev = ib_get_client_data(device, &rds_ib_client); 236 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
134 if (!rds_ibdev) 237 if (!rds_ibdev)
135 return; 238 return;
136 239
137 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { 240 rds_ib_dev_shutdown(rds_ibdev);
138 list_del(&i_ipaddr->list);
139 kfree(i_ipaddr);
140 }
141 241
142 rds_ib_destroy_conns(rds_ibdev); 242 /* stop connection attempts from getting a reference to this device. */
243 ib_set_client_data(device, &rds_ib_client, NULL);
143 244
144 if (rds_ibdev->mr_pool) 245 down_write(&rds_ib_devices_lock);
145 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); 246 list_del_rcu(&rds_ibdev->list);
146 247 up_write(&rds_ib_devices_lock);
147 ib_dereg_mr(rds_ibdev->mr);
148
149 while (ib_dealloc_pd(rds_ibdev->pd)) {
150 rdsdebug("Failed to dealloc pd %p\n", rds_ibdev->pd);
151 msleep(1);
152 }
153 248
154 list_del(&rds_ibdev->list); 249 /*
155 kfree(rds_ibdev); 250 * This synchronize rcu is waiting for readers of both the ib
251 * client data and the devices list to finish before we drop
252 * both of those references.
253 */
254 synchronize_rcu();
255 rds_ib_dev_put(rds_ibdev);
256 rds_ib_dev_put(rds_ibdev);
156} 257}
157 258
158struct ib_client rds_ib_client = { 259struct ib_client rds_ib_client = {
@@ -186,7 +287,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
186 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); 287 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
187 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); 288 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
188 289
189 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 290 rds_ibdev = ic->rds_ibdev;
190 iinfo->max_send_wr = ic->i_send_ring.w_nr; 291 iinfo->max_send_wr = ic->i_send_ring.w_nr;
191 iinfo->max_recv_wr = ic->i_recv_ring.w_nr; 292 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
192 iinfo->max_send_sge = rds_ibdev->max_sge; 293 iinfo->max_send_sge = rds_ibdev->max_sge;
@@ -248,29 +349,36 @@ static int rds_ib_laddr_check(__be32 addr)
248 return ret; 349 return ret;
249} 350}
250 351
352static void rds_ib_unregister_client(void)
353{
354 ib_unregister_client(&rds_ib_client);
355 /* wait for rds_ib_dev_free() to complete */
356 flush_workqueue(rds_wq);
357}
358
251void rds_ib_exit(void) 359void rds_ib_exit(void)
252{ 360{
253 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 361 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
362 rds_ib_unregister_client();
254 rds_ib_destroy_nodev_conns(); 363 rds_ib_destroy_nodev_conns();
255 ib_unregister_client(&rds_ib_client);
256 rds_ib_sysctl_exit(); 364 rds_ib_sysctl_exit();
257 rds_ib_recv_exit(); 365 rds_ib_recv_exit();
258 rds_trans_unregister(&rds_ib_transport); 366 rds_trans_unregister(&rds_ib_transport);
367 rds_ib_fmr_exit();
259} 368}
260 369
261struct rds_transport rds_ib_transport = { 370struct rds_transport rds_ib_transport = {
262 .laddr_check = rds_ib_laddr_check, 371 .laddr_check = rds_ib_laddr_check,
263 .xmit_complete = rds_ib_xmit_complete, 372 .xmit_complete = rds_ib_xmit_complete,
264 .xmit = rds_ib_xmit, 373 .xmit = rds_ib_xmit,
265 .xmit_cong_map = NULL,
266 .xmit_rdma = rds_ib_xmit_rdma, 374 .xmit_rdma = rds_ib_xmit_rdma,
375 .xmit_atomic = rds_ib_xmit_atomic,
267 .recv = rds_ib_recv, 376 .recv = rds_ib_recv,
268 .conn_alloc = rds_ib_conn_alloc, 377 .conn_alloc = rds_ib_conn_alloc,
269 .conn_free = rds_ib_conn_free, 378 .conn_free = rds_ib_conn_free,
270 .conn_connect = rds_ib_conn_connect, 379 .conn_connect = rds_ib_conn_connect,
271 .conn_shutdown = rds_ib_conn_shutdown, 380 .conn_shutdown = rds_ib_conn_shutdown,
272 .inc_copy_to_user = rds_ib_inc_copy_to_user, 381 .inc_copy_to_user = rds_ib_inc_copy_to_user,
273 .inc_purge = rds_ib_inc_purge,
274 .inc_free = rds_ib_inc_free, 382 .inc_free = rds_ib_inc_free,
275 .cm_initiate_connect = rds_ib_cm_initiate_connect, 383 .cm_initiate_connect = rds_ib_cm_initiate_connect,
276 .cm_handle_connect = rds_ib_cm_handle_connect, 384 .cm_handle_connect = rds_ib_cm_handle_connect,
@@ -286,16 +394,20 @@ struct rds_transport rds_ib_transport = {
286 .t_type = RDS_TRANS_IB 394 .t_type = RDS_TRANS_IB
287}; 395};
288 396
289int __init rds_ib_init(void) 397int rds_ib_init(void)
290{ 398{
291 int ret; 399 int ret;
292 400
293 INIT_LIST_HEAD(&rds_ib_devices); 401 INIT_LIST_HEAD(&rds_ib_devices);
294 402
295 ret = ib_register_client(&rds_ib_client); 403 ret = rds_ib_fmr_init();
296 if (ret) 404 if (ret)
297 goto out; 405 goto out;
298 406
407 ret = ib_register_client(&rds_ib_client);
408 if (ret)
409 goto out_fmr_exit;
410
299 ret = rds_ib_sysctl_init(); 411 ret = rds_ib_sysctl_init();
300 if (ret) 412 if (ret)
301 goto out_ibreg; 413 goto out_ibreg;
@@ -317,7 +429,9 @@ out_recv:
317out_sysctl: 429out_sysctl:
318 rds_ib_sysctl_exit(); 430 rds_ib_sysctl_exit();
319out_ibreg: 431out_ibreg:
320 ib_unregister_client(&rds_ib_client); 432 rds_ib_unregister_client();
433out_fmr_exit:
434 rds_ib_fmr_exit();
321out: 435out:
322 return ret; 436 return ret;
323} 437}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 64df4e79b29f..7ad3d57e06a5 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -3,11 +3,13 @@
3 3
4#include <rdma/ib_verbs.h> 4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h> 5#include <rdma/rdma_cm.h>
6#include <linux/pci.h>
7#include <linux/slab.h>
6#include "rds.h" 8#include "rds.h"
7#include "rdma_transport.h" 9#include "rdma_transport.h"
8 10
9#define RDS_FMR_SIZE 256 11#define RDS_FMR_SIZE 256
10#define RDS_FMR_POOL_SIZE 4096 12#define RDS_FMR_POOL_SIZE 8192
11 13
12#define RDS_IB_MAX_SGE 8 14#define RDS_IB_MAX_SGE 8
13#define RDS_IB_RECV_SGE 2 15#define RDS_IB_RECV_SGE 2
@@ -19,6 +21,9 @@
19 21
20#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ 22#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
21 23
24#define RDS_IB_RECYCLE_BATCH_COUNT 32
25
26extern struct rw_semaphore rds_ib_devices_lock;
22extern struct list_head rds_ib_devices; 27extern struct list_head rds_ib_devices;
23 28
24/* 29/*
@@ -26,20 +31,29 @@ extern struct list_head rds_ib_devices;
26 * try and minimize the amount of memory tied up both the device and 31 * try and minimize the amount of memory tied up both the device and
27 * socket receive queues. 32 * socket receive queues.
28 */ 33 */
29/* page offset of the final full frag that fits in the page */
30#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
31struct rds_page_frag { 34struct rds_page_frag {
32 struct list_head f_item; 35 struct list_head f_item;
33 struct page *f_page; 36 struct list_head f_cache_entry;
34 unsigned long f_offset; 37 struct scatterlist f_sg;
35 dma_addr_t f_mapped;
36}; 38};
37 39
38struct rds_ib_incoming { 40struct rds_ib_incoming {
39 struct list_head ii_frags; 41 struct list_head ii_frags;
42 struct list_head ii_cache_entry;
40 struct rds_incoming ii_inc; 43 struct rds_incoming ii_inc;
41}; 44};
42 45
46struct rds_ib_cache_head {
47 struct list_head *first;
48 unsigned long count;
49};
50
51struct rds_ib_refill_cache {
52 struct rds_ib_cache_head *percpu;
53 struct list_head *xfer;
54 struct list_head *ready;
55};
56
43struct rds_ib_connect_private { 57struct rds_ib_connect_private {
44 /* Add new fields at the end, and don't permute existing fields. */ 58 /* Add new fields at the end, and don't permute existing fields. */
45 __be32 dp_saddr; 59 __be32 dp_saddr;
@@ -53,8 +67,7 @@ struct rds_ib_connect_private {
53}; 67};
54 68
55struct rds_ib_send_work { 69struct rds_ib_send_work {
56 struct rds_message *s_rm; 70 void *s_op;
57 struct rds_rdma_op *s_op;
58 struct ib_send_wr s_wr; 71 struct ib_send_wr s_wr;
59 struct ib_sge s_sge[RDS_IB_MAX_SGE]; 72 struct ib_sge s_sge[RDS_IB_MAX_SGE];
60 unsigned long s_queued; 73 unsigned long s_queued;
@@ -92,10 +105,11 @@ struct rds_ib_connection {
92 105
93 /* tx */ 106 /* tx */
94 struct rds_ib_work_ring i_send_ring; 107 struct rds_ib_work_ring i_send_ring;
95 struct rds_message *i_rm; 108 struct rm_data_op *i_data_op;
96 struct rds_header *i_send_hdrs; 109 struct rds_header *i_send_hdrs;
97 u64 i_send_hdrs_dma; 110 u64 i_send_hdrs_dma;
98 struct rds_ib_send_work *i_sends; 111 struct rds_ib_send_work *i_sends;
112 atomic_t i_signaled_sends;
99 113
100 /* rx */ 114 /* rx */
101 struct tasklet_struct i_recv_tasklet; 115 struct tasklet_struct i_recv_tasklet;
@@ -106,8 +120,9 @@ struct rds_ib_connection {
106 struct rds_header *i_recv_hdrs; 120 struct rds_header *i_recv_hdrs;
107 u64 i_recv_hdrs_dma; 121 u64 i_recv_hdrs_dma;
108 struct rds_ib_recv_work *i_recvs; 122 struct rds_ib_recv_work *i_recvs;
109 struct rds_page_frag i_frag;
110 u64 i_ack_recv; /* last ACK received */ 123 u64 i_ack_recv; /* last ACK received */
124 struct rds_ib_refill_cache i_cache_incs;
125 struct rds_ib_refill_cache i_cache_frags;
111 126
112 /* sending acks */ 127 /* sending acks */
113 unsigned long i_ack_flags; 128 unsigned long i_ack_flags;
@@ -138,7 +153,6 @@ struct rds_ib_connection {
138 153
139 /* Batched completions */ 154 /* Batched completions */
140 unsigned int i_unsignaled_wrs; 155 unsigned int i_unsignaled_wrs;
141 long i_unsignaled_bytes;
142}; 156};
143 157
144/* This assumes that atomic_t is at least 32 bits */ 158/* This assumes that atomic_t is at least 32 bits */
@@ -164,9 +178,17 @@ struct rds_ib_device {
164 unsigned int max_fmrs; 178 unsigned int max_fmrs;
165 int max_sge; 179 int max_sge;
166 unsigned int max_wrs; 180 unsigned int max_wrs;
181 unsigned int max_initiator_depth;
182 unsigned int max_responder_resources;
167 spinlock_t spinlock; /* protect the above */ 183 spinlock_t spinlock; /* protect the above */
184 atomic_t refcount;
185 struct work_struct free_work;
168}; 186};
169 187
188#define pcidev_to_node(pcidev) pcibus_to_node(pcidev->bus)
189#define ibdev_to_node(ibdev) pcidev_to_node(to_pci_dev(ibdev->dma_device))
190#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
191
170/* bits for i_ack_flags */ 192/* bits for i_ack_flags */
171#define IB_ACK_IN_FLIGHT 0 193#define IB_ACK_IN_FLIGHT 0
172#define IB_ACK_REQUESTED 1 194#define IB_ACK_REQUESTED 1
@@ -202,6 +224,8 @@ struct rds_ib_statistics {
202 uint64_t s_ib_rdma_mr_pool_flush; 224 uint64_t s_ib_rdma_mr_pool_flush;
203 uint64_t s_ib_rdma_mr_pool_wait; 225 uint64_t s_ib_rdma_mr_pool_wait;
204 uint64_t s_ib_rdma_mr_pool_depleted; 226 uint64_t s_ib_rdma_mr_pool_depleted;
227 uint64_t s_ib_atomic_cswp;
228 uint64_t s_ib_atomic_fadd;
205}; 229};
206 230
207extern struct workqueue_struct *rds_ib_wq; 231extern struct workqueue_struct *rds_ib_wq;
@@ -243,6 +267,8 @@ static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
243extern struct rds_transport rds_ib_transport; 267extern struct rds_transport rds_ib_transport;
244extern void rds_ib_add_one(struct ib_device *device); 268extern void rds_ib_add_one(struct ib_device *device);
245extern void rds_ib_remove_one(struct ib_device *device); 269extern void rds_ib_remove_one(struct ib_device *device);
270struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
271void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
246extern struct ib_client rds_ib_client; 272extern struct ib_client rds_ib_client;
247 273
248extern unsigned int fmr_pool_size; 274extern unsigned int fmr_pool_size;
@@ -258,7 +284,7 @@ void rds_ib_conn_free(void *arg);
258int rds_ib_conn_connect(struct rds_connection *conn); 284int rds_ib_conn_connect(struct rds_connection *conn);
259void rds_ib_conn_shutdown(struct rds_connection *conn); 285void rds_ib_conn_shutdown(struct rds_connection *conn);
260void rds_ib_state_change(struct sock *sk); 286void rds_ib_state_change(struct sock *sk);
261int __init rds_ib_listen_init(void); 287int rds_ib_listen_init(void);
262void rds_ib_listen_stop(void); 288void rds_ib_listen_stop(void);
263void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); 289void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
264int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, 290int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -275,15 +301,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
275int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); 301int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
276void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 302void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
277void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 303void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
278void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock); 304void rds_ib_destroy_nodev_conns(void);
279static inline void rds_ib_destroy_nodev_conns(void)
280{
281 __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock);
282}
283static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev)
284{
285 __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock);
286}
287struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); 305struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
288void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); 306void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
289void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); 307void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
@@ -292,14 +310,16 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
292void rds_ib_sync_mr(void *trans_private, int dir); 310void rds_ib_sync_mr(void *trans_private, int dir);
293void rds_ib_free_mr(void *trans_private, int invalidate); 311void rds_ib_free_mr(void *trans_private, int invalidate);
294void rds_ib_flush_mrs(void); 312void rds_ib_flush_mrs(void);
313int rds_ib_fmr_init(void);
314void rds_ib_fmr_exit(void);
295 315
296/* ib_recv.c */ 316/* ib_recv.c */
297int __init rds_ib_recv_init(void); 317int rds_ib_recv_init(void);
298void rds_ib_recv_exit(void); 318void rds_ib_recv_exit(void);
299int rds_ib_recv(struct rds_connection *conn); 319int rds_ib_recv(struct rds_connection *conn);
300int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 320int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
301 gfp_t page_gfp, int prefill); 321void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
302void rds_ib_inc_purge(struct rds_incoming *inc); 322void rds_ib_recv_refill(struct rds_connection *conn, int prefill);
303void rds_ib_inc_free(struct rds_incoming *inc); 323void rds_ib_inc_free(struct rds_incoming *inc);
304int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 324int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
305 size_t size); 325 size_t size);
@@ -325,17 +345,19 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
325extern wait_queue_head_t rds_ib_ring_empty_wait; 345extern wait_queue_head_t rds_ib_ring_empty_wait;
326 346
327/* ib_send.c */ 347/* ib_send.c */
348char *rds_ib_wc_status_str(enum ib_wc_status status);
328void rds_ib_xmit_complete(struct rds_connection *conn); 349void rds_ib_xmit_complete(struct rds_connection *conn);
329int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, 350int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
330 unsigned int hdr_off, unsigned int sg, unsigned int off); 351 unsigned int hdr_off, unsigned int sg, unsigned int off);
331void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context); 352void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
332void rds_ib_send_init_ring(struct rds_ib_connection *ic); 353void rds_ib_send_init_ring(struct rds_ib_connection *ic);
333void rds_ib_send_clear_ring(struct rds_ib_connection *ic); 354void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
334int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); 355int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
335void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); 356void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits);
336void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); 357void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted);
337int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, 358int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
338 u32 *adv_credits, int need_posted, int max_posted); 359 u32 *adv_credits, int need_posted, int max_posted);
360int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);
339 361
340/* ib_stats.c */ 362/* ib_stats.c */
341DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); 363DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
@@ -344,7 +366,7 @@ unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
344 unsigned int avail); 366 unsigned int avail);
345 367
346/* ib_sysctl.c */ 368/* ib_sysctl.c */
347int __init rds_ib_sysctl_init(void); 369int rds_ib_sysctl_init(void);
348void rds_ib_sysctl_exit(void); 370void rds_ib_sysctl_exit(void);
349extern unsigned long rds_ib_sysctl_max_send_wr; 371extern unsigned long rds_ib_sysctl_max_send_wr;
350extern unsigned long rds_ib_sysctl_max_recv_wr; 372extern unsigned long rds_ib_sysctl_max_recv_wr;
@@ -354,28 +376,4 @@ extern unsigned long rds_ib_sysctl_max_recv_allocation;
354extern unsigned int rds_ib_sysctl_flow_control; 376extern unsigned int rds_ib_sysctl_flow_control;
355extern ctl_table rds_ib_sysctl_table[]; 377extern ctl_table rds_ib_sysctl_table[];
356 378
357/*
358 * Helper functions for getting/setting the header and data SGEs in
359 * RDS packets (not RDMA)
360 *
361 * From version 3.1 onwards, header is in front of data in the sge.
362 */
363static inline struct ib_sge *
364rds_ib_header_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
365{
366 if (ic->conn->c_version > RDS_PROTOCOL_3_0)
367 return &sge[0];
368 else
369 return &sge[1];
370}
371
372static inline struct ib_sge *
373rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
374{
375 if (ic->conn->c_version > RDS_PROTOCOL_3_0)
376 return &sge[1];
377 else
378 return &sge[0];
379}
380
381#endif 379#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index f68832798db2..ee369d201a65 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -38,6 +38,36 @@
38#include "rds.h" 38#include "rds.h"
39#include "ib.h" 39#include "ib.h"
40 40
41static char *rds_ib_event_type_strings[] = {
42#define RDS_IB_EVENT_STRING(foo) \
43 [IB_EVENT_##foo] = __stringify(IB_EVENT_##foo)
44 RDS_IB_EVENT_STRING(CQ_ERR),
45 RDS_IB_EVENT_STRING(QP_FATAL),
46 RDS_IB_EVENT_STRING(QP_REQ_ERR),
47 RDS_IB_EVENT_STRING(QP_ACCESS_ERR),
48 RDS_IB_EVENT_STRING(COMM_EST),
49 RDS_IB_EVENT_STRING(SQ_DRAINED),
50 RDS_IB_EVENT_STRING(PATH_MIG),
51 RDS_IB_EVENT_STRING(PATH_MIG_ERR),
52 RDS_IB_EVENT_STRING(DEVICE_FATAL),
53 RDS_IB_EVENT_STRING(PORT_ACTIVE),
54 RDS_IB_EVENT_STRING(PORT_ERR),
55 RDS_IB_EVENT_STRING(LID_CHANGE),
56 RDS_IB_EVENT_STRING(PKEY_CHANGE),
57 RDS_IB_EVENT_STRING(SM_CHANGE),
58 RDS_IB_EVENT_STRING(SRQ_ERR),
59 RDS_IB_EVENT_STRING(SRQ_LIMIT_REACHED),
60 RDS_IB_EVENT_STRING(QP_LAST_WQE_REACHED),
61 RDS_IB_EVENT_STRING(CLIENT_REREGISTER),
62#undef RDS_IB_EVENT_STRING
63};
64
65static char *rds_ib_event_str(enum ib_event_type type)
66{
67 return rds_str_array(rds_ib_event_type_strings,
68 ARRAY_SIZE(rds_ib_event_type_strings), type);
69};
70
41/* 71/*
42 * Set the selected protocol version 72 * Set the selected protocol version
43 */ 73 */
@@ -95,7 +125,6 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
95{ 125{
96 const struct rds_ib_connect_private *dp = NULL; 126 const struct rds_ib_connect_private *dp = NULL;
97 struct rds_ib_connection *ic = conn->c_transport_data; 127 struct rds_ib_connection *ic = conn->c_transport_data;
98 struct rds_ib_device *rds_ibdev;
99 struct ib_qp_attr qp_attr; 128 struct ib_qp_attr qp_attr;
100 int err; 129 int err;
101 130
@@ -111,11 +140,21 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
111 } 140 }
112 } 141 }
113 142
114 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", 143 if (conn->c_version < RDS_PROTOCOL(3,1)) {
115 &conn->c_faddr, 144 printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed,"
116 RDS_PROTOCOL_MAJOR(conn->c_version), 145 " no longer supported\n",
117 RDS_PROTOCOL_MINOR(conn->c_version), 146 &conn->c_faddr,
118 ic->i_flowctl ? ", flow control" : ""); 147 RDS_PROTOCOL_MAJOR(conn->c_version),
148 RDS_PROTOCOL_MINOR(conn->c_version));
149 rds_conn_destroy(conn);
150 return;
151 } else {
152 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n",
153 &conn->c_faddr,
154 RDS_PROTOCOL_MAJOR(conn->c_version),
155 RDS_PROTOCOL_MINOR(conn->c_version),
156 ic->i_flowctl ? ", flow control" : "");
157 }
119 158
120 /* 159 /*
121 * Init rings and fill recv. this needs to wait until protocol negotiation 160 * Init rings and fill recv. this needs to wait until protocol negotiation
@@ -125,7 +164,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
125 rds_ib_recv_init_ring(ic); 164 rds_ib_recv_init_ring(ic);
126 /* Post receive buffers - as a side effect, this will update 165 /* Post receive buffers - as a side effect, this will update
127 * the posted credit count. */ 166 * the posted credit count. */
128 rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); 167 rds_ib_recv_refill(conn, 1);
129 168
130 /* Tune RNR behavior */ 169 /* Tune RNR behavior */
131 rds_ib_tune_rnr(ic, &qp_attr); 170 rds_ib_tune_rnr(ic, &qp_attr);
@@ -135,12 +174,11 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
135 if (err) 174 if (err)
136 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); 175 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
137 176
138 /* update ib_device with this local ipaddr & conn */ 177 /* update ib_device with this local ipaddr */
139 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 178 err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr);
140 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
141 if (err) 179 if (err)
142 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); 180 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
143 rds_ib_add_conn(rds_ibdev, conn); 181 err);
144 182
145 /* If the peer gave us the last packet it saw, process this as if 183 /* If the peer gave us the last packet it saw, process this as if
146 * we had received a regular ACK. */ 184 * we had received a regular ACK. */
@@ -153,18 +191,23 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
153static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, 191static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
154 struct rdma_conn_param *conn_param, 192 struct rdma_conn_param *conn_param,
155 struct rds_ib_connect_private *dp, 193 struct rds_ib_connect_private *dp,
156 u32 protocol_version) 194 u32 protocol_version,
195 u32 max_responder_resources,
196 u32 max_initiator_depth)
157{ 197{
198 struct rds_ib_connection *ic = conn->c_transport_data;
199 struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
200
158 memset(conn_param, 0, sizeof(struct rdma_conn_param)); 201 memset(conn_param, 0, sizeof(struct rdma_conn_param));
159 /* XXX tune these? */ 202
160 conn_param->responder_resources = 1; 203 conn_param->responder_resources =
161 conn_param->initiator_depth = 1; 204 min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources);
205 conn_param->initiator_depth =
206 min_t(u32, rds_ibdev->max_initiator_depth, max_initiator_depth);
162 conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7); 207 conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7);
163 conn_param->rnr_retry_count = 7; 208 conn_param->rnr_retry_count = 7;
164 209
165 if (dp) { 210 if (dp) {
166 struct rds_ib_connection *ic = conn->c_transport_data;
167
168 memset(dp, 0, sizeof(*dp)); 211 memset(dp, 0, sizeof(*dp));
169 dp->dp_saddr = conn->c_laddr; 212 dp->dp_saddr = conn->c_laddr;
170 dp->dp_daddr = conn->c_faddr; 213 dp->dp_daddr = conn->c_faddr;
@@ -189,7 +232,8 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
189 232
190static void rds_ib_cq_event_handler(struct ib_event *event, void *data) 233static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
191{ 234{
192 rdsdebug("event %u data %p\n", event->event, data); 235 rdsdebug("event %u (%s) data %p\n",
236 event->event, rds_ib_event_str(event->event), data);
193} 237}
194 238
195static void rds_ib_qp_event_handler(struct ib_event *event, void *data) 239static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
@@ -197,16 +241,18 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
197 struct rds_connection *conn = data; 241 struct rds_connection *conn = data;
198 struct rds_ib_connection *ic = conn->c_transport_data; 242 struct rds_ib_connection *ic = conn->c_transport_data;
199 243
200 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event); 244 rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event,
245 rds_ib_event_str(event->event));
201 246
202 switch (event->event) { 247 switch (event->event) {
203 case IB_EVENT_COMM_EST: 248 case IB_EVENT_COMM_EST:
204 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); 249 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
205 break; 250 break;
206 default: 251 default:
207 rdsdebug("Fatal QP Event %u " 252 rdsdebug("Fatal QP Event %u (%s) "
208 "- connection %pI4->%pI4, reconnecting\n", 253 "- connection %pI4->%pI4, reconnecting\n",
209 event->event, &conn->c_laddr, &conn->c_faddr); 254 event->event, rds_ib_event_str(event->event),
255 &conn->c_laddr, &conn->c_faddr);
210 rds_conn_drop(conn); 256 rds_conn_drop(conn);
211 break; 257 break;
212 } 258 }
@@ -224,18 +270,16 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
224 struct rds_ib_device *rds_ibdev; 270 struct rds_ib_device *rds_ibdev;
225 int ret; 271 int ret;
226 272
227 /* rds_ib_add_one creates a rds_ib_device object per IB device, 273 /*
228 * and allocates a protection domain, memory range and FMR pool 274 * It's normal to see a null device if an incoming connection races
229 * for each. If that fails for any reason, it will not register 275 * with device removal, so we don't print a warning.
230 * the rds_ibdev at all.
231 */ 276 */
232 rds_ibdev = ib_get_client_data(dev, &rds_ib_client); 277 rds_ibdev = rds_ib_get_client_data(dev);
233 if (rds_ibdev == NULL) { 278 if (!rds_ibdev)
234 if (printk_ratelimit())
235 printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n",
236 dev->name);
237 return -EOPNOTSUPP; 279 return -EOPNOTSUPP;
238 } 280
281 /* add the conn now so that connection establishment has the dev */
282 rds_ib_add_conn(rds_ibdev, conn);
239 283
240 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) 284 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
241 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); 285 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
@@ -306,7 +350,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
306 ic->i_send_ring.w_nr * 350 ic->i_send_ring.w_nr *
307 sizeof(struct rds_header), 351 sizeof(struct rds_header),
308 &ic->i_send_hdrs_dma, GFP_KERNEL); 352 &ic->i_send_hdrs_dma, GFP_KERNEL);
309 if (ic->i_send_hdrs == NULL) { 353 if (!ic->i_send_hdrs) {
310 ret = -ENOMEM; 354 ret = -ENOMEM;
311 rdsdebug("ib_dma_alloc_coherent send failed\n"); 355 rdsdebug("ib_dma_alloc_coherent send failed\n");
312 goto out; 356 goto out;
@@ -316,7 +360,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
316 ic->i_recv_ring.w_nr * 360 ic->i_recv_ring.w_nr *
317 sizeof(struct rds_header), 361 sizeof(struct rds_header),
318 &ic->i_recv_hdrs_dma, GFP_KERNEL); 362 &ic->i_recv_hdrs_dma, GFP_KERNEL);
319 if (ic->i_recv_hdrs == NULL) { 363 if (!ic->i_recv_hdrs) {
320 ret = -ENOMEM; 364 ret = -ENOMEM;
321 rdsdebug("ib_dma_alloc_coherent recv failed\n"); 365 rdsdebug("ib_dma_alloc_coherent recv failed\n");
322 goto out; 366 goto out;
@@ -324,22 +368,24 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
324 368
325 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), 369 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
326 &ic->i_ack_dma, GFP_KERNEL); 370 &ic->i_ack_dma, GFP_KERNEL);
327 if (ic->i_ack == NULL) { 371 if (!ic->i_ack) {
328 ret = -ENOMEM; 372 ret = -ENOMEM;
329 rdsdebug("ib_dma_alloc_coherent ack failed\n"); 373 rdsdebug("ib_dma_alloc_coherent ack failed\n");
330 goto out; 374 goto out;
331 } 375 }
332 376
333 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); 377 ic->i_sends = vmalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
334 if (ic->i_sends == NULL) { 378 ibdev_to_node(dev));
379 if (!ic->i_sends) {
335 ret = -ENOMEM; 380 ret = -ENOMEM;
336 rdsdebug("send allocation failed\n"); 381 rdsdebug("send allocation failed\n");
337 goto out; 382 goto out;
338 } 383 }
339 memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); 384 memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work));
340 385
341 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); 386 ic->i_recvs = vmalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
342 if (ic->i_recvs == NULL) { 387 ibdev_to_node(dev));
388 if (!ic->i_recvs) {
343 ret = -ENOMEM; 389 ret = -ENOMEM;
344 rdsdebug("recv allocation failed\n"); 390 rdsdebug("recv allocation failed\n");
345 goto out; 391 goto out;
@@ -352,6 +398,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
352 ic->i_send_cq, ic->i_recv_cq); 398 ic->i_send_cq, ic->i_recv_cq);
353 399
354out: 400out:
401 rds_ib_dev_put(rds_ibdev);
355 return ret; 402 return ret;
356} 403}
357 404
@@ -409,7 +456,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
409 struct rds_ib_connection *ic = NULL; 456 struct rds_ib_connection *ic = NULL;
410 struct rdma_conn_param conn_param; 457 struct rdma_conn_param conn_param;
411 u32 version; 458 u32 version;
412 int err, destroy = 1; 459 int err = 1, destroy = 1;
413 460
414 /* Check whether the remote protocol version matches ours. */ 461 /* Check whether the remote protocol version matches ours. */
415 version = rds_ib_protocol_compatible(event); 462 version = rds_ib_protocol_compatible(event);
@@ -448,7 +495,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
448 /* Wait and see - our connect may still be succeeding */ 495 /* Wait and see - our connect may still be succeeding */
449 rds_ib_stats_inc(s_ib_connect_raced); 496 rds_ib_stats_inc(s_ib_connect_raced);
450 } 497 }
451 mutex_unlock(&conn->c_cm_lock);
452 goto out; 498 goto out;
453 } 499 }
454 500
@@ -475,24 +521,23 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
475 err = rds_ib_setup_qp(conn); 521 err = rds_ib_setup_qp(conn);
476 if (err) { 522 if (err) {
477 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); 523 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
478 mutex_unlock(&conn->c_cm_lock);
479 goto out; 524 goto out;
480 } 525 }
481 526
482 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version); 527 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
528 event->param.conn.responder_resources,
529 event->param.conn.initiator_depth);
483 530
484 /* rdma_accept() calls rdma_reject() internally if it fails */ 531 /* rdma_accept() calls rdma_reject() internally if it fails */
485 err = rdma_accept(cm_id, &conn_param); 532 err = rdma_accept(cm_id, &conn_param);
486 mutex_unlock(&conn->c_cm_lock); 533 if (err)
487 if (err) {
488 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); 534 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
489 goto out;
490 }
491
492 return 0;
493 535
494out: 536out:
495 rdma_reject(cm_id, NULL, 0); 537 if (conn)
538 mutex_unlock(&conn->c_cm_lock);
539 if (err)
540 rdma_reject(cm_id, NULL, 0);
496 return destroy; 541 return destroy;
497} 542}
498 543
@@ -516,8 +561,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
516 goto out; 561 goto out;
517 } 562 }
518 563
519 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION); 564 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
520 565 UINT_MAX, UINT_MAX);
521 ret = rdma_connect(cm_id, &conn_param); 566 ret = rdma_connect(cm_id, &conn_param);
522 if (ret) 567 if (ret)
523 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); 568 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
@@ -601,9 +646,19 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
601 ic->i_cm_id, err); 646 ic->i_cm_id, err);
602 } 647 }
603 648
649 /*
650 * We want to wait for tx and rx completion to finish
651 * before we tear down the connection, but we have to be
652 * careful not to get stuck waiting on a send ring that
653 * only has unsignaled sends in it. We've shutdown new
654 * sends before getting here so by waiting for signaled
655 * sends to complete we're ensured that there will be no
656 * more tx processing.
657 */
604 wait_event(rds_ib_ring_empty_wait, 658 wait_event(rds_ib_ring_empty_wait,
605 rds_ib_ring_empty(&ic->i_send_ring) && 659 rds_ib_ring_empty(&ic->i_recv_ring) &&
606 rds_ib_ring_empty(&ic->i_recv_ring)); 660 (atomic_read(&ic->i_signaled_sends) == 0));
661 tasklet_kill(&ic->i_recv_tasklet);
607 662
608 if (ic->i_send_hdrs) 663 if (ic->i_send_hdrs)
609 ib_dma_free_coherent(dev, 664 ib_dma_free_coherent(dev,
@@ -654,9 +709,12 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
654 BUG_ON(ic->rds_ibdev); 709 BUG_ON(ic->rds_ibdev);
655 710
656 /* Clear pending transmit */ 711 /* Clear pending transmit */
657 if (ic->i_rm) { 712 if (ic->i_data_op) {
658 rds_message_put(ic->i_rm); 713 struct rds_message *rm;
659 ic->i_rm = NULL; 714
715 rm = container_of(ic->i_data_op, struct rds_message, data);
716 rds_message_put(rm);
717 ic->i_data_op = NULL;
660 } 718 }
661 719
662 /* Clear the ACK state */ 720 /* Clear the ACK state */
@@ -690,12 +748,19 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
690{ 748{
691 struct rds_ib_connection *ic; 749 struct rds_ib_connection *ic;
692 unsigned long flags; 750 unsigned long flags;
751 int ret;
693 752
694 /* XXX too lazy? */ 753 /* XXX too lazy? */
695 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); 754 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL);
696 if (ic == NULL) 755 if (!ic)
697 return -ENOMEM; 756 return -ENOMEM;
698 757
758 ret = rds_ib_recv_alloc_caches(ic);
759 if (ret) {
760 kfree(ic);
761 return ret;
762 }
763
699 INIT_LIST_HEAD(&ic->ib_node); 764 INIT_LIST_HEAD(&ic->ib_node);
700 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn, 765 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn,
701 (unsigned long) ic); 766 (unsigned long) ic);
@@ -703,6 +768,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
703#ifndef KERNEL_HAS_ATOMIC64 768#ifndef KERNEL_HAS_ATOMIC64
704 spin_lock_init(&ic->i_ack_lock); 769 spin_lock_init(&ic->i_ack_lock);
705#endif 770#endif
771 atomic_set(&ic->i_signaled_sends, 0);
706 772
707 /* 773 /*
708 * rds_ib_conn_shutdown() waits for these to be emptied so they 774 * rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -744,6 +810,8 @@ void rds_ib_conn_free(void *arg)
744 list_del(&ic->ib_node); 810 list_del(&ic->ib_node);
745 spin_unlock_irq(lock_ptr); 811 spin_unlock_irq(lock_ptr);
746 812
813 rds_ib_recv_free_caches(ic);
814
747 kfree(ic); 815 kfree(ic);
748} 816}
749 817
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index a54cd63f9e35..b5a88415a18e 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -32,11 +32,16 @@
32 */ 32 */
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/rculist.h>
35 36
36#include "rds.h" 37#include "rds.h"
37#include "rdma.h"
38#include "ib.h" 38#include "ib.h"
39#include "xlist.h"
39 40
41struct workqueue_struct *rds_ib_fmr_wq;
42
43static DEFINE_PER_CPU(unsigned long, clean_list_grace);
44#define CLEAN_LIST_BUSY_BIT 0
40 45
41/* 46/*
42 * This is stored as mr->r_trans_private. 47 * This is stored as mr->r_trans_private.
@@ -45,7 +50,11 @@ struct rds_ib_mr {
45 struct rds_ib_device *device; 50 struct rds_ib_device *device;
46 struct rds_ib_mr_pool *pool; 51 struct rds_ib_mr_pool *pool;
47 struct ib_fmr *fmr; 52 struct ib_fmr *fmr;
48 struct list_head list; 53
54 struct xlist_head xlist;
55
56 /* unmap_list is for freeing */
57 struct list_head unmap_list;
49 unsigned int remap_count; 58 unsigned int remap_count;
50 59
51 struct scatterlist *sg; 60 struct scatterlist *sg;
@@ -59,14 +68,16 @@ struct rds_ib_mr {
59 */ 68 */
60struct rds_ib_mr_pool { 69struct rds_ib_mr_pool {
61 struct mutex flush_lock; /* serialize fmr invalidate */ 70 struct mutex flush_lock; /* serialize fmr invalidate */
62 struct work_struct flush_worker; /* flush worker */ 71 struct delayed_work flush_worker; /* flush worker */
63 72
64 spinlock_t list_lock; /* protect variables below */
65 atomic_t item_count; /* total # of MRs */ 73 atomic_t item_count; /* total # of MRs */
66 atomic_t dirty_count; /* # dirty of MRs */ 74 atomic_t dirty_count; /* # dirty of MRs */
67 struct list_head drop_list; /* MRs that have reached their max_maps limit */ 75
68 struct list_head free_list; /* unused MRs */ 76 struct xlist_head drop_list; /* MRs that have reached their max_maps limit */
69 struct list_head clean_list; /* unused & unamapped MRs */ 77 struct xlist_head free_list; /* unused MRs */
78 struct xlist_head clean_list; /* global unused & unamapped MRs */
79 wait_queue_head_t flush_wait;
80
70 atomic_t free_pinned; /* memory pinned by free MRs */ 81 atomic_t free_pinned; /* memory pinned by free MRs */
71 unsigned long max_items; 82 unsigned long max_items;
72 unsigned long max_items_soft; 83 unsigned long max_items_soft;
@@ -74,7 +85,7 @@ struct rds_ib_mr_pool {
74 struct ib_fmr_attr fmr_attr; 85 struct ib_fmr_attr fmr_attr;
75}; 86};
76 87
77static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all); 88static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **);
78static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr); 89static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
79static void rds_ib_mr_pool_flush_worker(struct work_struct *work); 90static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
80 91
@@ -83,16 +94,17 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
83 struct rds_ib_device *rds_ibdev; 94 struct rds_ib_device *rds_ibdev;
84 struct rds_ib_ipaddr *i_ipaddr; 95 struct rds_ib_ipaddr *i_ipaddr;
85 96
86 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 97 rcu_read_lock();
87 spin_lock_irq(&rds_ibdev->spinlock); 98 list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
88 list_for_each_entry(i_ipaddr, &rds_ibdev->ipaddr_list, list) { 99 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
89 if (i_ipaddr->ipaddr == ipaddr) { 100 if (i_ipaddr->ipaddr == ipaddr) {
90 spin_unlock_irq(&rds_ibdev->spinlock); 101 atomic_inc(&rds_ibdev->refcount);
102 rcu_read_unlock();
91 return rds_ibdev; 103 return rds_ibdev;
92 } 104 }
93 } 105 }
94 spin_unlock_irq(&rds_ibdev->spinlock);
95 } 106 }
107 rcu_read_unlock();
96 108
97 return NULL; 109 return NULL;
98} 110}
@@ -108,7 +120,7 @@ static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
108 i_ipaddr->ipaddr = ipaddr; 120 i_ipaddr->ipaddr = ipaddr;
109 121
110 spin_lock_irq(&rds_ibdev->spinlock); 122 spin_lock_irq(&rds_ibdev->spinlock);
111 list_add_tail(&i_ipaddr->list, &rds_ibdev->ipaddr_list); 123 list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list);
112 spin_unlock_irq(&rds_ibdev->spinlock); 124 spin_unlock_irq(&rds_ibdev->spinlock);
113 125
114 return 0; 126 return 0;
@@ -116,17 +128,24 @@ static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
116 128
117static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 129static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
118{ 130{
119 struct rds_ib_ipaddr *i_ipaddr, *next; 131 struct rds_ib_ipaddr *i_ipaddr;
132 struct rds_ib_ipaddr *to_free = NULL;
133
120 134
121 spin_lock_irq(&rds_ibdev->spinlock); 135 spin_lock_irq(&rds_ibdev->spinlock);
122 list_for_each_entry_safe(i_ipaddr, next, &rds_ibdev->ipaddr_list, list) { 136 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
123 if (i_ipaddr->ipaddr == ipaddr) { 137 if (i_ipaddr->ipaddr == ipaddr) {
124 list_del(&i_ipaddr->list); 138 list_del_rcu(&i_ipaddr->list);
125 kfree(i_ipaddr); 139 to_free = i_ipaddr;
126 break; 140 break;
127 } 141 }
128 } 142 }
129 spin_unlock_irq(&rds_ibdev->spinlock); 143 spin_unlock_irq(&rds_ibdev->spinlock);
144
145 if (to_free) {
146 synchronize_rcu();
147 kfree(to_free);
148 }
130} 149}
131 150
132int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 151int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
@@ -134,8 +153,10 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
134 struct rds_ib_device *rds_ibdev_old; 153 struct rds_ib_device *rds_ibdev_old;
135 154
136 rds_ibdev_old = rds_ib_get_device(ipaddr); 155 rds_ibdev_old = rds_ib_get_device(ipaddr);
137 if (rds_ibdev_old) 156 if (rds_ibdev_old) {
138 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr); 157 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
158 rds_ib_dev_put(rds_ibdev_old);
159 }
139 160
140 return rds_ib_add_ipaddr(rds_ibdev, ipaddr); 161 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
141} 162}
@@ -150,12 +171,13 @@ void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *con
150 BUG_ON(list_empty(&ic->ib_node)); 171 BUG_ON(list_empty(&ic->ib_node));
151 list_del(&ic->ib_node); 172 list_del(&ic->ib_node);
152 173
153 spin_lock_irq(&rds_ibdev->spinlock); 174 spin_lock(&rds_ibdev->spinlock);
154 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); 175 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
155 spin_unlock_irq(&rds_ibdev->spinlock); 176 spin_unlock(&rds_ibdev->spinlock);
156 spin_unlock_irq(&ib_nodev_conns_lock); 177 spin_unlock_irq(&ib_nodev_conns_lock);
157 178
158 ic->rds_ibdev = rds_ibdev; 179 ic->rds_ibdev = rds_ibdev;
180 atomic_inc(&rds_ibdev->refcount);
159} 181}
160 182
161void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 183void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
@@ -175,18 +197,18 @@ void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *
175 spin_unlock(&ib_nodev_conns_lock); 197 spin_unlock(&ib_nodev_conns_lock);
176 198
177 ic->rds_ibdev = NULL; 199 ic->rds_ibdev = NULL;
200 rds_ib_dev_put(rds_ibdev);
178} 201}
179 202
180void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock) 203void rds_ib_destroy_nodev_conns(void)
181{ 204{
182 struct rds_ib_connection *ic, *_ic; 205 struct rds_ib_connection *ic, *_ic;
183 LIST_HEAD(tmp_list); 206 LIST_HEAD(tmp_list);
184 207
185 /* avoid calling conn_destroy with irqs off */ 208 /* avoid calling conn_destroy with irqs off */
186 spin_lock_irq(list_lock); 209 spin_lock_irq(&ib_nodev_conns_lock);
187 list_splice(list, &tmp_list); 210 list_splice(&ib_nodev_conns, &tmp_list);
188 INIT_LIST_HEAD(list); 211 spin_unlock_irq(&ib_nodev_conns_lock);
189 spin_unlock_irq(list_lock);
190 212
191 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) 213 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node)
192 rds_conn_destroy(ic->conn); 214 rds_conn_destroy(ic->conn);
@@ -200,12 +222,12 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
200 if (!pool) 222 if (!pool)
201 return ERR_PTR(-ENOMEM); 223 return ERR_PTR(-ENOMEM);
202 224
203 INIT_LIST_HEAD(&pool->free_list); 225 INIT_XLIST_HEAD(&pool->free_list);
204 INIT_LIST_HEAD(&pool->drop_list); 226 INIT_XLIST_HEAD(&pool->drop_list);
205 INIT_LIST_HEAD(&pool->clean_list); 227 INIT_XLIST_HEAD(&pool->clean_list);
206 mutex_init(&pool->flush_lock); 228 mutex_init(&pool->flush_lock);
207 spin_lock_init(&pool->list_lock); 229 init_waitqueue_head(&pool->flush_wait);
208 INIT_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); 230 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
209 231
210 pool->fmr_attr.max_pages = fmr_message_size; 232 pool->fmr_attr.max_pages = fmr_message_size;
211 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; 233 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
@@ -233,34 +255,60 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co
233 255
234void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 256void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
235{ 257{
236 flush_workqueue(rds_wq); 258 cancel_delayed_work_sync(&pool->flush_worker);
237 rds_ib_flush_mr_pool(pool, 1); 259 rds_ib_flush_mr_pool(pool, 1, NULL);
238 WARN_ON(atomic_read(&pool->item_count)); 260 WARN_ON(atomic_read(&pool->item_count));
239 WARN_ON(atomic_read(&pool->free_pinned)); 261 WARN_ON(atomic_read(&pool->free_pinned));
240 kfree(pool); 262 kfree(pool);
241} 263}
242 264
265static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl,
266 struct rds_ib_mr **ibmr_ret)
267{
268 struct xlist_head *ibmr_xl;
269 ibmr_xl = xlist_del_head_fast(xl);
270 *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist);
271}
272
243static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) 273static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
244{ 274{
245 struct rds_ib_mr *ibmr = NULL; 275 struct rds_ib_mr *ibmr = NULL;
246 unsigned long flags; 276 struct xlist_head *ret;
277 unsigned long *flag;
247 278
248 spin_lock_irqsave(&pool->list_lock, flags); 279 preempt_disable();
249 if (!list_empty(&pool->clean_list)) { 280 flag = &__get_cpu_var(clean_list_grace);
250 ibmr = list_entry(pool->clean_list.next, struct rds_ib_mr, list); 281 set_bit(CLEAN_LIST_BUSY_BIT, flag);
251 list_del_init(&ibmr->list); 282 ret = xlist_del_head(&pool->clean_list);
252 } 283 if (ret)
253 spin_unlock_irqrestore(&pool->list_lock, flags); 284 ibmr = list_entry(ret, struct rds_ib_mr, xlist);
254 285
286 clear_bit(CLEAN_LIST_BUSY_BIT, flag);
287 preempt_enable();
255 return ibmr; 288 return ibmr;
256} 289}
257 290
291static inline void wait_clean_list_grace(void)
292{
293 int cpu;
294 unsigned long *flag;
295
296 for_each_online_cpu(cpu) {
297 flag = &per_cpu(clean_list_grace, cpu);
298 while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
299 cpu_relax();
300 }
301}
302
258static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) 303static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
259{ 304{
260 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 305 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
261 struct rds_ib_mr *ibmr = NULL; 306 struct rds_ib_mr *ibmr = NULL;
262 int err = 0, iter = 0; 307 int err = 0, iter = 0;
263 308
309 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
310 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
311
264 while (1) { 312 while (1) {
265 ibmr = rds_ib_reuse_fmr(pool); 313 ibmr = rds_ib_reuse_fmr(pool);
266 if (ibmr) 314 if (ibmr)
@@ -287,19 +335,24 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
287 335
288 /* We do have some empty MRs. Flush them out. */ 336 /* We do have some empty MRs. Flush them out. */
289 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait); 337 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait);
290 rds_ib_flush_mr_pool(pool, 0); 338 rds_ib_flush_mr_pool(pool, 0, &ibmr);
339 if (ibmr)
340 return ibmr;
291 } 341 }
292 342
293 ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); 343 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, rdsibdev_to_node(rds_ibdev));
294 if (!ibmr) { 344 if (!ibmr) {
295 err = -ENOMEM; 345 err = -ENOMEM;
296 goto out_no_cigar; 346 goto out_no_cigar;
297 } 347 }
298 348
349 memset(ibmr, 0, sizeof(*ibmr));
350
299 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, 351 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
300 (IB_ACCESS_LOCAL_WRITE | 352 (IB_ACCESS_LOCAL_WRITE |
301 IB_ACCESS_REMOTE_READ | 353 IB_ACCESS_REMOTE_READ |
302 IB_ACCESS_REMOTE_WRITE), 354 IB_ACCESS_REMOTE_WRITE|
355 IB_ACCESS_REMOTE_ATOMIC),
303 &pool->fmr_attr); 356 &pool->fmr_attr);
304 if (IS_ERR(ibmr->fmr)) { 357 if (IS_ERR(ibmr->fmr)) {
305 err = PTR_ERR(ibmr->fmr); 358 err = PTR_ERR(ibmr->fmr);
@@ -367,7 +420,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
367 if (page_cnt > fmr_message_size) 420 if (page_cnt > fmr_message_size)
368 return -EINVAL; 421 return -EINVAL;
369 422
370 dma_pages = kmalloc(sizeof(u64) * page_cnt, GFP_ATOMIC); 423 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
424 rdsibdev_to_node(rds_ibdev));
371 if (!dma_pages) 425 if (!dma_pages)
372 return -ENOMEM; 426 return -ENOMEM;
373 427
@@ -441,7 +495,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
441 495
442 /* FIXME we need a way to tell a r/w MR 496 /* FIXME we need a way to tell a r/w MR
443 * from a r/o MR */ 497 * from a r/o MR */
444 BUG_ON(in_interrupt()); 498 BUG_ON(irqs_disabled());
445 set_page_dirty(page); 499 set_page_dirty(page);
446 put_page(page); 500 put_page(page);
447 } 501 }
@@ -477,33 +531,109 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr
477} 531}
478 532
479/* 533/*
534 * given an xlist of mrs, put them all into the list_head for more processing
535 */
536static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list)
537{
538 struct rds_ib_mr *ibmr;
539 struct xlist_head splice;
540 struct xlist_head *cur;
541 struct xlist_head *next;
542
543 splice.next = NULL;
544 xlist_splice(xlist, &splice);
545 cur = splice.next;
546 while (cur) {
547 next = cur->next;
548 ibmr = list_entry(cur, struct rds_ib_mr, xlist);
549 list_add_tail(&ibmr->unmap_list, list);
550 cur = next;
551 }
552}
553
554/*
555 * this takes a list head of mrs and turns it into an xlist of clusters.
556 * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for
557 * reuse.
558 */
559static void list_append_to_xlist(struct rds_ib_mr_pool *pool,
560 struct list_head *list, struct xlist_head *xlist,
561 struct xlist_head **tail_ret)
562{
563 struct rds_ib_mr *ibmr;
564 struct xlist_head *cur_mr = xlist;
565 struct xlist_head *tail_mr = NULL;
566
567 list_for_each_entry(ibmr, list, unmap_list) {
568 tail_mr = &ibmr->xlist;
569 tail_mr->next = NULL;
570 cur_mr->next = tail_mr;
571 cur_mr = tail_mr;
572 }
573 *tail_ret = tail_mr;
574}
575
576/*
480 * Flush our pool of MRs. 577 * Flush our pool of MRs.
481 * At a minimum, all currently unused MRs are unmapped. 578 * At a minimum, all currently unused MRs are unmapped.
482 * If the number of MRs allocated exceeds the limit, we also try 579 * If the number of MRs allocated exceeds the limit, we also try
483 * to free as many MRs as needed to get back to this limit. 580 * to free as many MRs as needed to get back to this limit.
484 */ 581 */
485static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all) 582static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
583 int free_all, struct rds_ib_mr **ibmr_ret)
486{ 584{
487 struct rds_ib_mr *ibmr, *next; 585 struct rds_ib_mr *ibmr, *next;
586 struct xlist_head clean_xlist;
587 struct xlist_head *clean_tail;
488 LIST_HEAD(unmap_list); 588 LIST_HEAD(unmap_list);
489 LIST_HEAD(fmr_list); 589 LIST_HEAD(fmr_list);
490 unsigned long unpinned = 0; 590 unsigned long unpinned = 0;
491 unsigned long flags;
492 unsigned int nfreed = 0, ncleaned = 0, free_goal; 591 unsigned int nfreed = 0, ncleaned = 0, free_goal;
493 int ret = 0; 592 int ret = 0;
494 593
495 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush); 594 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
496 595
497 mutex_lock(&pool->flush_lock); 596 if (ibmr_ret) {
597 DEFINE_WAIT(wait);
598 while(!mutex_trylock(&pool->flush_lock)) {
599 ibmr = rds_ib_reuse_fmr(pool);
600 if (ibmr) {
601 *ibmr_ret = ibmr;
602 finish_wait(&pool->flush_wait, &wait);
603 goto out_nolock;
604 }
605
606 prepare_to_wait(&pool->flush_wait, &wait,
607 TASK_UNINTERRUPTIBLE);
608 if (xlist_empty(&pool->clean_list))
609 schedule();
610
611 ibmr = rds_ib_reuse_fmr(pool);
612 if (ibmr) {
613 *ibmr_ret = ibmr;
614 finish_wait(&pool->flush_wait, &wait);
615 goto out_nolock;
616 }
617 }
618 finish_wait(&pool->flush_wait, &wait);
619 } else
620 mutex_lock(&pool->flush_lock);
621
622 if (ibmr_ret) {
623 ibmr = rds_ib_reuse_fmr(pool);
624 if (ibmr) {
625 *ibmr_ret = ibmr;
626 goto out;
627 }
628 }
498 629
499 spin_lock_irqsave(&pool->list_lock, flags);
500 /* Get the list of all MRs to be dropped. Ordering matters - 630 /* Get the list of all MRs to be dropped. Ordering matters -
501 * we want to put drop_list ahead of free_list. */ 631 * we want to put drop_list ahead of free_list.
502 list_splice_init(&pool->free_list, &unmap_list); 632 */
503 list_splice_init(&pool->drop_list, &unmap_list); 633 xlist_append_to_list(&pool->drop_list, &unmap_list);
634 xlist_append_to_list(&pool->free_list, &unmap_list);
504 if (free_all) 635 if (free_all)
505 list_splice_init(&pool->clean_list, &unmap_list); 636 xlist_append_to_list(&pool->clean_list, &unmap_list);
506 spin_unlock_irqrestore(&pool->list_lock, flags);
507 637
508 free_goal = rds_ib_flush_goal(pool, free_all); 638 free_goal = rds_ib_flush_goal(pool, free_all);
509 639
@@ -511,19 +641,20 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
511 goto out; 641 goto out;
512 642
513 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ 643 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
514 list_for_each_entry(ibmr, &unmap_list, list) 644 list_for_each_entry(ibmr, &unmap_list, unmap_list)
515 list_add(&ibmr->fmr->list, &fmr_list); 645 list_add(&ibmr->fmr->list, &fmr_list);
646
516 ret = ib_unmap_fmr(&fmr_list); 647 ret = ib_unmap_fmr(&fmr_list);
517 if (ret) 648 if (ret)
518 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret); 649 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret);
519 650
520 /* Now we can destroy the DMA mapping and unpin any pages */ 651 /* Now we can destroy the DMA mapping and unpin any pages */
521 list_for_each_entry_safe(ibmr, next, &unmap_list, list) { 652 list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
522 unpinned += ibmr->sg_len; 653 unpinned += ibmr->sg_len;
523 __rds_ib_teardown_mr(ibmr); 654 __rds_ib_teardown_mr(ibmr);
524 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) { 655 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) {
525 rds_ib_stats_inc(s_ib_rdma_mr_free); 656 rds_ib_stats_inc(s_ib_rdma_mr_free);
526 list_del(&ibmr->list); 657 list_del(&ibmr->unmap_list);
527 ib_dealloc_fmr(ibmr->fmr); 658 ib_dealloc_fmr(ibmr->fmr);
528 kfree(ibmr); 659 kfree(ibmr);
529 nfreed++; 660 nfreed++;
@@ -531,9 +662,27 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
531 ncleaned++; 662 ncleaned++;
532 } 663 }
533 664
534 spin_lock_irqsave(&pool->list_lock, flags); 665 if (!list_empty(&unmap_list)) {
535 list_splice(&unmap_list, &pool->clean_list); 666 /* we have to make sure that none of the things we're about
536 spin_unlock_irqrestore(&pool->list_lock, flags); 667 * to put on the clean list would race with other cpus trying
668 * to pull items off. The xlist would explode if we managed to
669 * remove something from the clean list and then add it back again
670 * while another CPU was spinning on that same item in xlist_del_head.
671 *
672 * This is pretty unlikely, but just in case wait for an xlist grace period
673 * here before adding anything back into the clean list.
674 */
675 wait_clean_list_grace();
676
677 list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail);
678 if (ibmr_ret)
679 refill_local(pool, &clean_xlist, ibmr_ret);
680
681 /* refill_local may have emptied our list */
682 if (!xlist_empty(&clean_xlist))
683 xlist_add(clean_xlist.next, clean_tail, &pool->clean_list);
684
685 }
537 686
538 atomic_sub(unpinned, &pool->free_pinned); 687 atomic_sub(unpinned, &pool->free_pinned);
539 atomic_sub(ncleaned, &pool->dirty_count); 688 atomic_sub(ncleaned, &pool->dirty_count);
@@ -541,14 +690,35 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
541 690
542out: 691out:
543 mutex_unlock(&pool->flush_lock); 692 mutex_unlock(&pool->flush_lock);
693 if (waitqueue_active(&pool->flush_wait))
694 wake_up(&pool->flush_wait);
695out_nolock:
544 return ret; 696 return ret;
545} 697}
546 698
699int rds_ib_fmr_init(void)
700{
701 rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
702 if (!rds_ib_fmr_wq)
703 return -ENOMEM;
704 return 0;
705}
706
707/*
708 * By the time this is called all the IB devices should have been torn down and
709 * had their pools freed. As each pool is freed its work struct is waited on,
710 * so the pool flushing work queue should be idle by the time we get here.
711 */
712void rds_ib_fmr_exit(void)
713{
714 destroy_workqueue(rds_ib_fmr_wq);
715}
716
547static void rds_ib_mr_pool_flush_worker(struct work_struct *work) 717static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
548{ 718{
549 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker); 719 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
550 720
551 rds_ib_flush_mr_pool(pool, 0); 721 rds_ib_flush_mr_pool(pool, 0, NULL);
552} 722}
553 723
554void rds_ib_free_mr(void *trans_private, int invalidate) 724void rds_ib_free_mr(void *trans_private, int invalidate)
@@ -556,47 +726,49 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
556 struct rds_ib_mr *ibmr = trans_private; 726 struct rds_ib_mr *ibmr = trans_private;
557 struct rds_ib_device *rds_ibdev = ibmr->device; 727 struct rds_ib_device *rds_ibdev = ibmr->device;
558 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 728 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
559 unsigned long flags;
560 729
561 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); 730 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
562 731
563 /* Return it to the pool's free list */ 732 /* Return it to the pool's free list */
564 spin_lock_irqsave(&pool->list_lock, flags);
565 if (ibmr->remap_count >= pool->fmr_attr.max_maps) 733 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
566 list_add(&ibmr->list, &pool->drop_list); 734 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list);
567 else 735 else
568 list_add(&ibmr->list, &pool->free_list); 736 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list);
569 737
570 atomic_add(ibmr->sg_len, &pool->free_pinned); 738 atomic_add(ibmr->sg_len, &pool->free_pinned);
571 atomic_inc(&pool->dirty_count); 739 atomic_inc(&pool->dirty_count);
572 spin_unlock_irqrestore(&pool->list_lock, flags);
573 740
574 /* If we've pinned too many pages, request a flush */ 741 /* If we've pinned too many pages, request a flush */
575 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 742 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
576 atomic_read(&pool->dirty_count) >= pool->max_items / 10) 743 atomic_read(&pool->dirty_count) >= pool->max_items / 10)
577 queue_work(rds_wq, &pool->flush_worker); 744 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
578 745
579 if (invalidate) { 746 if (invalidate) {
580 if (likely(!in_interrupt())) { 747 if (likely(!in_interrupt())) {
581 rds_ib_flush_mr_pool(pool, 0); 748 rds_ib_flush_mr_pool(pool, 0, NULL);
582 } else { 749 } else {
583 /* We get here if the user created a MR marked 750 /* We get here if the user created a MR marked
584 * as use_once and invalidate at the same time. */ 751 * as use_once and invalidate at the same time. */
585 queue_work(rds_wq, &pool->flush_worker); 752 queue_delayed_work(rds_ib_fmr_wq,
753 &pool->flush_worker, 10);
586 } 754 }
587 } 755 }
756
757 rds_ib_dev_put(rds_ibdev);
588} 758}
589 759
590void rds_ib_flush_mrs(void) 760void rds_ib_flush_mrs(void)
591{ 761{
592 struct rds_ib_device *rds_ibdev; 762 struct rds_ib_device *rds_ibdev;
593 763
764 down_read(&rds_ib_devices_lock);
594 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 765 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
595 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 766 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
596 767
597 if (pool) 768 if (pool)
598 rds_ib_flush_mr_pool(pool, 0); 769 rds_ib_flush_mr_pool(pool, 0, NULL);
599 } 770 }
771 up_read(&rds_ib_devices_lock);
600} 772}
601 773
602void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, 774void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
@@ -628,6 +800,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
628 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret); 800 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret);
629 801
630 ibmr->device = rds_ibdev; 802 ibmr->device = rds_ibdev;
803 rds_ibdev = NULL;
631 804
632 out: 805 out:
633 if (ret) { 806 if (ret) {
@@ -635,5 +808,8 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
635 rds_ib_free_mr(ibmr, 0); 808 rds_ib_free_mr(ibmr, 0);
636 ibmr = ERR_PTR(ret); 809 ibmr = ERR_PTR(ret);
637 } 810 }
811 if (rds_ibdev)
812 rds_ib_dev_put(rds_ibdev);
638 return ibmr; 813 return ibmr;
639} 814}
815
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index c74e9904a6b2..e29e0ca32f74 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -43,42 +43,6 @@ static struct kmem_cache *rds_ib_incoming_slab;
43static struct kmem_cache *rds_ib_frag_slab; 43static struct kmem_cache *rds_ib_frag_slab;
44static atomic_t rds_ib_allocation = ATOMIC_INIT(0); 44static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
45 45
46static void rds_ib_frag_drop_page(struct rds_page_frag *frag)
47{
48 rdsdebug("frag %p page %p\n", frag, frag->f_page);
49 __free_page(frag->f_page);
50 frag->f_page = NULL;
51}
52
53static void rds_ib_frag_free(struct rds_page_frag *frag)
54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page != NULL);
57 kmem_cache_free(rds_ib_frag_slab, frag);
58}
59
60/*
61 * We map a page at a time. Its fragments are posted in order. This
62 * is called in fragment order as the fragments get send completion events.
63 * Only the last frag in the page performs the unmapping.
64 *
65 * It's OK for ring cleanup to call this in whatever order it likes because
66 * DMA is not in flight and so we can unmap while other ring entries still
67 * hold page references in their frags.
68 */
69static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic,
70 struct rds_ib_recv_work *recv)
71{
72 struct rds_page_frag *frag = recv->r_frag;
73
74 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
75 if (frag->f_mapped)
76 ib_dma_unmap_page(ic->i_cm_id->device,
77 frag->f_mapped,
78 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
79 frag->f_mapped = 0;
80}
81
82void rds_ib_recv_init_ring(struct rds_ib_connection *ic) 46void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
83{ 47{
84 struct rds_ib_recv_work *recv; 48 struct rds_ib_recv_work *recv;
@@ -95,16 +59,161 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
95 recv->r_wr.sg_list = recv->r_sge; 59 recv->r_wr.sg_list = recv->r_sge;
96 recv->r_wr.num_sge = RDS_IB_RECV_SGE; 60 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
97 61
98 sge = rds_ib_data_sge(ic, recv->r_sge); 62 sge = &recv->r_sge[0];
63 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
64 sge->length = sizeof(struct rds_header);
65 sge->lkey = ic->i_mr->lkey;
66
67 sge = &recv->r_sge[1];
99 sge->addr = 0; 68 sge->addr = 0;
100 sge->length = RDS_FRAG_SIZE; 69 sge->length = RDS_FRAG_SIZE;
101 sge->lkey = ic->i_mr->lkey; 70 sge->lkey = ic->i_mr->lkey;
71 }
72}
102 73
103 sge = rds_ib_header_sge(ic, recv->r_sge); 74/*
104 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); 75 * The entire 'from' list, including the from element itself, is put on
105 sge->length = sizeof(struct rds_header); 76 * to the tail of the 'to' list.
106 sge->lkey = ic->i_mr->lkey; 77 */
78static void list_splice_entire_tail(struct list_head *from,
79 struct list_head *to)
80{
81 struct list_head *from_last = from->prev;
82
83 list_splice_tail(from_last, to);
84 list_add_tail(from_last, to);
85}
86
87static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
88{
89 struct list_head *tmp;
90
91 tmp = xchg(&cache->xfer, NULL);
92 if (tmp) {
93 if (cache->ready)
94 list_splice_entire_tail(tmp, cache->ready);
95 else
96 cache->ready = tmp;
97 }
98}
99
100static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
101{
102 struct rds_ib_cache_head *head;
103 int cpu;
104
105 cache->percpu = alloc_percpu(struct rds_ib_cache_head);
106 if (!cache->percpu)
107 return -ENOMEM;
108
109 for_each_possible_cpu(cpu) {
110 head = per_cpu_ptr(cache->percpu, cpu);
111 head->first = NULL;
112 head->count = 0;
113 }
114 cache->xfer = NULL;
115 cache->ready = NULL;
116
117 return 0;
118}
119
120int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic)
121{
122 int ret;
123
124 ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs);
125 if (!ret) {
126 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags);
127 if (ret)
128 free_percpu(ic->i_cache_incs.percpu);
107 } 129 }
130
131 return ret;
132}
133
134static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
135 struct list_head *caller_list)
136{
137 struct rds_ib_cache_head *head;
138 int cpu;
139
140 for_each_possible_cpu(cpu) {
141 head = per_cpu_ptr(cache->percpu, cpu);
142 if (head->first) {
143 list_splice_entire_tail(head->first, caller_list);
144 head->first = NULL;
145 }
146 }
147
148 if (cache->ready) {
149 list_splice_entire_tail(cache->ready, caller_list);
150 cache->ready = NULL;
151 }
152}
153
154void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
155{
156 struct rds_ib_incoming *inc;
157 struct rds_ib_incoming *inc_tmp;
158 struct rds_page_frag *frag;
159 struct rds_page_frag *frag_tmp;
160 LIST_HEAD(list);
161
162 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
163 rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
164 free_percpu(ic->i_cache_incs.percpu);
165
166 list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
167 list_del(&inc->ii_cache_entry);
168 WARN_ON(!list_empty(&inc->ii_frags));
169 kmem_cache_free(rds_ib_incoming_slab, inc);
170 }
171
172 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
173 rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
174 free_percpu(ic->i_cache_frags.percpu);
175
176 list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
177 list_del(&frag->f_cache_entry);
178 WARN_ON(!list_empty(&frag->f_item));
179 kmem_cache_free(rds_ib_frag_slab, frag);
180 }
181}
182
183/* fwd decl */
184static void rds_ib_recv_cache_put(struct list_head *new_item,
185 struct rds_ib_refill_cache *cache);
186static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
187
188
189/* Recycle frag and attached recv buffer f_sg */
190static void rds_ib_frag_free(struct rds_ib_connection *ic,
191 struct rds_page_frag *frag)
192{
193 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
194
195 rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
196}
197
198/* Recycle inc after freeing attached frags */
199void rds_ib_inc_free(struct rds_incoming *inc)
200{
201 struct rds_ib_incoming *ibinc;
202 struct rds_page_frag *frag;
203 struct rds_page_frag *pos;
204 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
205
206 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
207
208 /* Free attached frags */
209 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
210 list_del_init(&frag->f_item);
211 rds_ib_frag_free(ic, frag);
212 }
213 BUG_ON(!list_empty(&ibinc->ii_frags));
214
215 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
216 rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
108} 217}
109 218
110static void rds_ib_recv_clear_one(struct rds_ib_connection *ic, 219static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
@@ -115,10 +224,8 @@ static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
115 recv->r_ibinc = NULL; 224 recv->r_ibinc = NULL;
116 } 225 }
117 if (recv->r_frag) { 226 if (recv->r_frag) {
118 rds_ib_recv_unmap_page(ic, recv); 227 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
119 if (recv->r_frag->f_page) 228 rds_ib_frag_free(ic, recv->r_frag);
120 rds_ib_frag_drop_page(recv->r_frag);
121 rds_ib_frag_free(recv->r_frag);
122 recv->r_frag = NULL; 229 recv->r_frag = NULL;
123 } 230 }
124} 231}
@@ -129,84 +236,111 @@ void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
129 236
130 for (i = 0; i < ic->i_recv_ring.w_nr; i++) 237 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
131 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); 238 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
132
133 if (ic->i_frag.f_page)
134 rds_ib_frag_drop_page(&ic->i_frag);
135} 239}
136 240
137static int rds_ib_recv_refill_one(struct rds_connection *conn, 241static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic,
138 struct rds_ib_recv_work *recv, 242 gfp_t slab_mask)
139 gfp_t kptr_gfp, gfp_t page_gfp)
140{ 243{
141 struct rds_ib_connection *ic = conn->c_transport_data; 244 struct rds_ib_incoming *ibinc;
142 dma_addr_t dma_addr; 245 struct list_head *cache_item;
143 struct ib_sge *sge; 246 int avail_allocs;
144 int ret = -ENOMEM;
145 247
146 if (recv->r_ibinc == NULL) { 248 cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
147 if (!atomic_add_unless(&rds_ib_allocation, 1, rds_ib_sysctl_max_recv_allocation)) { 249 if (cache_item) {
250 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
251 } else {
252 avail_allocs = atomic_add_unless(&rds_ib_allocation,
253 1, rds_ib_sysctl_max_recv_allocation);
254 if (!avail_allocs) {
148 rds_ib_stats_inc(s_ib_rx_alloc_limit); 255 rds_ib_stats_inc(s_ib_rx_alloc_limit);
149 goto out; 256 return NULL;
150 } 257 }
151 recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab, 258 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
152 kptr_gfp); 259 if (!ibinc) {
153 if (recv->r_ibinc == NULL) {
154 atomic_dec(&rds_ib_allocation); 260 atomic_dec(&rds_ib_allocation);
155 goto out; 261 return NULL;
156 } 262 }
157 INIT_LIST_HEAD(&recv->r_ibinc->ii_frags);
158 rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr);
159 } 263 }
264 INIT_LIST_HEAD(&ibinc->ii_frags);
265 rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
160 266
161 if (recv->r_frag == NULL) { 267 return ibinc;
162 recv->r_frag = kmem_cache_alloc(rds_ib_frag_slab, kptr_gfp); 268}
163 if (recv->r_frag == NULL) 269
164 goto out; 270static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic,
165 INIT_LIST_HEAD(&recv->r_frag->f_item); 271 gfp_t slab_mask, gfp_t page_mask)
166 recv->r_frag->f_page = NULL; 272{
273 struct rds_page_frag *frag;
274 struct list_head *cache_item;
275 int ret;
276
277 cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
278 if (cache_item) {
279 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
280 } else {
281 frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask);
282 if (!frag)
283 return NULL;
284
285 sg_init_table(&frag->f_sg, 1);
286 ret = rds_page_remainder_alloc(&frag->f_sg,
287 RDS_FRAG_SIZE, page_mask);
288 if (ret) {
289 kmem_cache_free(rds_ib_frag_slab, frag);
290 return NULL;
291 }
167 } 292 }
168 293
169 if (ic->i_frag.f_page == NULL) { 294 INIT_LIST_HEAD(&frag->f_item);
170 ic->i_frag.f_page = alloc_page(page_gfp); 295
171 if (ic->i_frag.f_page == NULL) 296 return frag;
172 goto out; 297}
173 ic->i_frag.f_offset = 0; 298
299static int rds_ib_recv_refill_one(struct rds_connection *conn,
300 struct rds_ib_recv_work *recv, int prefill)
301{
302 struct rds_ib_connection *ic = conn->c_transport_data;
303 struct ib_sge *sge;
304 int ret = -ENOMEM;
305 gfp_t slab_mask = GFP_NOWAIT;
306 gfp_t page_mask = GFP_NOWAIT;
307
308 if (prefill) {
309 slab_mask = GFP_KERNEL;
310 page_mask = GFP_HIGHUSER;
174 } 311 }
175 312
176 dma_addr = ib_dma_map_page(ic->i_cm_id->device, 313 if (!ic->i_cache_incs.ready)
177 ic->i_frag.f_page, 314 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
178 ic->i_frag.f_offset, 315 if (!ic->i_cache_frags.ready)
179 RDS_FRAG_SIZE, 316 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
180 DMA_FROM_DEVICE);
181 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
182 goto out;
183 317
184 /* 318 /*
185 * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap() 319 * ibinc was taken from recv if recv contained the start of a message.
186 * must be called on this recv. This happens as completions hit 320 * recvs that were continuations will still have this allocated.
187 * in order or on connection shutdown.
188 */ 321 */
189 recv->r_frag->f_page = ic->i_frag.f_page; 322 if (!recv->r_ibinc) {
190 recv->r_frag->f_offset = ic->i_frag.f_offset; 323 recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask);
191 recv->r_frag->f_mapped = dma_addr; 324 if (!recv->r_ibinc)
325 goto out;
326 }
192 327
193 sge = rds_ib_data_sge(ic, recv->r_sge); 328 WARN_ON(recv->r_frag); /* leak! */
194 sge->addr = dma_addr; 329 recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask);
195 sge->length = RDS_FRAG_SIZE; 330 if (!recv->r_frag)
331 goto out;
332
333 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
334 1, DMA_FROM_DEVICE);
335 WARN_ON(ret != 1);
196 336
197 sge = rds_ib_header_sge(ic, recv->r_sge); 337 sge = &recv->r_sge[0];
198 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); 338 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
199 sge->length = sizeof(struct rds_header); 339 sge->length = sizeof(struct rds_header);
200 340
201 get_page(recv->r_frag->f_page); 341 sge = &recv->r_sge[1];
202 342 sge->addr = sg_dma_address(&recv->r_frag->f_sg);
203 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) { 343 sge->length = sg_dma_len(&recv->r_frag->f_sg);
204 ic->i_frag.f_offset += RDS_FRAG_SIZE;
205 } else {
206 put_page(ic->i_frag.f_page);
207 ic->i_frag.f_page = NULL;
208 ic->i_frag.f_offset = 0;
209 }
210 344
211 ret = 0; 345 ret = 0;
212out: 346out:
@@ -216,13 +350,11 @@ out:
216/* 350/*
217 * This tries to allocate and post unused work requests after making sure that 351 * This tries to allocate and post unused work requests after making sure that
218 * they have all the allocations they need to queue received fragments into 352 * they have all the allocations they need to queue received fragments into
219 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc 353 * sockets.
220 * pairs don't go unmatched.
221 * 354 *
222 * -1 is returned if posting fails due to temporary resource exhaustion. 355 * -1 is returned if posting fails due to temporary resource exhaustion.
223 */ 356 */
224int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 357void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
225 gfp_t page_gfp, int prefill)
226{ 358{
227 struct rds_ib_connection *ic = conn->c_transport_data; 359 struct rds_ib_connection *ic = conn->c_transport_data;
228 struct rds_ib_recv_work *recv; 360 struct rds_ib_recv_work *recv;
@@ -236,28 +368,25 @@ int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
236 if (pos >= ic->i_recv_ring.w_nr) { 368 if (pos >= ic->i_recv_ring.w_nr) {
237 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", 369 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
238 pos); 370 pos);
239 ret = -EINVAL;
240 break; 371 break;
241 } 372 }
242 373
243 recv = &ic->i_recvs[pos]; 374 recv = &ic->i_recvs[pos];
244 ret = rds_ib_recv_refill_one(conn, recv, kptr_gfp, page_gfp); 375 ret = rds_ib_recv_refill_one(conn, recv, prefill);
245 if (ret) { 376 if (ret) {
246 ret = -1;
247 break; 377 break;
248 } 378 }
249 379
250 /* XXX when can this fail? */ 380 /* XXX when can this fail? */
251 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); 381 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
252 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, 382 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
253 recv->r_ibinc, recv->r_frag->f_page, 383 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
254 (long) recv->r_frag->f_mapped, ret); 384 (long) sg_dma_address(&recv->r_frag->f_sg), ret);
255 if (ret) { 385 if (ret) {
256 rds_ib_conn_error(conn, "recv post on " 386 rds_ib_conn_error(conn, "recv post on "
257 "%pI4 returned %d, disconnecting and " 387 "%pI4 returned %d, disconnecting and "
258 "reconnecting\n", &conn->c_faddr, 388 "reconnecting\n", &conn->c_faddr,
259 ret); 389 ret);
260 ret = -1;
261 break; 390 break;
262 } 391 }
263 392
@@ -270,37 +399,73 @@ int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
270 399
271 if (ret) 400 if (ret)
272 rds_ib_ring_unalloc(&ic->i_recv_ring, 1); 401 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
273 return ret;
274} 402}
275 403
276void rds_ib_inc_purge(struct rds_incoming *inc) 404/*
405 * We want to recycle several types of recv allocations, like incs and frags.
406 * To use this, the *_free() function passes in the ptr to a list_head within
407 * the recyclee, as well as the cache to put it on.
408 *
409 * First, we put the memory on a percpu list. When this reaches a certain size,
410 * We move it to an intermediate non-percpu list in a lockless manner, with some
411 * xchg/compxchg wizardry.
412 *
413 * N.B. Instead of a list_head as the anchor, we use a single pointer, which can
414 * be NULL and xchg'd. The list is actually empty when the pointer is NULL, and
415 * list_empty() will return true with one element is actually present.
416 */
417static void rds_ib_recv_cache_put(struct list_head *new_item,
418 struct rds_ib_refill_cache *cache)
277{ 419{
278 struct rds_ib_incoming *ibinc; 420 unsigned long flags;
279 struct rds_page_frag *frag; 421 struct rds_ib_cache_head *chp;
280 struct rds_page_frag *pos; 422 struct list_head *old;
281 423
282 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 424 local_irq_save(flags);
283 rdsdebug("purging ibinc %p inc %p\n", ibinc, inc);
284 425
285 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { 426 chp = per_cpu_ptr(cache->percpu, smp_processor_id());
286 list_del_init(&frag->f_item); 427 if (!chp->first)
287 rds_ib_frag_drop_page(frag); 428 INIT_LIST_HEAD(new_item);
288 rds_ib_frag_free(frag); 429 else /* put on front */
289 } 430 list_add_tail(new_item, chp->first);
431 chp->first = new_item;
432 chp->count++;
433
434 if (chp->count < RDS_IB_RECYCLE_BATCH_COUNT)
435 goto end;
436
437 /*
438 * Return our per-cpu first list to the cache's xfer by atomically
439 * grabbing the current xfer list, appending it to our per-cpu list,
440 * and then atomically returning that entire list back to the
441 * cache's xfer list as long as it's still empty.
442 */
443 do {
444 old = xchg(&cache->xfer, NULL);
445 if (old)
446 list_splice_entire_tail(old, chp->first);
447 old = cmpxchg(&cache->xfer, NULL, chp->first);
448 } while (old);
449
450 chp->first = NULL;
451 chp->count = 0;
452end:
453 local_irq_restore(flags);
290} 454}
291 455
292void rds_ib_inc_free(struct rds_incoming *inc) 456static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
293{ 457{
294 struct rds_ib_incoming *ibinc; 458 struct list_head *head = cache->ready;
295 459
296 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 460 if (head) {
461 if (!list_empty(head)) {
462 cache->ready = head->next;
463 list_del_init(head);
464 } else
465 cache->ready = NULL;
466 }
297 467
298 rds_ib_inc_purge(inc); 468 return head;
299 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
300 BUG_ON(!list_empty(&ibinc->ii_frags));
301 kmem_cache_free(rds_ib_incoming_slab, ibinc);
302 atomic_dec(&rds_ib_allocation);
303 BUG_ON(atomic_read(&rds_ib_allocation) < 0);
304} 469}
305 470
306int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, 471int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
@@ -336,13 +501,13 @@ int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
336 to_copy = min_t(unsigned long, to_copy, len - copied); 501 to_copy = min_t(unsigned long, to_copy, len - copied);
337 502
338 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " 503 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
339 "[%p, %lu] + %lu\n", 504 "[%p, %u] + %lu\n",
340 to_copy, iov->iov_base, iov->iov_len, iov_off, 505 to_copy, iov->iov_base, iov->iov_len, iov_off,
341 frag->f_page, frag->f_offset, frag_off); 506 sg_page(&frag->f_sg), frag->f_sg.offset, frag_off);
342 507
343 /* XXX needs + offset for multiple recvs per page */ 508 /* XXX needs + offset for multiple recvs per page */
344 ret = rds_page_copy_to_user(frag->f_page, 509 ret = rds_page_copy_to_user(sg_page(&frag->f_sg),
345 frag->f_offset + frag_off, 510 frag->f_sg.offset + frag_off,
346 iov->iov_base + iov_off, 511 iov->iov_base + iov_off,
347 to_copy); 512 to_copy);
348 if (ret) { 513 if (ret) {
@@ -557,47 +722,6 @@ u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
557 return rds_ib_get_ack(ic); 722 return rds_ib_get_ack(ic);
558} 723}
559 724
560static struct rds_header *rds_ib_get_header(struct rds_connection *conn,
561 struct rds_ib_recv_work *recv,
562 u32 data_len)
563{
564 struct rds_ib_connection *ic = conn->c_transport_data;
565 void *hdr_buff = &ic->i_recv_hdrs[recv - ic->i_recvs];
566 void *addr;
567 u32 misplaced_hdr_bytes;
568
569 /*
570 * Support header at the front (RDS 3.1+) as well as header-at-end.
571 *
572 * Cases:
573 * 1) header all in header buff (great!)
574 * 2) header all in data page (copy all to header buff)
575 * 3) header split across hdr buf + data page
576 * (move bit in hdr buff to end before copying other bit from data page)
577 */
578 if (conn->c_version > RDS_PROTOCOL_3_0 || data_len == RDS_FRAG_SIZE)
579 return hdr_buff;
580
581 if (data_len <= (RDS_FRAG_SIZE - sizeof(struct rds_header))) {
582 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
583 memcpy(hdr_buff,
584 addr + recv->r_frag->f_offset + data_len,
585 sizeof(struct rds_header));
586 kunmap_atomic(addr, KM_SOFTIRQ0);
587 return hdr_buff;
588 }
589
590 misplaced_hdr_bytes = (sizeof(struct rds_header) - (RDS_FRAG_SIZE - data_len));
591
592 memmove(hdr_buff + misplaced_hdr_bytes, hdr_buff, misplaced_hdr_bytes);
593
594 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
595 memcpy(hdr_buff, addr + recv->r_frag->f_offset + data_len,
596 sizeof(struct rds_header) - misplaced_hdr_bytes);
597 kunmap_atomic(addr, KM_SOFTIRQ0);
598 return hdr_buff;
599}
600
601/* 725/*
602 * It's kind of lame that we're copying from the posted receive pages into 726 * It's kind of lame that we're copying from the posted receive pages into
603 * long-lived bitmaps. We could have posted the bitmaps and rdma written into 727 * long-lived bitmaps. We could have posted the bitmaps and rdma written into
@@ -639,7 +763,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
639 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); 763 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
640 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ 764 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
641 765
642 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); 766 addr = kmap_atomic(sg_page(&frag->f_sg), KM_SOFTIRQ0);
643 767
644 src = addr + frag_off; 768 src = addr + frag_off;
645 dst = (void *)map->m_page_addrs[map_page] + map_off; 769 dst = (void *)map->m_page_addrs[map_page] + map_off;
@@ -710,7 +834,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
710 } 834 }
711 data_len -= sizeof(struct rds_header); 835 data_len -= sizeof(struct rds_header);
712 836
713 ihdr = rds_ib_get_header(conn, recv, data_len); 837 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
714 838
715 /* Validate the checksum. */ 839 /* Validate the checksum. */
716 if (!rds_message_verify_checksum(ihdr)) { 840 if (!rds_message_verify_checksum(ihdr)) {
@@ -742,12 +866,12 @@ static void rds_ib_process_recv(struct rds_connection *conn,
742 * the inc is freed. We don't go that route, so we have to drop the 866 * the inc is freed. We don't go that route, so we have to drop the
743 * page ref ourselves. We can't just leave the page on the recv 867 * page ref ourselves. We can't just leave the page on the recv
744 * because that confuses the dma mapping of pages and each recv's use 868 * because that confuses the dma mapping of pages and each recv's use
745 * of a partial page. We can leave the frag, though, it will be 869 * of a partial page.
746 * reused.
747 * 870 *
748 * FIXME: Fold this into the code path below. 871 * FIXME: Fold this into the code path below.
749 */ 872 */
750 rds_ib_frag_drop_page(recv->r_frag); 873 rds_ib_frag_free(ic, recv->r_frag);
874 recv->r_frag = NULL;
751 return; 875 return;
752 } 876 }
753 877
@@ -757,7 +881,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
757 * into the inc and save the inc so we can hang upcoming fragments 881 * into the inc and save the inc so we can hang upcoming fragments
758 * off its list. 882 * off its list.
759 */ 883 */
760 if (ibinc == NULL) { 884 if (!ibinc) {
761 ibinc = recv->r_ibinc; 885 ibinc = recv->r_ibinc;
762 recv->r_ibinc = NULL; 886 recv->r_ibinc = NULL;
763 ic->i_ibinc = ibinc; 887 ic->i_ibinc = ibinc;
@@ -842,32 +966,38 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic,
842 struct rds_ib_recv_work *recv; 966 struct rds_ib_recv_work *recv;
843 967
844 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { 968 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
845 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 969 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
846 (unsigned long long)wc.wr_id, wc.status, wc.byte_len, 970 (unsigned long long)wc.wr_id, wc.status,
971 rds_ib_wc_status_str(wc.status), wc.byte_len,
847 be32_to_cpu(wc.ex.imm_data)); 972 be32_to_cpu(wc.ex.imm_data));
848 rds_ib_stats_inc(s_ib_rx_cq_event); 973 rds_ib_stats_inc(s_ib_rx_cq_event);
849 974
850 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)]; 975 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
851 976
852 rds_ib_recv_unmap_page(ic, recv); 977 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
853 978
854 /* 979 /*
855 * Also process recvs in connecting state because it is possible 980 * Also process recvs in connecting state because it is possible
856 * to get a recv completion _before_ the rdmacm ESTABLISHED 981 * to get a recv completion _before_ the rdmacm ESTABLISHED
857 * event is processed. 982 * event is processed.
858 */ 983 */
859 if (rds_conn_up(conn) || rds_conn_connecting(conn)) { 984 if (wc.status == IB_WC_SUCCESS) {
985 rds_ib_process_recv(conn, recv, wc.byte_len, state);
986 } else {
860 /* We expect errors as the qp is drained during shutdown */ 987 /* We expect errors as the qp is drained during shutdown */
861 if (wc.status == IB_WC_SUCCESS) { 988 if (rds_conn_up(conn) || rds_conn_connecting(conn))
862 rds_ib_process_recv(conn, recv, wc.byte_len, state); 989 rds_ib_conn_error(conn, "recv completion on %pI4 had "
863 } else { 990 "status %u (%s), disconnecting and "
864 rds_ib_conn_error(conn, "recv completion on " 991 "reconnecting\n", &conn->c_faddr,
865 "%pI4 had status %u, disconnecting and " 992 wc.status,
866 "reconnecting\n", &conn->c_faddr, 993 rds_ib_wc_status_str(wc.status));
867 wc.status);
868 }
869 } 994 }
870 995
996 /*
997 * It's very important that we only free this ring entry if we've truly
998 * freed the resources allocated to the entry. The refilling path can
999 * leak if we don't.
1000 */
871 rds_ib_ring_free(&ic->i_recv_ring, 1); 1001 rds_ib_ring_free(&ic->i_recv_ring, 1);
872 } 1002 }
873} 1003}
@@ -897,11 +1027,8 @@ void rds_ib_recv_tasklet_fn(unsigned long data)
897 if (rds_ib_ring_empty(&ic->i_recv_ring)) 1027 if (rds_ib_ring_empty(&ic->i_recv_ring))
898 rds_ib_stats_inc(s_ib_rx_ring_empty); 1028 rds_ib_stats_inc(s_ib_rx_ring_empty);
899 1029
900 /*
901 * If the ring is running low, then schedule the thread to refill.
902 */
903 if (rds_ib_ring_low(&ic->i_recv_ring)) 1030 if (rds_ib_ring_low(&ic->i_recv_ring))
904 queue_delayed_work(rds_wq, &conn->c_recv_w, 0); 1031 rds_ib_recv_refill(conn, 0);
905} 1032}
906 1033
907int rds_ib_recv(struct rds_connection *conn) 1034int rds_ib_recv(struct rds_connection *conn)
@@ -910,25 +1037,13 @@ int rds_ib_recv(struct rds_connection *conn)
910 int ret = 0; 1037 int ret = 0;
911 1038
912 rdsdebug("conn %p\n", conn); 1039 rdsdebug("conn %p\n", conn);
913
914 /*
915 * If we get a temporary posting failure in this context then
916 * we're really low and we want the caller to back off for a bit.
917 */
918 mutex_lock(&ic->i_recv_mutex);
919 if (rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
920 ret = -ENOMEM;
921 else
922 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
923 mutex_unlock(&ic->i_recv_mutex);
924
925 if (rds_conn_up(conn)) 1040 if (rds_conn_up(conn))
926 rds_ib_attempt_ack(ic); 1041 rds_ib_attempt_ack(ic);
927 1042
928 return ret; 1043 return ret;
929} 1044}
930 1045
931int __init rds_ib_recv_init(void) 1046int rds_ib_recv_init(void)
932{ 1047{
933 struct sysinfo si; 1048 struct sysinfo si;
934 int ret = -ENOMEM; 1049 int ret = -ENOMEM;
@@ -939,14 +1054,14 @@ int __init rds_ib_recv_init(void)
939 1054
940 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", 1055 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
941 sizeof(struct rds_ib_incoming), 1056 sizeof(struct rds_ib_incoming),
942 0, 0, NULL); 1057 0, SLAB_HWCACHE_ALIGN, NULL);
943 if (rds_ib_incoming_slab == NULL) 1058 if (!rds_ib_incoming_slab)
944 goto out; 1059 goto out;
945 1060
946 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag", 1061 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
947 sizeof(struct rds_page_frag), 1062 sizeof(struct rds_page_frag),
948 0, 0, NULL); 1063 0, SLAB_HWCACHE_ALIGN, NULL);
949 if (rds_ib_frag_slab == NULL) 1064 if (!rds_ib_frag_slab)
950 kmem_cache_destroy(rds_ib_incoming_slab); 1065 kmem_cache_destroy(rds_ib_incoming_slab);
951 else 1066 else
952 ret = 0; 1067 ret = 0;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 17fa80803ab0..71f373c421bc 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -36,11 +36,49 @@
36#include <linux/dmapool.h> 36#include <linux/dmapool.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40#include "ib.h" 39#include "ib.h"
41 40
42static void rds_ib_send_rdma_complete(struct rds_message *rm, 41static char *rds_ib_wc_status_strings[] = {
43 int wc_status) 42#define RDS_IB_WC_STATUS_STR(foo) \
43 [IB_WC_##foo] = __stringify(IB_WC_##foo)
44 RDS_IB_WC_STATUS_STR(SUCCESS),
45 RDS_IB_WC_STATUS_STR(LOC_LEN_ERR),
46 RDS_IB_WC_STATUS_STR(LOC_QP_OP_ERR),
47 RDS_IB_WC_STATUS_STR(LOC_EEC_OP_ERR),
48 RDS_IB_WC_STATUS_STR(LOC_PROT_ERR),
49 RDS_IB_WC_STATUS_STR(WR_FLUSH_ERR),
50 RDS_IB_WC_STATUS_STR(MW_BIND_ERR),
51 RDS_IB_WC_STATUS_STR(BAD_RESP_ERR),
52 RDS_IB_WC_STATUS_STR(LOC_ACCESS_ERR),
53 RDS_IB_WC_STATUS_STR(REM_INV_REQ_ERR),
54 RDS_IB_WC_STATUS_STR(REM_ACCESS_ERR),
55 RDS_IB_WC_STATUS_STR(REM_OP_ERR),
56 RDS_IB_WC_STATUS_STR(RETRY_EXC_ERR),
57 RDS_IB_WC_STATUS_STR(RNR_RETRY_EXC_ERR),
58 RDS_IB_WC_STATUS_STR(LOC_RDD_VIOL_ERR),
59 RDS_IB_WC_STATUS_STR(REM_INV_RD_REQ_ERR),
60 RDS_IB_WC_STATUS_STR(REM_ABORT_ERR),
61 RDS_IB_WC_STATUS_STR(INV_EECN_ERR),
62 RDS_IB_WC_STATUS_STR(INV_EEC_STATE_ERR),
63 RDS_IB_WC_STATUS_STR(FATAL_ERR),
64 RDS_IB_WC_STATUS_STR(RESP_TIMEOUT_ERR),
65 RDS_IB_WC_STATUS_STR(GENERAL_ERR),
66#undef RDS_IB_WC_STATUS_STR
67};
68
69char *rds_ib_wc_status_str(enum ib_wc_status status)
70{
71 return rds_str_array(rds_ib_wc_status_strings,
72 ARRAY_SIZE(rds_ib_wc_status_strings), status);
73}
74
75/*
76 * Convert IB-specific error message to RDS error message and call core
77 * completion handler.
78 */
79static void rds_ib_send_complete(struct rds_message *rm,
80 int wc_status,
81 void (*complete)(struct rds_message *rm, int status))
44{ 82{
45 int notify_status; 83 int notify_status;
46 84
@@ -60,69 +98,125 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm,
60 notify_status = RDS_RDMA_OTHER_ERROR; 98 notify_status = RDS_RDMA_OTHER_ERROR;
61 break; 99 break;
62 } 100 }
63 rds_rdma_send_complete(rm, notify_status); 101 complete(rm, notify_status);
102}
103
104static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
105 struct rm_data_op *op,
106 int wc_status)
107{
108 if (op->op_nents)
109 ib_dma_unmap_sg(ic->i_cm_id->device,
110 op->op_sg, op->op_nents,
111 DMA_TO_DEVICE);
64} 112}
65 113
66static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, 114static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
67 struct rds_rdma_op *op) 115 struct rm_rdma_op *op,
116 int wc_status)
68{ 117{
69 if (op->r_mapped) { 118 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device, 119 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents, 120 op->op_sg, op->op_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 121 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0; 122 op->op_mapped = 0;
74 } 123 }
124
125 /* If the user asked for a completion notification on this
126 * message, we can implement three different semantics:
127 * 1. Notify when we received the ACK on the RDS message
128 * that was queued with the RDMA. This provides reliable
129 * notification of RDMA status at the expense of a one-way
130 * packet delay.
131 * 2. Notify when the IB stack gives us the completion event for
132 * the RDMA operation.
133 * 3. Notify when the IB stack gives us the completion event for
134 * the accompanying RDS messages.
135 * Here, we implement approach #3. To implement approach #2,
136 * we would need to take an event for the rdma WR. To implement #1,
137 * don't call rds_rdma_send_complete at all, and fall back to the notify
138 * handling in the ACK processing code.
139 *
140 * Note: There's no need to explicitly sync any RDMA buffers using
141 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
142 * operation itself unmapped the RDMA buffers, which takes care
143 * of synching.
144 */
145 rds_ib_send_complete(container_of(op, struct rds_message, rdma),
146 wc_status, rds_rdma_send_complete);
147
148 if (op->op_write)
149 rds_stats_add(s_send_rdma_bytes, op->op_bytes);
150 else
151 rds_stats_add(s_recv_rdma_bytes, op->op_bytes);
75} 152}
76 153
77static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, 154static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
78 struct rds_ib_send_work *send, 155 struct rm_atomic_op *op,
79 int wc_status) 156 int wc_status)
80{ 157{
81 struct rds_message *rm = send->s_rm; 158 /* unmap atomic recvbuf */
82 159 if (op->op_mapped) {
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm); 160 ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1,
84 161 DMA_FROM_DEVICE);
85 ib_dma_unmap_sg(ic->i_cm_id->device, 162 op->op_mapped = 0;
86 rm->m_sg, rm->m_nents, 163 }
87 DMA_TO_DEVICE);
88
89 if (rm->m_rdma_op != NULL) {
90 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
91
92 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics:
94 * 1. Notify when we received the ACK on the RDS message
95 * that was queued with the RDMA. This provides reliable
96 * notification of RDMA status at the expense of a one-way
97 * packet delay.
98 * 2. Notify when the IB stack gives us the completion event for
99 * the RDMA operation.
100 * 3. Notify when the IB stack gives us the completion event for
101 * the accompanying RDS messages.
102 * Here, we implement approach #3. To implement approach #2,
103 * call rds_rdma_send_complete from the cq_handler. To implement #1,
104 * don't call rds_rdma_send_complete at all, and fall back to the notify
105 * handling in the ACK processing code.
106 *
107 * Note: There's no need to explicitly sync any RDMA buffers using
108 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
109 * operation itself unmapped the RDMA buffers, which takes care
110 * of synching.
111 */
112 rds_ib_send_rdma_complete(rm, wc_status);
113 164
114 if (rm->m_rdma_op->r_write) 165 rds_ib_send_complete(container_of(op, struct rds_message, atomic),
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); 166 wc_status, rds_atomic_send_complete);
116 else 167
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); 168 if (op->op_type == RDS_ATOMIC_TYPE_CSWP)
169 rds_ib_stats_inc(s_ib_atomic_cswp);
170 else
171 rds_ib_stats_inc(s_ib_atomic_fadd);
172}
173
174/*
175 * Unmap the resources associated with a struct send_work.
176 *
177 * Returns the rm for no good reason other than it is unobtainable
178 * other than by switching on wr.opcode, currently, and the caller,
179 * the event handler, needs it.
180 */
181static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic,
182 struct rds_ib_send_work *send,
183 int wc_status)
184{
185 struct rds_message *rm = NULL;
186
187 /* In the error case, wc.opcode sometimes contains garbage */
188 switch (send->s_wr.opcode) {
189 case IB_WR_SEND:
190 if (send->s_op) {
191 rm = container_of(send->s_op, struct rds_message, data);
192 rds_ib_send_unmap_data(ic, send->s_op, wc_status);
193 }
194 break;
195 case IB_WR_RDMA_WRITE:
196 case IB_WR_RDMA_READ:
197 if (send->s_op) {
198 rm = container_of(send->s_op, struct rds_message, rdma);
199 rds_ib_send_unmap_rdma(ic, send->s_op, wc_status);
200 }
201 break;
202 case IB_WR_ATOMIC_FETCH_AND_ADD:
203 case IB_WR_ATOMIC_CMP_AND_SWP:
204 if (send->s_op) {
205 rm = container_of(send->s_op, struct rds_message, atomic);
206 rds_ib_send_unmap_atomic(ic, send->s_op, wc_status);
207 }
208 break;
209 default:
210 if (printk_ratelimit())
211 printk(KERN_NOTICE
212 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
213 __func__, send->s_wr.opcode);
214 break;
118 } 215 }
119 216
120 /* If anyone waited for this message to get flushed out, wake 217 send->s_wr.opcode = 0xdead;
121 * them up now */
122 rds_message_unmapped(rm);
123 218
124 rds_message_put(rm); 219 return rm;
125 send->s_rm = NULL;
126} 220}
127 221
128void rds_ib_send_init_ring(struct rds_ib_connection *ic) 222void rds_ib_send_init_ring(struct rds_ib_connection *ic)
@@ -133,23 +227,18 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 227 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge; 228 struct ib_sge *sge;
135 229
136 send->s_rm = NULL;
137 send->s_op = NULL; 230 send->s_op = NULL;
138 231
139 send->s_wr.wr_id = i; 232 send->s_wr.wr_id = i;
140 send->s_wr.sg_list = send->s_sge; 233 send->s_wr.sg_list = send->s_sge;
141 send->s_wr.num_sge = 1;
142 send->s_wr.opcode = IB_WR_SEND;
143 send->s_wr.send_flags = 0;
144 send->s_wr.ex.imm_data = 0; 234 send->s_wr.ex.imm_data = 0;
145 235
146 sge = rds_ib_data_sge(ic, send->s_sge); 236 sge = &send->s_sge[0];
147 sge->lkey = ic->i_mr->lkey;
148
149 sge = rds_ib_header_sge(ic, send->s_sge);
150 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); 237 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
151 sge->length = sizeof(struct rds_header); 238 sge->length = sizeof(struct rds_header);
152 sge->lkey = ic->i_mr->lkey; 239 sge->lkey = ic->i_mr->lkey;
240
241 send->s_sge[1].lkey = ic->i_mr->lkey;
153 } 242 }
154} 243}
155 244
@@ -159,16 +248,24 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
159 u32 i; 248 u32 i;
160 249
161 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 250 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
162 if (send->s_wr.opcode == 0xdead) 251 if (send->s_op && send->s_wr.opcode != 0xdead)
163 continue; 252 rds_ib_send_unmap_op(ic, send, IB_WC_WR_FLUSH_ERR);
164 if (send->s_rm)
165 rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
166 if (send->s_op)
167 rds_ib_send_unmap_rdma(ic, send->s_op);
168 } 253 }
169} 254}
170 255
171/* 256/*
257 * The only fast path caller always has a non-zero nr, so we don't
258 * bother testing nr before performing the atomic sub.
259 */
260static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
261{
262 if ((atomic_sub_return(nr, &ic->i_signaled_sends) == 0) &&
263 waitqueue_active(&rds_ib_ring_empty_wait))
264 wake_up(&rds_ib_ring_empty_wait);
265 BUG_ON(atomic_read(&ic->i_signaled_sends) < 0);
266}
267
268/*
172 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc 269 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
173 * operations performed in the send path. As the sender allocs and potentially 270 * operations performed in the send path. As the sender allocs and potentially
174 * unallocs the next free entry in the ring it doesn't alter which is 271 * unallocs the next free entry in the ring it doesn't alter which is
@@ -178,12 +275,14 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
178{ 275{
179 struct rds_connection *conn = context; 276 struct rds_connection *conn = context;
180 struct rds_ib_connection *ic = conn->c_transport_data; 277 struct rds_ib_connection *ic = conn->c_transport_data;
278 struct rds_message *rm = NULL;
181 struct ib_wc wc; 279 struct ib_wc wc;
182 struct rds_ib_send_work *send; 280 struct rds_ib_send_work *send;
183 u32 completed; 281 u32 completed;
184 u32 oldest; 282 u32 oldest;
185 u32 i = 0; 283 u32 i = 0;
186 int ret; 284 int ret;
285 int nr_sig = 0;
187 286
188 rdsdebug("cq %p conn %p\n", cq, conn); 287 rdsdebug("cq %p conn %p\n", cq, conn);
189 rds_ib_stats_inc(s_ib_tx_cq_call); 288 rds_ib_stats_inc(s_ib_tx_cq_call);
@@ -192,8 +291,9 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
192 rdsdebug("ib_req_notify_cq send failed: %d\n", ret); 291 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
193 292
194 while (ib_poll_cq(cq, 1, &wc) > 0) { 293 while (ib_poll_cq(cq, 1, &wc) > 0) {
195 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 294 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
196 (unsigned long long)wc.wr_id, wc.status, wc.byte_len, 295 (unsigned long long)wc.wr_id, wc.status,
296 rds_ib_wc_status_str(wc.status), wc.byte_len,
197 be32_to_cpu(wc.ex.imm_data)); 297 be32_to_cpu(wc.ex.imm_data));
198 rds_ib_stats_inc(s_ib_tx_cq_event); 298 rds_ib_stats_inc(s_ib_tx_cq_event);
199 299
@@ -210,51 +310,30 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
210 310
211 for (i = 0; i < completed; i++) { 311 for (i = 0; i < completed; i++) {
212 send = &ic->i_sends[oldest]; 312 send = &ic->i_sends[oldest];
313 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
314 nr_sig++;
213 315
214 /* In the error case, wc.opcode sometimes contains garbage */ 316 rm = rds_ib_send_unmap_op(ic, send, wc.status);
215 switch (send->s_wr.opcode) {
216 case IB_WR_SEND:
217 if (send->s_rm)
218 rds_ib_send_unmap_rm(ic, send, wc.status);
219 break;
220 case IB_WR_RDMA_WRITE:
221 case IB_WR_RDMA_READ:
222 /* Nothing to be done - the SG list will be unmapped
223 * when the SEND completes. */
224 break;
225 default:
226 if (printk_ratelimit())
227 printk(KERN_NOTICE
228 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
229 __func__, send->s_wr.opcode);
230 break;
231 }
232 317
233 send->s_wr.opcode = 0xdead;
234 send->s_wr.num_sge = 1;
235 if (send->s_queued + HZ/2 < jiffies) 318 if (send->s_queued + HZ/2 < jiffies)
236 rds_ib_stats_inc(s_ib_tx_stalled); 319 rds_ib_stats_inc(s_ib_tx_stalled);
237 320
238 /* If a RDMA operation produced an error, signal this right 321 if (send->s_op) {
239 * away. If we don't, the subsequent SEND that goes with this 322 if (send->s_op == rm->m_final_op) {
240 * RDMA will be canceled with ERR_WFLUSH, and the application 323 /* If anyone waited for this message to get flushed out, wake
241 * never learn that the RDMA failed. */ 324 * them up now */
242 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { 325 rds_message_unmapped(rm);
243 struct rds_message *rm;
244
245 rm = rds_send_get_message(conn, send->s_op);
246 if (rm) {
247 if (rm->m_rdma_op)
248 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
249 rds_ib_send_rdma_complete(rm, wc.status);
250 rds_message_put(rm);
251 } 326 }
327 rds_message_put(rm);
328 send->s_op = NULL;
252 } 329 }
253 330
254 oldest = (oldest + 1) % ic->i_send_ring.w_nr; 331 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
255 } 332 }
256 333
257 rds_ib_ring_free(&ic->i_send_ring, completed); 334 rds_ib_ring_free(&ic->i_send_ring, completed);
335 rds_ib_sub_signaled(ic, nr_sig);
336 nr_sig = 0;
258 337
259 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || 338 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
260 test_bit(0, &conn->c_map_queued)) 339 test_bit(0, &conn->c_map_queued))
@@ -262,10 +341,10 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
262 341
263 /* We expect errors as the qp is drained during shutdown */ 342 /* We expect errors as the qp is drained during shutdown */
264 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { 343 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
265 rds_ib_conn_error(conn, 344 rds_ib_conn_error(conn, "send completion on %pI4 had status "
266 "send completion on %pI4 " 345 "%u (%s), disconnecting and reconnecting\n",
267 "had status %u, disconnecting and reconnecting\n", 346 &conn->c_faddr, wc.status,
268 &conn->c_faddr, wc.status); 347 rds_ib_wc_status_str(wc.status));
269 } 348 }
270 } 349 }
271} 350}
@@ -294,7 +373,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
294 * credits (see rds_ib_send_add_credits below). 373 * credits (see rds_ib_send_add_credits below).
295 * 374 *
296 * The RDS send code is essentially single-threaded; rds_send_xmit 375 * The RDS send code is essentially single-threaded; rds_send_xmit
297 * grabs c_send_lock to ensure exclusive access to the send ring. 376 * sets RDS_IN_XMIT to ensure exclusive access to the send ring.
298 * However, the ACK sending code is independent and can race with 377 * However, the ACK sending code is independent and can race with
299 * message SENDs. 378 * message SENDs.
300 * 379 *
@@ -413,40 +492,21 @@ void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
413 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 492 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
414} 493}
415 494
416static inline void 495static inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic,
417rds_ib_xmit_populate_wr(struct rds_ib_connection *ic, 496 struct rds_ib_send_work *send,
418 struct rds_ib_send_work *send, unsigned int pos, 497 bool notify)
419 unsigned long buffer, unsigned int length,
420 int send_flags)
421{ 498{
422 struct ib_sge *sge; 499 /*
423 500 * We want to delay signaling completions just enough to get
424 WARN_ON(pos != send - ic->i_sends); 501 * the batching benefits but not so much that we create dead time
425 502 * on the wire.
426 send->s_wr.send_flags = send_flags; 503 */
427 send->s_wr.opcode = IB_WR_SEND; 504 if (ic->i_unsignaled_wrs-- == 0 || notify) {
428 send->s_wr.num_sge = 2; 505 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
429 send->s_wr.next = NULL; 506 send->s_wr.send_flags |= IB_SEND_SIGNALED;
430 send->s_queued = jiffies; 507 return 1;
431 send->s_op = NULL;
432
433 if (length != 0) {
434 sge = rds_ib_data_sge(ic, send->s_sge);
435 sge->addr = buffer;
436 sge->length = length;
437 sge->lkey = ic->i_mr->lkey;
438
439 sge = rds_ib_header_sge(ic, send->s_sge);
440 } else {
441 /* We're sending a packet with no payload. There is only
442 * one SGE */
443 send->s_wr.num_sge = 1;
444 sge = &send->s_sge[0];
445 } 508 }
446 509 return 0;
447 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
448 sge->length = sizeof(struct rds_header);
449 sge->lkey = ic->i_mr->lkey;
450} 510}
451 511
452/* 512/*
@@ -475,13 +535,14 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
475 u32 pos; 535 u32 pos;
476 u32 i; 536 u32 i;
477 u32 work_alloc; 537 u32 work_alloc;
478 u32 credit_alloc; 538 u32 credit_alloc = 0;
479 u32 posted; 539 u32 posted;
480 u32 adv_credits = 0; 540 u32 adv_credits = 0;
481 int send_flags = 0; 541 int send_flags = 0;
482 int sent; 542 int bytes_sent = 0;
483 int ret; 543 int ret;
484 int flow_controlled = 0; 544 int flow_controlled = 0;
545 int nr_sig = 0;
485 546
486 BUG_ON(off % RDS_FRAG_SIZE); 547 BUG_ON(off % RDS_FRAG_SIZE);
487 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); 548 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
@@ -507,14 +568,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
507 goto out; 568 goto out;
508 } 569 }
509 570
510 credit_alloc = work_alloc;
511 if (ic->i_flowctl) { 571 if (ic->i_flowctl) {
512 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT); 572 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
513 adv_credits += posted; 573 adv_credits += posted;
514 if (credit_alloc < work_alloc) { 574 if (credit_alloc < work_alloc) {
515 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); 575 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
516 work_alloc = credit_alloc; 576 work_alloc = credit_alloc;
517 flow_controlled++; 577 flow_controlled = 1;
518 } 578 }
519 if (work_alloc == 0) { 579 if (work_alloc == 0) {
520 set_bit(RDS_LL_SEND_FULL, &conn->c_flags); 580 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
@@ -525,31 +585,25 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
525 } 585 }
526 586
527 /* map the message the first time we see it */ 587 /* map the message the first time we see it */
528 if (ic->i_rm == NULL) { 588 if (!ic->i_data_op) {
529 /* 589 if (rm->data.op_nents) {
530 printk(KERN_NOTICE "rds_ib_xmit prep msg dport=%u flags=0x%x len=%d\n", 590 rm->data.op_count = ib_dma_map_sg(dev,
531 be16_to_cpu(rm->m_inc.i_hdr.h_dport), 591 rm->data.op_sg,
532 rm->m_inc.i_hdr.h_flags, 592 rm->data.op_nents,
533 be32_to_cpu(rm->m_inc.i_hdr.h_len)); 593 DMA_TO_DEVICE);
534 */ 594 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
535 if (rm->m_nents) { 595 if (rm->data.op_count == 0) {
536 rm->m_count = ib_dma_map_sg(dev,
537 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
538 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
539 if (rm->m_count == 0) {
540 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); 596 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
541 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 597 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
542 ret = -ENOMEM; /* XXX ? */ 598 ret = -ENOMEM; /* XXX ? */
543 goto out; 599 goto out;
544 } 600 }
545 } else { 601 } else {
546 rm->m_count = 0; 602 rm->data.op_count = 0;
547 } 603 }
548 604
549 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
550 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes;
551 rds_message_addref(rm); 605 rds_message_addref(rm);
552 ic->i_rm = rm; 606 ic->i_data_op = &rm->data;
553 607
554 /* Finalize the header */ 608 /* Finalize the header */
555 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags)) 609 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
@@ -559,10 +613,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
559 613
560 /* If it has a RDMA op, tell the peer we did it. This is 614 /* If it has a RDMA op, tell the peer we did it. This is
561 * used by the peer to release use-once RDMA MRs. */ 615 * used by the peer to release use-once RDMA MRs. */
562 if (rm->m_rdma_op) { 616 if (rm->rdma.op_active) {
563 struct rds_ext_header_rdma ext_hdr; 617 struct rds_ext_header_rdma ext_hdr;
564 618
565 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); 619 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
566 rds_message_add_extension(&rm->m_inc.i_hdr, 620 rds_message_add_extension(&rm->m_inc.i_hdr,
567 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 621 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
568 } 622 }
@@ -582,99 +636,77 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
582 /* 636 /*
583 * Update adv_credits since we reset the ACK_REQUIRED bit. 637 * Update adv_credits since we reset the ACK_REQUIRED bit.
584 */ 638 */
585 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); 639 if (ic->i_flowctl) {
586 adv_credits += posted; 640 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
587 BUG_ON(adv_credits > 255); 641 adv_credits += posted;
642 BUG_ON(adv_credits > 255);
643 }
588 } 644 }
589 645
590 send = &ic->i_sends[pos];
591 first = send;
592 prev = NULL;
593 scat = &rm->m_sg[sg];
594 sent = 0;
595 i = 0;
596
597 /* Sometimes you want to put a fence between an RDMA 646 /* Sometimes you want to put a fence between an RDMA
598 * READ and the following SEND. 647 * READ and the following SEND.
599 * We could either do this all the time 648 * We could either do this all the time
600 * or when requested by the user. Right now, we let 649 * or when requested by the user. Right now, we let
601 * the application choose. 650 * the application choose.
602 */ 651 */
603 if (rm->m_rdma_op && rm->m_rdma_op->r_fence) 652 if (rm->rdma.op_active && rm->rdma.op_fence)
604 send_flags = IB_SEND_FENCE; 653 send_flags = IB_SEND_FENCE;
605 654
606 /* 655 /* Each frag gets a header. Msgs may be 0 bytes */
607 * We could be copying the header into the unused tail of the page. 656 send = &ic->i_sends[pos];
608 * That would need to be changed in the future when those pages might 657 first = send;
609 * be mapped userspace pages or page cache pages. So instead we always 658 prev = NULL;
610 * use a second sge and our long-lived ring of mapped headers. We send 659 scat = &ic->i_data_op->op_sg[sg];
611 * the header after the data so that the data payload can be aligned on 660 i = 0;
612 * the receiver. 661 do {
613 */ 662 unsigned int len = 0;
614 663
615 /* handle a 0-len message */ 664 /* Set up the header */
616 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) { 665 send->s_wr.send_flags = send_flags;
617 rds_ib_xmit_populate_wr(ic, send, pos, 0, 0, send_flags); 666 send->s_wr.opcode = IB_WR_SEND;
618 goto add_header; 667 send->s_wr.num_sge = 1;
619 } 668 send->s_wr.next = NULL;
669 send->s_queued = jiffies;
670 send->s_op = NULL;
620 671
621 /* if there's data reference it with a chain of work reqs */ 672 send->s_sge[0].addr = ic->i_send_hdrs_dma
622 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { 673 + (pos * sizeof(struct rds_header));
623 unsigned int len; 674 send->s_sge[0].length = sizeof(struct rds_header);
624 675
625 send = &ic->i_sends[pos]; 676 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
626 677
627 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); 678 /* Set up the data, if present */
628 rds_ib_xmit_populate_wr(ic, send, pos, 679 if (i < work_alloc
629 ib_sg_dma_address(dev, scat) + off, len, 680 && scat != &rm->data.op_sg[rm->data.op_count]) {
630 send_flags); 681 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
682 send->s_wr.num_sge = 2;
631 683
632 /* 684 send->s_sge[1].addr = ib_sg_dma_address(dev, scat) + off;
633 * We want to delay signaling completions just enough to get 685 send->s_sge[1].length = len;
634 * the batching benefits but not so much that we create dead time
635 * on the wire.
636 */
637 if (ic->i_unsignaled_wrs-- == 0) {
638 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
639 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
640 }
641 686
642 ic->i_unsignaled_bytes -= len; 687 bytes_sent += len;
643 if (ic->i_unsignaled_bytes <= 0) { 688 off += len;
644 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes; 689 if (off == ib_sg_dma_len(dev, scat)) {
645 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 690 scat++;
691 off = 0;
692 }
646 } 693 }
647 694
695 rds_ib_set_wr_signal_state(ic, send, 0);
696
648 /* 697 /*
649 * Always signal the last one if we're stopping due to flow control. 698 * Always signal the last one if we're stopping due to flow control.
650 */ 699 */
651 if (flow_controlled && i == (work_alloc-1)) 700 if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
652 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 701 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
653 702
703 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
704 nr_sig++;
705
654 rdsdebug("send %p wr %p num_sge %u next %p\n", send, 706 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
655 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 707 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
656 708
657 sent += len; 709 if (ic->i_flowctl && adv_credits) {
658 off += len;
659 if (off == ib_sg_dma_len(dev, scat)) {
660 scat++;
661 off = 0;
662 }
663
664add_header:
665 /* Tack on the header after the data. The header SGE should already
666 * have been set up to point to the right header buffer. */
667 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
668
669 if (0) {
670 struct rds_header *hdr = &ic->i_send_hdrs[pos];
671
672 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
673 be16_to_cpu(hdr->h_dport),
674 hdr->h_flags,
675 be32_to_cpu(hdr->h_len));
676 }
677 if (adv_credits) {
678 struct rds_header *hdr = &ic->i_send_hdrs[pos]; 710 struct rds_header *hdr = &ic->i_send_hdrs[pos];
679 711
680 /* add credit and redo the header checksum */ 712 /* add credit and redo the header checksum */
@@ -689,20 +721,25 @@ add_header:
689 prev = send; 721 prev = send;
690 722
691 pos = (pos + 1) % ic->i_send_ring.w_nr; 723 pos = (pos + 1) % ic->i_send_ring.w_nr;
692 } 724 send = &ic->i_sends[pos];
725 i++;
726
727 } while (i < work_alloc
728 && scat != &rm->data.op_sg[rm->data.op_count]);
693 729
694 /* Account the RDS header in the number of bytes we sent, but just once. 730 /* Account the RDS header in the number of bytes we sent, but just once.
695 * The caller has no concept of fragmentation. */ 731 * The caller has no concept of fragmentation. */
696 if (hdr_off == 0) 732 if (hdr_off == 0)
697 sent += sizeof(struct rds_header); 733 bytes_sent += sizeof(struct rds_header);
698 734
699 /* if we finished the message then send completion owns it */ 735 /* if we finished the message then send completion owns it */
700 if (scat == &rm->m_sg[rm->m_count]) { 736 if (scat == &rm->data.op_sg[rm->data.op_count]) {
701 prev->s_rm = ic->i_rm; 737 prev->s_op = ic->i_data_op;
702 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 738 prev->s_wr.send_flags |= IB_SEND_SOLICITED;
703 ic->i_rm = NULL; 739 ic->i_data_op = NULL;
704 } 740 }
705 741
742 /* Put back wrs & credits we didn't use */
706 if (i < work_alloc) { 743 if (i < work_alloc) {
707 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); 744 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
708 work_alloc = i; 745 work_alloc = i;
@@ -710,6 +747,9 @@ add_header:
710 if (ic->i_flowctl && i < credit_alloc) 747 if (ic->i_flowctl && i < credit_alloc)
711 rds_ib_send_add_credits(conn, credit_alloc - i); 748 rds_ib_send_add_credits(conn, credit_alloc - i);
712 749
750 if (nr_sig)
751 atomic_add(nr_sig, &ic->i_signaled_sends);
752
713 /* XXX need to worry about failed_wr and partial sends. */ 753 /* XXX need to worry about failed_wr and partial sends. */
714 failed_wr = &first->s_wr; 754 failed_wr = &first->s_wr;
715 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 755 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
@@ -720,32 +760,127 @@ add_header:
720 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 " 760 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
721 "returned %d\n", &conn->c_faddr, ret); 761 "returned %d\n", &conn->c_faddr, ret);
722 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 762 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
723 if (prev->s_rm) { 763 rds_ib_sub_signaled(ic, nr_sig);
724 ic->i_rm = prev->s_rm; 764 if (prev->s_op) {
725 prev->s_rm = NULL; 765 ic->i_data_op = prev->s_op;
766 prev->s_op = NULL;
726 } 767 }
727 768
728 rds_ib_conn_error(ic->conn, "ib_post_send failed\n"); 769 rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
729 goto out; 770 goto out;
730 } 771 }
731 772
732 ret = sent; 773 ret = bytes_sent;
733out: 774out:
734 BUG_ON(adv_credits); 775 BUG_ON(adv_credits);
735 return ret; 776 return ret;
736} 777}
737 778
738int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) 779/*
780 * Issue atomic operation.
781 * A simplified version of the rdma case, we always map 1 SG, and
782 * only 8 bytes, for the return value from the atomic operation.
783 */
784int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
785{
786 struct rds_ib_connection *ic = conn->c_transport_data;
787 struct rds_ib_send_work *send = NULL;
788 struct ib_send_wr *failed_wr;
789 struct rds_ib_device *rds_ibdev;
790 u32 pos;
791 u32 work_alloc;
792 int ret;
793 int nr_sig = 0;
794
795 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
796
797 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
798 if (work_alloc != 1) {
799 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
800 rds_ib_stats_inc(s_ib_tx_ring_full);
801 ret = -ENOMEM;
802 goto out;
803 }
804
805 /* address of send request in ring */
806 send = &ic->i_sends[pos];
807 send->s_queued = jiffies;
808
809 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
810 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
811 send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
812 send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
813 send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
814 send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
815 } else { /* FADD */
816 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
817 send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
818 send->s_wr.wr.atomic.swap = 0;
819 send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
820 send->s_wr.wr.atomic.swap_mask = 0;
821 }
822 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
823 send->s_wr.num_sge = 1;
824 send->s_wr.next = NULL;
825 send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
826 send->s_wr.wr.atomic.rkey = op->op_rkey;
827 send->s_op = op;
828 rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
829
830 /* map 8 byte retval buffer to the device */
831 ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE);
832 rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret);
833 if (ret != 1) {
834 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
835 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
836 ret = -ENOMEM; /* XXX ? */
837 goto out;
838 }
839
840 /* Convert our struct scatterlist to struct ib_sge */
841 send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
842 send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
843 send->s_sge[0].lkey = ic->i_mr->lkey;
844
845 rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
846 send->s_sge[0].addr, send->s_sge[0].length);
847
848 if (nr_sig)
849 atomic_add(nr_sig, &ic->i_signaled_sends);
850
851 failed_wr = &send->s_wr;
852 ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
853 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
854 send, &send->s_wr, ret, failed_wr);
855 BUG_ON(failed_wr != &send->s_wr);
856 if (ret) {
857 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
858 "returned %d\n", &conn->c_faddr, ret);
859 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
860 rds_ib_sub_signaled(ic, nr_sig);
861 goto out;
862 }
863
864 if (unlikely(failed_wr != &send->s_wr)) {
865 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
866 BUG_ON(failed_wr != &send->s_wr);
867 }
868
869out:
870 return ret;
871}
872
873int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
739{ 874{
740 struct rds_ib_connection *ic = conn->c_transport_data; 875 struct rds_ib_connection *ic = conn->c_transport_data;
741 struct rds_ib_send_work *send = NULL; 876 struct rds_ib_send_work *send = NULL;
742 struct rds_ib_send_work *first; 877 struct rds_ib_send_work *first;
743 struct rds_ib_send_work *prev; 878 struct rds_ib_send_work *prev;
744 struct ib_send_wr *failed_wr; 879 struct ib_send_wr *failed_wr;
745 struct rds_ib_device *rds_ibdev;
746 struct scatterlist *scat; 880 struct scatterlist *scat;
747 unsigned long len; 881 unsigned long len;
748 u64 remote_addr = op->r_remote_addr; 882 u64 remote_addr = op->op_remote_addr;
883 u32 max_sge = ic->rds_ibdev->max_sge;
749 u32 pos; 884 u32 pos;
750 u32 work_alloc; 885 u32 work_alloc;
751 u32 i; 886 u32 i;
@@ -753,29 +888,28 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
753 int sent; 888 int sent;
754 int ret; 889 int ret;
755 int num_sge; 890 int num_sge;
756 891 int nr_sig = 0;
757 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 892
758 893 /* map the op the first time we see it */
759 /* map the message the first time we see it */ 894 if (!op->op_mapped) {
760 if (!op->r_mapped) { 895 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
761 op->r_count = ib_dma_map_sg(ic->i_cm_id->device, 896 op->op_sg, op->op_nents, (op->op_write) ?
762 op->r_sg, op->r_nents, (op->r_write) ? 897 DMA_TO_DEVICE : DMA_FROM_DEVICE);
763 DMA_TO_DEVICE : DMA_FROM_DEVICE); 898 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
764 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); 899 if (op->op_count == 0) {
765 if (op->r_count == 0) {
766 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); 900 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
767 ret = -ENOMEM; /* XXX ? */ 901 ret = -ENOMEM; /* XXX ? */
768 goto out; 902 goto out;
769 } 903 }
770 904
771 op->r_mapped = 1; 905 op->op_mapped = 1;
772 } 906 }
773 907
774 /* 908 /*
775 * Instead of knowing how to return a partial rdma read/write we insist that there 909 * Instead of knowing how to return a partial rdma read/write we insist that there
776 * be enough work requests to send the entire message. 910 * be enough work requests to send the entire message.
777 */ 911 */
778 i = ceil(op->r_count, rds_ibdev->max_sge); 912 i = ceil(op->op_count, max_sge);
779 913
780 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); 914 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
781 if (work_alloc != i) { 915 if (work_alloc != i) {
@@ -788,30 +922,24 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
788 send = &ic->i_sends[pos]; 922 send = &ic->i_sends[pos];
789 first = send; 923 first = send;
790 prev = NULL; 924 prev = NULL;
791 scat = &op->r_sg[0]; 925 scat = &op->op_sg[0];
792 sent = 0; 926 sent = 0;
793 num_sge = op->r_count; 927 num_sge = op->op_count;
794 928
795 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { 929 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
796 send->s_wr.send_flags = 0; 930 send->s_wr.send_flags = 0;
797 send->s_queued = jiffies; 931 send->s_queued = jiffies;
798 /* 932 send->s_op = NULL;
799 * We want to delay signaling completions just enough to get 933
800 * the batching benefits but not so much that we create dead time on the wire. 934 nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
801 */
802 if (ic->i_unsignaled_wrs-- == 0) {
803 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
804 send->s_wr.send_flags = IB_SEND_SIGNALED;
805 }
806 935
807 send->s_wr.opcode = op->r_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; 936 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
808 send->s_wr.wr.rdma.remote_addr = remote_addr; 937 send->s_wr.wr.rdma.remote_addr = remote_addr;
809 send->s_wr.wr.rdma.rkey = op->r_key; 938 send->s_wr.wr.rdma.rkey = op->op_rkey;
810 send->s_op = op;
811 939
812 if (num_sge > rds_ibdev->max_sge) { 940 if (num_sge > max_sge) {
813 send->s_wr.num_sge = rds_ibdev->max_sge; 941 send->s_wr.num_sge = max_sge;
814 num_sge -= rds_ibdev->max_sge; 942 num_sge -= max_sge;
815 } else { 943 } else {
816 send->s_wr.num_sge = num_sge; 944 send->s_wr.num_sge = num_sge;
817 } 945 }
@@ -821,7 +949,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
821 if (prev) 949 if (prev)
822 prev->s_wr.next = &send->s_wr; 950 prev->s_wr.next = &send->s_wr;
823 951
824 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { 952 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
825 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 953 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
826 send->s_sge[j].addr = 954 send->s_sge[j].addr =
827 ib_sg_dma_address(ic->i_cm_id->device, scat); 955 ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -843,15 +971,20 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
843 send = ic->i_sends; 971 send = ic->i_sends;
844 } 972 }
845 973
846 /* if we finished the message then send completion owns it */ 974 /* give a reference to the last op */
847 if (scat == &op->r_sg[op->r_count]) 975 if (scat == &op->op_sg[op->op_count]) {
848 prev->s_wr.send_flags = IB_SEND_SIGNALED; 976 prev->s_op = op;
977 rds_message_addref(container_of(op, struct rds_message, rdma));
978 }
849 979
850 if (i < work_alloc) { 980 if (i < work_alloc) {
851 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); 981 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
852 work_alloc = i; 982 work_alloc = i;
853 } 983 }
854 984
985 if (nr_sig)
986 atomic_add(nr_sig, &ic->i_signaled_sends);
987
855 failed_wr = &first->s_wr; 988 failed_wr = &first->s_wr;
856 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 989 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
857 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, 990 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
@@ -861,6 +994,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
861 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " 994 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
862 "returned %d\n", &conn->c_faddr, ret); 995 "returned %d\n", &conn->c_faddr, ret);
863 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 996 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
997 rds_ib_sub_signaled(ic, nr_sig);
864 goto out; 998 goto out;
865 } 999 }
866 1000
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index d2c904dd6fbc..2d5965d6e97c 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -67,6 +67,8 @@ static const char *const rds_ib_stat_names[] = {
67 "ib_rdma_mr_pool_flush", 67 "ib_rdma_mr_pool_flush",
68 "ib_rdma_mr_pool_wait", 68 "ib_rdma_mr_pool_wait",
69 "ib_rdma_mr_pool_depleted", 69 "ib_rdma_mr_pool_depleted",
70 "ib_atomic_cswp",
71 "ib_atomic_fadd",
70}; 72};
71 73
72unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter, 74unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
index 03f01cb4e0fe..fc3da37220fd 100644
--- a/net/rds/ib_sysctl.c
+++ b/net/rds/ib_sysctl.c
@@ -49,10 +49,6 @@ unsigned long rds_ib_sysctl_max_unsig_wrs = 16;
49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1; 49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1;
50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; 50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64;
51 51
52unsigned long rds_ib_sysctl_max_unsig_bytes = (16 << 20);
53static unsigned long rds_ib_sysctl_max_unsig_bytes_min = 1;
54static unsigned long rds_ib_sysctl_max_unsig_bytes_max = ~0UL;
55
56/* 52/*
57 * This sysctl does nothing. 53 * This sysctl does nothing.
58 * 54 *
@@ -94,15 +90,6 @@ ctl_table rds_ib_sysctl_table[] = {
94 .extra2 = &rds_ib_sysctl_max_unsig_wr_max, 90 .extra2 = &rds_ib_sysctl_max_unsig_wr_max,
95 }, 91 },
96 { 92 {
97 .procname = "max_unsignaled_bytes",
98 .data = &rds_ib_sysctl_max_unsig_bytes,
99 .maxlen = sizeof(unsigned long),
100 .mode = 0644,
101 .proc_handler = proc_doulongvec_minmax,
102 .extra1 = &rds_ib_sysctl_max_unsig_bytes_min,
103 .extra2 = &rds_ib_sysctl_max_unsig_bytes_max,
104 },
105 {
106 .procname = "max_recv_allocation", 93 .procname = "max_recv_allocation",
107 .data = &rds_ib_sysctl_max_recv_allocation, 94 .data = &rds_ib_sysctl_max_recv_allocation,
108 .maxlen = sizeof(unsigned long), 95 .maxlen = sizeof(unsigned long),
@@ -132,10 +119,10 @@ void rds_ib_sysctl_exit(void)
132 unregister_sysctl_table(rds_ib_sysctl_hdr); 119 unregister_sysctl_table(rds_ib_sysctl_hdr);
133} 120}
134 121
135int __init rds_ib_sysctl_init(void) 122int rds_ib_sysctl_init(void)
136{ 123{
137 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table); 124 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table);
138 if (rds_ib_sysctl_hdr == NULL) 125 if (!rds_ib_sysctl_hdr)
139 return -ENOMEM; 126 return -ENOMEM;
140 return 0; 127 return 0;
141} 128}
diff --git a/net/rds/info.c b/net/rds/info.c
index c45c4173a44d..4fdf1b6e84ff 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -76,7 +76,7 @@ void rds_info_register_func(int optname, rds_info_func func)
76 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); 76 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST);
77 77
78 spin_lock(&rds_info_lock); 78 spin_lock(&rds_info_lock);
79 BUG_ON(rds_info_funcs[offset] != NULL); 79 BUG_ON(rds_info_funcs[offset]);
80 rds_info_funcs[offset] = func; 80 rds_info_funcs[offset] = func;
81 spin_unlock(&rds_info_lock); 81 spin_unlock(&rds_info_lock);
82} 82}
@@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(rds_info_deregister_func);
102 */ 102 */
103void rds_info_iter_unmap(struct rds_info_iterator *iter) 103void rds_info_iter_unmap(struct rds_info_iterator *iter)
104{ 104{
105 if (iter->addr != NULL) { 105 if (iter->addr) {
106 kunmap_atomic(iter->addr, KM_USER0); 106 kunmap_atomic(iter->addr, KM_USER0);
107 iter->addr = NULL; 107 iter->addr = NULL;
108 } 108 }
@@ -117,7 +117,7 @@ void rds_info_copy(struct rds_info_iterator *iter, void *data,
117 unsigned long this; 117 unsigned long this;
118 118
119 while (bytes) { 119 while (bytes) {
120 if (iter->addr == NULL) 120 if (!iter->addr)
121 iter->addr = kmap_atomic(*iter->pages, KM_USER0); 121 iter->addr = kmap_atomic(*iter->pages, KM_USER0);
122 122
123 this = min(bytes, PAGE_SIZE - iter->offset); 123 this = min(bytes, PAGE_SIZE - iter->offset);
@@ -188,7 +188,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
188 >> PAGE_SHIFT; 188 >> PAGE_SHIFT;
189 189
190 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); 190 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
191 if (pages == NULL) { 191 if (!pages) {
192 ret = -ENOMEM; 192 ret = -ENOMEM;
193 goto out; 193 goto out;
194 } 194 }
@@ -206,7 +206,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
206 206
207call_func: 207call_func:
208 func = rds_info_funcs[optname - RDS_INFO_FIRST]; 208 func = rds_info_funcs[optname - RDS_INFO_FIRST];
209 if (func == NULL) { 209 if (!func) {
210 ret = -ENOPROTOOPT; 210 ret = -ENOPROTOOPT;
211 goto out; 211 goto out;
212 } 212 }
@@ -234,7 +234,7 @@ call_func:
234 ret = -EFAULT; 234 ret = -EFAULT;
235 235
236out: 236out:
237 for (i = 0; pages != NULL && i < nr_pages; i++) 237 for (i = 0; pages && i < nr_pages; i++)
238 put_page(pages[i]); 238 put_page(pages[i]);
239 kfree(pages); 239 kfree(pages);
240 240
diff --git a/net/rds/iw.c b/net/rds/iw.c
index c8f3d3525cb9..56808cac0fc7 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -264,7 +264,6 @@ struct rds_transport rds_iw_transport = {
264 .laddr_check = rds_iw_laddr_check, 264 .laddr_check = rds_iw_laddr_check,
265 .xmit_complete = rds_iw_xmit_complete, 265 .xmit_complete = rds_iw_xmit_complete,
266 .xmit = rds_iw_xmit, 266 .xmit = rds_iw_xmit,
267 .xmit_cong_map = NULL,
268 .xmit_rdma = rds_iw_xmit_rdma, 267 .xmit_rdma = rds_iw_xmit_rdma,
269 .recv = rds_iw_recv, 268 .recv = rds_iw_recv,
270 .conn_alloc = rds_iw_conn_alloc, 269 .conn_alloc = rds_iw_conn_alloc,
@@ -272,7 +271,6 @@ struct rds_transport rds_iw_transport = {
272 .conn_connect = rds_iw_conn_connect, 271 .conn_connect = rds_iw_conn_connect,
273 .conn_shutdown = rds_iw_conn_shutdown, 272 .conn_shutdown = rds_iw_conn_shutdown,
274 .inc_copy_to_user = rds_iw_inc_copy_to_user, 273 .inc_copy_to_user = rds_iw_inc_copy_to_user,
275 .inc_purge = rds_iw_inc_purge,
276 .inc_free = rds_iw_inc_free, 274 .inc_free = rds_iw_inc_free,
277 .cm_initiate_connect = rds_iw_cm_initiate_connect, 275 .cm_initiate_connect = rds_iw_cm_initiate_connect,
278 .cm_handle_connect = rds_iw_cm_handle_connect, 276 .cm_handle_connect = rds_iw_cm_handle_connect,
@@ -289,7 +287,7 @@ struct rds_transport rds_iw_transport = {
289 .t_prefer_loopback = 1, 287 .t_prefer_loopback = 1,
290}; 288};
291 289
292int __init rds_iw_init(void) 290int rds_iw_init(void)
293{ 291{
294 int ret; 292 int ret;
295 293
diff --git a/net/rds/iw.h b/net/rds/iw.h
index eef2f0c28476..543e665fafe3 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -70,7 +70,7 @@ struct rds_iw_send_work {
70 struct rds_message *s_rm; 70 struct rds_message *s_rm;
71 71
72 /* We should really put these into a union: */ 72 /* We should really put these into a union: */
73 struct rds_rdma_op *s_op; 73 struct rm_rdma_op *s_op;
74 struct rds_iw_mapping *s_mapping; 74 struct rds_iw_mapping *s_mapping;
75 struct ib_mr *s_mr; 75 struct ib_mr *s_mr;
76 struct ib_fast_reg_page_list *s_page_list; 76 struct ib_fast_reg_page_list *s_page_list;
@@ -284,7 +284,7 @@ void rds_iw_conn_free(void *arg);
284int rds_iw_conn_connect(struct rds_connection *conn); 284int rds_iw_conn_connect(struct rds_connection *conn);
285void rds_iw_conn_shutdown(struct rds_connection *conn); 285void rds_iw_conn_shutdown(struct rds_connection *conn);
286void rds_iw_state_change(struct sock *sk); 286void rds_iw_state_change(struct sock *sk);
287int __init rds_iw_listen_init(void); 287int rds_iw_listen_init(void);
288void rds_iw_listen_stop(void); 288void rds_iw_listen_stop(void);
289void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...); 289void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...);
290int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, 290int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -321,12 +321,11 @@ void rds_iw_flush_mrs(void);
321void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); 321void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
322 322
323/* ib_recv.c */ 323/* ib_recv.c */
324int __init rds_iw_recv_init(void); 324int rds_iw_recv_init(void);
325void rds_iw_recv_exit(void); 325void rds_iw_recv_exit(void);
326int rds_iw_recv(struct rds_connection *conn); 326int rds_iw_recv(struct rds_connection *conn);
327int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 327int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
328 gfp_t page_gfp, int prefill); 328 gfp_t page_gfp, int prefill);
329void rds_iw_inc_purge(struct rds_incoming *inc);
330void rds_iw_inc_free(struct rds_incoming *inc); 329void rds_iw_inc_free(struct rds_incoming *inc);
331int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 330int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
332 size_t size); 331 size_t size);
@@ -358,7 +357,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
358void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context); 357void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context);
359void rds_iw_send_init_ring(struct rds_iw_connection *ic); 358void rds_iw_send_init_ring(struct rds_iw_connection *ic);
360void rds_iw_send_clear_ring(struct rds_iw_connection *ic); 359void rds_iw_send_clear_ring(struct rds_iw_connection *ic);
361int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); 360int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
362void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits); 361void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits);
363void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted); 362void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted);
364int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted, 363int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted,
@@ -371,7 +370,7 @@ unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
371 unsigned int avail); 370 unsigned int avail);
372 371
373/* ib_sysctl.c */ 372/* ib_sysctl.c */
374int __init rds_iw_sysctl_init(void); 373int rds_iw_sysctl_init(void);
375void rds_iw_sysctl_exit(void); 374void rds_iw_sysctl_exit(void);
376extern unsigned long rds_iw_sysctl_max_send_wr; 375extern unsigned long rds_iw_sysctl_max_send_wr;
377extern unsigned long rds_iw_sysctl_max_recv_wr; 376extern unsigned long rds_iw_sysctl_max_recv_wr;
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index b5dd6ac39be8..712cf2d1f28e 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -257,7 +257,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
257 * the rds_iwdev at all. 257 * the rds_iwdev at all.
258 */ 258 */
259 rds_iwdev = ib_get_client_data(dev, &rds_iw_client); 259 rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
260 if (rds_iwdev == NULL) { 260 if (!rds_iwdev) {
261 if (printk_ratelimit()) 261 if (printk_ratelimit())
262 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", 262 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
263 dev->name); 263 dev->name);
@@ -292,7 +292,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
292 ic->i_send_ring.w_nr * 292 ic->i_send_ring.w_nr *
293 sizeof(struct rds_header), 293 sizeof(struct rds_header),
294 &ic->i_send_hdrs_dma, GFP_KERNEL); 294 &ic->i_send_hdrs_dma, GFP_KERNEL);
295 if (ic->i_send_hdrs == NULL) { 295 if (!ic->i_send_hdrs) {
296 ret = -ENOMEM; 296 ret = -ENOMEM;
297 rdsdebug("ib_dma_alloc_coherent send failed\n"); 297 rdsdebug("ib_dma_alloc_coherent send failed\n");
298 goto out; 298 goto out;
@@ -302,7 +302,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
302 ic->i_recv_ring.w_nr * 302 ic->i_recv_ring.w_nr *
303 sizeof(struct rds_header), 303 sizeof(struct rds_header),
304 &ic->i_recv_hdrs_dma, GFP_KERNEL); 304 &ic->i_recv_hdrs_dma, GFP_KERNEL);
305 if (ic->i_recv_hdrs == NULL) { 305 if (!ic->i_recv_hdrs) {
306 ret = -ENOMEM; 306 ret = -ENOMEM;
307 rdsdebug("ib_dma_alloc_coherent recv failed\n"); 307 rdsdebug("ib_dma_alloc_coherent recv failed\n");
308 goto out; 308 goto out;
@@ -310,14 +310,14 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
310 310
311 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), 311 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
312 &ic->i_ack_dma, GFP_KERNEL); 312 &ic->i_ack_dma, GFP_KERNEL);
313 if (ic->i_ack == NULL) { 313 if (!ic->i_ack) {
314 ret = -ENOMEM; 314 ret = -ENOMEM;
315 rdsdebug("ib_dma_alloc_coherent ack failed\n"); 315 rdsdebug("ib_dma_alloc_coherent ack failed\n");
316 goto out; 316 goto out;
317 } 317 }
318 318
319 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work)); 319 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
320 if (ic->i_sends == NULL) { 320 if (!ic->i_sends) {
321 ret = -ENOMEM; 321 ret = -ENOMEM;
322 rdsdebug("send allocation failed\n"); 322 rdsdebug("send allocation failed\n");
323 goto out; 323 goto out;
@@ -325,7 +325,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
325 rds_iw_send_init_ring(ic); 325 rds_iw_send_init_ring(ic);
326 326
327 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work)); 327 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
328 if (ic->i_recvs == NULL) { 328 if (!ic->i_recvs) {
329 ret = -ENOMEM; 329 ret = -ENOMEM;
330 rdsdebug("recv allocation failed\n"); 330 rdsdebug("recv allocation failed\n");
331 goto out; 331 goto out;
@@ -696,7 +696,7 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
696 696
697 /* XXX too lazy? */ 697 /* XXX too lazy? */
698 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL); 698 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL);
699 if (ic == NULL) 699 if (!ic)
700 return -ENOMEM; 700 return -ENOMEM;
701 701
702 INIT_LIST_HEAD(&ic->iw_node); 702 INIT_LIST_HEAD(&ic->iw_node);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 13dc1862d862..0e7accc23ee2 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -34,7 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35 35
36#include "rds.h" 36#include "rds.h"
37#include "rdma.h"
38#include "iw.h" 37#include "iw.h"
39 38
40 39
@@ -207,9 +206,9 @@ void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *con
207 BUG_ON(list_empty(&ic->iw_node)); 206 BUG_ON(list_empty(&ic->iw_node));
208 list_del(&ic->iw_node); 207 list_del(&ic->iw_node);
209 208
210 spin_lock_irq(&rds_iwdev->spinlock); 209 spin_lock(&rds_iwdev->spinlock);
211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); 210 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
212 spin_unlock_irq(&rds_iwdev->spinlock); 211 spin_unlock(&rds_iwdev->spinlock);
213 spin_unlock_irq(&iw_nodev_conns_lock); 212 spin_unlock_irq(&iw_nodev_conns_lock);
214 213
215 ic->rds_iwdev = rds_iwdev; 214 ic->rds_iwdev = rds_iwdev;
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 3d479067d54d..5e57347f49ff 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -53,7 +53,7 @@ static void rds_iw_frag_drop_page(struct rds_page_frag *frag)
53static void rds_iw_frag_free(struct rds_page_frag *frag) 53static void rds_iw_frag_free(struct rds_page_frag *frag)
54{ 54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page); 55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page != NULL); 56 BUG_ON(frag->f_page);
57 kmem_cache_free(rds_iw_frag_slab, frag); 57 kmem_cache_free(rds_iw_frag_slab, frag);
58} 58}
59 59
@@ -143,14 +143,14 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn,
143 struct ib_sge *sge; 143 struct ib_sge *sge;
144 int ret = -ENOMEM; 144 int ret = -ENOMEM;
145 145
146 if (recv->r_iwinc == NULL) { 146 if (!recv->r_iwinc) {
147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) { 147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) {
148 rds_iw_stats_inc(s_iw_rx_alloc_limit); 148 rds_iw_stats_inc(s_iw_rx_alloc_limit);
149 goto out; 149 goto out;
150 } 150 }
151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab, 151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab,
152 kptr_gfp); 152 kptr_gfp);
153 if (recv->r_iwinc == NULL) { 153 if (!recv->r_iwinc) {
154 atomic_dec(&rds_iw_allocation); 154 atomic_dec(&rds_iw_allocation);
155 goto out; 155 goto out;
156 } 156 }
@@ -158,17 +158,17 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn,
158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr); 158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr);
159 } 159 }
160 160
161 if (recv->r_frag == NULL) { 161 if (!recv->r_frag) {
162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp); 162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp);
163 if (recv->r_frag == NULL) 163 if (!recv->r_frag)
164 goto out; 164 goto out;
165 INIT_LIST_HEAD(&recv->r_frag->f_item); 165 INIT_LIST_HEAD(&recv->r_frag->f_item);
166 recv->r_frag->f_page = NULL; 166 recv->r_frag->f_page = NULL;
167 } 167 }
168 168
169 if (ic->i_frag.f_page == NULL) { 169 if (!ic->i_frag.f_page) {
170 ic->i_frag.f_page = alloc_page(page_gfp); 170 ic->i_frag.f_page = alloc_page(page_gfp);
171 if (ic->i_frag.f_page == NULL) 171 if (!ic->i_frag.f_page)
172 goto out; 172 goto out;
173 ic->i_frag.f_offset = 0; 173 ic->i_frag.f_offset = 0;
174 } 174 }
@@ -273,7 +273,7 @@ int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
273 return ret; 273 return ret;
274} 274}
275 275
276void rds_iw_inc_purge(struct rds_incoming *inc) 276static void rds_iw_inc_purge(struct rds_incoming *inc)
277{ 277{
278 struct rds_iw_incoming *iwinc; 278 struct rds_iw_incoming *iwinc;
279 struct rds_page_frag *frag; 279 struct rds_page_frag *frag;
@@ -716,7 +716,7 @@ static void rds_iw_process_recv(struct rds_connection *conn,
716 * into the inc and save the inc so we can hang upcoming fragments 716 * into the inc and save the inc so we can hang upcoming fragments
717 * off its list. 717 * off its list.
718 */ 718 */
719 if (iwinc == NULL) { 719 if (!iwinc) {
720 iwinc = recv->r_iwinc; 720 iwinc = recv->r_iwinc;
721 recv->r_iwinc = NULL; 721 recv->r_iwinc = NULL;
722 ic->i_iwinc = iwinc; 722 ic->i_iwinc = iwinc;
@@ -887,7 +887,7 @@ int rds_iw_recv(struct rds_connection *conn)
887 return ret; 887 return ret;
888} 888}
889 889
890int __init rds_iw_recv_init(void) 890int rds_iw_recv_init(void)
891{ 891{
892 struct sysinfo si; 892 struct sysinfo si;
893 int ret = -ENOMEM; 893 int ret = -ENOMEM;
@@ -899,13 +899,13 @@ int __init rds_iw_recv_init(void)
899 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming", 899 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming",
900 sizeof(struct rds_iw_incoming), 900 sizeof(struct rds_iw_incoming),
901 0, 0, NULL); 901 0, 0, NULL);
902 if (rds_iw_incoming_slab == NULL) 902 if (!rds_iw_incoming_slab)
903 goto out; 903 goto out;
904 904
905 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag", 905 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag",
906 sizeof(struct rds_page_frag), 906 sizeof(struct rds_page_frag),
907 0, 0, NULL); 907 0, 0, NULL);
908 if (rds_iw_frag_slab == NULL) 908 if (!rds_iw_frag_slab)
909 kmem_cache_destroy(rds_iw_incoming_slab); 909 kmem_cache_destroy(rds_iw_incoming_slab);
910 else 910 else
911 ret = 0; 911 ret = 0;
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 52182ff7519e..6280ea020d4e 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -36,7 +36,6 @@
36#include <linux/dmapool.h> 36#include <linux/dmapool.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40#include "iw.h" 39#include "iw.h"
41 40
42static void rds_iw_send_rdma_complete(struct rds_message *rm, 41static void rds_iw_send_rdma_complete(struct rds_message *rm,
@@ -64,13 +63,13 @@ static void rds_iw_send_rdma_complete(struct rds_message *rm,
64} 63}
65 64
66static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic, 65static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic,
67 struct rds_rdma_op *op) 66 struct rm_rdma_op *op)
68{ 67{
69 if (op->r_mapped) { 68 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device, 69 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents, 70 op->op_sg, op->op_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 71 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0; 72 op->op_mapped = 0;
74 } 73 }
75} 74}
76 75
@@ -83,11 +82,11 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm); 82 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84 83
85 ib_dma_unmap_sg(ic->i_cm_id->device, 84 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->m_sg, rm->m_nents, 85 rm->data.op_sg, rm->data.op_nents,
87 DMA_TO_DEVICE); 86 DMA_TO_DEVICE);
88 87
89 if (rm->m_rdma_op != NULL) { 88 if (rm->rdma.op_active) {
90 rds_iw_send_unmap_rdma(ic, rm->m_rdma_op); 89 rds_iw_send_unmap_rdma(ic, &rm->rdma);
91 90
92 /* If the user asked for a completion notification on this 91 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics: 92 * message, we can implement three different semantics:
@@ -111,10 +110,10 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
111 */ 110 */
112 rds_iw_send_rdma_complete(rm, wc_status); 111 rds_iw_send_rdma_complete(rm, wc_status);
113 112
114 if (rm->m_rdma_op->r_write) 113 if (rm->rdma.op_write)
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); 114 rds_stats_add(s_send_rdma_bytes, rm->rdma.op_bytes);
116 else 115 else
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); 116 rds_stats_add(s_recv_rdma_bytes, rm->rdma.op_bytes);
118 } 117 }
119 118
120 /* If anyone waited for this message to get flushed out, wake 119 /* If anyone waited for this message to get flushed out, wake
@@ -556,25 +555,27 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
556 } 555 }
557 556
558 /* map the message the first time we see it */ 557 /* map the message the first time we see it */
559 if (ic->i_rm == NULL) { 558 if (!ic->i_rm) {
560 /* 559 /*
561 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n", 560 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n",
562 be16_to_cpu(rm->m_inc.i_hdr.h_dport), 561 be16_to_cpu(rm->m_inc.i_hdr.h_dport),
563 rm->m_inc.i_hdr.h_flags, 562 rm->m_inc.i_hdr.h_flags,
564 be32_to_cpu(rm->m_inc.i_hdr.h_len)); 563 be32_to_cpu(rm->m_inc.i_hdr.h_len));
565 */ 564 */
566 if (rm->m_nents) { 565 if (rm->data.op_nents) {
567 rm->m_count = ib_dma_map_sg(dev, 566 rm->data.op_count = ib_dma_map_sg(dev,
568 rm->m_sg, rm->m_nents, DMA_TO_DEVICE); 567 rm->data.op_sg,
569 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count); 568 rm->data.op_nents,
570 if (rm->m_count == 0) { 569 DMA_TO_DEVICE);
570 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
571 if (rm->data.op_count == 0) {
571 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); 572 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
572 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); 573 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
573 ret = -ENOMEM; /* XXX ? */ 574 ret = -ENOMEM; /* XXX ? */
574 goto out; 575 goto out;
575 } 576 }
576 } else { 577 } else {
577 rm->m_count = 0; 578 rm->data.op_count = 0;
578 } 579 }
579 580
580 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; 581 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
@@ -590,10 +591,10 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
590 591
591 /* If it has a RDMA op, tell the peer we did it. This is 592 /* If it has a RDMA op, tell the peer we did it. This is
592 * used by the peer to release use-once RDMA MRs. */ 593 * used by the peer to release use-once RDMA MRs. */
593 if (rm->m_rdma_op) { 594 if (rm->rdma.op_active) {
594 struct rds_ext_header_rdma ext_hdr; 595 struct rds_ext_header_rdma ext_hdr;
595 596
596 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); 597 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
597 rds_message_add_extension(&rm->m_inc.i_hdr, 598 rds_message_add_extension(&rm->m_inc.i_hdr,
598 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 599 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
599 } 600 }
@@ -621,7 +622,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
621 send = &ic->i_sends[pos]; 622 send = &ic->i_sends[pos];
622 first = send; 623 first = send;
623 prev = NULL; 624 prev = NULL;
624 scat = &rm->m_sg[sg]; 625 scat = &rm->data.op_sg[sg];
625 sent = 0; 626 sent = 0;
626 i = 0; 627 i = 0;
627 628
@@ -631,7 +632,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
631 * or when requested by the user. Right now, we let 632 * or when requested by the user. Right now, we let
632 * the application choose. 633 * the application choose.
633 */ 634 */
634 if (rm->m_rdma_op && rm->m_rdma_op->r_fence) 635 if (rm->rdma.op_active && rm->rdma.op_fence)
635 send_flags = IB_SEND_FENCE; 636 send_flags = IB_SEND_FENCE;
636 637
637 /* 638 /*
@@ -650,7 +651,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
650 } 651 }
651 652
652 /* if there's data reference it with a chain of work reqs */ 653 /* if there's data reference it with a chain of work reqs */
653 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { 654 for (; i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]; i++) {
654 unsigned int len; 655 unsigned int len;
655 656
656 send = &ic->i_sends[pos]; 657 send = &ic->i_sends[pos];
@@ -728,7 +729,7 @@ add_header:
728 sent += sizeof(struct rds_header); 729 sent += sizeof(struct rds_header);
729 730
730 /* if we finished the message then send completion owns it */ 731 /* if we finished the message then send completion owns it */
731 if (scat == &rm->m_sg[rm->m_count]) { 732 if (scat == &rm->data.op_sg[rm->data.op_count]) {
732 prev->s_rm = ic->i_rm; 733 prev->s_rm = ic->i_rm;
733 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 734 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
734 ic->i_rm = NULL; 735 ic->i_rm = NULL;
@@ -784,7 +785,7 @@ static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rd
784 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); 785 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
785} 786}
786 787
787int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) 788int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
788{ 789{
789 struct rds_iw_connection *ic = conn->c_transport_data; 790 struct rds_iw_connection *ic = conn->c_transport_data;
790 struct rds_iw_send_work *send = NULL; 791 struct rds_iw_send_work *send = NULL;
@@ -794,7 +795,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
794 struct rds_iw_device *rds_iwdev; 795 struct rds_iw_device *rds_iwdev;
795 struct scatterlist *scat; 796 struct scatterlist *scat;
796 unsigned long len; 797 unsigned long len;
797 u64 remote_addr = op->r_remote_addr; 798 u64 remote_addr = op->op_remote_addr;
798 u32 pos, fr_pos; 799 u32 pos, fr_pos;
799 u32 work_alloc; 800 u32 work_alloc;
800 u32 i; 801 u32 i;
@@ -806,21 +807,21 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
806 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); 807 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
807 808
808 /* map the message the first time we see it */ 809 /* map the message the first time we see it */
809 if (!op->r_mapped) { 810 if (!op->op_mapped) {
810 op->r_count = ib_dma_map_sg(ic->i_cm_id->device, 811 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
811 op->r_sg, op->r_nents, (op->r_write) ? 812 op->op_sg, op->op_nents, (op->op_write) ?
812 DMA_TO_DEVICE : DMA_FROM_DEVICE); 813 DMA_TO_DEVICE : DMA_FROM_DEVICE);
813 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); 814 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
814 if (op->r_count == 0) { 815 if (op->op_count == 0) {
815 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); 816 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
816 ret = -ENOMEM; /* XXX ? */ 817 ret = -ENOMEM; /* XXX ? */
817 goto out; 818 goto out;
818 } 819 }
819 820
820 op->r_mapped = 1; 821 op->op_mapped = 1;
821 } 822 }
822 823
823 if (!op->r_write) { 824 if (!op->op_write) {
824 /* Alloc space on the send queue for the fastreg */ 825 /* Alloc space on the send queue for the fastreg */
825 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos); 826 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos);
826 if (work_alloc != 1) { 827 if (work_alloc != 1) {
@@ -835,7 +836,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
835 * Instead of knowing how to return a partial rdma read/write we insist that there 836 * Instead of knowing how to return a partial rdma read/write we insist that there
836 * be enough work requests to send the entire message. 837 * be enough work requests to send the entire message.
837 */ 838 */
838 i = ceil(op->r_count, rds_iwdev->max_sge); 839 i = ceil(op->op_count, rds_iwdev->max_sge);
839 840
840 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos); 841 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
841 if (work_alloc != i) { 842 if (work_alloc != i) {
@@ -846,17 +847,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
846 } 847 }
847 848
848 send = &ic->i_sends[pos]; 849 send = &ic->i_sends[pos];
849 if (!op->r_write) { 850 if (!op->op_write) {
850 first = prev = &ic->i_sends[fr_pos]; 851 first = prev = &ic->i_sends[fr_pos];
851 } else { 852 } else {
852 first = send; 853 first = send;
853 prev = NULL; 854 prev = NULL;
854 } 855 }
855 scat = &op->r_sg[0]; 856 scat = &op->op_sg[0];
856 sent = 0; 857 sent = 0;
857 num_sge = op->r_count; 858 num_sge = op->op_count;
858 859
859 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { 860 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
860 send->s_wr.send_flags = 0; 861 send->s_wr.send_flags = 0;
861 send->s_queued = jiffies; 862 send->s_queued = jiffies;
862 863
@@ -873,13 +874,13 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
873 * for local access after RDS is finished with it, using 874 * for local access after RDS is finished with it, using
874 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed. 875 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
875 */ 876 */
876 if (op->r_write) 877 if (op->op_write)
877 send->s_wr.opcode = IB_WR_RDMA_WRITE; 878 send->s_wr.opcode = IB_WR_RDMA_WRITE;
878 else 879 else
879 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV; 880 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
880 881
881 send->s_wr.wr.rdma.remote_addr = remote_addr; 882 send->s_wr.wr.rdma.remote_addr = remote_addr;
882 send->s_wr.wr.rdma.rkey = op->r_key; 883 send->s_wr.wr.rdma.rkey = op->op_rkey;
883 send->s_op = op; 884 send->s_op = op;
884 885
885 if (num_sge > rds_iwdev->max_sge) { 886 if (num_sge > rds_iwdev->max_sge) {
@@ -893,7 +894,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
893 if (prev) 894 if (prev)
894 prev->s_wr.next = &send->s_wr; 895 prev->s_wr.next = &send->s_wr;
895 896
896 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { 897 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
897 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 898 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
898 899
899 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) 900 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
@@ -927,7 +928,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
927 } 928 }
928 929
929 /* if we finished the message then send completion owns it */ 930 /* if we finished the message then send completion owns it */
930 if (scat == &op->r_sg[op->r_count]) 931 if (scat == &op->op_sg[op->op_count])
931 first->s_wr.send_flags = IB_SEND_SIGNALED; 932 first->s_wr.send_flags = IB_SEND_SIGNALED;
932 933
933 if (i < work_alloc) { 934 if (i < work_alloc) {
@@ -941,9 +942,9 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
941 * adapters do not allow using the lkey for this at all. To bypass this use a 942 * adapters do not allow using the lkey for this at all. To bypass this use a
942 * fastreg_mr (or possibly a dma_mr) 943 * fastreg_mr (or possibly a dma_mr)
943 */ 944 */
944 if (!op->r_write) { 945 if (!op->op_write) {
945 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos], 946 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
946 op->r_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr); 947 op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
947 work_alloc++; 948 work_alloc++;
948 } 949 }
949 950
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
index 1c4428a61a02..23e3a9a26aaf 100644
--- a/net/rds/iw_sysctl.c
+++ b/net/rds/iw_sysctl.c
@@ -122,10 +122,10 @@ void rds_iw_sysctl_exit(void)
122 unregister_sysctl_table(rds_iw_sysctl_hdr); 122 unregister_sysctl_table(rds_iw_sysctl_hdr);
123} 123}
124 124
125int __init rds_iw_sysctl_init(void) 125int rds_iw_sysctl_init(void)
126{ 126{
127 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table); 127 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table);
128 if (rds_iw_sysctl_hdr == NULL) 128 if (!rds_iw_sysctl_hdr)
129 return -ENOMEM; 129 return -ENOMEM;
130 return 0; 130 return 0;
131} 131}
diff --git a/net/rds/loop.c b/net/rds/loop.c
index dd9879379457..c390156b426f 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,17 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
61 unsigned int hdr_off, unsigned int sg, 61 unsigned int hdr_off, unsigned int sg,
62 unsigned int off) 62 unsigned int off)
63{ 63{
64 /* Do not send cong updates to loopback */
65 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
66 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
67 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
68 }
69
64 BUG_ON(hdr_off || sg || off); 70 BUG_ON(hdr_off || sg || off);
65 71
66 rds_inc_init(&rm->m_inc, conn, conn->c_laddr); 72 rds_inc_init(&rm->m_inc, conn, conn->c_laddr);
67 rds_message_addref(rm); /* for the inc */ 73 /* For the embedded inc. Matching put is in loop_inc_free() */
74 rds_message_addref(rm);
68 75
69 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc, 76 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc,
70 GFP_KERNEL, KM_USER0); 77 GFP_KERNEL, KM_USER0);
@@ -77,16 +84,14 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
77 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); 84 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len);
78} 85}
79 86
80static int rds_loop_xmit_cong_map(struct rds_connection *conn, 87/*
81 struct rds_cong_map *map, 88 * See rds_loop_xmit(). Since our inc is embedded in the rm, we
82 unsigned long offset) 89 * make sure the rm lives at least until the inc is done.
90 */
91static void rds_loop_inc_free(struct rds_incoming *inc)
83{ 92{
84 BUG_ON(offset); 93 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
85 BUG_ON(map != conn->c_lcong); 94 rds_message_put(rm);
86
87 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
88
89 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
90} 95}
91 96
92/* we need to at least give the thread something to succeed */ 97/* we need to at least give the thread something to succeed */
@@ -112,7 +117,7 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
112 unsigned long flags; 117 unsigned long flags;
113 118
114 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL); 119 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL);
115 if (lc == NULL) 120 if (!lc)
116 return -ENOMEM; 121 return -ENOMEM;
117 122
118 INIT_LIST_HEAD(&lc->loop_node); 123 INIT_LIST_HEAD(&lc->loop_node);
@@ -169,14 +174,12 @@ void rds_loop_exit(void)
169 */ 174 */
170struct rds_transport rds_loop_transport = { 175struct rds_transport rds_loop_transport = {
171 .xmit = rds_loop_xmit, 176 .xmit = rds_loop_xmit,
172 .xmit_cong_map = rds_loop_xmit_cong_map,
173 .recv = rds_loop_recv, 177 .recv = rds_loop_recv,
174 .conn_alloc = rds_loop_conn_alloc, 178 .conn_alloc = rds_loop_conn_alloc,
175 .conn_free = rds_loop_conn_free, 179 .conn_free = rds_loop_conn_free,
176 .conn_connect = rds_loop_conn_connect, 180 .conn_connect = rds_loop_conn_connect,
177 .conn_shutdown = rds_loop_conn_shutdown, 181 .conn_shutdown = rds_loop_conn_shutdown,
178 .inc_copy_to_user = rds_message_inc_copy_to_user, 182 .inc_copy_to_user = rds_message_inc_copy_to_user,
179 .inc_purge = rds_message_inc_purge, 183 .inc_free = rds_loop_inc_free,
180 .inc_free = rds_message_inc_free,
181 .t_name = "loopback", 184 .t_name = "loopback",
182}; 185};
diff --git a/net/rds/message.c b/net/rds/message.c
index 9a1d67e001ba..84f937f11d47 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -34,9 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35 35
36#include "rds.h" 36#include "rds.h"
37#include "rdma.h"
38
39static DECLARE_WAIT_QUEUE_HEAD(rds_message_flush_waitq);
40 37
41static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { 38static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
42[RDS_EXTHDR_NONE] = 0, 39[RDS_EXTHDR_NONE] = 0,
@@ -63,29 +60,31 @@ static void rds_message_purge(struct rds_message *rm)
63 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) 60 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
64 return; 61 return;
65 62
66 for (i = 0; i < rm->m_nents; i++) { 63 for (i = 0; i < rm->data.op_nents; i++) {
67 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i])); 64 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
68 /* XXX will have to put_page for page refs */ 65 /* XXX will have to put_page for page refs */
69 __free_page(sg_page(&rm->m_sg[i])); 66 __free_page(sg_page(&rm->data.op_sg[i]));
70 } 67 }
71 rm->m_nents = 0; 68 rm->data.op_nents = 0;
72 69
73 if (rm->m_rdma_op) 70 if (rm->rdma.op_active)
74 rds_rdma_free_op(rm->m_rdma_op); 71 rds_rdma_free_op(&rm->rdma);
75 if (rm->m_rdma_mr) 72 if (rm->rdma.op_rdma_mr)
76 rds_mr_put(rm->m_rdma_mr); 73 rds_mr_put(rm->rdma.op_rdma_mr);
77}
78 74
79void rds_message_inc_purge(struct rds_incoming *inc) 75 if (rm->atomic.op_active)
80{ 76 rds_atomic_free_op(&rm->atomic);
81 struct rds_message *rm = container_of(inc, struct rds_message, m_inc); 77 if (rm->atomic.op_rdma_mr)
82 rds_message_purge(rm); 78 rds_mr_put(rm->atomic.op_rdma_mr);
83} 79}
84 80
85void rds_message_put(struct rds_message *rm) 81void rds_message_put(struct rds_message *rm)
86{ 82{
87 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); 83 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
88 84 if (atomic_read(&rm->m_refcount) == 0) {
85printk(KERN_CRIT "danger refcount zero on %p\n", rm);
86WARN_ON(1);
87 }
89 if (atomic_dec_and_test(&rm->m_refcount)) { 88 if (atomic_dec_and_test(&rm->m_refcount)) {
90 BUG_ON(!list_empty(&rm->m_sock_item)); 89 BUG_ON(!list_empty(&rm->m_sock_item));
91 BUG_ON(!list_empty(&rm->m_conn_item)); 90 BUG_ON(!list_empty(&rm->m_conn_item));
@@ -96,12 +95,6 @@ void rds_message_put(struct rds_message *rm)
96} 95}
97EXPORT_SYMBOL_GPL(rds_message_put); 96EXPORT_SYMBOL_GPL(rds_message_put);
98 97
99void rds_message_inc_free(struct rds_incoming *inc)
100{
101 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
102 rds_message_put(rm);
103}
104
105void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 98void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
106 __be16 dport, u64 seq) 99 __be16 dport, u64 seq)
107{ 100{
@@ -214,41 +207,68 @@ int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 o
214} 207}
215EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension); 208EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension);
216 209
217struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp) 210/*
211 * Each rds_message is allocated with extra space for the scatterlist entries
212 * rds ops will need. This is to minimize memory allocation count. Then, each rds op
213 * can grab SGs when initializing its part of the rds_message.
214 */
215struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp)
218{ 216{
219 struct rds_message *rm; 217 struct rds_message *rm;
220 218
221 rm = kzalloc(sizeof(struct rds_message) + 219 rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp);
222 (nents * sizeof(struct scatterlist)), gfp);
223 if (!rm) 220 if (!rm)
224 goto out; 221 goto out;
225 222
226 if (nents) 223 rm->m_used_sgs = 0;
227 sg_init_table(rm->m_sg, nents); 224 rm->m_total_sgs = extra_len / sizeof(struct scatterlist);
225
228 atomic_set(&rm->m_refcount, 1); 226 atomic_set(&rm->m_refcount, 1);
229 INIT_LIST_HEAD(&rm->m_sock_item); 227 INIT_LIST_HEAD(&rm->m_sock_item);
230 INIT_LIST_HEAD(&rm->m_conn_item); 228 INIT_LIST_HEAD(&rm->m_conn_item);
231 spin_lock_init(&rm->m_rs_lock); 229 spin_lock_init(&rm->m_rs_lock);
230 init_waitqueue_head(&rm->m_flush_wait);
232 231
233out: 232out:
234 return rm; 233 return rm;
235} 234}
236 235
236/*
237 * RDS ops use this to grab SG entries from the rm's sg pool.
238 */
239struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
240{
241 struct scatterlist *sg_first = (struct scatterlist *) &rm[1];
242 struct scatterlist *sg_ret;
243
244 WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs);
245 WARN_ON(!nents);
246
247 sg_ret = &sg_first[rm->m_used_sgs];
248 sg_init_table(sg_ret, nents);
249 rm->m_used_sgs += nents;
250
251 return sg_ret;
252}
253
237struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len) 254struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
238{ 255{
239 struct rds_message *rm; 256 struct rds_message *rm;
240 unsigned int i; 257 unsigned int i;
258 int num_sgs = ceil(total_len, PAGE_SIZE);
259 int extra_bytes = num_sgs * sizeof(struct scatterlist);
241 260
242 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL); 261 rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
243 if (rm == NULL) 262 if (!rm)
244 return ERR_PTR(-ENOMEM); 263 return ERR_PTR(-ENOMEM);
245 264
246 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); 265 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
247 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 266 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
248 rm->m_nents = ceil(total_len, PAGE_SIZE); 267 rm->data.op_nents = ceil(total_len, PAGE_SIZE);
268 rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
249 269
250 for (i = 0; i < rm->m_nents; ++i) { 270 for (i = 0; i < rm->data.op_nents; ++i) {
251 sg_set_page(&rm->m_sg[i], 271 sg_set_page(&rm->data.op_sg[i],
252 virt_to_page(page_addrs[i]), 272 virt_to_page(page_addrs[i]),
253 PAGE_SIZE, 0); 273 PAGE_SIZE, 0);
254 } 274 }
@@ -256,40 +276,33 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
256 return rm; 276 return rm;
257} 277}
258 278
259struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, 279int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
260 size_t total_len) 280 size_t total_len)
261{ 281{
262 unsigned long to_copy; 282 unsigned long to_copy;
263 unsigned long iov_off; 283 unsigned long iov_off;
264 unsigned long sg_off; 284 unsigned long sg_off;
265 struct rds_message *rm;
266 struct iovec *iov; 285 struct iovec *iov;
267 struct scatterlist *sg; 286 struct scatterlist *sg;
268 int ret; 287 int ret = 0;
269
270 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL);
271 if (rm == NULL) {
272 ret = -ENOMEM;
273 goto out;
274 }
275 288
276 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 289 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
277 290
278 /* 291 /*
279 * now allocate and copy in the data payload. 292 * now allocate and copy in the data payload.
280 */ 293 */
281 sg = rm->m_sg; 294 sg = rm->data.op_sg;
282 iov = first_iov; 295 iov = first_iov;
283 iov_off = 0; 296 iov_off = 0;
284 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ 297 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
285 298
286 while (total_len) { 299 while (total_len) {
287 if (sg_page(sg) == NULL) { 300 if (!sg_page(sg)) {
288 ret = rds_page_remainder_alloc(sg, total_len, 301 ret = rds_page_remainder_alloc(sg, total_len,
289 GFP_HIGHUSER); 302 GFP_HIGHUSER);
290 if (ret) 303 if (ret)
291 goto out; 304 goto out;
292 rm->m_nents++; 305 rm->data.op_nents++;
293 sg_off = 0; 306 sg_off = 0;
294 } 307 }
295 308
@@ -320,14 +333,8 @@ struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
320 sg++; 333 sg++;
321 } 334 }
322 335
323 ret = 0;
324out: 336out:
325 if (ret) { 337 return ret;
326 if (rm)
327 rds_message_put(rm);
328 rm = ERR_PTR(ret);
329 }
330 return rm;
331} 338}
332 339
333int rds_message_inc_copy_to_user(struct rds_incoming *inc, 340int rds_message_inc_copy_to_user(struct rds_incoming *inc,
@@ -348,7 +355,7 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,
348 355
349 iov = first_iov; 356 iov = first_iov;
350 iov_off = 0; 357 iov_off = 0;
351 sg = rm->m_sg; 358 sg = rm->data.op_sg;
352 vec_off = 0; 359 vec_off = 0;
353 copied = 0; 360 copied = 0;
354 361
@@ -394,15 +401,14 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,
394 */ 401 */
395void rds_message_wait(struct rds_message *rm) 402void rds_message_wait(struct rds_message *rm)
396{ 403{
397 wait_event(rds_message_flush_waitq, 404 wait_event_interruptible(rm->m_flush_wait,
398 !test_bit(RDS_MSG_MAPPED, &rm->m_flags)); 405 !test_bit(RDS_MSG_MAPPED, &rm->m_flags));
399} 406}
400 407
401void rds_message_unmapped(struct rds_message *rm) 408void rds_message_unmapped(struct rds_message *rm)
402{ 409{
403 clear_bit(RDS_MSG_MAPPED, &rm->m_flags); 410 clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
404 if (waitqueue_active(&rds_message_flush_waitq)) 411 wake_up_interruptible(&rm->m_flush_wait);
405 wake_up(&rds_message_flush_waitq);
406} 412}
407EXPORT_SYMBOL_GPL(rds_message_unmapped); 413EXPORT_SYMBOL_GPL(rds_message_unmapped);
408 414
diff --git a/net/rds/page.c b/net/rds/page.c
index 595a952d4b17..5e44f5ae7898 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -116,7 +116,7 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
116 /* jump straight to allocation if we're trying for a huge page */ 116 /* jump straight to allocation if we're trying for a huge page */
117 if (bytes >= PAGE_SIZE) { 117 if (bytes >= PAGE_SIZE) {
118 page = alloc_page(gfp); 118 page = alloc_page(gfp);
119 if (page == NULL) { 119 if (!page) {
120 ret = -ENOMEM; 120 ret = -ENOMEM;
121 } else { 121 } else {
122 sg_set_page(scat, page, PAGE_SIZE, 0); 122 sg_set_page(scat, page, PAGE_SIZE, 0);
@@ -162,7 +162,7 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
162 rem = &per_cpu(rds_page_remainders, get_cpu()); 162 rem = &per_cpu(rds_page_remainders, get_cpu());
163 local_irq_save(flags); 163 local_irq_save(flags);
164 164
165 if (page == NULL) { 165 if (!page) {
166 ret = -ENOMEM; 166 ret = -ENOMEM;
167 break; 167 break;
168 } 168 }
@@ -186,6 +186,7 @@ out:
186 ret ? 0 : scat->length); 186 ret ? 0 : scat->length);
187 return ret; 187 return ret;
188} 188}
189EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
189 190
190static int rds_page_remainder_cpu_notify(struct notifier_block *self, 191static int rds_page_remainder_cpu_notify(struct notifier_block *self,
191 unsigned long action, void *hcpu) 192 unsigned long action, void *hcpu)
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 75fd13bb631b..1a41debca1ce 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -35,7 +35,7 @@
35#include <linux/rbtree.h> 35#include <linux/rbtree.h>
36#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */ 36#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */
37 37
38#include "rdma.h" 38#include "rds.h"
39 39
40/* 40/*
41 * XXX 41 * XXX
@@ -130,14 +130,22 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
130{ 130{
131 struct rds_mr *mr; 131 struct rds_mr *mr;
132 struct rb_node *node; 132 struct rb_node *node;
133 unsigned long flags;
133 134
134 /* Release any MRs associated with this socket */ 135 /* Release any MRs associated with this socket */
136 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
135 while ((node = rb_first(&rs->rs_rdma_keys))) { 137 while ((node = rb_first(&rs->rs_rdma_keys))) {
136 mr = container_of(node, struct rds_mr, r_rb_node); 138 mr = container_of(node, struct rds_mr, r_rb_node);
137 if (mr->r_trans == rs->rs_transport) 139 if (mr->r_trans == rs->rs_transport)
138 mr->r_invalidate = 0; 140 mr->r_invalidate = 0;
141 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
142 RB_CLEAR_NODE(&mr->r_rb_node);
143 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
144 rds_destroy_mr(mr);
139 rds_mr_put(mr); 145 rds_mr_put(mr);
146 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
140 } 147 }
148 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
141 149
142 if (rs->rs_transport && rs->rs_transport->flush_mrs) 150 if (rs->rs_transport && rs->rs_transport->flush_mrs)
143 rs->rs_transport->flush_mrs(); 151 rs->rs_transport->flush_mrs();
@@ -181,7 +189,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
181 goto out; 189 goto out;
182 } 190 }
183 191
184 if (rs->rs_transport->get_mr == NULL) { 192 if (!rs->rs_transport->get_mr) {
185 ret = -EOPNOTSUPP; 193 ret = -EOPNOTSUPP;
186 goto out; 194 goto out;
187 } 195 }
@@ -197,13 +205,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
197 205
198 /* XXX clamp nr_pages to limit the size of this alloc? */ 206 /* XXX clamp nr_pages to limit the size of this alloc? */
199 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 207 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
200 if (pages == NULL) { 208 if (!pages) {
201 ret = -ENOMEM; 209 ret = -ENOMEM;
202 goto out; 210 goto out;
203 } 211 }
204 212
205 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL); 213 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL);
206 if (mr == NULL) { 214 if (!mr) {
207 ret = -ENOMEM; 215 ret = -ENOMEM;
208 goto out; 216 goto out;
209 } 217 }
@@ -230,13 +238,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
230 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to 238 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to
231 * the zero page. 239 * the zero page.
232 */ 240 */
233 ret = rds_pin_pages(args->vec.addr & PAGE_MASK, nr_pages, pages, 1); 241 ret = rds_pin_pages(args->vec.addr, nr_pages, pages, 1);
234 if (ret < 0) 242 if (ret < 0)
235 goto out; 243 goto out;
236 244
237 nents = ret; 245 nents = ret;
238 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); 246 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
239 if (sg == NULL) { 247 if (!sg) {
240 ret = -ENOMEM; 248 ret = -ENOMEM;
241 goto out; 249 goto out;
242 } 250 }
@@ -406,68 +414,127 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
406 414
407 spin_lock_irqsave(&rs->rs_rdma_lock, flags); 415 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
408 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 416 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
409 if (mr && (mr->r_use_once || force)) { 417 if (!mr) {
418 printk(KERN_ERR "rds: trying to unuse MR with unknown r_key %u!\n", r_key);
419 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
420 return;
421 }
422
423 if (mr->r_use_once || force) {
410 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); 424 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
411 RB_CLEAR_NODE(&mr->r_rb_node); 425 RB_CLEAR_NODE(&mr->r_rb_node);
412 zot_me = 1; 426 zot_me = 1;
413 } else if (mr) 427 }
414 atomic_inc(&mr->r_refcount);
415 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); 428 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
416 429
417 /* May have to issue a dma_sync on this memory region. 430 /* May have to issue a dma_sync on this memory region.
418 * Note we could avoid this if the operation was a RDMA READ, 431 * Note we could avoid this if the operation was a RDMA READ,
419 * but at this point we can't tell. */ 432 * but at this point we can't tell. */
420 if (mr != NULL) { 433 if (mr->r_trans->sync_mr)
421 if (mr->r_trans->sync_mr) 434 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE);
422 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); 435
423 436 /* If the MR was marked as invalidate, this will
424 /* If the MR was marked as invalidate, this will 437 * trigger an async flush. */
425 * trigger an async flush. */ 438 if (zot_me)
426 if (zot_me) 439 rds_destroy_mr(mr);
427 rds_destroy_mr(mr); 440 rds_mr_put(mr);
428 rds_mr_put(mr);
429 }
430} 441}
431 442
432void rds_rdma_free_op(struct rds_rdma_op *ro) 443void rds_rdma_free_op(struct rm_rdma_op *ro)
433{ 444{
434 unsigned int i; 445 unsigned int i;
435 446
436 for (i = 0; i < ro->r_nents; i++) { 447 for (i = 0; i < ro->op_nents; i++) {
437 struct page *page = sg_page(&ro->r_sg[i]); 448 struct page *page = sg_page(&ro->op_sg[i]);
438 449
439 /* Mark page dirty if it was possibly modified, which 450 /* Mark page dirty if it was possibly modified, which
440 * is the case for a RDMA_READ which copies from remote 451 * is the case for a RDMA_READ which copies from remote
441 * to local memory */ 452 * to local memory */
442 if (!ro->r_write) { 453 if (!ro->op_write) {
443 BUG_ON(in_interrupt()); 454 BUG_ON(irqs_disabled());
444 set_page_dirty(page); 455 set_page_dirty(page);
445 } 456 }
446 put_page(page); 457 put_page(page);
447 } 458 }
448 459
449 kfree(ro->r_notifier); 460 kfree(ro->op_notifier);
450 kfree(ro); 461 ro->op_notifier = NULL;
462 ro->op_active = 0;
463}
464
465void rds_atomic_free_op(struct rm_atomic_op *ao)
466{
467 struct page *page = sg_page(ao->op_sg);
468
469 /* Mark page dirty if it was possibly modified, which
470 * is the case for a RDMA_READ which copies from remote
471 * to local memory */
472 set_page_dirty(page);
473 put_page(page);
474
475 kfree(ao->op_notifier);
476 ao->op_notifier = NULL;
477 ao->op_active = 0;
451} 478}
452 479
480
453/* 481/*
454 * args is a pointer to an in-kernel copy in the sendmsg cmsg. 482 * Count the number of pages needed to describe an incoming iovec.
455 */ 483 */
456static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, 484static int rds_rdma_pages(struct rds_rdma_args *args)
457 struct rds_rdma_args *args)
458{ 485{
459 struct rds_iovec vec; 486 struct rds_iovec vec;
460 struct rds_rdma_op *op = NULL; 487 struct rds_iovec __user *local_vec;
488 unsigned int tot_pages = 0;
461 unsigned int nr_pages; 489 unsigned int nr_pages;
462 unsigned int max_pages; 490 unsigned int i;
491
492 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
493
494 /* figure out the number of pages in the vector */
495 for (i = 0; i < args->nr_local; i++) {
496 if (copy_from_user(&vec, &local_vec[i],
497 sizeof(struct rds_iovec)))
498 return -EFAULT;
499
500 nr_pages = rds_pages_in_vec(&vec);
501 if (nr_pages == 0)
502 return -EINVAL;
503
504 tot_pages += nr_pages;
505 }
506
507 return tot_pages;
508}
509
510int rds_rdma_extra_size(struct rds_rdma_args *args)
511{
512 return rds_rdma_pages(args) * sizeof(struct scatterlist);
513}
514
515/*
516 * The application asks for a RDMA transfer.
517 * Extract all arguments and set up the rdma_op
518 */
519int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
520 struct cmsghdr *cmsg)
521{
522 struct rds_rdma_args *args;
523 struct rds_iovec vec;
524 struct rm_rdma_op *op = &rm->rdma;
525 int nr_pages;
463 unsigned int nr_bytes; 526 unsigned int nr_bytes;
464 struct page **pages = NULL; 527 struct page **pages = NULL;
465 struct rds_iovec __user *local_vec; 528 struct rds_iovec __user *local_vec;
466 struct scatterlist *sg;
467 unsigned int nr; 529 unsigned int nr;
468 unsigned int i, j; 530 unsigned int i, j;
469 int ret; 531 int ret = 0;
470 532
533 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args))
534 || rm->rdma.op_active)
535 return -EINVAL;
536
537 args = CMSG_DATA(cmsg);
471 538
472 if (rs->rs_bound_addr == 0) { 539 if (rs->rs_bound_addr == 0) {
473 ret = -ENOTCONN; /* XXX not a great errno */ 540 ret = -ENOTCONN; /* XXX not a great errno */
@@ -479,61 +546,38 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
479 goto out; 546 goto out;
480 } 547 }
481 548
482 nr_pages = 0; 549 nr_pages = rds_rdma_pages(args);
483 max_pages = 0; 550 if (nr_pages < 0)
484
485 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
486
487 /* figure out the number of pages in the vector */
488 for (i = 0; i < args->nr_local; i++) {
489 if (copy_from_user(&vec, &local_vec[i],
490 sizeof(struct rds_iovec))) {
491 ret = -EFAULT;
492 goto out;
493 }
494
495 nr = rds_pages_in_vec(&vec);
496 if (nr == 0) {
497 ret = -EINVAL;
498 goto out;
499 }
500
501 max_pages = max(nr, max_pages);
502 nr_pages += nr;
503 }
504
505 pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL);
506 if (pages == NULL) {
507 ret = -ENOMEM;
508 goto out; 551 goto out;
509 }
510 552
511 op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); 553 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
512 if (op == NULL) { 554 if (!pages) {
513 ret = -ENOMEM; 555 ret = -ENOMEM;
514 goto out; 556 goto out;
515 } 557 }
516 558
517 op->r_write = !!(args->flags & RDS_RDMA_READWRITE); 559 op->op_write = !!(args->flags & RDS_RDMA_READWRITE);
518 op->r_fence = !!(args->flags & RDS_RDMA_FENCE); 560 op->op_fence = !!(args->flags & RDS_RDMA_FENCE);
519 op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); 561 op->op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
520 op->r_recverr = rs->rs_recverr; 562 op->op_silent = !!(args->flags & RDS_RDMA_SILENT);
563 op->op_active = 1;
564 op->op_recverr = rs->rs_recverr;
521 WARN_ON(!nr_pages); 565 WARN_ON(!nr_pages);
522 sg_init_table(op->r_sg, nr_pages); 566 op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
523 567
524 if (op->r_notify || op->r_recverr) { 568 if (op->op_notify || op->op_recverr) {
525 /* We allocate an uninitialized notifier here, because 569 /* We allocate an uninitialized notifier here, because
526 * we don't want to do that in the completion handler. We 570 * we don't want to do that in the completion handler. We
527 * would have to use GFP_ATOMIC there, and don't want to deal 571 * would have to use GFP_ATOMIC there, and don't want to deal
528 * with failed allocations. 572 * with failed allocations.
529 */ 573 */
530 op->r_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); 574 op->op_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL);
531 if (!op->r_notifier) { 575 if (!op->op_notifier) {
532 ret = -ENOMEM; 576 ret = -ENOMEM;
533 goto out; 577 goto out;
534 } 578 }
535 op->r_notifier->n_user_token = args->user_token; 579 op->op_notifier->n_user_token = args->user_token;
536 op->r_notifier->n_status = RDS_RDMA_SUCCESS; 580 op->op_notifier->n_status = RDS_RDMA_SUCCESS;
537 } 581 }
538 582
539 /* The cookie contains the R_Key of the remote memory region, and 583 /* The cookie contains the R_Key of the remote memory region, and
@@ -543,15 +587,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
543 * destination address (which is really an offset into the MR) 587 * destination address (which is really an offset into the MR)
544 * FIXME: We may want to move this into ib_rdma.c 588 * FIXME: We may want to move this into ib_rdma.c
545 */ 589 */
546 op->r_key = rds_rdma_cookie_key(args->cookie); 590 op->op_rkey = rds_rdma_cookie_key(args->cookie);
547 op->r_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie); 591 op->op_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie);
548 592
549 nr_bytes = 0; 593 nr_bytes = 0;
550 594
551 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n", 595 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n",
552 (unsigned long long)args->nr_local, 596 (unsigned long long)args->nr_local,
553 (unsigned long long)args->remote_vec.addr, 597 (unsigned long long)args->remote_vec.addr,
554 op->r_key); 598 op->op_rkey);
599
600 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
555 601
556 for (i = 0; i < args->nr_local; i++) { 602 for (i = 0; i < args->nr_local; i++) {
557 if (copy_from_user(&vec, &local_vec[i], 603 if (copy_from_user(&vec, &local_vec[i],
@@ -569,15 +615,10 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
569 rs->rs_user_addr = vec.addr; 615 rs->rs_user_addr = vec.addr;
570 rs->rs_user_bytes = vec.bytes; 616 rs->rs_user_bytes = vec.bytes;
571 617
572 /* did the user change the vec under us? */
573 if (nr > max_pages || op->r_nents + nr > nr_pages) {
574 ret = -EINVAL;
575 goto out;
576 }
577 /* If it's a WRITE operation, we want to pin the pages for reading. 618 /* If it's a WRITE operation, we want to pin the pages for reading.
578 * If it's a READ operation, we need to pin the pages for writing. 619 * If it's a READ operation, we need to pin the pages for writing.
579 */ 620 */
580 ret = rds_pin_pages(vec.addr & PAGE_MASK, nr, pages, !op->r_write); 621 ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write);
581 if (ret < 0) 622 if (ret < 0)
582 goto out; 623 goto out;
583 624
@@ -588,8 +629,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
588 629
589 for (j = 0; j < nr; j++) { 630 for (j = 0; j < nr; j++) {
590 unsigned int offset = vec.addr & ~PAGE_MASK; 631 unsigned int offset = vec.addr & ~PAGE_MASK;
632 struct scatterlist *sg;
591 633
592 sg = &op->r_sg[op->r_nents + j]; 634 sg = &op->op_sg[op->op_nents + j];
593 sg_set_page(sg, pages[j], 635 sg_set_page(sg, pages[j],
594 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), 636 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset),
595 offset); 637 offset);
@@ -601,10 +643,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
601 vec.bytes -= sg->length; 643 vec.bytes -= sg->length;
602 } 644 }
603 645
604 op->r_nents += nr; 646 op->op_nents += nr;
605 } 647 }
606 648
607
608 if (nr_bytes > args->remote_vec.bytes) { 649 if (nr_bytes > args->remote_vec.bytes) {
609 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n", 650 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n",
610 nr_bytes, 651 nr_bytes,
@@ -612,38 +653,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
612 ret = -EINVAL; 653 ret = -EINVAL;
613 goto out; 654 goto out;
614 } 655 }
615 op->r_bytes = nr_bytes; 656 op->op_bytes = nr_bytes;
616 657
617 ret = 0; 658 ret = 0;
618out: 659out:
619 kfree(pages); 660 kfree(pages);
620 if (ret) { 661 if (ret)
621 if (op) 662 rds_rdma_free_op(op);
622 rds_rdma_free_op(op);
623 op = ERR_PTR(ret);
624 }
625 return op;
626}
627
628/*
629 * The application asks for a RDMA transfer.
630 * Extract all arguments and set up the rdma_op
631 */
632int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
633 struct cmsghdr *cmsg)
634{
635 struct rds_rdma_op *op;
636
637 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) ||
638 rm->m_rdma_op != NULL)
639 return -EINVAL;
640 663
641 op = rds_rdma_prepare(rs, CMSG_DATA(cmsg));
642 if (IS_ERR(op))
643 return PTR_ERR(op);
644 rds_stats_inc(s_send_rdma); 664 rds_stats_inc(s_send_rdma);
645 rm->m_rdma_op = op; 665
646 return 0; 666 return ret;
647} 667}
648 668
649/* 669/*
@@ -673,7 +693,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
673 693
674 spin_lock_irqsave(&rs->rs_rdma_lock, flags); 694 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
675 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 695 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
676 if (mr == NULL) 696 if (!mr)
677 err = -EINVAL; /* invalid r_key */ 697 err = -EINVAL; /* invalid r_key */
678 else 698 else
679 atomic_inc(&mr->r_refcount); 699 atomic_inc(&mr->r_refcount);
@@ -681,7 +701,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
681 701
682 if (mr) { 702 if (mr) {
683 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); 703 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
684 rm->m_rdma_mr = mr; 704 rm->rdma.op_rdma_mr = mr;
685 } 705 }
686 return err; 706 return err;
687} 707}
@@ -699,5 +719,98 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
699 rm->m_rdma_cookie != 0) 719 rm->m_rdma_cookie != 0)
700 return -EINVAL; 720 return -EINVAL;
701 721
702 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr); 722 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr);
723}
724
725/*
726 * Fill in rds_message for an atomic request.
727 */
728int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
729 struct cmsghdr *cmsg)
730{
731 struct page *page = NULL;
732 struct rds_atomic_args *args;
733 int ret = 0;
734
735 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args))
736 || rm->atomic.op_active)
737 return -EINVAL;
738
739 args = CMSG_DATA(cmsg);
740
741 /* Nonmasked & masked cmsg ops converted to masked hw ops */
742 switch (cmsg->cmsg_type) {
743 case RDS_CMSG_ATOMIC_FADD:
744 rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
745 rm->atomic.op_m_fadd.add = args->fadd.add;
746 rm->atomic.op_m_fadd.nocarry_mask = 0;
747 break;
748 case RDS_CMSG_MASKED_ATOMIC_FADD:
749 rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
750 rm->atomic.op_m_fadd.add = args->m_fadd.add;
751 rm->atomic.op_m_fadd.nocarry_mask = args->m_fadd.nocarry_mask;
752 break;
753 case RDS_CMSG_ATOMIC_CSWP:
754 rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
755 rm->atomic.op_m_cswp.compare = args->cswp.compare;
756 rm->atomic.op_m_cswp.swap = args->cswp.swap;
757 rm->atomic.op_m_cswp.compare_mask = ~0;
758 rm->atomic.op_m_cswp.swap_mask = ~0;
759 break;
760 case RDS_CMSG_MASKED_ATOMIC_CSWP:
761 rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
762 rm->atomic.op_m_cswp.compare = args->m_cswp.compare;
763 rm->atomic.op_m_cswp.swap = args->m_cswp.swap;
764 rm->atomic.op_m_cswp.compare_mask = args->m_cswp.compare_mask;
765 rm->atomic.op_m_cswp.swap_mask = args->m_cswp.swap_mask;
766 break;
767 default:
768 BUG(); /* should never happen */
769 }
770
771 rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
772 rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT);
773 rm->atomic.op_active = 1;
774 rm->atomic.op_recverr = rs->rs_recverr;
775 rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
776
777 /* verify 8 byte-aligned */
778 if (args->local_addr & 0x7) {
779 ret = -EFAULT;
780 goto err;
781 }
782
783 ret = rds_pin_pages(args->local_addr, 1, &page, 1);
784 if (ret != 1)
785 goto err;
786 ret = 0;
787
788 sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr));
789
790 if (rm->atomic.op_notify || rm->atomic.op_recverr) {
791 /* We allocate an uninitialized notifier here, because
792 * we don't want to do that in the completion handler. We
793 * would have to use GFP_ATOMIC there, and don't want to deal
794 * with failed allocations.
795 */
796 rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL);
797 if (!rm->atomic.op_notifier) {
798 ret = -ENOMEM;
799 goto err;
800 }
801
802 rm->atomic.op_notifier->n_user_token = args->user_token;
803 rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS;
804 }
805
806 rm->atomic.op_rkey = rds_rdma_cookie_key(args->cookie);
807 rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie);
808
809 return ret;
810err:
811 if (page)
812 put_page(page);
813 kfree(rm->atomic.op_notifier);
814
815 return ret;
703} 816}
diff --git a/net/rds/rdma.h b/net/rds/rdma.h
deleted file mode 100644
index 909c39835a5d..000000000000
--- a/net/rds/rdma.h
+++ /dev/null
@@ -1,85 +0,0 @@
1#ifndef _RDS_RDMA_H
2#define _RDS_RDMA_H
3
4#include <linux/rbtree.h>
5#include <linux/spinlock.h>
6#include <linux/scatterlist.h>
7
8#include "rds.h"
9
10struct rds_mr {
11 struct rb_node r_rb_node;
12 atomic_t r_refcount;
13 u32 r_key;
14
15 /* A copy of the creation flags */
16 unsigned int r_use_once:1;
17 unsigned int r_invalidate:1;
18 unsigned int r_write:1;
19
20 /* This is for RDS_MR_DEAD.
21 * It would be nice & consistent to make this part of the above
22 * bit field here, but we need to use test_and_set_bit.
23 */
24 unsigned long r_state;
25 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
26 struct rds_transport *r_trans;
27 void *r_trans_private;
28};
29
30/* Flags for mr->r_state */
31#define RDS_MR_DEAD 0
32
33struct rds_rdma_op {
34 u32 r_key;
35 u64 r_remote_addr;
36 unsigned int r_write:1;
37 unsigned int r_fence:1;
38 unsigned int r_notify:1;
39 unsigned int r_recverr:1;
40 unsigned int r_mapped:1;
41 struct rds_notifier *r_notifier;
42 unsigned int r_bytes;
43 unsigned int r_nents;
44 unsigned int r_count;
45 struct scatterlist r_sg[0];
46};
47
48static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
49{
50 return r_key | (((u64) offset) << 32);
51}
52
53static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
54{
55 return cookie;
56}
57
58static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
59{
60 return cookie >> 32;
61}
62
63int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
64int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
65int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
66void rds_rdma_drop_keys(struct rds_sock *rs);
67int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
68 struct cmsghdr *cmsg);
69int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
70 struct cmsghdr *cmsg);
71int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
72 struct cmsghdr *cmsg);
73int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
74 struct cmsghdr *cmsg);
75void rds_rdma_free_op(struct rds_rdma_op *ro);
76void rds_rdma_send_complete(struct rds_message *rm, int);
77
78extern void __rds_put_mr_final(struct rds_mr *mr);
79static inline void rds_mr_put(struct rds_mr *mr)
80{
81 if (atomic_dec_and_test(&mr->r_refcount))
82 __rds_put_mr_final(mr);
83}
84
85#endif
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index e599ba2f950d..e6ed10aee190 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -36,6 +36,34 @@
36 36
37static struct rdma_cm_id *rds_rdma_listen_id; 37static struct rdma_cm_id *rds_rdma_listen_id;
38 38
39static char *rds_cm_event_strings[] = {
40#define RDS_CM_EVENT_STRING(foo) \
41 [RDMA_CM_EVENT_##foo] = __stringify(RDMA_CM_EVENT_##foo)
42 RDS_CM_EVENT_STRING(ADDR_RESOLVED),
43 RDS_CM_EVENT_STRING(ADDR_ERROR),
44 RDS_CM_EVENT_STRING(ROUTE_RESOLVED),
45 RDS_CM_EVENT_STRING(ROUTE_ERROR),
46 RDS_CM_EVENT_STRING(CONNECT_REQUEST),
47 RDS_CM_EVENT_STRING(CONNECT_RESPONSE),
48 RDS_CM_EVENT_STRING(CONNECT_ERROR),
49 RDS_CM_EVENT_STRING(UNREACHABLE),
50 RDS_CM_EVENT_STRING(REJECTED),
51 RDS_CM_EVENT_STRING(ESTABLISHED),
52 RDS_CM_EVENT_STRING(DISCONNECTED),
53 RDS_CM_EVENT_STRING(DEVICE_REMOVAL),
54 RDS_CM_EVENT_STRING(MULTICAST_JOIN),
55 RDS_CM_EVENT_STRING(MULTICAST_ERROR),
56 RDS_CM_EVENT_STRING(ADDR_CHANGE),
57 RDS_CM_EVENT_STRING(TIMEWAIT_EXIT),
58#undef RDS_CM_EVENT_STRING
59};
60
61static char *rds_cm_event_str(enum rdma_cm_event_type type)
62{
63 return rds_str_array(rds_cm_event_strings,
64 ARRAY_SIZE(rds_cm_event_strings), type);
65};
66
39int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, 67int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
40 struct rdma_cm_event *event) 68 struct rdma_cm_event *event)
41{ 69{
@@ -44,8 +72,8 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
44 struct rds_transport *trans; 72 struct rds_transport *trans;
45 int ret = 0; 73 int ret = 0;
46 74
47 rdsdebug("conn %p id %p handling event %u\n", conn, cm_id, 75 rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
48 event->event); 76 event->event, rds_cm_event_str(event->event));
49 77
50 if (cm_id->device->node_type == RDMA_NODE_RNIC) 78 if (cm_id->device->node_type == RDMA_NODE_RNIC)
51 trans = &rds_iw_transport; 79 trans = &rds_iw_transport;
@@ -109,7 +137,8 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
109 137
110 default: 138 default:
111 /* things like device disconnect? */ 139 /* things like device disconnect? */
112 printk(KERN_ERR "RDS: unknown event %u!\n", event->event); 140 printk(KERN_ERR "RDS: unknown event %u (%s)!\n",
141 event->event, rds_cm_event_str(event->event));
113 break; 142 break;
114 } 143 }
115 144
@@ -117,12 +146,13 @@ out:
117 if (conn) 146 if (conn)
118 mutex_unlock(&conn->c_cm_lock); 147 mutex_unlock(&conn->c_cm_lock);
119 148
120 rdsdebug("id %p event %u handling ret %d\n", cm_id, event->event, ret); 149 rdsdebug("id %p event %u (%s) handling ret %d\n", cm_id, event->event,
150 rds_cm_event_str(event->event), ret);
121 151
122 return ret; 152 return ret;
123} 153}
124 154
125static int __init rds_rdma_listen_init(void) 155static int rds_rdma_listen_init(void)
126{ 156{
127 struct sockaddr_in sin; 157 struct sockaddr_in sin;
128 struct rdma_cm_id *cm_id; 158 struct rdma_cm_id *cm_id;
@@ -177,7 +207,7 @@ static void rds_rdma_listen_stop(void)
177 } 207 }
178} 208}
179 209
180int __init rds_rdma_init(void) 210int rds_rdma_init(void)
181{ 211{
182 int ret; 212 int ret;
183 213
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c224b5bb3ba9..8103dcf8b976 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -80,6 +80,7 @@ enum {
80/* Bits for c_flags */ 80/* Bits for c_flags */
81#define RDS_LL_SEND_FULL 0 81#define RDS_LL_SEND_FULL 0
82#define RDS_RECONNECT_PENDING 1 82#define RDS_RECONNECT_PENDING 1
83#define RDS_IN_XMIT 2
83 84
84struct rds_connection { 85struct rds_connection {
85 struct hlist_node c_hash_node; 86 struct hlist_node c_hash_node;
@@ -91,12 +92,13 @@ struct rds_connection {
91 struct rds_cong_map *c_lcong; 92 struct rds_cong_map *c_lcong;
92 struct rds_cong_map *c_fcong; 93 struct rds_cong_map *c_fcong;
93 94
94 struct mutex c_send_lock; /* protect send ring */
95 struct rds_message *c_xmit_rm; 95 struct rds_message *c_xmit_rm;
96 unsigned long c_xmit_sg; 96 unsigned long c_xmit_sg;
97 unsigned int c_xmit_hdr_off; 97 unsigned int c_xmit_hdr_off;
98 unsigned int c_xmit_data_off; 98 unsigned int c_xmit_data_off;
99 unsigned int c_xmit_atomic_sent;
99 unsigned int c_xmit_rdma_sent; 100 unsigned int c_xmit_rdma_sent;
101 unsigned int c_xmit_data_sent;
100 102
101 spinlock_t c_lock; /* protect msg queues */ 103 spinlock_t c_lock; /* protect msg queues */
102 u64 c_next_tx_seq; 104 u64 c_next_tx_seq;
@@ -116,11 +118,10 @@ struct rds_connection {
116 struct delayed_work c_conn_w; 118 struct delayed_work c_conn_w;
117 struct work_struct c_down_w; 119 struct work_struct c_down_w;
118 struct mutex c_cm_lock; /* protect conn state & cm */ 120 struct mutex c_cm_lock; /* protect conn state & cm */
121 wait_queue_head_t c_waitq;
119 122
120 struct list_head c_map_item; 123 struct list_head c_map_item;
121 unsigned long c_map_queued; 124 unsigned long c_map_queued;
122 unsigned long c_map_offset;
123 unsigned long c_map_bytes;
124 125
125 unsigned int c_unacked_packets; 126 unsigned int c_unacked_packets;
126 unsigned int c_unacked_bytes; 127 unsigned int c_unacked_bytes;
@@ -206,6 +207,48 @@ struct rds_incoming {
206 rds_rdma_cookie_t i_rdma_cookie; 207 rds_rdma_cookie_t i_rdma_cookie;
207}; 208};
208 209
210struct rds_mr {
211 struct rb_node r_rb_node;
212 atomic_t r_refcount;
213 u32 r_key;
214
215 /* A copy of the creation flags */
216 unsigned int r_use_once:1;
217 unsigned int r_invalidate:1;
218 unsigned int r_write:1;
219
220 /* This is for RDS_MR_DEAD.
221 * It would be nice & consistent to make this part of the above
222 * bit field here, but we need to use test_and_set_bit.
223 */
224 unsigned long r_state;
225 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
226 struct rds_transport *r_trans;
227 void *r_trans_private;
228};
229
230/* Flags for mr->r_state */
231#define RDS_MR_DEAD 0
232
233static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
234{
235 return r_key | (((u64) offset) << 32);
236}
237
238static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
239{
240 return cookie;
241}
242
243static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
244{
245 return cookie >> 32;
246}
247
248/* atomic operation types */
249#define RDS_ATOMIC_TYPE_CSWP 0
250#define RDS_ATOMIC_TYPE_FADD 1
251
209/* 252/*
210 * m_sock_item and m_conn_item are on lists that are serialized under 253 * m_sock_item and m_conn_item are on lists that are serialized under
211 * conn->c_lock. m_sock_item has additional meaning in that once it is empty 254 * conn->c_lock. m_sock_item has additional meaning in that once it is empty
@@ -258,13 +301,71 @@ struct rds_message {
258 * -> rs->rs_lock 301 * -> rs->rs_lock
259 */ 302 */
260 spinlock_t m_rs_lock; 303 spinlock_t m_rs_lock;
304 wait_queue_head_t m_flush_wait;
305
261 struct rds_sock *m_rs; 306 struct rds_sock *m_rs;
262 struct rds_rdma_op *m_rdma_op; 307
308 /* cookie to send to remote, in rds header */
263 rds_rdma_cookie_t m_rdma_cookie; 309 rds_rdma_cookie_t m_rdma_cookie;
264 struct rds_mr *m_rdma_mr; 310
265 unsigned int m_nents; 311 unsigned int m_used_sgs;
266 unsigned int m_count; 312 unsigned int m_total_sgs;
267 struct scatterlist m_sg[0]; 313
314 void *m_final_op;
315
316 struct {
317 struct rm_atomic_op {
318 int op_type;
319 union {
320 struct {
321 uint64_t compare;
322 uint64_t swap;
323 uint64_t compare_mask;
324 uint64_t swap_mask;
325 } op_m_cswp;
326 struct {
327 uint64_t add;
328 uint64_t nocarry_mask;
329 } op_m_fadd;
330 };
331
332 u32 op_rkey;
333 u64 op_remote_addr;
334 unsigned int op_notify:1;
335 unsigned int op_recverr:1;
336 unsigned int op_mapped:1;
337 unsigned int op_silent:1;
338 unsigned int op_active:1;
339 struct scatterlist *op_sg;
340 struct rds_notifier *op_notifier;
341
342 struct rds_mr *op_rdma_mr;
343 } atomic;
344 struct rm_rdma_op {
345 u32 op_rkey;
346 u64 op_remote_addr;
347 unsigned int op_write:1;
348 unsigned int op_fence:1;
349 unsigned int op_notify:1;
350 unsigned int op_recverr:1;
351 unsigned int op_mapped:1;
352 unsigned int op_silent:1;
353 unsigned int op_active:1;
354 unsigned int op_bytes;
355 unsigned int op_nents;
356 unsigned int op_count;
357 struct scatterlist *op_sg;
358 struct rds_notifier *op_notifier;
359
360 struct rds_mr *op_rdma_mr;
361 } rdma;
362 struct rm_data_op {
363 unsigned int op_active:1;
364 unsigned int op_nents;
365 unsigned int op_count;
366 struct scatterlist *op_sg;
367 } data;
368 };
268}; 369};
269 370
270/* 371/*
@@ -305,10 +406,6 @@ struct rds_notifier {
305 * transport is responsible for other serialization, including 406 * transport is responsible for other serialization, including
306 * rds_recv_incoming(). This is called in process context but 407 * rds_recv_incoming(). This is called in process context but
307 * should try hard not to block. 408 * should try hard not to block.
308 *
309 * @xmit_cong_map: This asks the transport to send the local bitmap down the
310 * given connection. XXX get a better story about the bitmap
311 * flag and header.
312 */ 409 */
313 410
314#define RDS_TRANS_IB 0 411#define RDS_TRANS_IB 0
@@ -332,13 +429,11 @@ struct rds_transport {
332 void (*xmit_complete)(struct rds_connection *conn); 429 void (*xmit_complete)(struct rds_connection *conn);
333 int (*xmit)(struct rds_connection *conn, struct rds_message *rm, 430 int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
334 unsigned int hdr_off, unsigned int sg, unsigned int off); 431 unsigned int hdr_off, unsigned int sg, unsigned int off);
335 int (*xmit_cong_map)(struct rds_connection *conn, 432 int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
336 struct rds_cong_map *map, unsigned long offset); 433 int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
337 int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op);
338 int (*recv)(struct rds_connection *conn); 434 int (*recv)(struct rds_connection *conn);
339 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, 435 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov,
340 size_t size); 436 size_t size);
341 void (*inc_purge)(struct rds_incoming *inc);
342 void (*inc_free)(struct rds_incoming *inc); 437 void (*inc_free)(struct rds_incoming *inc);
343 438
344 int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 439 int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
@@ -367,17 +462,11 @@ struct rds_sock {
367 * bound_addr used for both incoming and outgoing, no INADDR_ANY 462 * bound_addr used for both incoming and outgoing, no INADDR_ANY
368 * support. 463 * support.
369 */ 464 */
370 struct rb_node rs_bound_node; 465 struct hlist_node rs_bound_node;
371 __be32 rs_bound_addr; 466 __be32 rs_bound_addr;
372 __be32 rs_conn_addr; 467 __be32 rs_conn_addr;
373 __be16 rs_bound_port; 468 __be16 rs_bound_port;
374 __be16 rs_conn_port; 469 __be16 rs_conn_port;
375
376 /*
377 * This is only used to communicate the transport between bind and
378 * initiating connections. All other trans use is referenced through
379 * the connection.
380 */
381 struct rds_transport *rs_transport; 470 struct rds_transport *rs_transport;
382 471
383 /* 472 /*
@@ -466,8 +555,8 @@ struct rds_statistics {
466 uint64_t s_recv_ping; 555 uint64_t s_recv_ping;
467 uint64_t s_send_queue_empty; 556 uint64_t s_send_queue_empty;
468 uint64_t s_send_queue_full; 557 uint64_t s_send_queue_full;
469 uint64_t s_send_sem_contention; 558 uint64_t s_send_lock_contention;
470 uint64_t s_send_sem_queue_raced; 559 uint64_t s_send_lock_queue_raced;
471 uint64_t s_send_immediate_retry; 560 uint64_t s_send_immediate_retry;
472 uint64_t s_send_delayed_retry; 561 uint64_t s_send_delayed_retry;
473 uint64_t s_send_drop_acked; 562 uint64_t s_send_drop_acked;
@@ -487,6 +576,7 @@ struct rds_statistics {
487}; 576};
488 577
489/* af_rds.c */ 578/* af_rds.c */
579char *rds_str_array(char **array, size_t elements, size_t index);
490void rds_sock_addref(struct rds_sock *rs); 580void rds_sock_addref(struct rds_sock *rs);
491void rds_sock_put(struct rds_sock *rs); 581void rds_sock_put(struct rds_sock *rs);
492void rds_wake_sk_sleep(struct rds_sock *rs); 582void rds_wake_sk_sleep(struct rds_sock *rs);
@@ -521,15 +611,17 @@ void rds_cong_exit(void);
521struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); 611struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
522 612
523/* conn.c */ 613/* conn.c */
524int __init rds_conn_init(void); 614int rds_conn_init(void);
525void rds_conn_exit(void); 615void rds_conn_exit(void);
526struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr, 616struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
527 struct rds_transport *trans, gfp_t gfp); 617 struct rds_transport *trans, gfp_t gfp);
528struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, 618struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
529 struct rds_transport *trans, gfp_t gfp); 619 struct rds_transport *trans, gfp_t gfp);
620void rds_conn_shutdown(struct rds_connection *conn);
530void rds_conn_destroy(struct rds_connection *conn); 621void rds_conn_destroy(struct rds_connection *conn);
531void rds_conn_reset(struct rds_connection *conn); 622void rds_conn_reset(struct rds_connection *conn);
532void rds_conn_drop(struct rds_connection *conn); 623void rds_conn_drop(struct rds_connection *conn);
624void rds_conn_connect_if_down(struct rds_connection *conn);
533void rds_for_each_conn_info(struct socket *sock, unsigned int len, 625void rds_for_each_conn_info(struct socket *sock, unsigned int len,
534 struct rds_info_iterator *iter, 626 struct rds_info_iterator *iter,
535 struct rds_info_lengths *lens, 627 struct rds_info_lengths *lens,
@@ -566,7 +658,8 @@ rds_conn_connecting(struct rds_connection *conn)
566 658
567/* message.c */ 659/* message.c */
568struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); 660struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
569struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, 661struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
662int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
570 size_t total_len); 663 size_t total_len);
571struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); 664struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
572void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 665void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
@@ -580,7 +673,6 @@ int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *vers
580int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset); 673int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
581int rds_message_inc_copy_to_user(struct rds_incoming *inc, 674int rds_message_inc_copy_to_user(struct rds_incoming *inc,
582 struct iovec *first_iov, size_t size); 675 struct iovec *first_iov, size_t size);
583void rds_message_inc_purge(struct rds_incoming *inc);
584void rds_message_inc_free(struct rds_incoming *inc); 676void rds_message_inc_free(struct rds_incoming *inc);
585void rds_message_addref(struct rds_message *rm); 677void rds_message_addref(struct rds_message *rm);
586void rds_message_put(struct rds_message *rm); 678void rds_message_put(struct rds_message *rm);
@@ -636,14 +728,39 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
636typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack); 728typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
637void rds_send_drop_acked(struct rds_connection *conn, u64 ack, 729void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
638 is_acked_func is_acked); 730 is_acked_func is_acked);
639int rds_send_acked_before(struct rds_connection *conn, u64 seq);
640void rds_send_remove_from_sock(struct list_head *messages, int status); 731void rds_send_remove_from_sock(struct list_head *messages, int status);
641int rds_send_pong(struct rds_connection *conn, __be16 dport); 732int rds_send_pong(struct rds_connection *conn, __be16 dport);
642struct rds_message *rds_send_get_message(struct rds_connection *, 733struct rds_message *rds_send_get_message(struct rds_connection *,
643 struct rds_rdma_op *); 734 struct rm_rdma_op *);
644 735
645/* rdma.c */ 736/* rdma.c */
646void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); 737void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
738int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
739int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
740int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
741void rds_rdma_drop_keys(struct rds_sock *rs);
742int rds_rdma_extra_size(struct rds_rdma_args *args);
743int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
744 struct cmsghdr *cmsg);
745int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
746 struct cmsghdr *cmsg);
747int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
748 struct cmsghdr *cmsg);
749int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
750 struct cmsghdr *cmsg);
751void rds_rdma_free_op(struct rm_rdma_op *ro);
752void rds_atomic_free_op(struct rm_atomic_op *ao);
753void rds_rdma_send_complete(struct rds_message *rm, int wc_status);
754void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
755int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
756 struct cmsghdr *cmsg);
757
758extern void __rds_put_mr_final(struct rds_mr *mr);
759static inline void rds_mr_put(struct rds_mr *mr)
760{
761 if (atomic_dec_and_test(&mr->r_refcount))
762 __rds_put_mr_final(mr);
763}
647 764
648/* stats.c */ 765/* stats.c */
649DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); 766DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
@@ -657,14 +774,14 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
657 put_cpu(); \ 774 put_cpu(); \
658} while (0) 775} while (0)
659#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count) 776#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count)
660int __init rds_stats_init(void); 777int rds_stats_init(void);
661void rds_stats_exit(void); 778void rds_stats_exit(void);
662void rds_stats_info_copy(struct rds_info_iterator *iter, 779void rds_stats_info_copy(struct rds_info_iterator *iter,
663 uint64_t *values, const char *const *names, 780 uint64_t *values, const char *const *names,
664 size_t nr); 781 size_t nr);
665 782
666/* sysctl.c */ 783/* sysctl.c */
667int __init rds_sysctl_init(void); 784int rds_sysctl_init(void);
668void rds_sysctl_exit(void); 785void rds_sysctl_exit(void);
669extern unsigned long rds_sysctl_sndbuf_min; 786extern unsigned long rds_sysctl_sndbuf_min;
670extern unsigned long rds_sysctl_sndbuf_default; 787extern unsigned long rds_sysctl_sndbuf_default;
@@ -678,9 +795,10 @@ extern unsigned long rds_sysctl_trace_flags;
678extern unsigned int rds_sysctl_trace_level; 795extern unsigned int rds_sysctl_trace_level;
679 796
680/* threads.c */ 797/* threads.c */
681int __init rds_threads_init(void); 798int rds_threads_init(void);
682void rds_threads_exit(void); 799void rds_threads_exit(void);
683extern struct workqueue_struct *rds_wq; 800extern struct workqueue_struct *rds_wq;
801void rds_queue_reconnect(struct rds_connection *conn);
684void rds_connect_worker(struct work_struct *); 802void rds_connect_worker(struct work_struct *);
685void rds_shutdown_worker(struct work_struct *); 803void rds_shutdown_worker(struct work_struct *);
686void rds_send_worker(struct work_struct *); 804void rds_send_worker(struct work_struct *);
@@ -691,9 +809,10 @@ void rds_connect_complete(struct rds_connection *conn);
691int rds_trans_register(struct rds_transport *trans); 809int rds_trans_register(struct rds_transport *trans);
692void rds_trans_unregister(struct rds_transport *trans); 810void rds_trans_unregister(struct rds_transport *trans);
693struct rds_transport *rds_trans_get_preferred(__be32 addr); 811struct rds_transport *rds_trans_get_preferred(__be32 addr);
812void rds_trans_put(struct rds_transport *trans);
694unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, 813unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
695 unsigned int avail); 814 unsigned int avail);
696int __init rds_trans_init(void); 815int rds_trans_init(void);
697void rds_trans_exit(void); 816void rds_trans_exit(void);
698 817
699#endif 818#endif
diff --git a/net/rds/recv.c b/net/rds/recv.c
index c93588c2d553..68800f02aa30 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -36,7 +36,6 @@
36#include <linux/in.h> 36#include <linux/in.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40 39
41void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, 40void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
42 __be32 saddr) 41 __be32 saddr)
@@ -210,7 +209,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
210 } 209 }
211 210
212 rs = rds_find_bound(daddr, inc->i_hdr.h_dport); 211 rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
213 if (rs == NULL) { 212 if (!rs) {
214 rds_stats_inc(s_recv_drop_no_sock); 213 rds_stats_inc(s_recv_drop_no_sock);
215 goto out; 214 goto out;
216 } 215 }
@@ -251,7 +250,7 @@ static int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc)
251{ 250{
252 unsigned long flags; 251 unsigned long flags;
253 252
254 if (*inc == NULL) { 253 if (!*inc) {
255 read_lock_irqsave(&rs->rs_recv_lock, flags); 254 read_lock_irqsave(&rs->rs_recv_lock, flags);
256 if (!list_empty(&rs->rs_recv_queue)) { 255 if (!list_empty(&rs->rs_recv_queue)) {
257 *inc = list_entry(rs->rs_recv_queue.next, 256 *inc = list_entry(rs->rs_recv_queue.next,
@@ -334,10 +333,10 @@ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
334 333
335 if (msghdr) { 334 if (msghdr) {
336 cmsg.user_token = notifier->n_user_token; 335 cmsg.user_token = notifier->n_user_token;
337 cmsg.status = notifier->n_status; 336 cmsg.status = notifier->n_status;
338 337
339 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS, 338 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS,
340 sizeof(cmsg), &cmsg); 339 sizeof(cmsg), &cmsg);
341 if (err) 340 if (err)
342 break; 341 break;
343 } 342 }
diff --git a/net/rds/send.c b/net/rds/send.c
index 9c1c6bcaa6c9..9b951a0ab6b7 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -37,7 +37,6 @@
37#include <linux/list.h> 37#include <linux/list.h>
38 38
39#include "rds.h" 39#include "rds.h"
40#include "rdma.h"
41 40
42/* When transmitting messages in rds_send_xmit, we need to emerge from 41/* When transmitting messages in rds_send_xmit, we need to emerge from
43 * time to time and briefly release the CPU. Otherwise the softlock watchdog 42 * time to time and briefly release the CPU. Otherwise the softlock watchdog
@@ -54,7 +53,8 @@ module_param(send_batch_count, int, 0444);
54MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue"); 53MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue");
55 54
56/* 55/*
57 * Reset the send state. Caller must hold c_send_lock when calling here. 56 * Reset the send state. Callers must ensure that this doesn't race with
57 * rds_send_xmit().
58 */ 58 */
59void rds_send_reset(struct rds_connection *conn) 59void rds_send_reset(struct rds_connection *conn)
60{ 60{
@@ -62,18 +62,22 @@ void rds_send_reset(struct rds_connection *conn)
62 unsigned long flags; 62 unsigned long flags;
63 63
64 if (conn->c_xmit_rm) { 64 if (conn->c_xmit_rm) {
65 rm = conn->c_xmit_rm;
66 conn->c_xmit_rm = NULL;
65 /* Tell the user the RDMA op is no longer mapped by the 67 /* Tell the user the RDMA op is no longer mapped by the
66 * transport. This isn't entirely true (it's flushed out 68 * transport. This isn't entirely true (it's flushed out
67 * independently) but as the connection is down, there's 69 * independently) but as the connection is down, there's
68 * no ongoing RDMA to/from that memory */ 70 * no ongoing RDMA to/from that memory */
69 rds_message_unmapped(conn->c_xmit_rm); 71 rds_message_unmapped(rm);
70 rds_message_put(conn->c_xmit_rm); 72 rds_message_put(rm);
71 conn->c_xmit_rm = NULL;
72 } 73 }
74
73 conn->c_xmit_sg = 0; 75 conn->c_xmit_sg = 0;
74 conn->c_xmit_hdr_off = 0; 76 conn->c_xmit_hdr_off = 0;
75 conn->c_xmit_data_off = 0; 77 conn->c_xmit_data_off = 0;
78 conn->c_xmit_atomic_sent = 0;
76 conn->c_xmit_rdma_sent = 0; 79 conn->c_xmit_rdma_sent = 0;
80 conn->c_xmit_data_sent = 0;
77 81
78 conn->c_map_queued = 0; 82 conn->c_map_queued = 0;
79 83
@@ -90,6 +94,25 @@ void rds_send_reset(struct rds_connection *conn)
90 spin_unlock_irqrestore(&conn->c_lock, flags); 94 spin_unlock_irqrestore(&conn->c_lock, flags);
91} 95}
92 96
97static int acquire_in_xmit(struct rds_connection *conn)
98{
99 return test_and_set_bit(RDS_IN_XMIT, &conn->c_flags) == 0;
100}
101
102static void release_in_xmit(struct rds_connection *conn)
103{
104 clear_bit(RDS_IN_XMIT, &conn->c_flags);
105 smp_mb__after_clear_bit();
106 /*
107 * We don't use wait_on_bit()/wake_up_bit() because our waking is in a
108 * hot path and finding waiters is very rare. We don't want to walk
109 * the system-wide hashed waitqueue buckets in the fast path only to
110 * almost never find waiters.
111 */
112 if (waitqueue_active(&conn->c_waitq))
113 wake_up_all(&conn->c_waitq);
114}
115
93/* 116/*
94 * We're making the concious trade-off here to only send one message 117 * We're making the concious trade-off here to only send one message
95 * down the connection at a time. 118 * down the connection at a time.
@@ -109,102 +132,69 @@ int rds_send_xmit(struct rds_connection *conn)
109 struct rds_message *rm; 132 struct rds_message *rm;
110 unsigned long flags; 133 unsigned long flags;
111 unsigned int tmp; 134 unsigned int tmp;
112 unsigned int send_quota = send_batch_count;
113 struct scatterlist *sg; 135 struct scatterlist *sg;
114 int ret = 0; 136 int ret = 0;
115 int was_empty = 0;
116 LIST_HEAD(to_be_dropped); 137 LIST_HEAD(to_be_dropped);
117 138
139restart:
140
118 /* 141 /*
119 * sendmsg calls here after having queued its message on the send 142 * sendmsg calls here after having queued its message on the send
120 * queue. We only have one task feeding the connection at a time. If 143 * queue. We only have one task feeding the connection at a time. If
121 * another thread is already feeding the queue then we back off. This 144 * another thread is already feeding the queue then we back off. This
122 * avoids blocking the caller and trading per-connection data between 145 * avoids blocking the caller and trading per-connection data between
123 * caches per message. 146 * caches per message.
124 *
125 * The sem holder will issue a retry if they notice that someone queued
126 * a message after they stopped walking the send queue but before they
127 * dropped the sem.
128 */ 147 */
129 if (!mutex_trylock(&conn->c_send_lock)) { 148 if (!acquire_in_xmit(conn)) {
130 rds_stats_inc(s_send_sem_contention); 149 rds_stats_inc(s_send_lock_contention);
131 ret = -ENOMEM; 150 ret = -ENOMEM;
132 goto out; 151 goto out;
133 } 152 }
134 153
154 /*
155 * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
156 * we do the opposite to avoid races.
157 */
158 if (!rds_conn_up(conn)) {
159 release_in_xmit(conn);
160 ret = 0;
161 goto out;
162 }
163
135 if (conn->c_trans->xmit_prepare) 164 if (conn->c_trans->xmit_prepare)
136 conn->c_trans->xmit_prepare(conn); 165 conn->c_trans->xmit_prepare(conn);
137 166
138 /* 167 /*
139 * spin trying to push headers and data down the connection until 168 * spin trying to push headers and data down the connection until
140 * the connection doens't make forward progress. 169 * the connection doesn't make forward progress.
141 */ 170 */
142 while (--send_quota) { 171 while (1) {
143 /*
144 * See if need to send a congestion map update if we're
145 * between sending messages. The send_sem protects our sole
146 * use of c_map_offset and _bytes.
147 * Note this is used only by transports that define a special
148 * xmit_cong_map function. For all others, we create allocate
149 * a cong_map message and treat it just like any other send.
150 */
151 if (conn->c_map_bytes) {
152 ret = conn->c_trans->xmit_cong_map(conn, conn->c_lcong,
153 conn->c_map_offset);
154 if (ret <= 0)
155 break;
156 172
157 conn->c_map_offset += ret;
158 conn->c_map_bytes -= ret;
159 if (conn->c_map_bytes)
160 continue;
161 }
162
163 /* If we're done sending the current message, clear the
164 * offset and S/G temporaries.
165 */
166 rm = conn->c_xmit_rm; 173 rm = conn->c_xmit_rm;
167 if (rm != NULL &&
168 conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
169 conn->c_xmit_sg == rm->m_nents) {
170 conn->c_xmit_rm = NULL;
171 conn->c_xmit_sg = 0;
172 conn->c_xmit_hdr_off = 0;
173 conn->c_xmit_data_off = 0;
174 conn->c_xmit_rdma_sent = 0;
175 174
176 /* Release the reference to the previous message. */ 175 /*
177 rds_message_put(rm); 176 * If between sending messages, we can send a pending congestion
178 rm = NULL; 177 * map update.
179 }
180
181 /* If we're asked to send a cong map update, do so.
182 */ 178 */
183 if (rm == NULL && test_and_clear_bit(0, &conn->c_map_queued)) { 179 if (!rm && test_and_clear_bit(0, &conn->c_map_queued)) {
184 if (conn->c_trans->xmit_cong_map != NULL) {
185 conn->c_map_offset = 0;
186 conn->c_map_bytes = sizeof(struct rds_header) +
187 RDS_CONG_MAP_BYTES;
188 continue;
189 }
190
191 rm = rds_cong_update_alloc(conn); 180 rm = rds_cong_update_alloc(conn);
192 if (IS_ERR(rm)) { 181 if (IS_ERR(rm)) {
193 ret = PTR_ERR(rm); 182 ret = PTR_ERR(rm);
194 break; 183 break;
195 } 184 }
185 rm->data.op_active = 1;
196 186
197 conn->c_xmit_rm = rm; 187 conn->c_xmit_rm = rm;
198 } 188 }
199 189
200 /* 190 /*
201 * Grab the next message from the send queue, if there is one. 191 * If not already working on one, grab the next message.
202 * 192 *
203 * c_xmit_rm holds a ref while we're sending this message down 193 * c_xmit_rm holds a ref while we're sending this message down
204 * the connction. We can use this ref while holding the 194 * the connction. We can use this ref while holding the
205 * send_sem.. rds_send_reset() is serialized with it. 195 * send_sem.. rds_send_reset() is serialized with it.
206 */ 196 */
207 if (rm == NULL) { 197 if (!rm) {
208 unsigned int len; 198 unsigned int len;
209 199
210 spin_lock_irqsave(&conn->c_lock, flags); 200 spin_lock_irqsave(&conn->c_lock, flags);
@@ -224,10 +214,8 @@ int rds_send_xmit(struct rds_connection *conn)
224 214
225 spin_unlock_irqrestore(&conn->c_lock, flags); 215 spin_unlock_irqrestore(&conn->c_lock, flags);
226 216
227 if (rm == NULL) { 217 if (!rm)
228 was_empty = 1;
229 break; 218 break;
230 }
231 219
232 /* Unfortunately, the way Infiniband deals with 220 /* Unfortunately, the way Infiniband deals with
233 * RDMA to a bad MR key is by moving the entire 221 * RDMA to a bad MR key is by moving the entire
@@ -236,13 +224,12 @@ int rds_send_xmit(struct rds_connection *conn)
236 * connection. 224 * connection.
237 * Therefore, we never retransmit messages with RDMA ops. 225 * Therefore, we never retransmit messages with RDMA ops.
238 */ 226 */
239 if (rm->m_rdma_op && 227 if (rm->rdma.op_active &&
240 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { 228 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
241 spin_lock_irqsave(&conn->c_lock, flags); 229 spin_lock_irqsave(&conn->c_lock, flags);
242 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 230 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
243 list_move(&rm->m_conn_item, &to_be_dropped); 231 list_move(&rm->m_conn_item, &to_be_dropped);
244 spin_unlock_irqrestore(&conn->c_lock, flags); 232 spin_unlock_irqrestore(&conn->c_lock, flags);
245 rds_message_put(rm);
246 continue; 233 continue;
247 } 234 }
248 235
@@ -263,23 +250,55 @@ int rds_send_xmit(struct rds_connection *conn)
263 conn->c_xmit_rm = rm; 250 conn->c_xmit_rm = rm;
264 } 251 }
265 252
266 /* 253 /* The transport either sends the whole rdma or none of it */
267 * Try and send an rdma message. Let's see if we can 254 if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) {
268 * keep this simple and require that the transport either 255 rm->m_final_op = &rm->rdma;
269 * send the whole rdma or none of it. 256 ret = conn->c_trans->xmit_rdma(conn, &rm->rdma);
270 */
271 if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) {
272 ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op);
273 if (ret) 257 if (ret)
274 break; 258 break;
275 conn->c_xmit_rdma_sent = 1; 259 conn->c_xmit_rdma_sent = 1;
260
276 /* The transport owns the mapped memory for now. 261 /* The transport owns the mapped memory for now.
277 * You can't unmap it while it's on the send queue */ 262 * You can't unmap it while it's on the send queue */
278 set_bit(RDS_MSG_MAPPED, &rm->m_flags); 263 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
279 } 264 }
280 265
281 if (conn->c_xmit_hdr_off < sizeof(struct rds_header) || 266 if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
282 conn->c_xmit_sg < rm->m_nents) { 267 rm->m_final_op = &rm->atomic;
268 ret = conn->c_trans->xmit_atomic(conn, &rm->atomic);
269 if (ret)
270 break;
271 conn->c_xmit_atomic_sent = 1;
272
273 /* The transport owns the mapped memory for now.
274 * You can't unmap it while it's on the send queue */
275 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
276 }
277
278 /*
279 * A number of cases require an RDS header to be sent
280 * even if there is no data.
281 * We permit 0-byte sends; rds-ping depends on this.
282 * However, if there are exclusively attached silent ops,
283 * we skip the hdr/data send, to enable silent operation.
284 */
285 if (rm->data.op_nents == 0) {
286 int ops_present;
287 int all_ops_are_silent = 1;
288
289 ops_present = (rm->atomic.op_active || rm->rdma.op_active);
290 if (rm->atomic.op_active && !rm->atomic.op_silent)
291 all_ops_are_silent = 0;
292 if (rm->rdma.op_active && !rm->rdma.op_silent)
293 all_ops_are_silent = 0;
294
295 if (ops_present && all_ops_are_silent
296 && !rm->m_rdma_cookie)
297 rm->data.op_active = 0;
298 }
299
300 if (rm->data.op_active && !conn->c_xmit_data_sent) {
301 rm->m_final_op = &rm->data;
283 ret = conn->c_trans->xmit(conn, rm, 302 ret = conn->c_trans->xmit(conn, rm,
284 conn->c_xmit_hdr_off, 303 conn->c_xmit_hdr_off,
285 conn->c_xmit_sg, 304 conn->c_xmit_sg,
@@ -295,7 +314,7 @@ int rds_send_xmit(struct rds_connection *conn)
295 ret -= tmp; 314 ret -= tmp;
296 } 315 }
297 316
298 sg = &rm->m_sg[conn->c_xmit_sg]; 317 sg = &rm->data.op_sg[conn->c_xmit_sg];
299 while (ret) { 318 while (ret) {
300 tmp = min_t(int, ret, sg->length - 319 tmp = min_t(int, ret, sg->length -
301 conn->c_xmit_data_off); 320 conn->c_xmit_data_off);
@@ -306,49 +325,63 @@ int rds_send_xmit(struct rds_connection *conn)
306 sg++; 325 sg++;
307 conn->c_xmit_sg++; 326 conn->c_xmit_sg++;
308 BUG_ON(ret != 0 && 327 BUG_ON(ret != 0 &&
309 conn->c_xmit_sg == rm->m_nents); 328 conn->c_xmit_sg == rm->data.op_nents);
310 } 329 }
311 } 330 }
331
332 if (conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
333 (conn->c_xmit_sg == rm->data.op_nents))
334 conn->c_xmit_data_sent = 1;
312 } 335 }
313 }
314 336
315 /* Nuke any messages we decided not to retransmit. */ 337 /*
316 if (!list_empty(&to_be_dropped)) 338 * A rm will only take multiple times through this loop
317 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED); 339 * if there is a data op. Thus, if the data is sent (or there was
340 * none), then we're done with the rm.
341 */
342 if (!rm->data.op_active || conn->c_xmit_data_sent) {
343 conn->c_xmit_rm = NULL;
344 conn->c_xmit_sg = 0;
345 conn->c_xmit_hdr_off = 0;
346 conn->c_xmit_data_off = 0;
347 conn->c_xmit_rdma_sent = 0;
348 conn->c_xmit_atomic_sent = 0;
349 conn->c_xmit_data_sent = 0;
350
351 rds_message_put(rm);
352 }
353 }
318 354
319 if (conn->c_trans->xmit_complete) 355 if (conn->c_trans->xmit_complete)
320 conn->c_trans->xmit_complete(conn); 356 conn->c_trans->xmit_complete(conn);
321 357
322 /* 358 release_in_xmit(conn);
323 * We might be racing with another sender who queued a message but
324 * backed off on noticing that we held the c_send_lock. If we check
325 * for queued messages after dropping the sem then either we'll
326 * see the queued message or the queuer will get the sem. If we
327 * notice the queued message then we trigger an immediate retry.
328 *
329 * We need to be careful only to do this when we stopped processing
330 * the send queue because it was empty. It's the only way we
331 * stop processing the loop when the transport hasn't taken
332 * responsibility for forward progress.
333 */
334 mutex_unlock(&conn->c_send_lock);
335 359
336 if (conn->c_map_bytes || (send_quota == 0 && !was_empty)) { 360 /* Nuke any messages we decided not to retransmit. */
337 /* We exhausted the send quota, but there's work left to 361 if (!list_empty(&to_be_dropped)) {
338 * do. Return and (re-)schedule the send worker. 362 /* irqs on here, so we can put(), unlike above */
339 */ 363 list_for_each_entry(rm, &to_be_dropped, m_conn_item)
340 ret = -EAGAIN; 364 rds_message_put(rm);
365 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED);
341 } 366 }
342 367
343 if (ret == 0 && was_empty) { 368 /*
344 /* A simple bit test would be way faster than taking the 369 * Other senders can queue a message after we last test the send queue
345 * spin lock */ 370 * but before we clear RDS_IN_XMIT. In that case they'd back off and
346 spin_lock_irqsave(&conn->c_lock, flags); 371 * not try and send their newly queued message. We need to check the
372 * send queue after having cleared RDS_IN_XMIT so that their message
373 * doesn't get stuck on the send queue.
374 *
375 * If the transport cannot continue (i.e ret != 0), then it must
376 * call us when more room is available, such as from the tx
377 * completion handler.
378 */
379 if (ret == 0) {
380 smp_mb();
347 if (!list_empty(&conn->c_send_queue)) { 381 if (!list_empty(&conn->c_send_queue)) {
348 rds_stats_inc(s_send_sem_queue_raced); 382 rds_stats_inc(s_send_lock_queue_raced);
349 ret = -EAGAIN; 383 goto restart;
350 } 384 }
351 spin_unlock_irqrestore(&conn->c_lock, flags);
352 } 385 }
353out: 386out:
354 return ret; 387 return ret;
@@ -376,52 +409,60 @@ static inline int rds_send_is_acked(struct rds_message *rm, u64 ack,
376} 409}
377 410
378/* 411/*
379 * Returns true if there are no messages on the send and retransmit queues 412 * This is pretty similar to what happens below in the ACK
380 * which have a sequence number greater than or equal to the given sequence 413 * handling code - except that we call here as soon as we get
381 * number. 414 * the IB send completion on the RDMA op and the accompanying
415 * message.
382 */ 416 */
383int rds_send_acked_before(struct rds_connection *conn, u64 seq) 417void rds_rdma_send_complete(struct rds_message *rm, int status)
384{ 418{
385 struct rds_message *rm, *tmp; 419 struct rds_sock *rs = NULL;
386 int ret = 1; 420 struct rm_rdma_op *ro;
421 struct rds_notifier *notifier;
422 unsigned long flags;
387 423
388 spin_lock(&conn->c_lock); 424 spin_lock_irqsave(&rm->m_rs_lock, flags);
389 425
390 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { 426 ro = &rm->rdma;
391 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) 427 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
392 ret = 0; 428 ro->op_active && ro->op_notify && ro->op_notifier) {
393 break; 429 notifier = ro->op_notifier;
394 } 430 rs = rm->m_rs;
431 sock_hold(rds_rs_to_sk(rs));
395 432
396 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { 433 notifier->n_status = status;
397 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) 434 spin_lock(&rs->rs_lock);
398 ret = 0; 435 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
399 break; 436 spin_unlock(&rs->rs_lock);
437
438 ro->op_notifier = NULL;
400 } 439 }
401 440
402 spin_unlock(&conn->c_lock); 441 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
403 442
404 return ret; 443 if (rs) {
444 rds_wake_sk_sleep(rs);
445 sock_put(rds_rs_to_sk(rs));
446 }
405} 447}
448EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
406 449
407/* 450/*
408 * This is pretty similar to what happens below in the ACK 451 * Just like above, except looks at atomic op
409 * handling code - except that we call here as soon as we get
410 * the IB send completion on the RDMA op and the accompanying
411 * message.
412 */ 452 */
413void rds_rdma_send_complete(struct rds_message *rm, int status) 453void rds_atomic_send_complete(struct rds_message *rm, int status)
414{ 454{
415 struct rds_sock *rs = NULL; 455 struct rds_sock *rs = NULL;
416 struct rds_rdma_op *ro; 456 struct rm_atomic_op *ao;
417 struct rds_notifier *notifier; 457 struct rds_notifier *notifier;
458 unsigned long flags;
418 459
419 spin_lock(&rm->m_rs_lock); 460 spin_lock_irqsave(&rm->m_rs_lock, flags);
420 461
421 ro = rm->m_rdma_op; 462 ao = &rm->atomic;
422 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && 463 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)
423 ro && ro->r_notify && ro->r_notifier) { 464 && ao->op_active && ao->op_notify && ao->op_notifier) {
424 notifier = ro->r_notifier; 465 notifier = ao->op_notifier;
425 rs = rm->m_rs; 466 rs = rm->m_rs;
426 sock_hold(rds_rs_to_sk(rs)); 467 sock_hold(rds_rs_to_sk(rs));
427 468
@@ -430,17 +471,17 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
430 list_add_tail(&notifier->n_list, &rs->rs_notify_queue); 471 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
431 spin_unlock(&rs->rs_lock); 472 spin_unlock(&rs->rs_lock);
432 473
433 ro->r_notifier = NULL; 474 ao->op_notifier = NULL;
434 } 475 }
435 476
436 spin_unlock(&rm->m_rs_lock); 477 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
437 478
438 if (rs) { 479 if (rs) {
439 rds_wake_sk_sleep(rs); 480 rds_wake_sk_sleep(rs);
440 sock_put(rds_rs_to_sk(rs)); 481 sock_put(rds_rs_to_sk(rs));
441 } 482 }
442} 483}
443EXPORT_SYMBOL_GPL(rds_rdma_send_complete); 484EXPORT_SYMBOL_GPL(rds_atomic_send_complete);
444 485
445/* 486/*
446 * This is the same as rds_rdma_send_complete except we 487 * This is the same as rds_rdma_send_complete except we
@@ -448,15 +489,23 @@ EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
448 * socket, socket lock) and can just move the notifier. 489 * socket, socket lock) and can just move the notifier.
449 */ 490 */
450static inline void 491static inline void
451__rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status) 492__rds_send_complete(struct rds_sock *rs, struct rds_message *rm, int status)
452{ 493{
453 struct rds_rdma_op *ro; 494 struct rm_rdma_op *ro;
495 struct rm_atomic_op *ao;
496
497 ro = &rm->rdma;
498 if (ro->op_active && ro->op_notify && ro->op_notifier) {
499 ro->op_notifier->n_status = status;
500 list_add_tail(&ro->op_notifier->n_list, &rs->rs_notify_queue);
501 ro->op_notifier = NULL;
502 }
454 503
455 ro = rm->m_rdma_op; 504 ao = &rm->atomic;
456 if (ro && ro->r_notify && ro->r_notifier) { 505 if (ao->op_active && ao->op_notify && ao->op_notifier) {
457 ro->r_notifier->n_status = status; 506 ao->op_notifier->n_status = status;
458 list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue); 507 list_add_tail(&ao->op_notifier->n_list, &rs->rs_notify_queue);
459 ro->r_notifier = NULL; 508 ao->op_notifier = NULL;
460 } 509 }
461 510
462 /* No need to wake the app - caller does this */ 511 /* No need to wake the app - caller does this */
@@ -468,7 +517,7 @@ __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status
468 * So speed is not an issue here. 517 * So speed is not an issue here.
469 */ 518 */
470struct rds_message *rds_send_get_message(struct rds_connection *conn, 519struct rds_message *rds_send_get_message(struct rds_connection *conn,
471 struct rds_rdma_op *op) 520 struct rm_rdma_op *op)
472{ 521{
473 struct rds_message *rm, *tmp, *found = NULL; 522 struct rds_message *rm, *tmp, *found = NULL;
474 unsigned long flags; 523 unsigned long flags;
@@ -476,7 +525,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
476 spin_lock_irqsave(&conn->c_lock, flags); 525 spin_lock_irqsave(&conn->c_lock, flags);
477 526
478 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { 527 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
479 if (rm->m_rdma_op == op) { 528 if (&rm->rdma == op) {
480 atomic_inc(&rm->m_refcount); 529 atomic_inc(&rm->m_refcount);
481 found = rm; 530 found = rm;
482 goto out; 531 goto out;
@@ -484,7 +533,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
484 } 533 }
485 534
486 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { 535 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
487 if (rm->m_rdma_op == op) { 536 if (&rm->rdma == op) {
488 atomic_inc(&rm->m_refcount); 537 atomic_inc(&rm->m_refcount);
489 found = rm; 538 found = rm;
490 break; 539 break;
@@ -544,19 +593,20 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
544 spin_lock(&rs->rs_lock); 593 spin_lock(&rs->rs_lock);
545 594
546 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { 595 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
547 struct rds_rdma_op *ro = rm->m_rdma_op; 596 struct rm_rdma_op *ro = &rm->rdma;
548 struct rds_notifier *notifier; 597 struct rds_notifier *notifier;
549 598
550 list_del_init(&rm->m_sock_item); 599 list_del_init(&rm->m_sock_item);
551 rds_send_sndbuf_remove(rs, rm); 600 rds_send_sndbuf_remove(rs, rm);
552 601
553 if (ro && ro->r_notifier && (status || ro->r_notify)) { 602 if (ro->op_active && ro->op_notifier &&
554 notifier = ro->r_notifier; 603 (ro->op_notify || (ro->op_recverr && status))) {
604 notifier = ro->op_notifier;
555 list_add_tail(&notifier->n_list, 605 list_add_tail(&notifier->n_list,
556 &rs->rs_notify_queue); 606 &rs->rs_notify_queue);
557 if (!notifier->n_status) 607 if (!notifier->n_status)
558 notifier->n_status = status; 608 notifier->n_status = status;
559 rm->m_rdma_op->r_notifier = NULL; 609 rm->rdma.op_notifier = NULL;
560 } 610 }
561 was_on_sock = 1; 611 was_on_sock = 1;
562 rm->m_rs = NULL; 612 rm->m_rs = NULL;
@@ -619,9 +669,8 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
619{ 669{
620 struct rds_message *rm, *tmp; 670 struct rds_message *rm, *tmp;
621 struct rds_connection *conn; 671 struct rds_connection *conn;
622 unsigned long flags, flags2; 672 unsigned long flags;
623 LIST_HEAD(list); 673 LIST_HEAD(list);
624 int wake = 0;
625 674
626 /* get all the messages we're dropping under the rs lock */ 675 /* get all the messages we're dropping under the rs lock */
627 spin_lock_irqsave(&rs->rs_lock, flags); 676 spin_lock_irqsave(&rs->rs_lock, flags);
@@ -631,59 +680,54 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
631 dest->sin_port != rm->m_inc.i_hdr.h_dport)) 680 dest->sin_port != rm->m_inc.i_hdr.h_dport))
632 continue; 681 continue;
633 682
634 wake = 1;
635 list_move(&rm->m_sock_item, &list); 683 list_move(&rm->m_sock_item, &list);
636 rds_send_sndbuf_remove(rs, rm); 684 rds_send_sndbuf_remove(rs, rm);
637 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); 685 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
638 } 686 }
639 687
640 /* order flag updates with the rs lock */ 688 /* order flag updates with the rs lock */
641 if (wake) 689 smp_mb__after_clear_bit();
642 smp_mb__after_clear_bit();
643 690
644 spin_unlock_irqrestore(&rs->rs_lock, flags); 691 spin_unlock_irqrestore(&rs->rs_lock, flags);
645 692
646 conn = NULL; 693 if (list_empty(&list))
694 return;
647 695
648 /* now remove the messages from the conn list as needed */ 696 /* Remove the messages from the conn */
649 list_for_each_entry(rm, &list, m_sock_item) { 697 list_for_each_entry(rm, &list, m_sock_item) {
650 /* We do this here rather than in the loop above, so that
651 * we don't have to nest m_rs_lock under rs->rs_lock */
652 spin_lock_irqsave(&rm->m_rs_lock, flags2);
653 /* If this is a RDMA operation, notify the app. */
654 spin_lock(&rs->rs_lock);
655 __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
656 spin_unlock(&rs->rs_lock);
657 rm->m_rs = NULL;
658 spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
659 698
699 conn = rm->m_inc.i_conn;
700
701 spin_lock_irqsave(&conn->c_lock, flags);
660 /* 702 /*
661 * If we see this flag cleared then we're *sure* that someone 703 * Maybe someone else beat us to removing rm from the conn.
662 * else beat us to removing it from the conn. If we race 704 * If we race with their flag update we'll get the lock and
663 * with their flag update we'll get the lock and then really 705 * then really see that the flag has been cleared.
664 * see that the flag has been cleared.
665 */ 706 */
666 if (!test_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 707 if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
708 spin_unlock_irqrestore(&conn->c_lock, flags);
667 continue; 709 continue;
668
669 if (conn != rm->m_inc.i_conn) {
670 if (conn)
671 spin_unlock_irqrestore(&conn->c_lock, flags);
672 conn = rm->m_inc.i_conn;
673 spin_lock_irqsave(&conn->c_lock, flags);
674 } 710 }
711 list_del_init(&rm->m_conn_item);
712 spin_unlock_irqrestore(&conn->c_lock, flags);
675 713
676 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { 714 /*
677 list_del_init(&rm->m_conn_item); 715 * Couldn't grab m_rs_lock in top loop (lock ordering),
678 rds_message_put(rm); 716 * but we can now.
679 } 717 */
680 } 718 spin_lock_irqsave(&rm->m_rs_lock, flags);
681 719
682 if (conn) 720 spin_lock(&rs->rs_lock);
683 spin_unlock_irqrestore(&conn->c_lock, flags); 721 __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
722 spin_unlock(&rs->rs_lock);
684 723
685 if (wake) 724 rm->m_rs = NULL;
686 rds_wake_sk_sleep(rs); 725 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
726
727 rds_message_put(rm);
728 }
729
730 rds_wake_sk_sleep(rs);
687 731
688 while (!list_empty(&list)) { 732 while (!list_empty(&list)) {
689 rm = list_entry(list.next, struct rds_message, m_sock_item); 733 rm = list_entry(list.next, struct rds_message, m_sock_item);
@@ -763,6 +807,63 @@ out:
763 return *queued; 807 return *queued;
764} 808}
765 809
810/*
811 * rds_message is getting to be quite complicated, and we'd like to allocate
812 * it all in one go. This figures out how big it needs to be up front.
813 */
814static int rds_rm_size(struct msghdr *msg, int data_len)
815{
816 struct cmsghdr *cmsg;
817 int size = 0;
818 int cmsg_groups = 0;
819 int retval;
820
821 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
822 if (!CMSG_OK(msg, cmsg))
823 return -EINVAL;
824
825 if (cmsg->cmsg_level != SOL_RDS)
826 continue;
827
828 switch (cmsg->cmsg_type) {
829 case RDS_CMSG_RDMA_ARGS:
830 cmsg_groups |= 1;
831 retval = rds_rdma_extra_size(CMSG_DATA(cmsg));
832 if (retval < 0)
833 return retval;
834 size += retval;
835
836 break;
837
838 case RDS_CMSG_RDMA_DEST:
839 case RDS_CMSG_RDMA_MAP:
840 cmsg_groups |= 2;
841 /* these are valid but do no add any size */
842 break;
843
844 case RDS_CMSG_ATOMIC_CSWP:
845 case RDS_CMSG_ATOMIC_FADD:
846 case RDS_CMSG_MASKED_ATOMIC_CSWP:
847 case RDS_CMSG_MASKED_ATOMIC_FADD:
848 cmsg_groups |= 1;
849 size += sizeof(struct scatterlist);
850 break;
851
852 default:
853 return -EINVAL;
854 }
855
856 }
857
858 size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
859
860 /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
861 if (cmsg_groups == 3)
862 return -EINVAL;
863
864 return size;
865}
866
766static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, 867static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
767 struct msghdr *msg, int *allocated_mr) 868 struct msghdr *msg, int *allocated_mr)
768{ 869{
@@ -777,7 +878,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
777 continue; 878 continue;
778 879
779 /* As a side effect, RDMA_DEST and RDMA_MAP will set 880 /* As a side effect, RDMA_DEST and RDMA_MAP will set
780 * rm->m_rdma_cookie and rm->m_rdma_mr. 881 * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr.
781 */ 882 */
782 switch (cmsg->cmsg_type) { 883 switch (cmsg->cmsg_type) {
783 case RDS_CMSG_RDMA_ARGS: 884 case RDS_CMSG_RDMA_ARGS:
@@ -793,6 +894,12 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
793 if (!ret) 894 if (!ret)
794 *allocated_mr = 1; 895 *allocated_mr = 1;
795 break; 896 break;
897 case RDS_CMSG_ATOMIC_CSWP:
898 case RDS_CMSG_ATOMIC_FADD:
899 case RDS_CMSG_MASKED_ATOMIC_CSWP:
900 case RDS_CMSG_MASKED_ATOMIC_FADD:
901 ret = rds_cmsg_atomic(rs, rm, cmsg);
902 break;
796 903
797 default: 904 default:
798 return -EINVAL; 905 return -EINVAL;
@@ -850,13 +957,26 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
850 goto out; 957 goto out;
851 } 958 }
852 959
853 rm = rds_message_copy_from_user(msg->msg_iov, payload_len); 960 /* size of rm including all sgs */
854 if (IS_ERR(rm)) { 961 ret = rds_rm_size(msg, payload_len);
855 ret = PTR_ERR(rm); 962 if (ret < 0)
856 rm = NULL; 963 goto out;
964
965 rm = rds_message_alloc(ret, GFP_KERNEL);
966 if (!rm) {
967 ret = -ENOMEM;
857 goto out; 968 goto out;
858 } 969 }
859 970
971 /* Attach data to the rm */
972 if (payload_len) {
973 rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
974 ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
975 if (ret)
976 goto out;
977 }
978 rm->data.op_active = 1;
979
860 rm->m_daddr = daddr; 980 rm->m_daddr = daddr;
861 981
862 /* rds_conn_create has a spinlock that runs with IRQ off. 982 /* rds_conn_create has a spinlock that runs with IRQ off.
@@ -879,22 +999,23 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
879 if (ret) 999 if (ret)
880 goto out; 1000 goto out;
881 1001
882 if ((rm->m_rdma_cookie || rm->m_rdma_op) && 1002 if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
883 conn->c_trans->xmit_rdma == NULL) {
884 if (printk_ratelimit()) 1003 if (printk_ratelimit())
885 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", 1004 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
886 rm->m_rdma_op, conn->c_trans->xmit_rdma); 1005 &rm->rdma, conn->c_trans->xmit_rdma);
887 ret = -EOPNOTSUPP; 1006 ret = -EOPNOTSUPP;
888 goto out; 1007 goto out;
889 } 1008 }
890 1009
891 /* If the connection is down, trigger a connect. We may 1010 if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) {
892 * have scheduled a delayed reconnect however - in this case 1011 if (printk_ratelimit())
893 * we should not interfere. 1012 printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n",
894 */ 1013 &rm->atomic, conn->c_trans->xmit_atomic);
895 if (rds_conn_state(conn) == RDS_CONN_DOWN && 1014 ret = -EOPNOTSUPP;
896 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) 1015 goto out;
897 queue_delayed_work(rds_wq, &conn->c_conn_w, 0); 1016 }
1017
1018 rds_conn_connect_if_down(conn);
898 1019
899 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); 1020 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
900 if (ret) { 1021 if (ret) {
@@ -938,7 +1059,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
938 rds_stats_inc(s_send_queued); 1059 rds_stats_inc(s_send_queued);
939 1060
940 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) 1061 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
941 rds_send_worker(&conn->c_send_w.work); 1062 rds_send_xmit(conn);
942 1063
943 rds_message_put(rm); 1064 rds_message_put(rm);
944 return payload_len; 1065 return payload_len;
@@ -966,20 +1087,15 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
966 int ret = 0; 1087 int ret = 0;
967 1088
968 rm = rds_message_alloc(0, GFP_ATOMIC); 1089 rm = rds_message_alloc(0, GFP_ATOMIC);
969 if (rm == NULL) { 1090 if (!rm) {
970 ret = -ENOMEM; 1091 ret = -ENOMEM;
971 goto out; 1092 goto out;
972 } 1093 }
973 1094
974 rm->m_daddr = conn->c_faddr; 1095 rm->m_daddr = conn->c_faddr;
1096 rm->data.op_active = 1;
975 1097
976 /* If the connection is down, trigger a connect. We may 1098 rds_conn_connect_if_down(conn);
977 * have scheduled a delayed reconnect however - in this case
978 * we should not interfere.
979 */
980 if (rds_conn_state(conn) == RDS_CONN_DOWN &&
981 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
982 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
983 1099
984 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL); 1100 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL);
985 if (ret) 1101 if (ret)
@@ -999,7 +1115,9 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
999 rds_stats_inc(s_send_queued); 1115 rds_stats_inc(s_send_queued);
1000 rds_stats_inc(s_send_pong); 1116 rds_stats_inc(s_send_pong);
1001 1117
1002 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 1118 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
1119 rds_send_xmit(conn);
1120
1003 rds_message_put(rm); 1121 rds_message_put(rm);
1004 return 0; 1122 return 0;
1005 1123
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 7598eb07cfb1..10c759ccac0c 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -57,8 +57,8 @@ static const char *const rds_stat_names[] = {
57 "recv_ping", 57 "recv_ping",
58 "send_queue_empty", 58 "send_queue_empty",
59 "send_queue_full", 59 "send_queue_full",
60 "send_sem_contention", 60 "send_lock_contention",
61 "send_sem_queue_raced", 61 "send_lock_queue_raced",
62 "send_immediate_retry", 62 "send_immediate_retry",
63 "send_delayed_retry", 63 "send_delayed_retry",
64 "send_drop_acked", 64 "send_drop_acked",
@@ -143,7 +143,7 @@ void rds_stats_exit(void)
143 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info); 143 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info);
144} 144}
145 145
146int __init rds_stats_init(void) 146int rds_stats_init(void)
147{ 147{
148 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info); 148 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info);
149 return 0; 149 return 0;
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index 7829a20325d3..25ad0c77a26c 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -105,13 +105,13 @@ void rds_sysctl_exit(void)
105 unregister_sysctl_table(rds_sysctl_reg_table); 105 unregister_sysctl_table(rds_sysctl_reg_table);
106} 106}
107 107
108int __init rds_sysctl_init(void) 108int rds_sysctl_init(void)
109{ 109{
110 rds_sysctl_reconnect_min = msecs_to_jiffies(1); 110 rds_sysctl_reconnect_min = msecs_to_jiffies(1);
111 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; 111 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min;
112 112
113 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table); 113 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table);
114 if (rds_sysctl_reg_table == NULL) 114 if (!rds_sysctl_reg_table)
115 return -ENOMEM; 115 return -ENOMEM;
116 return 0; 116 return 0;
117} 117}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index babf4577ff7d..eeb08e6ab96b 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -200,7 +200,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
200 struct rds_tcp_connection *tc; 200 struct rds_tcp_connection *tc;
201 201
202 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); 202 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
203 if (tc == NULL) 203 if (!tc)
204 return -ENOMEM; 204 return -ENOMEM;
205 205
206 tc->t_sock = NULL; 206 tc->t_sock = NULL;
@@ -258,7 +258,6 @@ struct rds_transport rds_tcp_transport = {
258 .laddr_check = rds_tcp_laddr_check, 258 .laddr_check = rds_tcp_laddr_check,
259 .xmit_prepare = rds_tcp_xmit_prepare, 259 .xmit_prepare = rds_tcp_xmit_prepare,
260 .xmit_complete = rds_tcp_xmit_complete, 260 .xmit_complete = rds_tcp_xmit_complete,
261 .xmit_cong_map = rds_tcp_xmit_cong_map,
262 .xmit = rds_tcp_xmit, 261 .xmit = rds_tcp_xmit,
263 .recv = rds_tcp_recv, 262 .recv = rds_tcp_recv,
264 .conn_alloc = rds_tcp_conn_alloc, 263 .conn_alloc = rds_tcp_conn_alloc,
@@ -266,7 +265,6 @@ struct rds_transport rds_tcp_transport = {
266 .conn_connect = rds_tcp_conn_connect, 265 .conn_connect = rds_tcp_conn_connect,
267 .conn_shutdown = rds_tcp_conn_shutdown, 266 .conn_shutdown = rds_tcp_conn_shutdown,
268 .inc_copy_to_user = rds_tcp_inc_copy_to_user, 267 .inc_copy_to_user = rds_tcp_inc_copy_to_user,
269 .inc_purge = rds_tcp_inc_purge,
270 .inc_free = rds_tcp_inc_free, 268 .inc_free = rds_tcp_inc_free,
271 .stats_info_copy = rds_tcp_stats_info_copy, 269 .stats_info_copy = rds_tcp_stats_info_copy,
272 .exit = rds_tcp_exit, 270 .exit = rds_tcp_exit,
@@ -276,14 +274,14 @@ struct rds_transport rds_tcp_transport = {
276 .t_prefer_loopback = 1, 274 .t_prefer_loopback = 1,
277}; 275};
278 276
279int __init rds_tcp_init(void) 277int rds_tcp_init(void)
280{ 278{
281 int ret; 279 int ret;
282 280
283 rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection", 281 rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection",
284 sizeof(struct rds_tcp_connection), 282 sizeof(struct rds_tcp_connection),
285 0, 0, NULL); 283 0, 0, NULL);
286 if (rds_tcp_conn_slab == NULL) { 284 if (!rds_tcp_conn_slab) {
287 ret = -ENOMEM; 285 ret = -ENOMEM;
288 goto out; 286 goto out;
289 } 287 }
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 844fa6b9cf5a..f5e6f7bebb50 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -43,7 +43,7 @@ struct rds_tcp_statistics {
43}; 43};
44 44
45/* tcp.c */ 45/* tcp.c */
46int __init rds_tcp_init(void); 46int rds_tcp_init(void);
47void rds_tcp_exit(void); 47void rds_tcp_exit(void);
48void rds_tcp_tune(struct socket *sock); 48void rds_tcp_tune(struct socket *sock);
49void rds_tcp_nonagle(struct socket *sock); 49void rds_tcp_nonagle(struct socket *sock);
@@ -61,16 +61,15 @@ void rds_tcp_conn_shutdown(struct rds_connection *conn);
61void rds_tcp_state_change(struct sock *sk); 61void rds_tcp_state_change(struct sock *sk);
62 62
63/* tcp_listen.c */ 63/* tcp_listen.c */
64int __init rds_tcp_listen_init(void); 64int rds_tcp_listen_init(void);
65void rds_tcp_listen_stop(void); 65void rds_tcp_listen_stop(void);
66void rds_tcp_listen_data_ready(struct sock *sk, int bytes); 66void rds_tcp_listen_data_ready(struct sock *sk, int bytes);
67 67
68/* tcp_recv.c */ 68/* tcp_recv.c */
69int __init rds_tcp_recv_init(void); 69int rds_tcp_recv_init(void);
70void rds_tcp_recv_exit(void); 70void rds_tcp_recv_exit(void);
71void rds_tcp_data_ready(struct sock *sk, int bytes); 71void rds_tcp_data_ready(struct sock *sk, int bytes);
72int rds_tcp_recv(struct rds_connection *conn); 72int rds_tcp_recv(struct rds_connection *conn);
73void rds_tcp_inc_purge(struct rds_incoming *inc);
74void rds_tcp_inc_free(struct rds_incoming *inc); 73void rds_tcp_inc_free(struct rds_incoming *inc);
75int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 74int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
76 size_t size); 75 size_t size);
@@ -81,8 +80,6 @@ void rds_tcp_xmit_complete(struct rds_connection *conn);
81int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, 80int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
82 unsigned int hdr_off, unsigned int sg, unsigned int off); 81 unsigned int hdr_off, unsigned int sg, unsigned int off);
83void rds_tcp_write_space(struct sock *sk); 82void rds_tcp_write_space(struct sock *sk);
84int rds_tcp_xmit_cong_map(struct rds_connection *conn,
85 struct rds_cong_map *map, unsigned long offset);
86 83
87/* tcp_stats.c */ 84/* tcp_stats.c */
88DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats); 85DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index c519939e8da9..af95c8e058fc 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -45,7 +45,7 @@ void rds_tcp_state_change(struct sock *sk)
45 45
46 read_lock_bh(&sk->sk_callback_lock); 46 read_lock_bh(&sk->sk_callback_lock);
47 conn = sk->sk_user_data; 47 conn = sk->sk_user_data;
48 if (conn == NULL) { 48 if (!conn) {
49 state_change = sk->sk_state_change; 49 state_change = sk->sk_state_change;
50 goto out; 50 goto out;
51 } 51 }
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 27844f231d10..8b5cc4aa8868 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -116,7 +116,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
116 116
117 read_lock_bh(&sk->sk_callback_lock); 117 read_lock_bh(&sk->sk_callback_lock);
118 ready = sk->sk_user_data; 118 ready = sk->sk_user_data;
119 if (ready == NULL) { /* check for teardown race */ 119 if (!ready) { /* check for teardown race */
120 ready = sk->sk_data_ready; 120 ready = sk->sk_data_ready;
121 goto out; 121 goto out;
122 } 122 }
@@ -135,7 +135,7 @@ out:
135 ready(sk, bytes); 135 ready(sk, bytes);
136} 136}
137 137
138int __init rds_tcp_listen_init(void) 138int rds_tcp_listen_init(void)
139{ 139{
140 struct sockaddr_in sin; 140 struct sockaddr_in sin;
141 struct socket *sock = NULL; 141 struct socket *sock = NULL;
@@ -178,7 +178,7 @@ void rds_tcp_listen_stop(void)
178 struct socket *sock = rds_tcp_listen_sock; 178 struct socket *sock = rds_tcp_listen_sock;
179 struct sock *sk; 179 struct sock *sk;
180 180
181 if (sock == NULL) 181 if (!sock)
182 return; 182 return;
183 183
184 sk = sock->sk; 184 sk = sock->sk;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index e43797404102..67263fbee623 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -39,7 +39,7 @@
39 39
40static struct kmem_cache *rds_tcp_incoming_slab; 40static struct kmem_cache *rds_tcp_incoming_slab;
41 41
42void rds_tcp_inc_purge(struct rds_incoming *inc) 42static void rds_tcp_inc_purge(struct rds_incoming *inc)
43{ 43{
44 struct rds_tcp_incoming *tinc; 44 struct rds_tcp_incoming *tinc;
45 tinc = container_of(inc, struct rds_tcp_incoming, ti_inc); 45 tinc = container_of(inc, struct rds_tcp_incoming, ti_inc);
@@ -190,10 +190,10 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
190 * processing. 190 * processing.
191 */ 191 */
192 while (left) { 192 while (left) {
193 if (tinc == NULL) { 193 if (!tinc) {
194 tinc = kmem_cache_alloc(rds_tcp_incoming_slab, 194 tinc = kmem_cache_alloc(rds_tcp_incoming_slab,
195 arg->gfp); 195 arg->gfp);
196 if (tinc == NULL) { 196 if (!tinc) {
197 desc->error = -ENOMEM; 197 desc->error = -ENOMEM;
198 goto out; 198 goto out;
199 } 199 }
@@ -229,7 +229,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
229 229
230 if (left && tc->t_tinc_data_rem) { 230 if (left && tc->t_tinc_data_rem) {
231 clone = skb_clone(skb, arg->gfp); 231 clone = skb_clone(skb, arg->gfp);
232 if (clone == NULL) { 232 if (!clone) {
233 desc->error = -ENOMEM; 233 desc->error = -ENOMEM;
234 goto out; 234 goto out;
235 } 235 }
@@ -326,7 +326,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
326 326
327 read_lock_bh(&sk->sk_callback_lock); 327 read_lock_bh(&sk->sk_callback_lock);
328 conn = sk->sk_user_data; 328 conn = sk->sk_user_data;
329 if (conn == NULL) { /* check for teardown race */ 329 if (!conn) { /* check for teardown race */
330 ready = sk->sk_data_ready; 330 ready = sk->sk_data_ready;
331 goto out; 331 goto out;
332 } 332 }
@@ -342,12 +342,12 @@ out:
342 ready(sk, bytes); 342 ready(sk, bytes);
343} 343}
344 344
345int __init rds_tcp_recv_init(void) 345int rds_tcp_recv_init(void)
346{ 346{
347 rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming", 347 rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming",
348 sizeof(struct rds_tcp_incoming), 348 sizeof(struct rds_tcp_incoming),
349 0, 0, NULL); 349 0, 0, NULL);
350 if (rds_tcp_incoming_slab == NULL) 350 if (!rds_tcp_incoming_slab)
351 return -ENOMEM; 351 return -ENOMEM;
352 return 0; 352 return 0;
353} 353}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 2f012a07d94d..aa16841afbdf 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -77,56 +77,6 @@ int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
77} 77}
78 78
79/* the core send_sem serializes this with other xmit and shutdown */ 79/* the core send_sem serializes this with other xmit and shutdown */
80int rds_tcp_xmit_cong_map(struct rds_connection *conn,
81 struct rds_cong_map *map, unsigned long offset)
82{
83 static struct rds_header rds_tcp_map_header = {
84 .h_flags = RDS_FLAG_CONG_BITMAP,
85 };
86 struct rds_tcp_connection *tc = conn->c_transport_data;
87 unsigned long i;
88 int ret;
89 int copied = 0;
90
91 /* Some problem claims cpu_to_be32(constant) isn't a constant. */
92 rds_tcp_map_header.h_len = cpu_to_be32(RDS_CONG_MAP_BYTES);
93
94 if (offset < sizeof(struct rds_header)) {
95 ret = rds_tcp_sendmsg(tc->t_sock,
96 (void *)&rds_tcp_map_header + offset,
97 sizeof(struct rds_header) - offset);
98 if (ret <= 0)
99 return ret;
100 offset += ret;
101 copied = ret;
102 if (offset < sizeof(struct rds_header))
103 return ret;
104 }
105
106 offset -= sizeof(struct rds_header);
107 i = offset / PAGE_SIZE;
108 offset = offset % PAGE_SIZE;
109 BUG_ON(i >= RDS_CONG_MAP_PAGES);
110
111 do {
112 ret = tc->t_sock->ops->sendpage(tc->t_sock,
113 virt_to_page(map->m_page_addrs[i]),
114 offset, PAGE_SIZE - offset,
115 MSG_DONTWAIT);
116 if (ret <= 0)
117 break;
118 copied += ret;
119 offset += ret;
120 if (offset == PAGE_SIZE) {
121 offset = 0;
122 i++;
123 }
124 } while (i < RDS_CONG_MAP_PAGES);
125
126 return copied ? copied : ret;
127}
128
129/* the core send_sem serializes this with other xmit and shutdown */
130int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, 80int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
131 unsigned int hdr_off, unsigned int sg, unsigned int off) 81 unsigned int hdr_off, unsigned int sg, unsigned int off)
132{ 82{
@@ -166,21 +116,21 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
166 goto out; 116 goto out;
167 } 117 }
168 118
169 while (sg < rm->m_nents) { 119 while (sg < rm->data.op_nents) {
170 ret = tc->t_sock->ops->sendpage(tc->t_sock, 120 ret = tc->t_sock->ops->sendpage(tc->t_sock,
171 sg_page(&rm->m_sg[sg]), 121 sg_page(&rm->data.op_sg[sg]),
172 rm->m_sg[sg].offset + off, 122 rm->data.op_sg[sg].offset + off,
173 rm->m_sg[sg].length - off, 123 rm->data.op_sg[sg].length - off,
174 MSG_DONTWAIT|MSG_NOSIGNAL); 124 MSG_DONTWAIT|MSG_NOSIGNAL);
175 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->m_sg[sg]), 125 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]),
176 rm->m_sg[sg].offset + off, rm->m_sg[sg].length - off, 126 rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off,
177 ret); 127 ret);
178 if (ret <= 0) 128 if (ret <= 0)
179 break; 129 break;
180 130
181 off += ret; 131 off += ret;
182 done += ret; 132 done += ret;
183 if (off == rm->m_sg[sg].length) { 133 if (off == rm->data.op_sg[sg].length) {
184 off = 0; 134 off = 0;
185 sg++; 135 sg++;
186 } 136 }
@@ -226,7 +176,7 @@ void rds_tcp_write_space(struct sock *sk)
226 176
227 read_lock_bh(&sk->sk_callback_lock); 177 read_lock_bh(&sk->sk_callback_lock);
228 conn = sk->sk_user_data; 178 conn = sk->sk_user_data;
229 if (conn == NULL) { 179 if (!conn) {
230 write_space = sk->sk_write_space; 180 write_space = sk->sk_write_space;
231 goto out; 181 goto out;
232 } 182 }
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 786c20eaaf5e..0fd90f8c5f59 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -61,7 +61,7 @@
61 * 61 *
62 * Transition to state DISCONNECTING/DOWN: 62 * Transition to state DISCONNECTING/DOWN:
63 * - Inside the shutdown worker; synchronizes with xmit path 63 * - Inside the shutdown worker; synchronizes with xmit path
64 * through c_send_lock, and with connection management callbacks 64 * through RDS_IN_XMIT, and with connection management callbacks
65 * via c_cm_lock. 65 * via c_cm_lock.
66 * 66 *
67 * For receive callbacks, we rely on the underlying transport 67 * For receive callbacks, we rely on the underlying transport
@@ -110,7 +110,7 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
110 * We should *always* start with a random backoff; otherwise a broken connection 110 * We should *always* start with a random backoff; otherwise a broken connection
111 * will always take several iterations to be re-established. 111 * will always take several iterations to be re-established.
112 */ 112 */
113static void rds_queue_reconnect(struct rds_connection *conn) 113void rds_queue_reconnect(struct rds_connection *conn)
114{ 114{
115 unsigned long rand; 115 unsigned long rand;
116 116
@@ -156,58 +156,6 @@ void rds_connect_worker(struct work_struct *work)
156 } 156 }
157} 157}
158 158
159void rds_shutdown_worker(struct work_struct *work)
160{
161 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
162
163 /* shut it down unless it's down already */
164 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
165 /*
166 * Quiesce the connection mgmt handlers before we start tearing
167 * things down. We don't hold the mutex for the entire
168 * duration of the shutdown operation, else we may be
169 * deadlocking with the CM handler. Instead, the CM event
170 * handler is supposed to check for state DISCONNECTING
171 */
172 mutex_lock(&conn->c_cm_lock);
173 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING) &&
174 !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
175 rds_conn_error(conn, "shutdown called in state %d\n",
176 atomic_read(&conn->c_state));
177 mutex_unlock(&conn->c_cm_lock);
178 return;
179 }
180 mutex_unlock(&conn->c_cm_lock);
181
182 mutex_lock(&conn->c_send_lock);
183 conn->c_trans->conn_shutdown(conn);
184 rds_conn_reset(conn);
185 mutex_unlock(&conn->c_send_lock);
186
187 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
188 /* This can happen - eg when we're in the middle of tearing
189 * down the connection, and someone unloads the rds module.
190 * Quite reproduceable with loopback connections.
191 * Mostly harmless.
192 */
193 rds_conn_error(conn,
194 "%s: failed to transition to state DOWN, "
195 "current state is %d\n",
196 __func__,
197 atomic_read(&conn->c_state));
198 return;
199 }
200 }
201
202 /* Then reconnect if it's still live.
203 * The passive side of an IB loopback connection is never added
204 * to the conn hash, so we never trigger a reconnect on this
205 * conn - the reconnect is always triggered by the active peer. */
206 cancel_delayed_work(&conn->c_conn_w);
207 if (!hlist_unhashed(&conn->c_hash_node))
208 rds_queue_reconnect(conn);
209}
210
211void rds_send_worker(struct work_struct *work) 159void rds_send_worker(struct work_struct *work)
212{ 160{
213 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work); 161 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work);
@@ -252,15 +200,22 @@ void rds_recv_worker(struct work_struct *work)
252 } 200 }
253} 201}
254 202
203void rds_shutdown_worker(struct work_struct *work)
204{
205 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
206
207 rds_conn_shutdown(conn);
208}
209
255void rds_threads_exit(void) 210void rds_threads_exit(void)
256{ 211{
257 destroy_workqueue(rds_wq); 212 destroy_workqueue(rds_wq);
258} 213}
259 214
260int __init rds_threads_init(void) 215int rds_threads_init(void)
261{ 216{
262 rds_wq = create_workqueue("krdsd"); 217 rds_wq = create_singlethread_workqueue("krdsd");
263 if (rds_wq == NULL) 218 if (!rds_wq)
264 return -ENOMEM; 219 return -ENOMEM;
265 220
266 return 0; 221 return 0;
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7e1067901353..7f2ac4fec367 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -71,19 +71,28 @@ void rds_trans_unregister(struct rds_transport *trans)
71} 71}
72EXPORT_SYMBOL_GPL(rds_trans_unregister); 72EXPORT_SYMBOL_GPL(rds_trans_unregister);
73 73
74void rds_trans_put(struct rds_transport *trans)
75{
76 if (trans && trans->t_owner)
77 module_put(trans->t_owner);
78}
79
74struct rds_transport *rds_trans_get_preferred(__be32 addr) 80struct rds_transport *rds_trans_get_preferred(__be32 addr)
75{ 81{
76 struct rds_transport *ret = NULL; 82 struct rds_transport *ret = NULL;
77 int i; 83 struct rds_transport *trans;
84 unsigned int i;
78 85
79 if (IN_LOOPBACK(ntohl(addr))) 86 if (IN_LOOPBACK(ntohl(addr)))
80 return &rds_loop_transport; 87 return &rds_loop_transport;
81 88
82 down_read(&rds_trans_sem); 89 down_read(&rds_trans_sem);
83 for (i = 0; i < RDS_TRANS_COUNT; i++) 90 for (i = 0; i < RDS_TRANS_COUNT; i++) {
84 { 91 trans = transports[i];
85 if (transports[i] && (transports[i]->laddr_check(addr) == 0)) { 92
86 ret = transports[i]; 93 if (trans && (trans->laddr_check(addr) == 0) &&
94 (!trans->t_owner || try_module_get(trans->t_owner))) {
95 ret = trans;
87 break; 96 break;
88 } 97 }
89 } 98 }
diff --git a/net/rds/xlist.h b/net/rds/xlist.h
new file mode 100644
index 000000000000..e6b5190daddd
--- /dev/null
+++ b/net/rds/xlist.h
@@ -0,0 +1,80 @@
1#ifndef _LINUX_XLIST_H
2#define _LINUX_XLIST_H
3
4#include <linux/stddef.h>
5#include <linux/poison.h>
6#include <linux/prefetch.h>
7#include <asm/system.h>
8
9struct xlist_head {
10 struct xlist_head *next;
11};
12
13static inline void INIT_XLIST_HEAD(struct xlist_head *list)
14{
15 list->next = NULL;
16}
17
18static inline int xlist_empty(struct xlist_head *head)
19{
20 return head->next == NULL;
21}
22
23static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail,
24 struct xlist_head *head)
25{
26 struct xlist_head *cur;
27 struct xlist_head *check;
28
29 while (1) {
30 cur = head->next;
31 tail->next = cur;
32 check = cmpxchg(&head->next, cur, new);
33 if (check == cur)
34 break;
35 }
36}
37
38static inline struct xlist_head *xlist_del_head(struct xlist_head *head)
39{
40 struct xlist_head *cur;
41 struct xlist_head *check;
42 struct xlist_head *next;
43
44 while (1) {
45 cur = head->next;
46 if (!cur)
47 goto out;
48
49 next = cur->next;
50 check = cmpxchg(&head->next, cur, next);
51 if (check == cur)
52 goto out;
53 }
54out:
55 return cur;
56}
57
58static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head)
59{
60 struct xlist_head *cur;
61
62 cur = head->next;
63 if (!cur)
64 return NULL;
65
66 head->next = cur->next;
67 return cur;
68}
69
70static inline void xlist_splice(struct xlist_head *list,
71 struct xlist_head *head)
72{
73 struct xlist_head *cur;
74
75 WARN_ON(head->next);
76 cur = xchg(&list->next, NULL);
77 head->next = cur;
78}
79
80#endif
diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index 3713d7ecab96..1bca6d49ec96 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -142,7 +142,7 @@ static unsigned long rfkill_last_scheduled;
142static unsigned long rfkill_ratelimit(const unsigned long last) 142static unsigned long rfkill_ratelimit(const unsigned long last)
143{ 143{
144 const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY); 144 const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY);
145 return (time_after(jiffies, last + delay)) ? 0 : delay; 145 return time_after(jiffies, last + delay) ? 0 : delay;
146} 146}
147 147
148static void rfkill_schedule_ratelimited(void) 148static void rfkill_schedule_ratelimited(void)
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index a750a28e0221..fa5f5641a2c2 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -114,7 +114,7 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
114 if (ax25s) 114 if (ax25s)
115 ax25_cb_put(ax25s); 115 ax25_cb_put(ax25s);
116 116
117 return (neigh->ax25 != NULL); 117 return neigh->ax25 != NULL;
118} 118}
119 119
120/* 120/*
@@ -137,7 +137,7 @@ static int rose_link_up(struct rose_neigh *neigh)
137 if (ax25s) 137 if (ax25s)
138 ax25_cb_put(ax25s); 138 ax25_cb_put(ax25s);
139 139
140 return (neigh->ax25 != NULL); 140 return neigh->ax25 != NULL;
141} 141}
142 142
143/* 143/*
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f691fb180d1..a36270a994d7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -518,6 +518,16 @@ config NET_ACT_SKBEDIT
518 To compile this code as a module, choose M here: the 518 To compile this code as a module, choose M here: the
519 module will be called act_skbedit. 519 module will be called act_skbedit.
520 520
521config NET_ACT_CSUM
522 tristate "Checksum Updating"
523 depends on NET_CLS_ACT && INET
524 ---help---
525 Say Y here to update some common checksum after some direct
526 packet alterations.
527
528 To compile this code as a module, choose M here: the
529 module will be called act_csum.
530
521config NET_CLS_IND 531config NET_CLS_IND
522 bool "Incoming device classification" 532 bool "Incoming device classification"
523 depends on NET_CLS_U32 || NET_CLS_FW 533 depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f14e71bfa58f..960f5dba6304 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o 15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o 16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o 17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
18obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
18obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o 19obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
19obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o 20obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
20obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o 21obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
new file mode 100644
index 000000000000..67dc7ce9b63a
--- /dev/null
+++ b/net/sched/act_csum.c
@@ -0,0 +1,595 @@
1/*
2 * Checksum updating actions
3 *
4 * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#include <linux/types.h>
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/spinlock.h>
18
19#include <linux/netlink.h>
20#include <net/netlink.h>
21#include <linux/rtnetlink.h>
22
23#include <linux/skbuff.h>
24
25#include <net/ip.h>
26#include <net/ipv6.h>
27#include <net/icmp.h>
28#include <linux/icmpv6.h>
29#include <linux/igmp.h>
30#include <net/tcp.h>
31#include <net/udp.h>
32#include <net/ip6_checksum.h>
33
34#include <net/act_api.h>
35
36#include <linux/tc_act/tc_csum.h>
37#include <net/tc_act/tc_csum.h>
38
39#define CSUM_TAB_MASK 15
40static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
41static u32 csum_idx_gen;
42static DEFINE_RWLOCK(csum_lock);
43
44static struct tcf_hashinfo csum_hash_info = {
45 .htab = tcf_csum_ht,
46 .hmask = CSUM_TAB_MASK,
47 .lock = &csum_lock,
48};
49
50static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
51 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
52};
53
54static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
55 struct tc_action *a, int ovr, int bind)
56{
57 struct nlattr *tb[TCA_CSUM_MAX + 1];
58 struct tc_csum *parm;
59 struct tcf_common *pc;
60 struct tcf_csum *p;
61 int ret = 0, err;
62
63 if (nla == NULL)
64 return -EINVAL;
65
66 err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
67 if (err < 0)
68 return err;
69
70 if (tb[TCA_CSUM_PARMS] == NULL)
71 return -EINVAL;
72 parm = nla_data(tb[TCA_CSUM_PARMS]);
73
74 pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info);
75 if (!pc) {
76 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
77 &csum_idx_gen, &csum_hash_info);
78 if (IS_ERR(pc))
79 return PTR_ERR(pc);
80 p = to_tcf_csum(pc);
81 ret = ACT_P_CREATED;
82 } else {
83 p = to_tcf_csum(pc);
84 if (!ovr) {
85 tcf_hash_release(pc, bind, &csum_hash_info);
86 return -EEXIST;
87 }
88 }
89
90 spin_lock_bh(&p->tcf_lock);
91 p->tcf_action = parm->action;
92 p->update_flags = parm->update_flags;
93 spin_unlock_bh(&p->tcf_lock);
94
95 if (ret == ACT_P_CREATED)
96 tcf_hash_insert(pc, &csum_hash_info);
97
98 return ret;
99}
100
101static int tcf_csum_cleanup(struct tc_action *a, int bind)
102{
103 struct tcf_csum *p = a->priv;
104 return tcf_hash_release(&p->common, bind, &csum_hash_info);
105}
106
107/**
108 * tcf_csum_skb_nextlayer - Get next layer pointer
109 * @skb: sk_buff to use
110 * @ihl: previous summed headers length
111 * @ipl: complete packet length
112 * @jhl: next header length
113 *
114 * Check the expected next layer availability in the specified sk_buff.
115 * Return the next layer pointer if pass, NULL otherwise.
116 */
117static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
118 unsigned int ihl, unsigned int ipl,
119 unsigned int jhl)
120{
121 int ntkoff = skb_network_offset(skb);
122 int hl = ihl + jhl;
123
124 if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
125 (skb_cloned(skb) &&
126 !skb_clone_writable(skb, hl + ntkoff) &&
127 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
128 return NULL;
129 else
130 return (void *)(skb_network_header(skb) + ihl);
131}
132
133static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
134 unsigned int ihl, unsigned int ipl)
135{
136 struct icmphdr *icmph;
137
138 icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph));
139 if (icmph == NULL)
140 return 0;
141
142 icmph->checksum = 0;
143 skb->csum = csum_partial(icmph, ipl - ihl, 0);
144 icmph->checksum = csum_fold(skb->csum);
145
146 skb->ip_summed = CHECKSUM_NONE;
147
148 return 1;
149}
150
151static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
152 unsigned int ihl, unsigned int ipl)
153{
154 struct igmphdr *igmph;
155
156 igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph));
157 if (igmph == NULL)
158 return 0;
159
160 igmph->csum = 0;
161 skb->csum = csum_partial(igmph, ipl - ihl, 0);
162 igmph->csum = csum_fold(skb->csum);
163
164 skb->ip_summed = CHECKSUM_NONE;
165
166 return 1;
167}
168
169static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
170 unsigned int ihl, unsigned int ipl)
171{
172 struct icmp6hdr *icmp6h;
173
174 icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
175 if (icmp6h == NULL)
176 return 0;
177
178 icmp6h->icmp6_cksum = 0;
179 skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
180 icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
181 ipl - ihl, IPPROTO_ICMPV6,
182 skb->csum);
183
184 skb->ip_summed = CHECKSUM_NONE;
185
186 return 1;
187}
188
189static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
190 unsigned int ihl, unsigned int ipl)
191{
192 struct tcphdr *tcph;
193
194 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
195 if (tcph == NULL)
196 return 0;
197
198 tcph->check = 0;
199 skb->csum = csum_partial(tcph, ipl - ihl, 0);
200 tcph->check = tcp_v4_check(ipl - ihl,
201 iph->saddr, iph->daddr, skb->csum);
202
203 skb->ip_summed = CHECKSUM_NONE;
204
205 return 1;
206}
207
208static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
209 unsigned int ihl, unsigned int ipl)
210{
211 struct tcphdr *tcph;
212
213 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
214 if (tcph == NULL)
215 return 0;
216
217 tcph->check = 0;
218 skb->csum = csum_partial(tcph, ipl - ihl, 0);
219 tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
220 ipl - ihl, IPPROTO_TCP,
221 skb->csum);
222
223 skb->ip_summed = CHECKSUM_NONE;
224
225 return 1;
226}
227
228static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
229 unsigned int ihl, unsigned int ipl, int udplite)
230{
231 struct udphdr *udph;
232 u16 ul;
233
234 /*
235 * Support both UDP and UDPLITE checksum algorithms, Don't use
236 * udph->len to get the real length without any protocol check,
237 * UDPLITE uses udph->len for another thing,
238 * Use iph->tot_len, or just ipl.
239 */
240
241 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
242 if (udph == NULL)
243 return 0;
244
245 ul = ntohs(udph->len);
246
247 if (udplite || udph->check) {
248
249 udph->check = 0;
250
251 if (udplite) {
252 if (ul == 0)
253 skb->csum = csum_partial(udph, ipl - ihl, 0);
254 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
255 skb->csum = csum_partial(udph, ul, 0);
256 else
257 goto ignore_obscure_skb;
258 } else {
259 if (ul != ipl - ihl)
260 goto ignore_obscure_skb;
261
262 skb->csum = csum_partial(udph, ul, 0);
263 }
264
265 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
266 ul, iph->protocol,
267 skb->csum);
268
269 if (!udph->check)
270 udph->check = CSUM_MANGLED_0;
271 }
272
273 skb->ip_summed = CHECKSUM_NONE;
274
275ignore_obscure_skb:
276 return 1;
277}
278
279static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
280 unsigned int ihl, unsigned int ipl, int udplite)
281{
282 struct udphdr *udph;
283 u16 ul;
284
285 /*
286 * Support both UDP and UDPLITE checksum algorithms, Don't use
287 * udph->len to get the real length without any protocol check,
288 * UDPLITE uses udph->len for another thing,
289 * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl.
290 */
291
292 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
293 if (udph == NULL)
294 return 0;
295
296 ul = ntohs(udph->len);
297
298 udph->check = 0;
299
300 if (udplite) {
301 if (ul == 0)
302 skb->csum = csum_partial(udph, ipl - ihl, 0);
303
304 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
305 skb->csum = csum_partial(udph, ul, 0);
306
307 else
308 goto ignore_obscure_skb;
309 } else {
310 if (ul != ipl - ihl)
311 goto ignore_obscure_skb;
312
313 skb->csum = csum_partial(udph, ul, 0);
314 }
315
316 udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul,
317 udplite ? IPPROTO_UDPLITE : IPPROTO_UDP,
318 skb->csum);
319
320 if (!udph->check)
321 udph->check = CSUM_MANGLED_0;
322
323 skb->ip_summed = CHECKSUM_NONE;
324
325ignore_obscure_skb:
326 return 1;
327}
328
329static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
330{
331 struct iphdr *iph;
332 int ntkoff;
333
334 ntkoff = skb_network_offset(skb);
335
336 if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff))
337 goto fail;
338
339 iph = ip_hdr(skb);
340
341 switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
342 case IPPROTO_ICMP:
343 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
344 if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4,
345 ntohs(iph->tot_len)))
346 goto fail;
347 break;
348 case IPPROTO_IGMP:
349 if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP)
350 if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4,
351 ntohs(iph->tot_len)))
352 goto fail;
353 break;
354 case IPPROTO_TCP:
355 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
356 if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4,
357 ntohs(iph->tot_len)))
358 goto fail;
359 break;
360 case IPPROTO_UDP:
361 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
362 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
363 ntohs(iph->tot_len), 0))
364 goto fail;
365 break;
366 case IPPROTO_UDPLITE:
367 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
368 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
369 ntohs(iph->tot_len), 1))
370 goto fail;
371 break;
372 }
373
374 if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
375 if (skb_cloned(skb) &&
376 !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
377 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
378 goto fail;
379
380 ip_send_check(iph);
381 }
382
383 return 1;
384
385fail:
386 return 0;
387}
388
389static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
390 unsigned int ixhl, unsigned int *pl)
391{
392 int off, len, optlen;
393 unsigned char *xh = (void *)ip6xh;
394
395 off = sizeof(*ip6xh);
396 len = ixhl - off;
397
398 while (len > 1) {
399 switch (xh[off]) {
400 case IPV6_TLV_PAD0:
401 optlen = 1;
402 break;
403 case IPV6_TLV_JUMBO:
404 optlen = xh[off + 1] + 2;
405 if (optlen != 6 || len < 6 || (off & 3) != 2)
406 /* wrong jumbo option length/alignment */
407 return 0;
408 *pl = ntohl(*(__be32 *)(xh + off + 2));
409 goto done;
410 default:
411 optlen = xh[off + 1] + 2;
412 if (optlen > len)
413 /* ignore obscure options */
414 goto done;
415 break;
416 }
417 off += optlen;
418 len -= optlen;
419 }
420
421done:
422 return 1;
423}
424
425static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
426{
427 struct ipv6hdr *ip6h;
428 struct ipv6_opt_hdr *ip6xh;
429 unsigned int hl, ixhl;
430 unsigned int pl;
431 int ntkoff;
432 u8 nexthdr;
433
434 ntkoff = skb_network_offset(skb);
435
436 hl = sizeof(*ip6h);
437
438 if (!pskb_may_pull(skb, hl + ntkoff))
439 goto fail;
440
441 ip6h = ipv6_hdr(skb);
442
443 pl = ntohs(ip6h->payload_len);
444 nexthdr = ip6h->nexthdr;
445
446 do {
447 switch (nexthdr) {
448 case NEXTHDR_FRAGMENT:
449 goto ignore_skb;
450 case NEXTHDR_ROUTING:
451 case NEXTHDR_HOP:
452 case NEXTHDR_DEST:
453 if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff))
454 goto fail;
455 ip6xh = (void *)(skb_network_header(skb) + hl);
456 ixhl = ipv6_optlen(ip6xh);
457 if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
458 goto fail;
459 if ((nexthdr == NEXTHDR_HOP) &&
460 !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
461 goto fail;
462 nexthdr = ip6xh->nexthdr;
463 hl += ixhl;
464 break;
465 case IPPROTO_ICMPV6:
466 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
467 if (!tcf_csum_ipv6_icmp(skb, ip6h,
468 hl, pl + sizeof(*ip6h)))
469 goto fail;
470 goto done;
471 case IPPROTO_TCP:
472 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
473 if (!tcf_csum_ipv6_tcp(skb, ip6h,
474 hl, pl + sizeof(*ip6h)))
475 goto fail;
476 goto done;
477 case IPPROTO_UDP:
478 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
479 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
480 pl + sizeof(*ip6h), 0))
481 goto fail;
482 goto done;
483 case IPPROTO_UDPLITE:
484 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
485 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
486 pl + sizeof(*ip6h), 1))
487 goto fail;
488 goto done;
489 default:
490 goto ignore_skb;
491 }
492 } while (pskb_may_pull(skb, hl + 1 + ntkoff));
493
494done:
495ignore_skb:
496 return 1;
497
498fail:
499 return 0;
500}
501
502static int tcf_csum(struct sk_buff *skb,
503 struct tc_action *a, struct tcf_result *res)
504{
505 struct tcf_csum *p = a->priv;
506 int action;
507 u32 update_flags;
508
509 spin_lock(&p->tcf_lock);
510 p->tcf_tm.lastuse = jiffies;
511 p->tcf_bstats.bytes += qdisc_pkt_len(skb);
512 p->tcf_bstats.packets++;
513 action = p->tcf_action;
514 update_flags = p->update_flags;
515 spin_unlock(&p->tcf_lock);
516
517 if (unlikely(action == TC_ACT_SHOT))
518 goto drop;
519
520 switch (skb->protocol) {
521 case cpu_to_be16(ETH_P_IP):
522 if (!tcf_csum_ipv4(skb, update_flags))
523 goto drop;
524 break;
525 case cpu_to_be16(ETH_P_IPV6):
526 if (!tcf_csum_ipv6(skb, update_flags))
527 goto drop;
528 break;
529 }
530
531 return action;
532
533drop:
534 spin_lock(&p->tcf_lock);
535 p->tcf_qstats.drops++;
536 spin_unlock(&p->tcf_lock);
537 return TC_ACT_SHOT;
538}
539
540static int tcf_csum_dump(struct sk_buff *skb,
541 struct tc_action *a, int bind, int ref)
542{
543 unsigned char *b = skb_tail_pointer(skb);
544 struct tcf_csum *p = a->priv;
545 struct tc_csum opt = {
546 .update_flags = p->update_flags,
547 .index = p->tcf_index,
548 .action = p->tcf_action,
549 .refcnt = p->tcf_refcnt - ref,
550 .bindcnt = p->tcf_bindcnt - bind,
551 };
552 struct tcf_t t;
553
554 NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt);
555 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
556 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
557 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
558 NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t);
559
560 return skb->len;
561
562nla_put_failure:
563 nlmsg_trim(skb, b);
564 return -1;
565}
566
567static struct tc_action_ops act_csum_ops = {
568 .kind = "csum",
569 .hinfo = &csum_hash_info,
570 .type = TCA_ACT_CSUM,
571 .capab = TCA_CAP_NONE,
572 .owner = THIS_MODULE,
573 .act = tcf_csum,
574 .dump = tcf_csum_dump,
575 .cleanup = tcf_csum_cleanup,
576 .lookup = tcf_hash_search,
577 .init = tcf_csum_init,
578 .walk = tcf_generic_walker
579};
580
581MODULE_DESCRIPTION("Checksum updating actions");
582MODULE_LICENSE("GPL");
583
584static int __init csum_init_module(void)
585{
586 return tcf_register_action(&act_csum_ops);
587}
588
589static void __exit csum_cleanup_module(void)
590{
591 tcf_unregister_action(&act_csum_ops);
592}
593
594module_init(csum_init_module);
595module_exit(csum_cleanup_module);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index e17096e3913c..5b271a18bc3a 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -111,44 +111,41 @@ static u32 flow_get_proto(struct sk_buff *skb)
111 } 111 }
112} 112}
113 113
114static int has_ports(u8 protocol)
115{
116 switch (protocol) {
117 case IPPROTO_TCP:
118 case IPPROTO_UDP:
119 case IPPROTO_UDPLITE:
120 case IPPROTO_SCTP:
121 case IPPROTO_DCCP:
122 case IPPROTO_ESP:
123 return 1;
124 default:
125 return 0;
126 }
127}
128
129static u32 flow_get_proto_src(struct sk_buff *skb) 114static u32 flow_get_proto_src(struct sk_buff *skb)
130{ 115{
131 switch (skb->protocol) { 116 switch (skb->protocol) {
132 case htons(ETH_P_IP): { 117 case htons(ETH_P_IP): {
133 struct iphdr *iph; 118 struct iphdr *iph;
119 int poff;
134 120
135 if (!pskb_network_may_pull(skb, sizeof(*iph))) 121 if (!pskb_network_may_pull(skb, sizeof(*iph)))
136 break; 122 break;
137 iph = ip_hdr(skb); 123 iph = ip_hdr(skb);
138 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 124 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
139 has_ports(iph->protocol) && 125 break;
140 pskb_network_may_pull(skb, iph->ihl * 4 + 2)) 126 poff = proto_ports_offset(iph->protocol);
141 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); 127 if (poff >= 0 &&
128 pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) {
129 iph = ip_hdr(skb);
130 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
131 poff));
132 }
142 break; 133 break;
143 } 134 }
144 case htons(ETH_P_IPV6): { 135 case htons(ETH_P_IPV6): {
145 struct ipv6hdr *iph; 136 struct ipv6hdr *iph;
137 int poff;
146 138
147 if (!pskb_network_may_pull(skb, sizeof(*iph) + 2)) 139 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 break; 140 break;
149 iph = ipv6_hdr(skb); 141 iph = ipv6_hdr(skb);
150 if (has_ports(iph->nexthdr)) 142 poff = proto_ports_offset(iph->nexthdr);
151 return ntohs(*(__be16 *)&iph[1]); 143 if (poff >= 0 &&
144 pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) {
145 iph = ipv6_hdr(skb);
146 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
147 poff));
148 }
152 break; 149 break;
153 } 150 }
154 } 151 }
@@ -161,24 +158,36 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
161 switch (skb->protocol) { 158 switch (skb->protocol) {
162 case htons(ETH_P_IP): { 159 case htons(ETH_P_IP): {
163 struct iphdr *iph; 160 struct iphdr *iph;
161 int poff;
164 162
165 if (!pskb_network_may_pull(skb, sizeof(*iph))) 163 if (!pskb_network_may_pull(skb, sizeof(*iph)))
166 break; 164 break;
167 iph = ip_hdr(skb); 165 iph = ip_hdr(skb);
168 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 166 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
169 has_ports(iph->protocol) && 167 break;
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 168 poff = proto_ports_offset(iph->protocol);
171 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); 169 if (poff >= 0 &&
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
171 iph = ip_hdr(skb);
172 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
173 2 + poff));
174 }
172 break; 175 break;
173 } 176 }
174 case htons(ETH_P_IPV6): { 177 case htons(ETH_P_IPV6): {
175 struct ipv6hdr *iph; 178 struct ipv6hdr *iph;
179 int poff;
176 180
177 if (!pskb_network_may_pull(skb, sizeof(*iph) + 4)) 181 if (!pskb_network_may_pull(skb, sizeof(*iph)))
178 break; 182 break;
179 iph = ipv6_hdr(skb); 183 iph = ipv6_hdr(skb);
180 if (has_ports(iph->nexthdr)) 184 poff = proto_ports_offset(iph->nexthdr);
181 return ntohs(*(__be16 *)((void *)&iph[1] + 2)); 185 if (poff >= 0 &&
186 pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) {
187 iph = ipv6_hdr(skb);
188 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
189 poff + 2));
190 }
182 break; 191 break;
183 } 192 }
184 } 193 }
@@ -297,6 +306,11 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
297 return tag & VLAN_VID_MASK; 306 return tag & VLAN_VID_MASK;
298} 307}
299 308
309static u32 flow_get_rxhash(struct sk_buff *skb)
310{
311 return skb_get_rxhash(skb);
312}
313
300static u32 flow_key_get(struct sk_buff *skb, int key) 314static u32 flow_key_get(struct sk_buff *skb, int key)
301{ 315{
302 switch (key) { 316 switch (key) {
@@ -334,6 +348,8 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
334 return flow_get_skgid(skb); 348 return flow_get_skgid(skb);
335 case FLOW_KEY_VLAN_TAG: 349 case FLOW_KEY_VLAN_TAG:
336 return flow_get_vlan_tag(skb); 350 return flow_get_vlan_tag(skb);
351 case FLOW_KEY_RXHASH:
352 return flow_get_rxhash(skb);
337 default: 353 default:
338 WARN_ON(1); 354 WARN_ON(1);
339 return 0; 355 return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 3bcac8aa333c..34da5e29ea1a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -223,6 +223,11 @@ META_COLLECTOR(int_maclen)
223 dst->value = skb->mac_len; 223 dst->value = skb->mac_len;
224} 224}
225 225
226META_COLLECTOR(int_rxhash)
227{
228 dst->value = skb_get_rxhash(skb);
229}
230
226/************************************************************************** 231/**************************************************************************
227 * Netfilter 232 * Netfilter
228 **************************************************************************/ 233 **************************************************************************/
@@ -541,6 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
541 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), 546 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off),
542 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), 547 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend),
543 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), 548 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag),
549 [META_ID(RXHASH)] = META_FUNC(int_rxhash),
544 } 550 }
545}; 551};
546 552
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 408eea7086aa..b8020784d0e9 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -240,7 +240,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
240 if (q) 240 if (q)
241 goto out; 241 goto out;
242 242
243 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); 243 q = qdisc_match_from_root(dev->ingress_queue.qdisc_sleeping, handle);
244out: 244out:
245 return q; 245 return q;
246} 246}
@@ -360,7 +360,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
360 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 360 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
361 } 361 }
362 362
363 if (!s || tsize != s->tsize || (!tab && tsize > 0)) 363 if (tsize != s->tsize || (!tab && tsize > 0))
364 return ERR_PTR(-EINVAL); 364 return ERR_PTR(-EINVAL);
365 365
366 spin_lock(&qdisc_stab_lock); 366 spin_lock(&qdisc_stab_lock);
@@ -701,7 +701,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
701 } 701 }
702 702
703 for (i = 0; i < num_q; i++) { 703 for (i = 0; i < num_q; i++) {
704 struct netdev_queue *dev_queue = &dev->rx_queue; 704 struct netdev_queue *dev_queue = &dev->ingress_queue;
705 705
706 if (!ingress) 706 if (!ingress)
707 dev_queue = netdev_get_tx_queue(dev, i); 707 dev_queue = netdev_get_tx_queue(dev, i);
@@ -979,7 +979,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
979 return -ENOENT; 979 return -ENOENT;
980 q = qdisc_leaf(p, clid); 980 q = qdisc_leaf(p, clid);
981 } else { /* ingress */ 981 } else { /* ingress */
982 q = dev->rx_queue.qdisc_sleeping; 982 q = dev->ingress_queue.qdisc_sleeping;
983 } 983 }
984 } else { 984 } else {
985 q = dev->qdisc; 985 q = dev->qdisc;
@@ -1044,7 +1044,7 @@ replay:
1044 return -ENOENT; 1044 return -ENOENT;
1045 q = qdisc_leaf(p, clid); 1045 q = qdisc_leaf(p, clid);
1046 } else { /*ingress */ 1046 } else { /*ingress */
1047 q = dev->rx_queue.qdisc_sleeping; 1047 q = dev->ingress_queue.qdisc_sleeping;
1048 } 1048 }
1049 } else { 1049 } else {
1050 q = dev->qdisc; 1050 q = dev->qdisc;
@@ -1124,7 +1124,7 @@ create_n_graft:
1124 if (!(n->nlmsg_flags&NLM_F_CREATE)) 1124 if (!(n->nlmsg_flags&NLM_F_CREATE))
1125 return -ENOENT; 1125 return -ENOENT;
1126 if (clid == TC_H_INGRESS) 1126 if (clid == TC_H_INGRESS)
1127 q = qdisc_create(dev, &dev->rx_queue, p, 1127 q = qdisc_create(dev, &dev->ingress_queue, p,
1128 tcm->tcm_parent, tcm->tcm_parent, 1128 tcm->tcm_parent, tcm->tcm_parent,
1129 tca, &err); 1129 tca, &err);
1130 else { 1130 else {
@@ -1304,7 +1304,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1304 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) 1304 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
1305 goto done; 1305 goto done;
1306 1306
1307 dev_queue = &dev->rx_queue; 1307 dev_queue = &dev->ingress_queue;
1308 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) 1308 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
1309 goto done; 1309 goto done;
1310 1310
@@ -1595,7 +1595,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1595 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) 1595 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1596 goto done; 1596 goto done;
1597 1597
1598 dev_queue = &dev->rx_queue; 1598 dev_queue = &dev->ingress_queue;
1599 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) 1599 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
1600 goto done; 1600 goto done;
1601 1601
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2aeb3a4386a1..545278a1c478 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -753,7 +753,7 @@ void dev_activate(struct net_device *dev)
753 753
754 need_watchdog = 0; 754 need_watchdog = 0;
755 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); 755 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
756 transition_one_qdisc(dev, &dev->rx_queue, NULL); 756 transition_one_qdisc(dev, &dev->ingress_queue, NULL);
757 757
758 if (need_watchdog) { 758 if (need_watchdog) {
759 dev->trans_start = jiffies; 759 dev->trans_start = jiffies;
@@ -812,7 +812,7 @@ static bool some_qdisc_is_busy(struct net_device *dev)
812void dev_deactivate(struct net_device *dev) 812void dev_deactivate(struct net_device *dev)
813{ 813{
814 netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); 814 netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
815 dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); 815 dev_deactivate_queue(dev, &dev->ingress_queue, &noop_qdisc);
816 816
817 dev_watchdog_down(dev); 817 dev_watchdog_down(dev);
818 818
@@ -838,7 +838,7 @@ void dev_init_scheduler(struct net_device *dev)
838{ 838{
839 dev->qdisc = &noop_qdisc; 839 dev->qdisc = &noop_qdisc;
840 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); 840 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
841 dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); 841 dev_init_scheduler_queue(dev, &dev->ingress_queue, &noop_qdisc);
842 842
843 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); 843 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
844} 844}
@@ -861,7 +861,7 @@ static void shutdown_scheduler_queue(struct net_device *dev,
861void dev_shutdown(struct net_device *dev) 861void dev_shutdown(struct net_device *dev)
862{ 862{
863 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); 863 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
864 shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); 864 shutdown_scheduler_queue(dev, &dev->ingress_queue, &noop_qdisc);
865 qdisc_destroy(dev->qdisc); 865 qdisc_destroy(dev->qdisc);
866 dev->qdisc = &noop_qdisc; 866 dev->qdisc = &noop_qdisc;
867 867
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 201cbac2b32c..3cf478d012dd 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -123,40 +123,39 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
123 case htons(ETH_P_IP): 123 case htons(ETH_P_IP):
124 { 124 {
125 const struct iphdr *iph; 125 const struct iphdr *iph;
126 int poff;
126 127
127 if (!pskb_network_may_pull(skb, sizeof(*iph))) 128 if (!pskb_network_may_pull(skb, sizeof(*iph)))
128 goto err; 129 goto err;
129 iph = ip_hdr(skb); 130 iph = ip_hdr(skb);
130 h = (__force u32)iph->daddr; 131 h = (__force u32)iph->daddr;
131 h2 = (__force u32)iph->saddr ^ iph->protocol; 132 h2 = (__force u32)iph->saddr ^ iph->protocol;
132 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 133 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
133 (iph->protocol == IPPROTO_TCP || 134 break;
134 iph->protocol == IPPROTO_UDP || 135 poff = proto_ports_offset(iph->protocol);
135 iph->protocol == IPPROTO_UDPLITE || 136 if (poff >= 0 &&
136 iph->protocol == IPPROTO_SCTP || 137 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
137 iph->protocol == IPPROTO_DCCP || 138 iph = ip_hdr(skb);
138 iph->protocol == IPPROTO_ESP) && 139 h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
139 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 140 }
140 h2 ^= *(((u32*)iph) + iph->ihl);
141 break; 141 break;
142 } 142 }
143 case htons(ETH_P_IPV6): 143 case htons(ETH_P_IPV6):
144 { 144 {
145 struct ipv6hdr *iph; 145 struct ipv6hdr *iph;
146 int poff;
146 147
147 if (!pskb_network_may_pull(skb, sizeof(*iph))) 148 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 goto err; 149 goto err;
149 iph = ipv6_hdr(skb); 150 iph = ipv6_hdr(skb);
150 h = (__force u32)iph->daddr.s6_addr32[3]; 151 h = (__force u32)iph->daddr.s6_addr32[3];
151 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; 152 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
152 if ((iph->nexthdr == IPPROTO_TCP || 153 poff = proto_ports_offset(iph->nexthdr);
153 iph->nexthdr == IPPROTO_UDP || 154 if (poff >= 0 &&
154 iph->nexthdr == IPPROTO_UDPLITE || 155 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
155 iph->nexthdr == IPPROTO_SCTP || 156 iph = ipv6_hdr(skb);
156 iph->nexthdr == IPPROTO_DCCP || 157 h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
157 iph->nexthdr == IPPROTO_ESP) && 158 }
158 pskb_network_may_pull(skb, sizeof(*iph) + 4))
159 h2 ^= *(u32*)&iph[1];
160 break; 159 break;
161 } 160 }
162 default: 161 default:
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0b85e5256434..5f1fb8bd862d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -48,6 +48,8 @@
48 * be incorporated into the next SCTP release. 48 * be incorporated into the next SCTP release.
49 */ 49 */
50 50
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52
51#include <linux/types.h> 53#include <linux/types.h>
52#include <linux/fcntl.h> 54#include <linux/fcntl.h>
53#include <linux/poll.h> 55#include <linux/poll.h>
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 476caaf100ed..6c8556459a75 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -37,6 +37,8 @@
37 * be incorporated into the next SCTP release. 37 * be incorporated into the next SCTP release.
38 */ 38 */
39 39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
40#include <linux/types.h> 42#include <linux/types.h>
41#include <linux/kernel.h> 43#include <linux/kernel.h>
42#include <linux/net.h> 44#include <linux/net.h>
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index ccb6dc48d15b..397296fb156f 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -43,6 +43,8 @@
43 * be incorporated into the next SCTP release. 43 * be incorporated into the next SCTP release.
44 */ 44 */
45 45
46#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
47
46#include <net/sctp/sctp.h> 48#include <net/sctp/sctp.h>
47#include <net/sctp/sm.h> 49#include <net/sctp/sm.h>
48#include <linux/interrupt.h> 50#include <linux/interrupt.h>
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 732689140fb8..95e0c8eda1a0 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -47,6 +47,8 @@
47 * be incorporated into the next SCTP release. 47 * be incorporated into the next SCTP release.
48 */ 48 */
49 49
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51
50#include <linux/module.h> 52#include <linux/module.h>
51#include <linux/errno.h> 53#include <linux/errno.h>
52#include <linux/types.h> 54#include <linux/types.h>
@@ -336,7 +338,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
336 memcpy(saddr, baddr, sizeof(union sctp_addr)); 338 memcpy(saddr, baddr, sizeof(union sctp_addr));
337 SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); 339 SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr);
338 } else { 340 } else {
339 printk(KERN_ERR "%s: asoc:%p Could not find a valid source " 341 pr_err("%s: asoc:%p Could not find a valid source "
340 "address for the dest:%pI6\n", 342 "address for the dest:%pI6\n",
341 __func__, asoc, &daddr->v6.sin6_addr); 343 __func__, asoc, &daddr->v6.sin6_addr);
342 } 344 }
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index f73ec0ea93ba..8ef8e7d9eb61 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -38,6 +38,8 @@
38 * be incorporated into the next SCTP release. 38 * be incorporated into the next SCTP release.
39 */ 39 */
40 40
41#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
42
41#include <linux/kernel.h> 43#include <linux/kernel.h>
42#include <net/sctp/sctp.h> 44#include <net/sctp/sctp.h>
43 45
@@ -134,8 +136,7 @@ void sctp_dbg_objcnt_init(void)
134 ent = proc_create("sctp_dbg_objcnt", 0, 136 ent = proc_create("sctp_dbg_objcnt", 0,
135 proc_net_sctp, &sctp_objcnt_ops); 137 proc_net_sctp, &sctp_objcnt_ops);
136 if (!ent) 138 if (!ent)
137 printk(KERN_WARNING 139 pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
138 "sctp_dbg_objcnt: Unable to create /proc entry.\n");
139} 140}
140 141
141/* Cleanup the objcount entry in the proc filesystem. */ 142/* Cleanup the objcount entry in the proc filesystem. */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index bcc4590ccaf2..60600d337a3a 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -41,6 +41,8 @@
41 * be incorporated into the next SCTP release. 41 * be incorporated into the next SCTP release.
42 */ 42 */
43 43
44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
45
44#include <linux/types.h> 46#include <linux/types.h>
45#include <linux/kernel.h> 47#include <linux/kernel.h>
46#include <linux/wait.h> 48#include <linux/wait.h>
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index c04b2eb59186..8c6d379b4bb6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/types.h> 51#include <linux/types.h>
50#include <linux/list.h> /* For struct list_head */ 52#include <linux/list.h> /* For struct list_head */
51#include <linux/socket.h> 53#include <linux/socket.h>
@@ -1463,23 +1465,23 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1463 /* Display the end of the 1465 /* Display the end of the
1464 * current range. 1466 * current range.
1465 */ 1467 */
1466 SCTP_DEBUG_PRINTK("-%08x", 1468 SCTP_DEBUG_PRINTK_CONT("-%08x",
1467 dbg_last_ack_tsn); 1469 dbg_last_ack_tsn);
1468 } 1470 }
1469 1471
1470 /* Start a new range. */ 1472 /* Start a new range. */
1471 SCTP_DEBUG_PRINTK(",%08x", tsn); 1473 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1472 dbg_ack_tsn = tsn; 1474 dbg_ack_tsn = tsn;
1473 break; 1475 break;
1474 1476
1475 case 1: /* The last TSN was NOT ACKed. */ 1477 case 1: /* The last TSN was NOT ACKed. */
1476 if (dbg_last_kept_tsn != dbg_kept_tsn) { 1478 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1477 /* Display the end of current range. */ 1479 /* Display the end of current range. */
1478 SCTP_DEBUG_PRINTK("-%08x", 1480 SCTP_DEBUG_PRINTK_CONT("-%08x",
1479 dbg_last_kept_tsn); 1481 dbg_last_kept_tsn);
1480 } 1482 }
1481 1483
1482 SCTP_DEBUG_PRINTK("\n"); 1484 SCTP_DEBUG_PRINTK_CONT("\n");
1483 1485
1484 /* FALL THROUGH... */ 1486 /* FALL THROUGH... */
1485 default: 1487 default:
@@ -1526,18 +1528,18 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1526 break; 1528 break;
1527 1529
1528 if (dbg_last_kept_tsn != dbg_kept_tsn) 1530 if (dbg_last_kept_tsn != dbg_kept_tsn)
1529 SCTP_DEBUG_PRINTK("-%08x", 1531 SCTP_DEBUG_PRINTK_CONT("-%08x",
1530 dbg_last_kept_tsn); 1532 dbg_last_kept_tsn);
1531 1533
1532 SCTP_DEBUG_PRINTK(",%08x", tsn); 1534 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1533 dbg_kept_tsn = tsn; 1535 dbg_kept_tsn = tsn;
1534 break; 1536 break;
1535 1537
1536 case 0: 1538 case 0:
1537 if (dbg_last_ack_tsn != dbg_ack_tsn) 1539 if (dbg_last_ack_tsn != dbg_ack_tsn)
1538 SCTP_DEBUG_PRINTK("-%08x", 1540 SCTP_DEBUG_PRINTK_CONT("-%08x",
1539 dbg_last_ack_tsn); 1541 dbg_last_ack_tsn);
1540 SCTP_DEBUG_PRINTK("\n"); 1542 SCTP_DEBUG_PRINTK_CONT("\n");
1541 1543
1542 /* FALL THROUGH... */ 1544 /* FALL THROUGH... */
1543 default: 1545 default:
@@ -1556,17 +1558,17 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1556 switch (dbg_prt_state) { 1558 switch (dbg_prt_state) {
1557 case 0: 1559 case 0:
1558 if (dbg_last_ack_tsn != dbg_ack_tsn) { 1560 if (dbg_last_ack_tsn != dbg_ack_tsn) {
1559 SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_ack_tsn); 1561 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn);
1560 } else { 1562 } else {
1561 SCTP_DEBUG_PRINTK("\n"); 1563 SCTP_DEBUG_PRINTK_CONT("\n");
1562 } 1564 }
1563 break; 1565 break;
1564 1566
1565 case 1: 1567 case 1:
1566 if (dbg_last_kept_tsn != dbg_kept_tsn) { 1568 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1567 SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_kept_tsn); 1569 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn);
1568 } else { 1570 } else {
1569 SCTP_DEBUG_PRINTK("\n"); 1571 SCTP_DEBUG_PRINTK_CONT("\n");
1570 } 1572 }
1571 } 1573 }
1572#endif /* SCTP_DEBUG */ 1574#endif /* SCTP_DEBUG */
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index db3a42b8b349..2e63e9dc010e 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -22,6 +22,8 @@
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 23 */
24 24
25#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
25#include <linux/kernel.h> 27#include <linux/kernel.h>
26#include <linux/kprobes.h> 28#include <linux/kprobes.h>
27#include <linux/socket.h> 29#include <linux/socket.h>
@@ -192,7 +194,7 @@ static __init int sctpprobe_init(void)
192 if (ret) 194 if (ret)
193 goto remove_proc; 195 goto remove_proc;
194 196
195 pr_info("SCTP probe registered (port=%d)\n", port); 197 pr_info("probe registered (port=%d)\n", port);
196 198
197 return 0; 199 return 0;
198 200
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5027b83f1cc0..1ef29c74d85e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/module.h> 51#include <linux/module.h>
50#include <linux/init.h> 52#include <linux/init.h>
51#include <linux/netdevice.h> 53#include <linux/netdevice.h>
@@ -707,8 +709,7 @@ static int sctp_ctl_sock_init(void)
707 &init_net); 709 &init_net);
708 710
709 if (err < 0) { 711 if (err < 0) {
710 printk(KERN_ERR 712 pr_err("Failed to create the SCTP control socket\n");
711 "SCTP: Failed to create the SCTP control socket.\n");
712 return err; 713 return err;
713 } 714 }
714 return 0; 715 return 0;
@@ -798,7 +799,7 @@ static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
798static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp) 799static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp)
799{ 800{
800 /* PF_INET only supports AF_INET addresses. */ 801 /* PF_INET only supports AF_INET addresses. */
801 return (AF_INET == family); 802 return AF_INET == family;
802} 803}
803 804
804/* Address matching with wildcards allowed. */ 805/* Address matching with wildcards allowed. */
@@ -1206,7 +1207,7 @@ SCTP_STATIC __init int sctp_init(void)
1206 __get_free_pages(GFP_ATOMIC, order); 1207 __get_free_pages(GFP_ATOMIC, order);
1207 } while (!sctp_assoc_hashtable && --order > 0); 1208 } while (!sctp_assoc_hashtable && --order > 0);
1208 if (!sctp_assoc_hashtable) { 1209 if (!sctp_assoc_hashtable) {
1209 printk(KERN_ERR "SCTP: Failed association hash alloc.\n"); 1210 pr_err("Failed association hash alloc\n");
1210 status = -ENOMEM; 1211 status = -ENOMEM;
1211 goto err_ahash_alloc; 1212 goto err_ahash_alloc;
1212 } 1213 }
@@ -1220,7 +1221,7 @@ SCTP_STATIC __init int sctp_init(void)
1220 sctp_ep_hashtable = (struct sctp_hashbucket *) 1221 sctp_ep_hashtable = (struct sctp_hashbucket *)
1221 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL); 1222 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL);
1222 if (!sctp_ep_hashtable) { 1223 if (!sctp_ep_hashtable) {
1223 printk(KERN_ERR "SCTP: Failed endpoint_hash alloc.\n"); 1224 pr_err("Failed endpoint_hash alloc\n");
1224 status = -ENOMEM; 1225 status = -ENOMEM;
1225 goto err_ehash_alloc; 1226 goto err_ehash_alloc;
1226 } 1227 }
@@ -1239,7 +1240,7 @@ SCTP_STATIC __init int sctp_init(void)
1239 __get_free_pages(GFP_ATOMIC, order); 1240 __get_free_pages(GFP_ATOMIC, order);
1240 } while (!sctp_port_hashtable && --order > 0); 1241 } while (!sctp_port_hashtable && --order > 0);
1241 if (!sctp_port_hashtable) { 1242 if (!sctp_port_hashtable) {
1242 printk(KERN_ERR "SCTP: Failed bind hash alloc."); 1243 pr_err("Failed bind hash alloc\n");
1243 status = -ENOMEM; 1244 status = -ENOMEM;
1244 goto err_bhash_alloc; 1245 goto err_bhash_alloc;
1245 } 1246 }
@@ -1248,8 +1249,7 @@ SCTP_STATIC __init int sctp_init(void)
1248 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); 1249 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
1249 } 1250 }
1250 1251
1251 printk(KERN_INFO "SCTP: Hash tables configured " 1252 pr_info("Hash tables configured (established %d bind %d)\n",
1252 "(established %d bind %d)\n",
1253 sctp_assoc_hashsize, sctp_port_hashsize); 1253 sctp_assoc_hashsize, sctp_port_hashsize);
1254 1254
1255 /* Disable ADDIP by default. */ 1255 /* Disable ADDIP by default. */
@@ -1290,8 +1290,7 @@ SCTP_STATIC __init int sctp_init(void)
1290 1290
1291 /* Initialize the control inode/socket for handling OOTB packets. */ 1291 /* Initialize the control inode/socket for handling OOTB packets. */
1292 if ((status = sctp_ctl_sock_init())) { 1292 if ((status = sctp_ctl_sock_init())) {
1293 printk (KERN_ERR 1293 pr_err("Failed to initialize the SCTP control sock\n");
1294 "SCTP: Failed to initialize the SCTP control sock.\n");
1295 goto err_ctl_sock_init; 1294 goto err_ctl_sock_init;
1296 } 1295 }
1297 1296
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 246f92924658..2cc46f0962ca 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -50,6 +50,8 @@
50 * be incorporated into the next SCTP release. 50 * be incorporated into the next SCTP release.
51 */ 51 */
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54
53#include <linux/types.h> 55#include <linux/types.h>
54#include <linux/kernel.h> 56#include <linux/kernel.h>
55#include <linux/ip.h> 57#include <linux/ip.h>
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index f5e5e27cac5e..b21b218d564f 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -47,6 +47,8 @@
47 * be incorporated into the next SCTP release. 47 * be incorporated into the next SCTP release.
48 */ 48 */
49 49
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51
50#include <linux/skbuff.h> 52#include <linux/skbuff.h>
51#include <linux/types.h> 53#include <linux/types.h>
52#include <linux/socket.h> 54#include <linux/socket.h>
@@ -1146,26 +1148,23 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
1146 1148
1147 case SCTP_DISPOSITION_VIOLATION: 1149 case SCTP_DISPOSITION_VIOLATION:
1148 if (net_ratelimit()) 1150 if (net_ratelimit())
1149 printk(KERN_ERR "sctp protocol violation state %d " 1151 pr_err("protocol violation state %d chunkid %d\n",
1150 "chunkid %d\n", state, subtype.chunk); 1152 state, subtype.chunk);
1151 break; 1153 break;
1152 1154
1153 case SCTP_DISPOSITION_NOT_IMPL: 1155 case SCTP_DISPOSITION_NOT_IMPL:
1154 printk(KERN_WARNING "sctp unimplemented feature in state %d, " 1156 pr_warn("unimplemented feature in state %d, event_type %d, event_id %d\n",
1155 "event_type %d, event_id %d\n", 1157 state, event_type, subtype.chunk);
1156 state, event_type, subtype.chunk);
1157 break; 1158 break;
1158 1159
1159 case SCTP_DISPOSITION_BUG: 1160 case SCTP_DISPOSITION_BUG:
1160 printk(KERN_ERR "sctp bug in state %d, " 1161 pr_err("bug in state %d, event_type %d, event_id %d\n",
1161 "event_type %d, event_id %d\n",
1162 state, event_type, subtype.chunk); 1162 state, event_type, subtype.chunk);
1163 BUG(); 1163 BUG();
1164 break; 1164 break;
1165 1165
1166 default: 1166 default:
1167 printk(KERN_ERR "sctp impossible disposition %d " 1167 pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n",
1168 "in state %d, event_type %d, event_id %d\n",
1169 status, state, event_type, subtype.chunk); 1168 status, state, event_type, subtype.chunk);
1170 BUG(); 1169 BUG();
1171 break; 1170 break;
@@ -1679,8 +1678,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1679 sctp_cmd_send_asconf(asoc); 1678 sctp_cmd_send_asconf(asoc);
1680 break; 1679 break;
1681 default: 1680 default:
1682 printk(KERN_WARNING "Impossible command: %u, %p\n", 1681 pr_warn("Impossible command: %u, %p\n",
1683 cmd->verb, cmd->obj.ptr); 1682 cmd->verb, cmd->obj.ptr);
1684 break; 1683 break;
1685 } 1684 }
1686 1685
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d344dc481ccc..4b4eb7c96bbd 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -50,6 +50,8 @@
50 * be incorporated into the next SCTP release. 50 * be incorporated into the next SCTP release.
51 */ 51 */
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54
53#include <linux/types.h> 55#include <linux/types.h>
54#include <linux/kernel.h> 56#include <linux/kernel.h>
55#include <linux/ip.h> 57#include <linux/ip.h>
@@ -1138,18 +1140,16 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
1138 if (unlikely(!link)) { 1140 if (unlikely(!link)) {
1139 if (from_addr.sa.sa_family == AF_INET6) { 1141 if (from_addr.sa.sa_family == AF_INET6) {
1140 if (net_ratelimit()) 1142 if (net_ratelimit())
1141 printk(KERN_WARNING 1143 pr_warn("%s association %p could not find address %pI6\n",
1142 "%s association %p could not find address %pI6\n", 1144 __func__,
1143 __func__, 1145 asoc,
1144 asoc, 1146 &from_addr.v6.sin6_addr);
1145 &from_addr.v6.sin6_addr);
1146 } else { 1147 } else {
1147 if (net_ratelimit()) 1148 if (net_ratelimit())
1148 printk(KERN_WARNING 1149 pr_warn("%s association %p could not find address %pI4\n",
1149 "%s association %p could not find address %pI4\n", 1150 __func__,
1150 __func__, 1151 asoc,
1151 asoc, 1152 &from_addr.v4.sin_addr.s_addr);
1152 &from_addr.v4.sin_addr.s_addr);
1153 } 1153 }
1154 return SCTP_DISPOSITION_DISCARD; 1154 return SCTP_DISPOSITION_DISCARD;
1155 } 1155 }
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 6d9b3aafcc5d..546d4387fb3c 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/skbuff.h> 51#include <linux/skbuff.h>
50#include <net/sctp/sctp.h> 52#include <net/sctp/sctp.h>
51#include <net/sctp/sm.h> 53#include <net/sctp/sm.h>
@@ -66,15 +68,19 @@ static const sctp_sm_table_entry_t bug = {
66 .name = "sctp_sf_bug" 68 .name = "sctp_sf_bug"
67}; 69};
68 70
69#define DO_LOOKUP(_max, _type, _table) \ 71#define DO_LOOKUP(_max, _type, _table) \
70 if ((event_subtype._type > (_max))) { \ 72({ \
71 printk(KERN_WARNING \ 73 const sctp_sm_table_entry_t *rtn; \
72 "sctp table %p possible attack:" \ 74 \
73 " event %d exceeds max %d\n", \ 75 if ((event_subtype._type > (_max))) { \
74 _table, event_subtype._type, _max); \ 76 pr_warn("table %p possible attack: event %d exceeds max %d\n", \
75 return &bug; \ 77 _table, event_subtype._type, _max); \
76 } \ 78 rtn = &bug; \
77 return &_table[event_subtype._type][(int)state]; 79 } else \
80 rtn = &_table[event_subtype._type][(int)state]; \
81 \
82 rtn; \
83})
78 84
79const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, 85const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
80 sctp_state_t state, 86 sctp_state_t state,
@@ -83,21 +89,15 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
83 switch (event_type) { 89 switch (event_type) {
84 case SCTP_EVENT_T_CHUNK: 90 case SCTP_EVENT_T_CHUNK:
85 return sctp_chunk_event_lookup(event_subtype.chunk, state); 91 return sctp_chunk_event_lookup(event_subtype.chunk, state);
86 break;
87 case SCTP_EVENT_T_TIMEOUT: 92 case SCTP_EVENT_T_TIMEOUT:
88 DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, 93 return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout,
89 timeout_event_table); 94 timeout_event_table);
90 break;
91
92 case SCTP_EVENT_T_OTHER: 95 case SCTP_EVENT_T_OTHER:
93 DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, other_event_table); 96 return DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other,
94 break; 97 other_event_table);
95
96 case SCTP_EVENT_T_PRIMITIVE: 98 case SCTP_EVENT_T_PRIMITIVE:
97 DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive, 99 return DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive,
98 primitive_event_table); 100 primitive_event_table);
99 break;
100
101 default: 101 default:
102 /* Yikes! We got an illegal event type. */ 102 /* Yikes! We got an illegal event type. */
103 return &bug; 103 return &bug;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fbb70770ad05..e34ca9cc1167 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -57,6 +57,8 @@
57 * be incorporated into the next SCTP release. 57 * be incorporated into the next SCTP release.
58 */ 58 */
59 59
60#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
61
60#include <linux/types.h> 62#include <linux/types.h>
61#include <linux/kernel.h> 63#include <linux/kernel.h>
62#include <linux/wait.h> 64#include <linux/wait.h>
@@ -2469,9 +2471,8 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
2469 if (params.sack_delay == 0 && params.sack_freq == 0) 2471 if (params.sack_delay == 0 && params.sack_freq == 0)
2470 return 0; 2472 return 0;
2471 } else if (optlen == sizeof(struct sctp_assoc_value)) { 2473 } else if (optlen == sizeof(struct sctp_assoc_value)) {
2472 printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " 2474 pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
2473 "in delayed_ack socket option deprecated\n"); 2475 pr_warn("Use struct sctp_sack_info instead\n");
2474 printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
2475 if (copy_from_user(&params, optval, optlen)) 2476 if (copy_from_user(&params, optval, optlen))
2476 return -EFAULT; 2477 return -EFAULT;
2477 2478
@@ -2879,10 +2880,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
2879 int val; 2880 int val;
2880 2881
2881 if (optlen == sizeof(int)) { 2882 if (optlen == sizeof(int)) {
2882 printk(KERN_WARNING 2883 pr_warn("Use of int in maxseg socket option deprecated\n");
2883 "SCTP: Use of int in maxseg socket option deprecated\n"); 2884 pr_warn("Use struct sctp_assoc_value instead\n");
2884 printk(KERN_WARNING
2885 "SCTP: Use struct sctp_assoc_value instead\n");
2886 if (copy_from_user(&val, optval, optlen)) 2885 if (copy_from_user(&val, optval, optlen))
2887 return -EFAULT; 2886 return -EFAULT;
2888 params.assoc_id = 0; 2887 params.assoc_id = 0;
@@ -3132,10 +3131,8 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
3132 int assoc_id = 0; 3131 int assoc_id = 0;
3133 3132
3134 if (optlen == sizeof(int)) { 3133 if (optlen == sizeof(int)) {
3135 printk(KERN_WARNING 3134 pr_warn("Use of int in max_burst socket option deprecated\n");
3136 "SCTP: Use of int in max_burst socket option deprecated\n"); 3135 pr_warn("Use struct sctp_assoc_value instead\n");
3137 printk(KERN_WARNING
3138 "SCTP: Use struct sctp_assoc_value instead\n");
3139 if (copy_from_user(&val, optval, optlen)) 3136 if (copy_from_user(&val, optval, optlen))
3140 return -EFAULT; 3137 return -EFAULT;
3141 } else if (optlen == sizeof(struct sctp_assoc_value)) { 3138 } else if (optlen == sizeof(struct sctp_assoc_value)) {
@@ -3606,7 +3603,40 @@ out:
3606/* The SCTP ioctl handler. */ 3603/* The SCTP ioctl handler. */
3607SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) 3604SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
3608{ 3605{
3609 return -ENOIOCTLCMD; 3606 int rc = -ENOTCONN;
3607
3608 sctp_lock_sock(sk);
3609
3610 /*
3611 * SEQPACKET-style sockets in LISTENING state are valid, for
3612 * SCTP, so only discard TCP-style sockets in LISTENING state.
3613 */
3614 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
3615 goto out;
3616
3617 switch (cmd) {
3618 case SIOCINQ: {
3619 struct sk_buff *skb;
3620 unsigned int amount = 0;
3621
3622 skb = skb_peek(&sk->sk_receive_queue);
3623 if (skb != NULL) {
3624 /*
3625 * We will only return the amount of this packet since
3626 * that is all that will be read.
3627 */
3628 amount = skb->len;
3629 }
3630 rc = put_user(amount, (int __user *)arg);
3631 break;
3632 }
3633 default:
3634 rc = -ENOIOCTLCMD;
3635 break;
3636 }
3637out:
3638 sctp_release_sock(sk);
3639 return rc;
3610} 3640}
3611 3641
3612/* This is the function which gets called during socket creation to 3642/* This is the function which gets called during socket creation to
@@ -3865,7 +3895,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
3865 } 3895 }
3866 3896
3867out: 3897out:
3868 return (retval); 3898 return retval;
3869} 3899}
3870 3900
3871 3901
@@ -3921,7 +3951,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
3921 } 3951 }
3922 3952
3923out: 3953out:
3924 return (retval); 3954 return retval;
3925} 3955}
3926 3956
3927/* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS) 3957/* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS)
@@ -4292,9 +4322,8 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len,
4292 if (copy_from_user(&params, optval, len)) 4322 if (copy_from_user(&params, optval, len))
4293 return -EFAULT; 4323 return -EFAULT;
4294 } else if (len == sizeof(struct sctp_assoc_value)) { 4324 } else if (len == sizeof(struct sctp_assoc_value)) {
4295 printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " 4325 pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
4296 "in delayed_ack socket option deprecated\n"); 4326 pr_warn("Use struct sctp_sack_info instead\n");
4297 printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
4298 if (copy_from_user(&params, optval, len)) 4327 if (copy_from_user(&params, optval, len))
4299 return -EFAULT; 4328 return -EFAULT;
4300 } else 4329 } else
@@ -4940,10 +4969,8 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
4940 struct sctp_association *asoc; 4969 struct sctp_association *asoc;
4941 4970
4942 if (len == sizeof(int)) { 4971 if (len == sizeof(int)) {
4943 printk(KERN_WARNING 4972 pr_warn("Use of int in maxseg socket option deprecated\n");
4944 "SCTP: Use of int in maxseg socket option deprecated\n"); 4973 pr_warn("Use struct sctp_assoc_value instead\n");
4945 printk(KERN_WARNING
4946 "SCTP: Use struct sctp_assoc_value instead\n");
4947 params.assoc_id = 0; 4974 params.assoc_id = 0;
4948 } else if (len >= sizeof(struct sctp_assoc_value)) { 4975 } else if (len >= sizeof(struct sctp_assoc_value)) {
4949 len = sizeof(struct sctp_assoc_value); 4976 len = sizeof(struct sctp_assoc_value);
@@ -5034,10 +5061,8 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
5034 struct sctp_association *asoc; 5061 struct sctp_association *asoc;
5035 5062
5036 if (len == sizeof(int)) { 5063 if (len == sizeof(int)) {
5037 printk(KERN_WARNING 5064 pr_warn("Use of int in max_burst socket option deprecated\n");
5038 "SCTP: Use of int in max_burst socket option deprecated\n"); 5065 pr_warn("Use struct sctp_assoc_value instead\n");
5039 printk(KERN_WARNING
5040 "SCTP: Use struct sctp_assoc_value instead\n");
5041 params.assoc_id = 0; 5066 params.assoc_id = 0;
5042 } else if (len >= sizeof(struct sctp_assoc_value)) { 5067 } else if (len >= sizeof(struct sctp_assoc_value)) {
5043 len = sizeof(struct sctp_assoc_value); 5068 len = sizeof(struct sctp_assoc_value);
@@ -5580,7 +5605,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum)
5580 /* Note: sk->sk_num gets filled in if ephemeral port request. */ 5605 /* Note: sk->sk_num gets filled in if ephemeral port request. */
5581 ret = sctp_get_port_local(sk, &addr); 5606 ret = sctp_get_port_local(sk, &addr);
5582 5607
5583 return (ret ? 1 : 0); 5608 return ret ? 1 : 0;
5584} 5609}
5585 5610
5586/* 5611/*
@@ -5597,8 +5622,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog)
5597 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); 5622 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
5598 if (IS_ERR(tfm)) { 5623 if (IS_ERR(tfm)) {
5599 if (net_ratelimit()) { 5624 if (net_ratelimit()) {
5600 printk(KERN_INFO 5625 pr_info("failed to load transform for %s: %ld\n",
5601 "SCTP: failed to load transform for %s: %ld\n",
5602 sctp_hmac_alg, PTR_ERR(tfm)); 5626 sctp_hmac_alg, PTR_ERR(tfm));
5603 } 5627 }
5604 return -ENOSYS; 5628 return -ENOSYS;
@@ -5727,13 +5751,12 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
5727 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 5751 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
5728 mask |= POLLERR; 5752 mask |= POLLERR;
5729 if (sk->sk_shutdown & RCV_SHUTDOWN) 5753 if (sk->sk_shutdown & RCV_SHUTDOWN)
5730 mask |= POLLRDHUP; 5754 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
5731 if (sk->sk_shutdown == SHUTDOWN_MASK) 5755 if (sk->sk_shutdown == SHUTDOWN_MASK)
5732 mask |= POLLHUP; 5756 mask |= POLLHUP;
5733 5757
5734 /* Is it readable? Reconsider this code with TCP-style support. */ 5758 /* Is it readable? Reconsider this code with TCP-style support. */
5735 if (!skb_queue_empty(&sk->sk_receive_queue) || 5759 if (!skb_queue_empty(&sk->sk_receive_queue))
5736 (sk->sk_shutdown & RCV_SHUTDOWN))
5737 mask |= POLLIN | POLLRDNORM; 5760 mask |= POLLIN | POLLRDNORM;
5738 5761
5739 /* The association is either gone or not ready. */ 5762 /* The association is either gone or not ready. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 132046cb82fc..d3ae493d234a 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -48,6 +48,8 @@
48 * be incorporated into the next SCTP release. 48 * be incorporated into the next SCTP release.
49 */ 49 */
50 50
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52
51#include <linux/slab.h> 53#include <linux/slab.h>
52#include <linux/types.h> 54#include <linux/types.h>
53#include <linux/random.h> 55#include <linux/random.h>
@@ -244,10 +246,9 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
244 struct dst_entry *dst; 246 struct dst_entry *dst;
245 247
246 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { 248 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
247 printk(KERN_WARNING "%s: Reported pmtu %d too low, " 249 pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
248 "using default minimum of %d\n", 250 __func__, pmtu,
249 __func__, pmtu, 251 SCTP_DEFAULT_MINSEGMENT);
250 SCTP_DEFAULT_MINSEGMENT);
251 /* Use default minimum segment size and disable 252 /* Use default minimum segment size and disable
252 * pmtu discovery on this transport. 253 * pmtu discovery on this transport.
253 */ 254 */
diff --git a/net/socket.c b/net/socket.c
index 2270b941bcc7..717a5f1c8792 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -535,14 +535,13 @@ void sock_release(struct socket *sock)
535} 535}
536EXPORT_SYMBOL(sock_release); 536EXPORT_SYMBOL(sock_release);
537 537
538int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, 538int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
539 union skb_shared_tx *shtx)
540{ 539{
541 shtx->flags = 0; 540 *tx_flags = 0;
542 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 541 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
543 shtx->hardware = 1; 542 *tx_flags |= SKBTX_HW_TSTAMP;
544 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 543 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
545 shtx->software = 1; 544 *tx_flags |= SKBTX_SW_TSTAMP;
546 return 0; 545 return 0;
547} 546}
548EXPORT_SYMBOL(sock_tx_timestamp); 547EXPORT_SYMBOL(sock_tx_timestamp);
@@ -1919,7 +1918,8 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1919 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1918 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1920 * checking falls down on this. 1919 * checking falls down on this.
1921 */ 1920 */
1922 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, 1921 if (copy_from_user(ctl_buf,
1922 (void __user __force *)msg_sys.msg_control,
1923 ctl_len)) 1923 ctl_len))
1924 goto out_freectl; 1924 goto out_freectl;
1925 msg_sys.msg_control = ctl_buf; 1925 msg_sys.msg_control = ctl_buf;
@@ -3054,14 +3054,19 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
3054 char *optval, int *optlen) 3054 char *optval, int *optlen)
3055{ 3055{
3056 mm_segment_t oldfs = get_fs(); 3056 mm_segment_t oldfs = get_fs();
3057 char __user *uoptval;
3058 int __user *uoptlen;
3057 int err; 3059 int err;
3058 3060
3061 uoptval = (char __user __force *) optval;
3062 uoptlen = (int __user __force *) optlen;
3063
3059 set_fs(KERNEL_DS); 3064 set_fs(KERNEL_DS);
3060 if (level == SOL_SOCKET) 3065 if (level == SOL_SOCKET)
3061 err = sock_getsockopt(sock, level, optname, optval, optlen); 3066 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
3062 else 3067 else
3063 err = sock->ops->getsockopt(sock, level, optname, optval, 3068 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3064 optlen); 3069 uoptlen);
3065 set_fs(oldfs); 3070 set_fs(oldfs);
3066 return err; 3071 return err;
3067} 3072}
@@ -3071,13 +3076,16 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
3071 char *optval, unsigned int optlen) 3076 char *optval, unsigned int optlen)
3072{ 3077{
3073 mm_segment_t oldfs = get_fs(); 3078 mm_segment_t oldfs = get_fs();
3079 char __user *uoptval;
3074 int err; 3080 int err;
3075 3081
3082 uoptval = (char __user __force *) optval;
3083
3076 set_fs(KERNEL_DS); 3084 set_fs(KERNEL_DS);
3077 if (level == SOL_SOCKET) 3085 if (level == SOL_SOCKET)
3078 err = sock_setsockopt(sock, level, optname, optval, optlen); 3086 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
3079 else 3087 else
3080 err = sock->ops->setsockopt(sock, level, optname, optval, 3088 err = sock->ops->setsockopt(sock, level, optname, uoptval,
3081 optlen); 3089 optlen);
3082 set_fs(oldfs); 3090 set_fs(oldfs);
3083 return err; 3091 return err;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index dcfc66bab2bb..597c493392ad 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1049,7 +1049,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
1049out: 1049out:
1050 if (acred->machine_cred != gss_cred->gc_machine_cred) 1050 if (acred->machine_cred != gss_cred->gc_machine_cred)
1051 return 0; 1051 return 0;
1052 return (rc->cr_uid == acred->uid); 1052 return rc->cr_uid == acred->uid;
1053} 1053}
1054 1054
1055/* 1055/*
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index 310b78e99456..c586e92bcf76 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -76,19 +76,19 @@ static int
76der_length_size( int length) 76der_length_size( int length)
77{ 77{
78 if (length < (1<<7)) 78 if (length < (1<<7))
79 return(1); 79 return 1;
80 else if (length < (1<<8)) 80 else if (length < (1<<8))
81 return(2); 81 return 2;
82#if (SIZEOF_INT == 2) 82#if (SIZEOF_INT == 2)
83 else 83 else
84 return(3); 84 return 3;
85#else 85#else
86 else if (length < (1<<16)) 86 else if (length < (1<<16))
87 return(3); 87 return 3;
88 else if (length < (1<<24)) 88 else if (length < (1<<24))
89 return(4); 89 return 4;
90 else 90 else
91 return(5); 91 return 5;
92#endif 92#endif
93} 93}
94 94
@@ -121,14 +121,14 @@ der_read_length(unsigned char **buf, int *bufsize)
121 int ret; 121 int ret;
122 122
123 if (*bufsize < 1) 123 if (*bufsize < 1)
124 return(-1); 124 return -1;
125 sf = *(*buf)++; 125 sf = *(*buf)++;
126 (*bufsize)--; 126 (*bufsize)--;
127 if (sf & 0x80) { 127 if (sf & 0x80) {
128 if ((sf &= 0x7f) > ((*bufsize)-1)) 128 if ((sf &= 0x7f) > ((*bufsize)-1))
129 return(-1); 129 return -1;
130 if (sf > SIZEOF_INT) 130 if (sf > SIZEOF_INT)
131 return (-1); 131 return -1;
132 ret = 0; 132 ret = 0;
133 for (; sf; sf--) { 133 for (; sf; sf--) {
134 ret = (ret<<8) + (*(*buf)++); 134 ret = (ret<<8) + (*(*buf)++);
@@ -138,7 +138,7 @@ der_read_length(unsigned char **buf, int *bufsize)
138 ret = sf; 138 ret = sf;
139 } 139 }
140 140
141 return(ret); 141 return ret;
142} 142}
143 143
144/* returns the length of a token, given the mech oid and the body size */ 144/* returns the length of a token, given the mech oid and the body size */
@@ -148,7 +148,7 @@ g_token_size(struct xdr_netobj *mech, unsigned int body_size)
148{ 148{
149 /* set body_size to sequence contents size */ 149 /* set body_size to sequence contents size */
150 body_size += 2 + (int) mech->len; /* NEED overflow check */ 150 body_size += 2 + (int) mech->len; /* NEED overflow check */
151 return(1 + der_length_size(body_size) + body_size); 151 return 1 + der_length_size(body_size) + body_size;
152} 152}
153 153
154EXPORT_SYMBOL_GPL(g_token_size); 154EXPORT_SYMBOL_GPL(g_token_size);
@@ -186,27 +186,27 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
186 int ret = 0; 186 int ret = 0;
187 187
188 if ((toksize-=1) < 0) 188 if ((toksize-=1) < 0)
189 return(G_BAD_TOK_HEADER); 189 return G_BAD_TOK_HEADER;
190 if (*buf++ != 0x60) 190 if (*buf++ != 0x60)
191 return(G_BAD_TOK_HEADER); 191 return G_BAD_TOK_HEADER;
192 192
193 if ((seqsize = der_read_length(&buf, &toksize)) < 0) 193 if ((seqsize = der_read_length(&buf, &toksize)) < 0)
194 return(G_BAD_TOK_HEADER); 194 return G_BAD_TOK_HEADER;
195 195
196 if (seqsize != toksize) 196 if (seqsize != toksize)
197 return(G_BAD_TOK_HEADER); 197 return G_BAD_TOK_HEADER;
198 198
199 if ((toksize-=1) < 0) 199 if ((toksize-=1) < 0)
200 return(G_BAD_TOK_HEADER); 200 return G_BAD_TOK_HEADER;
201 if (*buf++ != 0x06) 201 if (*buf++ != 0x06)
202 return(G_BAD_TOK_HEADER); 202 return G_BAD_TOK_HEADER;
203 203
204 if ((toksize-=1) < 0) 204 if ((toksize-=1) < 0)
205 return(G_BAD_TOK_HEADER); 205 return G_BAD_TOK_HEADER;
206 toid.len = *buf++; 206 toid.len = *buf++;
207 207
208 if ((toksize-=toid.len) < 0) 208 if ((toksize-=toid.len) < 0)
209 return(G_BAD_TOK_HEADER); 209 return G_BAD_TOK_HEADER;
210 toid.data = buf; 210 toid.data = buf;
211 buf+=toid.len; 211 buf+=toid.len;
212 212
@@ -217,17 +217,17 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
217 to return G_BAD_TOK_HEADER if the token header is in fact bad */ 217 to return G_BAD_TOK_HEADER if the token header is in fact bad */
218 218
219 if ((toksize-=2) < 0) 219 if ((toksize-=2) < 0)
220 return(G_BAD_TOK_HEADER); 220 return G_BAD_TOK_HEADER;
221 221
222 if (ret) 222 if (ret)
223 return(ret); 223 return ret;
224 224
225 if (!ret) { 225 if (!ret) {
226 *buf_in = buf; 226 *buf_in = buf;
227 *body_size = toksize; 227 *body_size = toksize;
228 } 228 }
229 229
230 return(ret); 230 return ret;
231} 231}
232 232
233EXPORT_SYMBOL_GPL(g_verify_token_header); 233EXPORT_SYMBOL_GPL(g_verify_token_header);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 415c013ba382..62ac90c62cb1 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -162,5 +162,5 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
162 *seqnum = ((plain[0]) | 162 *seqnum = ((plain[0]) |
163 (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24)); 163 (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
164 164
165 return (0); 165 return 0;
166} 166}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 2689de39dc78..8b4061049d76 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -331,7 +331,7 @@ gss_delete_sec_context(struct gss_ctx **context_handle)
331 *context_handle); 331 *context_handle);
332 332
333 if (!*context_handle) 333 if (!*context_handle)
334 return(GSS_S_NO_CONTEXT); 334 return GSS_S_NO_CONTEXT;
335 if ((*context_handle)->internal_ctx_id) 335 if ((*context_handle)->internal_ctx_id)
336 (*context_handle)->mech_type->gm_ops 336 (*context_handle)->mech_type->gm_ops
337 ->gss_delete_sec_context((*context_handle) 337 ->gss_delete_sec_context((*context_handle)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cace6049e4a5..aa5dbda6608c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -376,7 +376,7 @@ int rpc_queue_empty(struct rpc_wait_queue *queue)
376 spin_lock_bh(&queue->lock); 376 spin_lock_bh(&queue->lock);
377 res = queue->qlen; 377 res = queue->qlen;
378 spin_unlock_bh(&queue->lock); 378 spin_unlock_bh(&queue->lock);
379 return (res == 0); 379 return res == 0;
380} 380}
381EXPORT_SYMBOL_GPL(rpc_queue_empty); 381EXPORT_SYMBOL_GPL(rpc_queue_empty);
382 382
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index c048543ffbeb..2ddc351b3be9 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -89,7 +89,7 @@ int tipc_addr_domain_valid(u32 addr)
89 89
90int tipc_addr_node_valid(u32 addr) 90int tipc_addr_node_valid(u32 addr)
91{ 91{
92 return (tipc_addr_domain_valid(addr) && tipc_node(addr)); 92 return tipc_addr_domain_valid(addr) && tipc_node(addr);
93} 93}
94 94
95int tipc_in_scope(u32 domain, u32 addr) 95int tipc_in_scope(u32 domain, u32 addr)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a008c6689305..ecfaac10d0b4 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -143,6 +143,19 @@ static void bcbuf_decr_acks(struct sk_buff *buf)
143} 143}
144 144
145 145
146static void bclink_set_last_sent(void)
147{
148 if (bcl->next_out)
149 bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1);
150 else
151 bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1);
152}
153
154u32 tipc_bclink_get_last_sent(void)
155{
156 return bcl->fsm_msg_cnt;
157}
158
146/** 159/**
147 * bclink_set_gap - set gap according to contents of current deferred pkt queue 160 * bclink_set_gap - set gap according to contents of current deferred pkt queue
148 * 161 *
@@ -171,7 +184,7 @@ static void bclink_set_gap(struct tipc_node *n_ptr)
171 184
172static int bclink_ack_allowed(u32 n) 185static int bclink_ack_allowed(u32 n)
173{ 186{
174 return((n % TIPC_MIN_LINK_WIN) == tipc_own_tag); 187 return (n % TIPC_MIN_LINK_WIN) == tipc_own_tag;
175} 188}
176 189
177 190
@@ -237,8 +250,10 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
237 250
238 /* Try resolving broadcast link congestion, if necessary */ 251 /* Try resolving broadcast link congestion, if necessary */
239 252
240 if (unlikely(bcl->next_out)) 253 if (unlikely(bcl->next_out)) {
241 tipc_link_push_queue(bcl); 254 tipc_link_push_queue(bcl);
255 bclink_set_last_sent();
256 }
242 if (unlikely(released && !list_empty(&bcl->waiting_ports))) 257 if (unlikely(released && !list_empty(&bcl->waiting_ports)))
243 tipc_link_wakeup_ports(bcl, 0); 258 tipc_link_wakeup_ports(bcl, 0);
244 spin_unlock_bh(&bc_lock); 259 spin_unlock_bh(&bc_lock);
@@ -395,7 +410,7 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
395 if (unlikely(res == -ELINKCONG)) 410 if (unlikely(res == -ELINKCONG))
396 buf_discard(buf); 411 buf_discard(buf);
397 else 412 else
398 bcl->stats.sent_info++; 413 bclink_set_last_sent();
399 414
400 if (bcl->out_queue_size > bcl->stats.max_queue_sz) 415 if (bcl->out_queue_size > bcl->stats.max_queue_sz)
401 bcl->stats.max_queue_sz = bcl->out_queue_size; 416 bcl->stats.max_queue_sz = bcl->out_queue_size;
@@ -529,15 +544,6 @@ receive:
529 tipc_node_unlock(node); 544 tipc_node_unlock(node);
530} 545}
531 546
532u32 tipc_bclink_get_last_sent(void)
533{
534 u32 last_sent = mod(bcl->next_out_no - 1);
535
536 if (bcl->next_out)
537 last_sent = mod(buf_seqno(bcl->next_out) - 1);
538 return last_sent;
539}
540
541u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) 547u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
542{ 548{
543 return (n_ptr->bclink.supported && 549 return (n_ptr->bclink.supported &&
@@ -570,6 +576,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
570 msg = buf_msg(buf); 576 msg = buf_msg(buf);
571 msg_set_non_seq(msg, 1); 577 msg_set_non_seq(msg, 1);
572 msg_set_mc_netid(msg, tipc_net_id); 578 msg_set_mc_netid(msg, tipc_net_id);
579 bcl->stats.sent_info++;
573 } 580 }
574 581
575 /* Send buffer over bearers until all targets reached */ 582 /* Send buffer over bearers until all targets reached */
@@ -609,11 +616,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
609 bcbearer->remains = bcbearer->remains_new; 616 bcbearer->remains = bcbearer->remains_new;
610 } 617 }
611 618
612 /* Unable to reach all targets */ 619 /*
620 * Unable to reach all targets (indicate success, since currently
621 * there isn't code in place to properly block & unblock the
622 * pseudo-bearer used by the broadcast link)
623 */
613 624
614 bcbearer->bearer.publ.blocked = 1; 625 return TIPC_OK;
615 bcl->stats.bearer_congs++;
616 return 1;
617} 626}
618 627
619/** 628/**
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 52ae17b2583e..9c10c6b7c12b 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -63,7 +63,7 @@ static int media_name_valid(const char *name)
63 len = strlen(name); 63 len = strlen(name);
64 if ((len + 1) > TIPC_MAX_MEDIA_NAME) 64 if ((len + 1) > TIPC_MAX_MEDIA_NAME)
65 return 0; 65 return 0;
66 return (strspn(name, tipc_alphabet) == len); 66 return strspn(name, tipc_alphabet) == len;
67} 67}
68 68
69/** 69/**
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 696468117985..466b861dab91 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -169,6 +169,7 @@ void tipc_core_stop(void)
169 tipc_nametbl_stop(); 169 tipc_nametbl_stop();
170 tipc_ref_table_stop(); 170 tipc_ref_table_stop();
171 tipc_socket_stop(); 171 tipc_socket_stop();
172 tipc_log_resize(0);
172} 173}
173 174
174/** 175/**
@@ -203,7 +204,9 @@ static int __init tipc_init(void)
203{ 204{
204 int res; 205 int res;
205 206
206 tipc_log_resize(CONFIG_TIPC_LOG); 207 if (tipc_log_resize(CONFIG_TIPC_LOG) != 0)
208 warn("Unable to create log buffer\n");
209
207 info("Activated (version " TIPC_MOD_VER 210 info("Activated (version " TIPC_MOD_VER
208 " compiled " __DATE__ " " __TIME__ ")\n"); 211 " compiled " __DATE__ " " __TIME__ ")\n");
209 212
@@ -230,7 +233,6 @@ static void __exit tipc_exit(void)
230 tipc_core_stop_net(); 233 tipc_core_stop_net();
231 tipc_core_stop(); 234 tipc_core_stop();
232 info("Deactivated\n"); 235 info("Deactivated\n");
233 tipc_log_resize(0);
234} 236}
235 237
236module_init(tipc_init); 238module_init(tipc_init);
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index 1885a7edb0c8..6569d45bfb9a 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -134,7 +134,7 @@ void tipc_printbuf_reset(struct print_buf *pb)
134 134
135int tipc_printbuf_empty(struct print_buf *pb) 135int tipc_printbuf_empty(struct print_buf *pb)
136{ 136{
137 return (!pb->buf || (pb->crs == pb->buf)); 137 return !pb->buf || (pb->crs == pb->buf);
138} 138}
139 139
140/** 140/**
@@ -169,7 +169,7 @@ int tipc_printbuf_validate(struct print_buf *pb)
169 tipc_printf(pb, err); 169 tipc_printf(pb, err);
170 } 170 }
171 } 171 }
172 return (pb->crs - pb->buf + 1); 172 return pb->crs - pb->buf + 1;
173} 173}
174 174
175/** 175/**
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index fc1fcf5e6b53..f28d1ae93125 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -203,6 +203,14 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
203 return; 203 return;
204 } 204 }
205 spin_lock_bh(&n_ptr->lock); 205 spin_lock_bh(&n_ptr->lock);
206
207 /* Don't talk to neighbor during cleanup after last session */
208
209 if (n_ptr->cleanup_required) {
210 spin_unlock_bh(&n_ptr->lock);
211 return;
212 }
213
206 link = n_ptr->links[b_ptr->identity]; 214 link = n_ptr->links[b_ptr->identity];
207 if (!link) { 215 if (!link) {
208 dbg("creating link\n"); 216 dbg("creating link\n");
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 6230d16020c4..6e988ba485fd 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -72,17 +72,26 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
72{ 72{
73 struct sk_buff *clone; 73 struct sk_buff *clone;
74 struct net_device *dev; 74 struct net_device *dev;
75 int delta;
75 76
76 clone = skb_clone(buf, GFP_ATOMIC); 77 clone = skb_clone(buf, GFP_ATOMIC);
77 if (clone) { 78 if (!clone)
78 skb_reset_network_header(clone); 79 return 0;
79 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev; 80
80 clone->dev = dev; 81 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
81 dev_hard_header(clone, dev, ETH_P_TIPC, 82 delta = dev->hard_header_len - skb_headroom(buf);
82 &dest->dev_addr.eth_addr, 83
83 dev->dev_addr, clone->len); 84 if ((delta > 0) &&
84 dev_queue_xmit(clone); 85 pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
86 kfree_skb(clone);
87 return 0;
85 } 88 }
89
90 skb_reset_network_header(clone);
91 clone->dev = dev;
92 dev_hard_header(clone, dev, ETH_P_TIPC, &dest->dev_addr.eth_addr,
93 dev->dev_addr, clone->len);
94 dev_queue_xmit(clone);
86 return 0; 95 return 0;
87} 96}
88 97
@@ -92,15 +101,12 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
92 * Accept only packets explicitly sent to this node, or broadcast packets; 101 * Accept only packets explicitly sent to this node, or broadcast packets;
93 * ignores packets sent using Ethernet multicast, and traffic sent to other 102 * ignores packets sent using Ethernet multicast, and traffic sent to other
94 * nodes (which can happen if interface is running in promiscuous mode). 103 * nodes (which can happen if interface is running in promiscuous mode).
95 * Routine truncates any Ethernet padding/CRC appended to the message,
96 * and ensures message size matches actual length
97 */ 104 */
98 105
99static int recv_msg(struct sk_buff *buf, struct net_device *dev, 106static int recv_msg(struct sk_buff *buf, struct net_device *dev,
100 struct packet_type *pt, struct net_device *orig_dev) 107 struct packet_type *pt, struct net_device *orig_dev)
101{ 108{
102 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; 109 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
103 u32 size;
104 110
105 if (!net_eq(dev_net(dev), &init_net)) { 111 if (!net_eq(dev_net(dev), &init_net)) {
106 kfree_skb(buf); 112 kfree_skb(buf);
@@ -109,13 +115,9 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
109 115
110 if (likely(eb_ptr->bearer)) { 116 if (likely(eb_ptr->bearer)) {
111 if (likely(buf->pkt_type <= PACKET_BROADCAST)) { 117 if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
112 size = msg_size((struct tipc_msg *)buf->data); 118 buf->next = NULL;
113 skb_trim(buf, size); 119 tipc_recv_msg(buf, eb_ptr->bearer);
114 if (likely(buf->len == size)) { 120 return 0;
115 buf->next = NULL;
116 tipc_recv_msg(buf, eb_ptr->bearer);
117 return 0;
118 }
119 } 121 }
120 } 122 }
121 kfree_skb(buf); 123 kfree_skb(buf);
@@ -133,6 +135,16 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
133 struct eth_bearer *eb_ptr = &eth_bearers[0]; 135 struct eth_bearer *eb_ptr = &eth_bearers[0];
134 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS]; 136 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
135 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; 137 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
138 int pending_dev = 0;
139
140 /* Find unused Ethernet bearer structure */
141
142 while (eb_ptr->dev) {
143 if (!eb_ptr->bearer)
144 pending_dev++;
145 if (++eb_ptr == stop)
146 return pending_dev ? -EAGAIN : -EDQUOT;
147 }
136 148
137 /* Find device with specified name */ 149 /* Find device with specified name */
138 150
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a3616b99529b..b8cf1e9d0b86 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -239,13 +239,13 @@ int tipc_link_is_up(struct link *l_ptr)
239{ 239{
240 if (!l_ptr) 240 if (!l_ptr)
241 return 0; 241 return 0;
242 return (link_working_working(l_ptr) || link_working_unknown(l_ptr)); 242 return link_working_working(l_ptr) || link_working_unknown(l_ptr);
243} 243}
244 244
245int tipc_link_is_active(struct link *l_ptr) 245int tipc_link_is_active(struct link *l_ptr)
246{ 246{
247 return ((l_ptr->owner->active_links[0] == l_ptr) || 247 return (l_ptr->owner->active_links[0] == l_ptr) ||
248 (l_ptr->owner->active_links[1] == l_ptr)); 248 (l_ptr->owner->active_links[1] == l_ptr);
249} 249}
250 250
251/** 251/**
@@ -1802,6 +1802,15 @@ static int link_recv_buf_validate(struct sk_buff *buf)
1802 return pskb_may_pull(buf, hdr_size); 1802 return pskb_may_pull(buf, hdr_size);
1803} 1803}
1804 1804
1805/**
1806 * tipc_recv_msg - process TIPC messages arriving from off-node
1807 * @head: pointer to message buffer chain
1808 * @tb_ptr: pointer to bearer message arrived on
1809 *
1810 * Invoked with no locks held. Bearer pointer must point to a valid bearer
1811 * structure (i.e. cannot be NULL), but bearer can be inactive.
1812 */
1813
1805void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) 1814void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1806{ 1815{
1807 read_lock_bh(&tipc_net_lock); 1816 read_lock_bh(&tipc_net_lock);
@@ -1819,6 +1828,11 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1819 1828
1820 head = head->next; 1829 head = head->next;
1821 1830
1831 /* Ensure bearer is still enabled */
1832
1833 if (unlikely(!b_ptr->active))
1834 goto cont;
1835
1822 /* Ensure message is well-formed */ 1836 /* Ensure message is well-formed */
1823 1837
1824 if (unlikely(!link_recv_buf_validate(buf))) 1838 if (unlikely(!link_recv_buf_validate(buf)))
@@ -1855,13 +1869,22 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1855 goto cont; 1869 goto cont;
1856 } 1870 }
1857 1871
1858 /* Locate unicast link endpoint that should handle message */ 1872 /* Locate neighboring node that sent message */
1859 1873
1860 n_ptr = tipc_node_find(msg_prevnode(msg)); 1874 n_ptr = tipc_node_find(msg_prevnode(msg));
1861 if (unlikely(!n_ptr)) 1875 if (unlikely(!n_ptr))
1862 goto cont; 1876 goto cont;
1863 tipc_node_lock(n_ptr); 1877 tipc_node_lock(n_ptr);
1864 1878
1879 /* Don't talk to neighbor during cleanup after last session */
1880
1881 if (n_ptr->cleanup_required) {
1882 tipc_node_unlock(n_ptr);
1883 goto cont;
1884 }
1885
1886 /* Locate unicast link endpoint that should handle message */
1887
1865 l_ptr = n_ptr->links[b_ptr->identity]; 1888 l_ptr = n_ptr->links[b_ptr->identity];
1866 if (unlikely(!l_ptr)) { 1889 if (unlikely(!l_ptr)) {
1867 tipc_node_unlock(n_ptr); 1890 tipc_node_unlock(n_ptr);
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 2e5385c47d30..26151d30589d 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -279,12 +279,12 @@ static inline int between(u32 lower, u32 upper, u32 n)
279 279
280static inline int less_eq(u32 left, u32 right) 280static inline int less_eq(u32 left, u32 right)
281{ 281{
282 return (mod(right - left) < 32768u); 282 return mod(right - left) < 32768u;
283} 283}
284 284
285static inline int less(u32 left, u32 right) 285static inline int less(u32 left, u32 right)
286{ 286{
287 return (less_eq(left, right) && (mod(right) != mod(left))); 287 return less_eq(left, right) && (mod(right) != mod(left));
288} 288}
289 289
290static inline u32 lesser(u32 left, u32 right) 290static inline u32 lesser(u32 left, u32 right)
@@ -299,32 +299,32 @@ static inline u32 lesser(u32 left, u32 right)
299 299
300static inline int link_working_working(struct link *l_ptr) 300static inline int link_working_working(struct link *l_ptr)
301{ 301{
302 return (l_ptr->state == WORKING_WORKING); 302 return l_ptr->state == WORKING_WORKING;
303} 303}
304 304
305static inline int link_working_unknown(struct link *l_ptr) 305static inline int link_working_unknown(struct link *l_ptr)
306{ 306{
307 return (l_ptr->state == WORKING_UNKNOWN); 307 return l_ptr->state == WORKING_UNKNOWN;
308} 308}
309 309
310static inline int link_reset_unknown(struct link *l_ptr) 310static inline int link_reset_unknown(struct link *l_ptr)
311{ 311{
312 return (l_ptr->state == RESET_UNKNOWN); 312 return l_ptr->state == RESET_UNKNOWN;
313} 313}
314 314
315static inline int link_reset_reset(struct link *l_ptr) 315static inline int link_reset_reset(struct link *l_ptr)
316{ 316{
317 return (l_ptr->state == RESET_RESET); 317 return l_ptr->state == RESET_RESET;
318} 318}
319 319
320static inline int link_blocked(struct link *l_ptr) 320static inline int link_blocked(struct link *l_ptr)
321{ 321{
322 return (l_ptr->exp_msg_count || l_ptr->blocked); 322 return l_ptr->exp_msg_count || l_ptr->blocked;
323} 323}
324 324
325static inline int link_congested(struct link *l_ptr) 325static inline int link_congested(struct link *l_ptr)
326{ 326{
327 return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]); 327 return l_ptr->out_queue_size >= l_ptr->queue_limit[0];
328} 328}
329 329
330#endif 330#endif
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 995d2da35b01..031aad18efce 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -104,7 +104,7 @@ static inline u32 msg_user(struct tipc_msg *m)
104 104
105static inline u32 msg_isdata(struct tipc_msg *m) 105static inline u32 msg_isdata(struct tipc_msg *m)
106{ 106{
107 return (msg_user(m) <= TIPC_CRITICAL_IMPORTANCE); 107 return msg_user(m) <= TIPC_CRITICAL_IMPORTANCE;
108} 108}
109 109
110static inline void msg_set_user(struct tipc_msg *m, u32 n) 110static inline void msg_set_user(struct tipc_msg *m, u32 n)
@@ -289,7 +289,7 @@ static inline void msg_set_destnode(struct tipc_msg *m, u32 a)
289 289
290static inline int msg_is_dest(struct tipc_msg *m, u32 d) 290static inline int msg_is_dest(struct tipc_msg *m, u32 d)
291{ 291{
292 return(msg_short(m) || (msg_destnode(m) == d)); 292 return msg_short(m) || (msg_destnode(m) == d);
293} 293}
294 294
295static inline u32 msg_routed(struct tipc_msg *m) 295static inline u32 msg_routed(struct tipc_msg *m)
@@ -632,7 +632,7 @@ static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n)
632 632
633static inline u32 msg_max_pkt(struct tipc_msg *m) 633static inline u32 msg_max_pkt(struct tipc_msg *m)
634{ 634{
635 return (msg_bits(m, 9, 16, 0xffff) * 4); 635 return msg_bits(m, 9, 16, 0xffff) * 4;
636} 636}
637 637
638static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n) 638static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n)
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 8ba79620db3f..9ca4b0689237 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -116,7 +116,7 @@ DEFINE_RWLOCK(tipc_nametbl_lock);
116 116
117static int hash(int x) 117static int hash(int x)
118{ 118{
119 return(x & (tipc_nametbl_size - 1)); 119 return x & (tipc_nametbl_size - 1);
120} 120}
121 121
122/** 122/**
@@ -613,8 +613,7 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
613} 613}
614 614
615/* 615/*
616 * tipc_nametbl_translate(): Translate tipc_name -> tipc_portid. 616 * tipc_nametbl_translate - translate name to port id
617 * Very time-critical.
618 * 617 *
619 * Note: on entry 'destnode' is the search domain used during translation; 618 * Note: on entry 'destnode' is the search domain used during translation;
620 * on exit it passes back the node address of the matching port (if any) 619 * on exit it passes back the node address of the matching port (if any)
@@ -685,7 +684,6 @@ found:
685 } 684 }
686 spin_unlock_bh(&seq->lock); 685 spin_unlock_bh(&seq->lock);
687not_found: 686not_found:
688 *destnode = 0;
689 read_unlock_bh(&tipc_nametbl_lock); 687 read_unlock_bh(&tipc_nametbl_lock);
690 return 0; 688 return 0;
691} 689}
@@ -877,7 +875,7 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
877 u32 index) 875 u32 index)
878{ 876{
879 char portIdStr[27]; 877 char portIdStr[27];
880 char *scopeStr; 878 const char *scope_str[] = {"", " zone", " cluster", " node"};
881 struct publication *publ = sseq->zone_list; 879 struct publication *publ = sseq->zone_list;
882 880
883 tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper); 881 tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper);
@@ -893,15 +891,8 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
893 tipc_node(publ->node), publ->ref); 891 tipc_node(publ->node), publ->ref);
894 tipc_printf(buf, "%-26s ", portIdStr); 892 tipc_printf(buf, "%-26s ", portIdStr);
895 if (depth > 3) { 893 if (depth > 3) {
896 if (publ->node != tipc_own_addr) 894 tipc_printf(buf, "%-10u %s", publ->key,
897 scopeStr = ""; 895 scope_str[publ->scope]);
898 else if (publ->scope == TIPC_NODE_SCOPE)
899 scopeStr = "node";
900 else if (publ->scope == TIPC_CLUSTER_SCOPE)
901 scopeStr = "cluster";
902 else
903 scopeStr = "zone";
904 tipc_printf(buf, "%-10u %s", publ->key, scopeStr);
905 } 896 }
906 897
907 publ = publ->zone_list_next; 898 publ = publ->zone_list_next;
@@ -951,24 +942,19 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth,
951 942
952static void nametbl_header(struct print_buf *buf, u32 depth) 943static void nametbl_header(struct print_buf *buf, u32 depth)
953{ 944{
954 tipc_printf(buf, "Type "); 945 const char *header[] = {
955 946 "Type ",
956 if (depth > 1) 947 "Lower Upper ",
957 tipc_printf(buf, "Lower Upper "); 948 "Port Identity ",
958 if (depth > 2) 949 "Publication Scope"
959 tipc_printf(buf, "Port Identity "); 950 };
960 if (depth > 3) 951
961 tipc_printf(buf, "Publication"); 952 int i;
962 953
963 tipc_printf(buf, "\n-----------"); 954 if (depth > 4)
964 955 depth = 4;
965 if (depth > 1) 956 for (i = 0; i < depth; i++)
966 tipc_printf(buf, "--------------------- "); 957 tipc_printf(buf, header[i]);
967 if (depth > 2)
968 tipc_printf(buf, "-------------------------- ");
969 if (depth > 3)
970 tipc_printf(buf, "------------------");
971
972 tipc_printf(buf, "\n"); 958 tipc_printf(buf, "\n");
973} 959}
974 960
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f61b7694138b..7e05af47a196 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -248,6 +248,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
248 248
249 /* Handle message for another node */ 249 /* Handle message for another node */
250 msg_dbg(msg, "NET>SEND>: "); 250 msg_dbg(msg, "NET>SEND>: ");
251 skb_trim(buf, msg_size(msg));
251 tipc_link_send(buf, dnode, msg_link_selector(msg)); 252 tipc_link_send(buf, dnode, msg_link_selector(msg));
252} 253}
253 254
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b634942caba5..7c49cd056df7 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -237,23 +237,22 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr)
237 237
238int tipc_node_has_active_links(struct tipc_node *n_ptr) 238int tipc_node_has_active_links(struct tipc_node *n_ptr)
239{ 239{
240 return (n_ptr && 240 return n_ptr->active_links[0] != NULL;
241 ((n_ptr->active_links[0]) || (n_ptr->active_links[1])));
242} 241}
243 242
244int tipc_node_has_redundant_links(struct tipc_node *n_ptr) 243int tipc_node_has_redundant_links(struct tipc_node *n_ptr)
245{ 244{
246 return (n_ptr->working_links > 1); 245 return n_ptr->working_links > 1;
247} 246}
248 247
249static int tipc_node_has_active_routes(struct tipc_node *n_ptr) 248static int tipc_node_has_active_routes(struct tipc_node *n_ptr)
250{ 249{
251 return (n_ptr && (n_ptr->last_router >= 0)); 250 return n_ptr && (n_ptr->last_router >= 0);
252} 251}
253 252
254int tipc_node_is_up(struct tipc_node *n_ptr) 253int tipc_node_is_up(struct tipc_node *n_ptr)
255{ 254{
256 return (tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr)); 255 return tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr);
257} 256}
258 257
259struct tipc_node *tipc_node_attach_link(struct link *l_ptr) 258struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
@@ -384,6 +383,20 @@ static void node_established_contact(struct tipc_node *n_ptr)
384 tipc_highest_allowed_slave); 383 tipc_highest_allowed_slave);
385} 384}
386 385
386static void node_cleanup_finished(unsigned long node_addr)
387{
388 struct tipc_node *n_ptr;
389
390 read_lock_bh(&tipc_net_lock);
391 n_ptr = tipc_node_find(node_addr);
392 if (n_ptr) {
393 tipc_node_lock(n_ptr);
394 n_ptr->cleanup_required = 0;
395 tipc_node_unlock(n_ptr);
396 }
397 read_unlock_bh(&tipc_net_lock);
398}
399
387static void node_lost_contact(struct tipc_node *n_ptr) 400static void node_lost_contact(struct tipc_node *n_ptr)
388{ 401{
389 struct cluster *c_ptr; 402 struct cluster *c_ptr;
@@ -458,6 +471,11 @@ static void node_lost_contact(struct tipc_node *n_ptr)
458 tipc_k_signal((Handler)ns->handle_node_down, 471 tipc_k_signal((Handler)ns->handle_node_down,
459 (unsigned long)ns->usr_handle); 472 (unsigned long)ns->usr_handle);
460 } 473 }
474
475 /* Prevent re-contact with node until all cleanup is done */
476
477 n_ptr->cleanup_required = 1;
478 tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr);
461} 479}
462 480
463/** 481/**
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 6f990da5d143..45f3db3a595d 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -52,6 +52,7 @@
52 * @active_links: pointers to active links to node 52 * @active_links: pointers to active links to node
53 * @links: pointers to all links to node 53 * @links: pointers to all links to node
54 * @working_links: number of working links to node (both active and standby) 54 * @working_links: number of working links to node (both active and standby)
55 * @cleanup_required: non-zero if cleaning up after a prior loss of contact
55 * @link_cnt: number of links to node 56 * @link_cnt: number of links to node
56 * @permit_changeover: non-zero if node has redundant links to this system 57 * @permit_changeover: non-zero if node has redundant links to this system
57 * @routers: bitmap (used for multicluster communication) 58 * @routers: bitmap (used for multicluster communication)
@@ -78,6 +79,7 @@ struct tipc_node {
78 struct link *links[MAX_BEARERS]; 79 struct link *links[MAX_BEARERS];
79 int link_cnt; 80 int link_cnt;
80 int working_links; 81 int working_links;
82 int cleanup_required;
81 int permit_changeover; 83 int permit_changeover;
82 u32 routers[512/32]; 84 u32 routers[512/32];
83 int last_router; 85 int last_router;
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 0737680e9266..d760336f2ca8 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -588,19 +588,10 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
588 if (!p_ptr) { 588 if (!p_ptr) {
589 err = TIPC_ERR_NO_PORT; 589 err = TIPC_ERR_NO_PORT;
590 } else if (p_ptr->publ.connected) { 590 } else if (p_ptr->publ.connected) {
591 if (port_peernode(p_ptr) != msg_orignode(msg)) 591 if ((port_peernode(p_ptr) != msg_orignode(msg)) ||
592 (port_peerport(p_ptr) != msg_origport(msg))) {
592 err = TIPC_ERR_NO_PORT; 593 err = TIPC_ERR_NO_PORT;
593 if (port_peerport(p_ptr) != msg_origport(msg)) 594 } else if (msg_type(msg) == CONN_ACK) {
594 err = TIPC_ERR_NO_PORT;
595 if (!err && msg_routed(msg)) {
596 u32 seqno = msg_transp_seqno(msg);
597 u32 myno = ++p_ptr->last_in_seqno;
598 if (seqno != myno) {
599 err = TIPC_ERR_NO_PORT;
600 abort_buf = port_build_self_abort_msg(p_ptr, err);
601 }
602 }
603 if (msg_type(msg) == CONN_ACK) {
604 int wakeup = tipc_port_congested(p_ptr) && 595 int wakeup = tipc_port_congested(p_ptr) &&
605 p_ptr->publ.congested && 596 p_ptr->publ.congested &&
606 p_ptr->wakeup; 597 p_ptr->wakeup;
@@ -1473,7 +1464,7 @@ int tipc_forward2name(u32 ref,
1473 msg_set_destnode(msg, destnode); 1464 msg_set_destnode(msg, destnode);
1474 msg_set_destport(msg, destport); 1465 msg_set_destport(msg, destport);
1475 1466
1476 if (likely(destport || destnode)) { 1467 if (likely(destport)) {
1477 p_ptr->sent++; 1468 p_ptr->sent++;
1478 if (likely(destnode == tipc_own_addr)) 1469 if (likely(destnode == tipc_own_addr))
1479 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1470 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
@@ -1551,7 +1542,7 @@ int tipc_forward_buf2name(u32 ref,
1551 skb_push(buf, LONG_H_SIZE); 1542 skb_push(buf, LONG_H_SIZE);
1552 skb_copy_to_linear_data(buf, msg, LONG_H_SIZE); 1543 skb_copy_to_linear_data(buf, msg, LONG_H_SIZE);
1553 msg_dbg(buf_msg(buf),"PREP:"); 1544 msg_dbg(buf_msg(buf),"PREP:");
1554 if (likely(destport || destnode)) { 1545 if (likely(destport)) {
1555 p_ptr->sent++; 1546 p_ptr->sent++;
1556 if (destnode == tipc_own_addr) 1547 if (destnode == tipc_own_addr)
1557 return tipc_port_recv_msg(buf); 1548 return tipc_port_recv_msg(buf);
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 8d1652aab298..e74bd9563739 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -157,7 +157,7 @@ static inline u32 tipc_peer_node(struct port *p_ptr)
157 157
158static inline int tipc_port_congested(struct port *p_ptr) 158static inline int tipc_port_congested(struct port *p_ptr)
159{ 159{
160 return((p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2)); 160 return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
161} 161}
162 162
163/** 163/**
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 66e889ba48fd..33217fc3d697 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -64,6 +64,7 @@ struct tipc_sock {
64 struct sock sk; 64 struct sock sk;
65 struct tipc_port *p; 65 struct tipc_port *p;
66 struct tipc_portid peer_name; 66 struct tipc_portid peer_name;
67 long conn_timeout;
67}; 68};
68 69
69#define tipc_sk(sk) ((struct tipc_sock *)(sk)) 70#define tipc_sk(sk) ((struct tipc_sock *)(sk))
@@ -240,9 +241,9 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
240 sock->state = state; 241 sock->state = state;
241 242
242 sock_init_data(sock, sk); 243 sock_init_data(sock, sk);
243 sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
244 sk->sk_backlog_rcv = backlog_rcv; 244 sk->sk_backlog_rcv = backlog_rcv;
245 tipc_sk(sk)->p = tp_ptr; 245 tipc_sk(sk)->p = tp_ptr;
246 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
246 247
247 spin_unlock_bh(tp_ptr->lock); 248 spin_unlock_bh(tp_ptr->lock);
248 249
@@ -429,36 +430,55 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
429 * to handle any preventable race conditions, so TIPC will do the same ... 430 * to handle any preventable race conditions, so TIPC will do the same ...
430 * 431 *
431 * TIPC sets the returned events as follows: 432 * TIPC sets the returned events as follows:
432 * a) POLLRDNORM and POLLIN are set if the socket's receive queue is non-empty 433 *
433 * or if a connection-oriented socket is does not have an active connection 434 * socket state flags set
434 * (i.e. a read operation will not block). 435 * ------------ ---------
435 * b) POLLOUT is set except when a socket's connection has been terminated 436 * unconnected no read flags
436 * (i.e. a write operation will not block). 437 * no write flags
437 * c) POLLHUP is set when a socket's connection has been terminated. 438 *
438 * 439 * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue
439 * IMPORTANT: The fact that a read or write operation will not block does NOT 440 * no write flags
440 * imply that the operation will succeed! 441 *
442 * connected POLLIN/POLLRDNORM if data in rx queue
443 * POLLOUT if port is not congested
444 *
445 * disconnecting POLLIN/POLLRDNORM/POLLHUP
446 * no write flags
447 *
448 * listening POLLIN if SYN in rx queue
449 * no write flags
450 *
451 * ready POLLIN/POLLRDNORM if data in rx queue
452 * [connectionless] POLLOUT (since port cannot be congested)
453 *
454 * IMPORTANT: The fact that a read or write operation is indicated does NOT
455 * imply that the operation will succeed, merely that it should be performed
456 * and will not block.
441 */ 457 */
442 458
443static unsigned int poll(struct file *file, struct socket *sock, 459static unsigned int poll(struct file *file, struct socket *sock,
444 poll_table *wait) 460 poll_table *wait)
445{ 461{
446 struct sock *sk = sock->sk; 462 struct sock *sk = sock->sk;
447 u32 mask; 463 u32 mask = 0;
448 464
449 poll_wait(file, sk_sleep(sk), wait); 465 poll_wait(file, sk_sleep(sk), wait);
450 466
451 if (!skb_queue_empty(&sk->sk_receive_queue) || 467 switch ((int)sock->state) {
452 (sock->state == SS_UNCONNECTED) || 468 case SS_READY:
453 (sock->state == SS_DISCONNECTING)) 469 case SS_CONNECTED:
454 mask = (POLLRDNORM | POLLIN); 470 if (!tipc_sk_port(sk)->congested)
455 else 471 mask |= POLLOUT;
456 mask = 0; 472 /* fall thru' */
457 473 case SS_CONNECTING:
458 if (sock->state == SS_DISCONNECTING) 474 case SS_LISTENING:
459 mask |= POLLHUP; 475 if (!skb_queue_empty(&sk->sk_receive_queue))
460 else 476 mask |= (POLLIN | POLLRDNORM);
461 mask |= POLLOUT; 477 break;
478 case SS_DISCONNECTING:
479 mask = (POLLIN | POLLRDNORM | POLLHUP);
480 break;
481 }
462 482
463 return mask; 483 return mask;
464} 484}
@@ -1026,9 +1046,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1026 struct sk_buff *buf; 1046 struct sk_buff *buf;
1027 struct tipc_msg *msg; 1047 struct tipc_msg *msg;
1028 unsigned int sz; 1048 unsigned int sz;
1029 int sz_to_copy; 1049 int sz_to_copy, target, needed;
1030 int sz_copied = 0; 1050 int sz_copied = 0;
1031 int needed;
1032 char __user *crs = m->msg_iov->iov_base; 1051 char __user *crs = m->msg_iov->iov_base;
1033 unsigned char *buf_crs; 1052 unsigned char *buf_crs;
1034 u32 err; 1053 u32 err;
@@ -1050,6 +1069,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1050 goto exit; 1069 goto exit;
1051 } 1070 }
1052 1071
1072 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
1073
1053restart: 1074restart:
1054 1075
1055 /* Look for a message in receive queue; wait if necessary */ 1076 /* Look for a message in receive queue; wait if necessary */
@@ -1138,7 +1159,7 @@ restart:
1138 1159
1139 if ((sz_copied < buf_len) && /* didn't get all requested data */ 1160 if ((sz_copied < buf_len) && /* didn't get all requested data */
1140 (!skb_queue_empty(&sk->sk_receive_queue) || 1161 (!skb_queue_empty(&sk->sk_receive_queue) ||
1141 (flags & MSG_WAITALL)) && /* and more is ready or required */ 1162 (sz_copied < target)) && /* and more is ready or required */
1142 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */ 1163 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */
1143 (!err)) /* and haven't reached a FIN */ 1164 (!err)) /* and haven't reached a FIN */
1144 goto restart; 1165 goto restart;
@@ -1174,7 +1195,7 @@ static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
1174 if (msg_connected(msg)) 1195 if (msg_connected(msg))
1175 threshold *= 4; 1196 threshold *= 4;
1176 1197
1177 return (queue_size >= threshold); 1198 return queue_size >= threshold;
1178} 1199}
1179 1200
1180/** 1201/**
@@ -1365,6 +1386,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1365 struct msghdr m = {NULL,}; 1386 struct msghdr m = {NULL,};
1366 struct sk_buff *buf; 1387 struct sk_buff *buf;
1367 struct tipc_msg *msg; 1388 struct tipc_msg *msg;
1389 long timeout;
1368 int res; 1390 int res;
1369 1391
1370 lock_sock(sk); 1392 lock_sock(sk);
@@ -1379,7 +1401,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1379 /* For now, TIPC does not support the non-blocking form of connect() */ 1401 /* For now, TIPC does not support the non-blocking form of connect() */
1380 1402
1381 if (flags & O_NONBLOCK) { 1403 if (flags & O_NONBLOCK) {
1382 res = -EWOULDBLOCK; 1404 res = -EOPNOTSUPP;
1383 goto exit; 1405 goto exit;
1384 } 1406 }
1385 1407
@@ -1425,11 +1447,12 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1425 1447
1426 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ 1448 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1427 1449
1450 timeout = tipc_sk(sk)->conn_timeout;
1428 release_sock(sk); 1451 release_sock(sk);
1429 res = wait_event_interruptible_timeout(*sk_sleep(sk), 1452 res = wait_event_interruptible_timeout(*sk_sleep(sk),
1430 (!skb_queue_empty(&sk->sk_receive_queue) || 1453 (!skb_queue_empty(&sk->sk_receive_queue) ||
1431 (sock->state != SS_CONNECTING)), 1454 (sock->state != SS_CONNECTING)),
1432 sk->sk_rcvtimeo); 1455 timeout ? timeout : MAX_SCHEDULE_TIMEOUT);
1433 lock_sock(sk); 1456 lock_sock(sk);
1434 1457
1435 if (res > 0) { 1458 if (res > 0) {
@@ -1692,7 +1715,7 @@ static int setsockopt(struct socket *sock,
1692 res = tipc_set_portunreturnable(tport->ref, value); 1715 res = tipc_set_portunreturnable(tport->ref, value);
1693 break; 1716 break;
1694 case TIPC_CONN_TIMEOUT: 1717 case TIPC_CONN_TIMEOUT:
1695 sk->sk_rcvtimeo = msecs_to_jiffies(value); 1718 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(value);
1696 /* no need to set "res", since already 0 at this point */ 1719 /* no need to set "res", since already 0 at this point */
1697 break; 1720 break;
1698 default: 1721 default:
@@ -1747,7 +1770,7 @@ static int getsockopt(struct socket *sock,
1747 res = tipc_portunreturnable(tport->ref, &value); 1770 res = tipc_portunreturnable(tport->ref, &value);
1748 break; 1771 break;
1749 case TIPC_CONN_TIMEOUT: 1772 case TIPC_CONN_TIMEOUT:
1750 value = jiffies_to_msecs(sk->sk_rcvtimeo); 1773 value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout);
1751 /* no need to set "res", since already 0 at this point */ 1774 /* no need to set "res", since already 0 at this point */
1752 break; 1775 break;
1753 case TIPC_NODE_RECVQ_DEPTH: 1776 case TIPC_NODE_RECVQ_DEPTH:
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ab6eab4c45e2..1a5b9a6bd128 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -604,6 +604,6 @@ int tipc_ispublished(struct tipc_name const *name)
604{ 604{
605 u32 domain = 0; 605 u32 domain = 0;
606 606
607 return(tipc_nametbl_translate(name->type, name->instance,&domain) != 0); 607 return tipc_nametbl_translate(name->type, name->instance, &domain) != 0;
608} 608}
609 609
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0b39b2451ea5..c586da3f4f18 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2033,11 +2033,10 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
2033 if (sk->sk_shutdown == SHUTDOWN_MASK) 2033 if (sk->sk_shutdown == SHUTDOWN_MASK)
2034 mask |= POLLHUP; 2034 mask |= POLLHUP;
2035 if (sk->sk_shutdown & RCV_SHUTDOWN) 2035 if (sk->sk_shutdown & RCV_SHUTDOWN)
2036 mask |= POLLRDHUP; 2036 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2037 2037
2038 /* readable? */ 2038 /* readable? */
2039 if (!skb_queue_empty(&sk->sk_receive_queue) || 2039 if (!skb_queue_empty(&sk->sk_receive_queue))
2040 (sk->sk_shutdown & RCV_SHUTDOWN))
2041 mask |= POLLIN | POLLRDNORM; 2040 mask |= POLLIN | POLLRDNORM;
2042 2041
2043 /* Connection-based need to check for termination and startup */ 2042 /* Connection-based need to check for termination and startup */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index d6d046b9f6f2..9c21ebf9780e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -253,11 +253,16 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
253 WARN_ON(err); 253 WARN_ON(err);
254 wdev->netdev->features |= NETIF_F_NETNS_LOCAL; 254 wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
255 } 255 }
256
257 return err;
256 } 258 }
257 259
258 wiphy_net_set(&rdev->wiphy, net); 260 wiphy_net_set(&rdev->wiphy, net);
259 261
260 return err; 262 err = device_rename(&rdev->wiphy.dev, dev_name(&rdev->wiphy.dev));
263 WARN_ON(err);
264
265 return 0;
261} 266}
262 267
263static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) 268static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
@@ -428,7 +433,7 @@ int wiphy_register(struct wiphy *wiphy)
428 433
429 /* sanity check ifmodes */ 434 /* sanity check ifmodes */
430 WARN_ON(!ifmodes); 435 WARN_ON(!ifmodes);
431 ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; 436 ifmodes &= ((1 << NUM_NL80211_IFTYPES) - 1) & ~1;
432 if (WARN_ON(ifmodes != wiphy->interface_modes)) 437 if (WARN_ON(ifmodes != wiphy->interface_modes))
433 wiphy->interface_modes = ifmodes; 438 wiphy->interface_modes = ifmodes;
434 439
@@ -683,8 +688,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
683 INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); 688 INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work);
684 INIT_LIST_HEAD(&wdev->event_list); 689 INIT_LIST_HEAD(&wdev->event_list);
685 spin_lock_init(&wdev->event_lock); 690 spin_lock_init(&wdev->event_lock);
686 INIT_LIST_HEAD(&wdev->action_registrations); 691 INIT_LIST_HEAD(&wdev->mgmt_registrations);
687 spin_lock_init(&wdev->action_registrations_lock); 692 spin_lock_init(&wdev->mgmt_registrations_lock);
688 693
689 mutex_lock(&rdev->devlist_mtx); 694 mutex_lock(&rdev->devlist_mtx);
690 list_add_rcu(&wdev->list, &rdev->netdev_list); 695 list_add_rcu(&wdev->list, &rdev->netdev_list);
@@ -724,6 +729,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
724 dev->ethtool_ops = &cfg80211_ethtool_ops; 729 dev->ethtool_ops = &cfg80211_ethtool_ops;
725 730
726 if ((wdev->iftype == NL80211_IFTYPE_STATION || 731 if ((wdev->iftype == NL80211_IFTYPE_STATION ||
732 wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
727 wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) 733 wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
728 dev->priv_flags |= IFF_DONT_BRIDGE; 734 dev->priv_flags |= IFF_DONT_BRIDGE;
729 break; 735 break;
@@ -732,6 +738,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
732 case NL80211_IFTYPE_ADHOC: 738 case NL80211_IFTYPE_ADHOC:
733 cfg80211_leave_ibss(rdev, dev, true); 739 cfg80211_leave_ibss(rdev, dev, true);
734 break; 740 break;
741 case NL80211_IFTYPE_P2P_CLIENT:
735 case NL80211_IFTYPE_STATION: 742 case NL80211_IFTYPE_STATION:
736 wdev_lock(wdev); 743 wdev_lock(wdev);
737#ifdef CONFIG_CFG80211_WEXT 744#ifdef CONFIG_CFG80211_WEXT
@@ -804,7 +811,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
804 sysfs_remove_link(&dev->dev.kobj, "phy80211"); 811 sysfs_remove_link(&dev->dev.kobj, "phy80211");
805 list_del_rcu(&wdev->list); 812 list_del_rcu(&wdev->list);
806 rdev->devlist_generation++; 813 rdev->devlist_generation++;
807 cfg80211_mlme_purge_actions(wdev); 814 cfg80211_mlme_purge_registrations(wdev);
808#ifdef CONFIG_CFG80211_WEXT 815#ifdef CONFIG_CFG80211_WEXT
809 kfree(wdev->wext.keys); 816 kfree(wdev->wext.keys);
810#endif 817#endif
@@ -910,52 +917,3 @@ static void __exit cfg80211_exit(void)
910 destroy_workqueue(cfg80211_wq); 917 destroy_workqueue(cfg80211_wq);
911} 918}
912module_exit(cfg80211_exit); 919module_exit(cfg80211_exit);
913
914static int ___wiphy_printk(const char *level, const struct wiphy *wiphy,
915 struct va_format *vaf)
916{
917 if (!wiphy)
918 return printk("%s(NULL wiphy *): %pV", level, vaf);
919
920 return printk("%s%s: %pV", level, wiphy_name(wiphy), vaf);
921}
922
923int __wiphy_printk(const char *level, const struct wiphy *wiphy,
924 const char *fmt, ...)
925{
926 struct va_format vaf;
927 va_list args;
928 int r;
929
930 va_start(args, fmt);
931
932 vaf.fmt = fmt;
933 vaf.va = &args;
934
935 r = ___wiphy_printk(level, wiphy, &vaf);
936 va_end(args);
937
938 return r;
939}
940EXPORT_SYMBOL(__wiphy_printk);
941
942#define define_wiphy_printk_level(func, kern_level) \
943int func(const struct wiphy *wiphy, const char *fmt, ...) \
944{ \
945 struct va_format vaf; \
946 va_list args; \
947 int r; \
948 \
949 va_start(args, fmt); \
950 \
951 vaf.fmt = fmt; \
952 vaf.va = &args; \
953 \
954 r = ___wiphy_printk(kern_level, wiphy, &vaf); \
955 va_end(args); \
956 \
957 return r; \
958} \
959EXPORT_SYMBOL(func);
960
961define_wiphy_printk_level(wiphy_debug, KERN_DEBUG);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 63d57ae399c3..5d89310b3587 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -86,7 +86,7 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
86static inline 86static inline
87bool wiphy_idx_valid(int wiphy_idx) 87bool wiphy_idx_valid(int wiphy_idx)
88{ 88{
89 return (wiphy_idx >= 0); 89 return wiphy_idx >= 0;
90} 90}
91 91
92 92
@@ -95,7 +95,10 @@ extern struct mutex cfg80211_mutex;
95extern struct list_head cfg80211_rdev_list; 95extern struct list_head cfg80211_rdev_list;
96extern int cfg80211_rdev_list_generation; 96extern int cfg80211_rdev_list_generation;
97 97
98#define assert_cfg80211_lock() WARN_ON(!mutex_is_locked(&cfg80211_mutex)) 98static inline void assert_cfg80211_lock(void)
99{
100 lockdep_assert_held(&cfg80211_mutex);
101}
99 102
100/* 103/*
101 * You can use this to mark a wiphy_idx as not having an associated wiphy. 104 * You can use this to mark a wiphy_idx as not having an associated wiphy.
@@ -202,8 +205,8 @@ static inline void wdev_unlock(struct wireless_dev *wdev)
202 mutex_unlock(&wdev->mtx); 205 mutex_unlock(&wdev->mtx);
203} 206}
204 207
205#define ASSERT_RDEV_LOCK(rdev) WARN_ON(!mutex_is_locked(&(rdev)->mtx)); 208#define ASSERT_RDEV_LOCK(rdev) lockdep_assert_held(&(rdev)->mtx)
206#define ASSERT_WDEV_LOCK(wdev) WARN_ON(!mutex_is_locked(&(wdev)->mtx)); 209#define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx)
207 210
208enum cfg80211_event_type { 211enum cfg80211_event_type {
209 EVENT_CONNECT_RESULT, 212 EVENT_CONNECT_RESULT,
@@ -331,16 +334,17 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
331 const u8 *resp_ie, size_t resp_ie_len, 334 const u8 *resp_ie, size_t resp_ie_len,
332 u16 status, bool wextev, 335 u16 status, bool wextev,
333 struct cfg80211_bss *bss); 336 struct cfg80211_bss *bss);
334int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, 337int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
335 const u8 *match_data, int match_len); 338 u16 frame_type, const u8 *match_data,
336void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid); 339 int match_len);
337void cfg80211_mlme_purge_actions(struct wireless_dev *wdev); 340void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
338int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, 341void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
339 struct net_device *dev, 342int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
340 struct ieee80211_channel *chan, 343 struct net_device *dev,
341 enum nl80211_channel_type channel_type, 344 struct ieee80211_channel *chan,
342 bool channel_type_valid, 345 enum nl80211_channel_type channel_type,
343 const u8 *buf, size_t len, u64 *cookie); 346 bool channel_type_valid,
347 const u8 *buf, size_t len, u64 *cookie);
344 348
345/* SME */ 349/* SME */
346int __cfg80211_connect(struct cfg80211_registered_device *rdev, 350int __cfg80211_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 27a8ce9343c3..8cb6e08373b9 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -88,6 +88,25 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
88 if (wdev->ssid_len) 88 if (wdev->ssid_len)
89 return -EALREADY; 89 return -EALREADY;
90 90
91 if (!params->basic_rates) {
92 /*
93 * If no rates were explicitly configured,
94 * use the mandatory rate set for 11b or
95 * 11a for maximum compatibility.
96 */
97 struct ieee80211_supported_band *sband =
98 rdev->wiphy.bands[params->channel->band];
99 int j;
100 u32 flag = params->channel->band == IEEE80211_BAND_5GHZ ?
101 IEEE80211_RATE_MANDATORY_A :
102 IEEE80211_RATE_MANDATORY_B;
103
104 for (j = 0; j < sband->n_bitrates; j++) {
105 if (sband->bitrates[j].flags & flag)
106 params->basic_rates |= BIT(j);
107 }
108 }
109
91 if (WARN_ON(wdev->connect_keys)) 110 if (WARN_ON(wdev->connect_keys))
92 kfree(wdev->connect_keys); 111 kfree(wdev->connect_keys);
93 wdev->connect_keys = connkeys; 112 wdev->connect_keys = connkeys;
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d1a3fb99fdf2..46f371160896 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -149,7 +149,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
149 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; 149 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
150 const u8 *bssid = mgmt->bssid; 150 const u8 *bssid = mgmt->bssid;
151 int i; 151 int i;
152 bool found = false; 152 bool found = false, was_current = false;
153 153
154 ASSERT_WDEV_LOCK(wdev); 154 ASSERT_WDEV_LOCK(wdev);
155 155
@@ -159,6 +159,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
159 cfg80211_put_bss(&wdev->current_bss->pub); 159 cfg80211_put_bss(&wdev->current_bss->pub);
160 wdev->current_bss = NULL; 160 wdev->current_bss = NULL;
161 found = true; 161 found = true;
162 was_current = true;
162 } else for (i = 0; i < MAX_AUTH_BSSES; i++) { 163 } else for (i = 0; i < MAX_AUTH_BSSES; i++) {
163 if (wdev->auth_bsses[i] && 164 if (wdev->auth_bsses[i] &&
164 memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) { 165 memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) {
@@ -183,7 +184,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
183 184
184 nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL); 185 nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL);
185 186
186 if (wdev->sme_state == CFG80211_SME_CONNECTED) { 187 if (wdev->sme_state == CFG80211_SME_CONNECTED && was_current) {
187 u16 reason_code; 188 u16 reason_code;
188 bool from_ap; 189 bool from_ap;
189 190
@@ -747,31 +748,51 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
747} 748}
748EXPORT_SYMBOL(cfg80211_new_sta); 749EXPORT_SYMBOL(cfg80211_new_sta);
749 750
750struct cfg80211_action_registration { 751struct cfg80211_mgmt_registration {
751 struct list_head list; 752 struct list_head list;
752 753
753 u32 nlpid; 754 u32 nlpid;
754 755
755 int match_len; 756 int match_len;
756 757
758 __le16 frame_type;
759
757 u8 match[]; 760 u8 match[];
758}; 761};
759 762
760int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, 763int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
761 const u8 *match_data, int match_len) 764 u16 frame_type, const u8 *match_data,
765 int match_len)
762{ 766{
763 struct cfg80211_action_registration *reg, *nreg; 767 struct cfg80211_mgmt_registration *reg, *nreg;
764 int err = 0; 768 int err = 0;
769 u16 mgmt_type;
770
771 if (!wdev->wiphy->mgmt_stypes)
772 return -EOPNOTSUPP;
773
774 if ((frame_type & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT)
775 return -EINVAL;
776
777 if (frame_type & ~(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE))
778 return -EINVAL;
779
780 mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4;
781 if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].rx & BIT(mgmt_type)))
782 return -EINVAL;
765 783
766 nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL); 784 nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL);
767 if (!nreg) 785 if (!nreg)
768 return -ENOMEM; 786 return -ENOMEM;
769 787
770 spin_lock_bh(&wdev->action_registrations_lock); 788 spin_lock_bh(&wdev->mgmt_registrations_lock);
771 789
772 list_for_each_entry(reg, &wdev->action_registrations, list) { 790 list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
773 int mlen = min(match_len, reg->match_len); 791 int mlen = min(match_len, reg->match_len);
774 792
793 if (frame_type != le16_to_cpu(reg->frame_type))
794 continue;
795
775 if (memcmp(reg->match, match_data, mlen) == 0) { 796 if (memcmp(reg->match, match_data, mlen) == 0) {
776 err = -EALREADY; 797 err = -EALREADY;
777 break; 798 break;
@@ -786,69 +807,83 @@ int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
786 memcpy(nreg->match, match_data, match_len); 807 memcpy(nreg->match, match_data, match_len);
787 nreg->match_len = match_len; 808 nreg->match_len = match_len;
788 nreg->nlpid = snd_pid; 809 nreg->nlpid = snd_pid;
789 list_add(&nreg->list, &wdev->action_registrations); 810 nreg->frame_type = cpu_to_le16(frame_type);
811 list_add(&nreg->list, &wdev->mgmt_registrations);
790 812
791 out: 813 out:
792 spin_unlock_bh(&wdev->action_registrations_lock); 814 spin_unlock_bh(&wdev->mgmt_registrations_lock);
793 return err; 815 return err;
794} 816}
795 817
796void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid) 818void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
797{ 819{
798 struct cfg80211_action_registration *reg, *tmp; 820 struct cfg80211_mgmt_registration *reg, *tmp;
799 821
800 spin_lock_bh(&wdev->action_registrations_lock); 822 spin_lock_bh(&wdev->mgmt_registrations_lock);
801 823
802 list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { 824 list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
803 if (reg->nlpid == nlpid) { 825 if (reg->nlpid == nlpid) {
804 list_del(&reg->list); 826 list_del(&reg->list);
805 kfree(reg); 827 kfree(reg);
806 } 828 }
807 } 829 }
808 830
809 spin_unlock_bh(&wdev->action_registrations_lock); 831 spin_unlock_bh(&wdev->mgmt_registrations_lock);
810} 832}
811 833
812void cfg80211_mlme_purge_actions(struct wireless_dev *wdev) 834void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
813{ 835{
814 struct cfg80211_action_registration *reg, *tmp; 836 struct cfg80211_mgmt_registration *reg, *tmp;
815 837
816 spin_lock_bh(&wdev->action_registrations_lock); 838 spin_lock_bh(&wdev->mgmt_registrations_lock);
817 839
818 list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { 840 list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
819 list_del(&reg->list); 841 list_del(&reg->list);
820 kfree(reg); 842 kfree(reg);
821 } 843 }
822 844
823 spin_unlock_bh(&wdev->action_registrations_lock); 845 spin_unlock_bh(&wdev->mgmt_registrations_lock);
824} 846}
825 847
826int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, 848int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
827 struct net_device *dev, 849 struct net_device *dev,
828 struct ieee80211_channel *chan, 850 struct ieee80211_channel *chan,
829 enum nl80211_channel_type channel_type, 851 enum nl80211_channel_type channel_type,
830 bool channel_type_valid, 852 bool channel_type_valid,
831 const u8 *buf, size_t len, u64 *cookie) 853 const u8 *buf, size_t len, u64 *cookie)
832{ 854{
833 struct wireless_dev *wdev = dev->ieee80211_ptr; 855 struct wireless_dev *wdev = dev->ieee80211_ptr;
834 const struct ieee80211_mgmt *mgmt; 856 const struct ieee80211_mgmt *mgmt;
857 u16 stype;
858
859 if (!wdev->wiphy->mgmt_stypes)
860 return -EOPNOTSUPP;
835 861
836 if (rdev->ops->action == NULL) 862 if (!rdev->ops->mgmt_tx)
837 return -EOPNOTSUPP; 863 return -EOPNOTSUPP;
864
838 if (len < 24 + 1) 865 if (len < 24 + 1)
839 return -EINVAL; 866 return -EINVAL;
840 867
841 mgmt = (const struct ieee80211_mgmt *) buf; 868 mgmt = (const struct ieee80211_mgmt *) buf;
842 if (!ieee80211_is_action(mgmt->frame_control)) 869
870 if (!ieee80211_is_mgmt(mgmt->frame_control))
843 return -EINVAL; 871 return -EINVAL;
844 if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { 872
873 stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
874 if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].tx & BIT(stype >> 4)))
875 return -EINVAL;
876
877 if (ieee80211_is_action(mgmt->frame_control) &&
878 mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) {
845 /* Verify that we are associated with the destination AP */ 879 /* Verify that we are associated with the destination AP */
846 wdev_lock(wdev); 880 wdev_lock(wdev);
847 881
848 if (!wdev->current_bss || 882 if (!wdev->current_bss ||
849 memcmp(wdev->current_bss->pub.bssid, mgmt->bssid, 883 memcmp(wdev->current_bss->pub.bssid, mgmt->bssid,
850 ETH_ALEN) != 0 || 884 ETH_ALEN) != 0 ||
851 (wdev->iftype == NL80211_IFTYPE_STATION && 885 ((wdev->iftype == NL80211_IFTYPE_STATION ||
886 wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
852 memcmp(wdev->current_bss->pub.bssid, mgmt->da, 887 memcmp(wdev->current_bss->pub.bssid, mgmt->da,
853 ETH_ALEN) != 0)) { 888 ETH_ALEN) != 0)) {
854 wdev_unlock(wdev); 889 wdev_unlock(wdev);
@@ -862,64 +897,75 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
862 return -EINVAL; 897 return -EINVAL;
863 898
864 /* Transmit the Action frame as requested by user space */ 899 /* Transmit the Action frame as requested by user space */
865 return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, 900 return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type,
866 channel_type_valid, buf, len, cookie); 901 channel_type_valid, buf, len, cookie);
867} 902}
868 903
869bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, 904bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf,
870 size_t len, gfp_t gfp) 905 size_t len, gfp_t gfp)
871{ 906{
872 struct wireless_dev *wdev = dev->ieee80211_ptr; 907 struct wireless_dev *wdev = dev->ieee80211_ptr;
873 struct wiphy *wiphy = wdev->wiphy; 908 struct wiphy *wiphy = wdev->wiphy;
874 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 909 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
875 struct cfg80211_action_registration *reg; 910 struct cfg80211_mgmt_registration *reg;
876 const u8 *action_data; 911 const struct ieee80211_txrx_stypes *stypes =
877 int action_data_len; 912 &wiphy->mgmt_stypes[wdev->iftype];
913 struct ieee80211_mgmt *mgmt = (void *)buf;
914 const u8 *data;
915 int data_len;
878 bool result = false; 916 bool result = false;
917 __le16 ftype = mgmt->frame_control &
918 cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE);
919 u16 stype;
879 920
880 /* frame length - min size excluding category */ 921 stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4;
881 action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1);
882 922
883 /* action data starts with category */ 923 if (!(stypes->rx & BIT(stype)))
884 action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1; 924 return false;
885 925
886 spin_lock_bh(&wdev->action_registrations_lock); 926 data = buf + ieee80211_hdrlen(mgmt->frame_control);
927 data_len = len - ieee80211_hdrlen(mgmt->frame_control);
928
929 spin_lock_bh(&wdev->mgmt_registrations_lock);
930
931 list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
932 if (reg->frame_type != ftype)
933 continue;
887 934
888 list_for_each_entry(reg, &wdev->action_registrations, list) { 935 if (reg->match_len > data_len)
889 if (reg->match_len > action_data_len)
890 continue; 936 continue;
891 937
892 if (memcmp(reg->match, action_data, reg->match_len)) 938 if (memcmp(reg->match, data, reg->match_len))
893 continue; 939 continue;
894 940
895 /* found match! */ 941 /* found match! */
896 942
897 /* Indicate the received Action frame to user space */ 943 /* Indicate the received Action frame to user space */
898 if (nl80211_send_action(rdev, dev, reg->nlpid, freq, 944 if (nl80211_send_mgmt(rdev, dev, reg->nlpid, freq,
899 buf, len, gfp)) 945 buf, len, gfp))
900 continue; 946 continue;
901 947
902 result = true; 948 result = true;
903 break; 949 break;
904 } 950 }
905 951
906 spin_unlock_bh(&wdev->action_registrations_lock); 952 spin_unlock_bh(&wdev->mgmt_registrations_lock);
907 953
908 return result; 954 return result;
909} 955}
910EXPORT_SYMBOL(cfg80211_rx_action); 956EXPORT_SYMBOL(cfg80211_rx_mgmt);
911 957
912void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, 958void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie,
913 const u8 *buf, size_t len, bool ack, gfp_t gfp) 959 const u8 *buf, size_t len, bool ack, gfp_t gfp)
914{ 960{
915 struct wireless_dev *wdev = dev->ieee80211_ptr; 961 struct wireless_dev *wdev = dev->ieee80211_ptr;
916 struct wiphy *wiphy = wdev->wiphy; 962 struct wiphy *wiphy = wdev->wiphy;
917 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 963 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
918 964
919 /* Indicate TX status of the Action frame to user space */ 965 /* Indicate TX status of the Action frame to user space */
920 nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp); 966 nl80211_send_mgmt_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
921} 967}
922EXPORT_SYMBOL(cfg80211_action_tx_status); 968EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
923 969
924void cfg80211_cqm_rssi_notify(struct net_device *dev, 970void cfg80211_cqm_rssi_notify(struct net_device *dev,
925 enum nl80211_cqm_rssi_threshold_event rssi_event, 971 enum nl80211_cqm_rssi_threshold_event rssi_event,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 37902a54e9c1..9c84825803ce 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -136,6 +136,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
136 .len = sizeof(struct nl80211_sta_flag_update), 136 .len = sizeof(struct nl80211_sta_flag_update),
137 }, 137 },
138 [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, 138 [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG },
139 [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 },
140 [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
139 [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, 141 [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
140 [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, 142 [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
141 [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, 143 [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
@@ -156,6 +158,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
156 158
157 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, 159 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
158 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, 160 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
161 [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
159}; 162};
160 163
161/* policy for the attributes */ 164/* policy for the attributes */
@@ -407,12 +410,14 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
407 switch (wdev->iftype) { 410 switch (wdev->iftype) {
408 case NL80211_IFTYPE_AP: 411 case NL80211_IFTYPE_AP:
409 case NL80211_IFTYPE_AP_VLAN: 412 case NL80211_IFTYPE_AP_VLAN:
413 case NL80211_IFTYPE_P2P_GO:
410 break; 414 break;
411 case NL80211_IFTYPE_ADHOC: 415 case NL80211_IFTYPE_ADHOC:
412 if (!wdev->current_bss) 416 if (!wdev->current_bss)
413 return -ENOLINK; 417 return -ENOLINK;
414 break; 418 break;
415 case NL80211_IFTYPE_STATION: 419 case NL80211_IFTYPE_STATION:
420 case NL80211_IFTYPE_P2P_CLIENT:
416 if (wdev->sme_state != CFG80211_SME_CONNECTED) 421 if (wdev->sme_state != CFG80211_SME_CONNECTED)
417 return -ENOLINK; 422 return -ENOLINK;
418 break; 423 break;
@@ -437,6 +442,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
437 struct ieee80211_rate *rate; 442 struct ieee80211_rate *rate;
438 int i; 443 int i;
439 u16 ifmodes = dev->wiphy.interface_modes; 444 u16 ifmodes = dev->wiphy.interface_modes;
445 const struct ieee80211_txrx_stypes *mgmt_stypes =
446 dev->wiphy.mgmt_stypes;
440 447
441 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); 448 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY);
442 if (!hdr) 449 if (!hdr)
@@ -471,6 +478,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
471 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, 478 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
472 dev->wiphy.max_num_pmkids); 479 dev->wiphy.max_num_pmkids);
473 480
481 if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
482 NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
483
474 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); 484 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
475 if (!nl_modes) 485 if (!nl_modes)
476 goto nla_put_failure; 486 goto nla_put_failure;
@@ -587,7 +597,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
587 CMD(flush_pmksa, FLUSH_PMKSA); 597 CMD(flush_pmksa, FLUSH_PMKSA);
588 CMD(remain_on_channel, REMAIN_ON_CHANNEL); 598 CMD(remain_on_channel, REMAIN_ON_CHANNEL);
589 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); 599 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
590 CMD(action, ACTION); 600 CMD(mgmt_tx, FRAME);
591 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { 601 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
592 i++; 602 i++;
593 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); 603 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -608,6 +618,55 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
608 618
609 nla_nest_end(msg, nl_cmds); 619 nla_nest_end(msg, nl_cmds);
610 620
621 if (mgmt_stypes) {
622 u16 stypes;
623 struct nlattr *nl_ftypes, *nl_ifs;
624 enum nl80211_iftype ift;
625
626 nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
627 if (!nl_ifs)
628 goto nla_put_failure;
629
630 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
631 nl_ftypes = nla_nest_start(msg, ift);
632 if (!nl_ftypes)
633 goto nla_put_failure;
634 i = 0;
635 stypes = mgmt_stypes[ift].tx;
636 while (stypes) {
637 if (stypes & 1)
638 NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
639 (i << 4) | IEEE80211_FTYPE_MGMT);
640 stypes >>= 1;
641 i++;
642 }
643 nla_nest_end(msg, nl_ftypes);
644 }
645
646 nla_nest_end(msg, nl_ifs);
647
648 nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
649 if (!nl_ifs)
650 goto nla_put_failure;
651
652 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
653 nl_ftypes = nla_nest_start(msg, ift);
654 if (!nl_ftypes)
655 goto nla_put_failure;
656 i = 0;
657 stypes = mgmt_stypes[ift].rx;
658 while (stypes) {
659 if (stypes & 1)
660 NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
661 (i << 4) | IEEE80211_FTYPE_MGMT);
662 stypes >>= 1;
663 i++;
664 }
665 nla_nest_end(msg, nl_ftypes);
666 }
667 nla_nest_end(msg, nl_ifs);
668 }
669
611 return genlmsg_end(msg, hdr); 670 return genlmsg_end(msg, hdr);
612 671
613 nla_put_failure: 672 nla_put_failure:
@@ -709,7 +768,8 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
709 wdev->iftype == NL80211_IFTYPE_AP || 768 wdev->iftype == NL80211_IFTYPE_AP ||
710 wdev->iftype == NL80211_IFTYPE_WDS || 769 wdev->iftype == NL80211_IFTYPE_WDS ||
711 wdev->iftype == NL80211_IFTYPE_MESH_POINT || 770 wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
712 wdev->iftype == NL80211_IFTYPE_MONITOR; 771 wdev->iftype == NL80211_IFTYPE_MONITOR ||
772 wdev->iftype == NL80211_IFTYPE_P2P_GO;
713} 773}
714 774
715static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, 775static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
@@ -776,7 +836,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
776 struct cfg80211_registered_device *rdev; 836 struct cfg80211_registered_device *rdev;
777 struct net_device *netdev = NULL; 837 struct net_device *netdev = NULL;
778 struct wireless_dev *wdev; 838 struct wireless_dev *wdev;
779 int result, rem_txq_params = 0; 839 int result = 0, rem_txq_params = 0;
780 struct nlattr *nl_txq_params; 840 struct nlattr *nl_txq_params;
781 u32 changed; 841 u32 changed;
782 u8 retry_short = 0, retry_long = 0; 842 u8 retry_short = 0, retry_long = 0;
@@ -1636,7 +1696,8 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1636 if (err) 1696 if (err)
1637 goto unlock_rtnl; 1697 goto unlock_rtnl;
1638 1698
1639 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) { 1699 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
1700 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
1640 err = -EOPNOTSUPP; 1701 err = -EOPNOTSUPP;
1641 goto out; 1702 goto out;
1642 } 1703 }
@@ -1728,7 +1789,8 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
1728 goto out; 1789 goto out;
1729 } 1790 }
1730 1791
1731 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) { 1792 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
1793 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
1732 err = -EOPNOTSUPP; 1794 err = -EOPNOTSUPP;
1733 goto out; 1795 goto out;
1734 } 1796 }
@@ -2071,10 +2133,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2071 switch (dev->ieee80211_ptr->iftype) { 2133 switch (dev->ieee80211_ptr->iftype) {
2072 case NL80211_IFTYPE_AP: 2134 case NL80211_IFTYPE_AP:
2073 case NL80211_IFTYPE_AP_VLAN: 2135 case NL80211_IFTYPE_AP_VLAN:
2136 case NL80211_IFTYPE_P2P_GO:
2074 /* disallow mesh-specific things */ 2137 /* disallow mesh-specific things */
2075 if (params.plink_action) 2138 if (params.plink_action)
2076 err = -EINVAL; 2139 err = -EINVAL;
2077 break; 2140 break;
2141 case NL80211_IFTYPE_P2P_CLIENT:
2078 case NL80211_IFTYPE_STATION: 2142 case NL80211_IFTYPE_STATION:
2079 /* disallow everything but AUTHORIZED flag */ 2143 /* disallow everything but AUTHORIZED flag */
2080 if (params.plink_action) 2144 if (params.plink_action)
@@ -2176,7 +2240,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
2176 goto out_rtnl; 2240 goto out_rtnl;
2177 2241
2178 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && 2242 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2179 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { 2243 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
2244 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
2180 err = -EINVAL; 2245 err = -EINVAL;
2181 goto out; 2246 goto out;
2182 } 2247 }
@@ -2229,7 +2294,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
2229 2294
2230 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && 2295 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2231 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && 2296 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
2232 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { 2297 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
2298 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
2233 err = -EINVAL; 2299 err = -EINVAL;
2234 goto out; 2300 goto out;
2235 } 2301 }
@@ -2603,7 +2669,8 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
2603 goto out; 2669 goto out;
2604 } 2670 }
2605 2671
2606 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) { 2672 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2673 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
2607 err = -EOPNOTSUPP; 2674 err = -EOPNOTSUPP;
2608 goto out; 2675 goto out;
2609 } 2676 }
@@ -3306,6 +3373,7 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
3306 } 3373 }
3307 3374
3308 switch (wdev->iftype) { 3375 switch (wdev->iftype) {
3376 case NL80211_IFTYPE_P2P_CLIENT:
3309 case NL80211_IFTYPE_STATION: 3377 case NL80211_IFTYPE_STATION:
3310 if (intbss == wdev->current_bss) 3378 if (intbss == wdev->current_bss)
3311 NLA_PUT_U32(msg, NL80211_BSS_STATUS, 3379 NLA_PUT_U32(msg, NL80211_BSS_STATUS,
@@ -3572,12 +3640,28 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3572 if (err) 3640 if (err)
3573 goto unlock_rtnl; 3641 goto unlock_rtnl;
3574 3642
3643 if (key.idx >= 0) {
3644 int i;
3645 bool ok = false;
3646 for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) {
3647 if (key.p.cipher == rdev->wiphy.cipher_suites[i]) {
3648 ok = true;
3649 break;
3650 }
3651 }
3652 if (!ok) {
3653 err = -EINVAL;
3654 goto out;
3655 }
3656 }
3657
3575 if (!rdev->ops->auth) { 3658 if (!rdev->ops->auth) {
3576 err = -EOPNOTSUPP; 3659 err = -EOPNOTSUPP;
3577 goto out; 3660 goto out;
3578 } 3661 }
3579 3662
3580 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3663 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3664 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
3581 err = -EOPNOTSUPP; 3665 err = -EOPNOTSUPP;
3582 goto out; 3666 goto out;
3583 } 3667 }
@@ -3624,7 +3708,8 @@ unlock_rtnl:
3624 return err; 3708 return err;
3625} 3709}
3626 3710
3627static int nl80211_crypto_settings(struct genl_info *info, 3711static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
3712 struct genl_info *info,
3628 struct cfg80211_crypto_settings *settings, 3713 struct cfg80211_crypto_settings *settings,
3629 int cipher_limit) 3714 int cipher_limit)
3630{ 3715{
@@ -3632,6 +3717,19 @@ static int nl80211_crypto_settings(struct genl_info *info,
3632 3717
3633 settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; 3718 settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT];
3634 3719
3720 if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) {
3721 u16 proto;
3722 proto = nla_get_u16(
3723 info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]);
3724 settings->control_port_ethertype = cpu_to_be16(proto);
3725 if (!(rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
3726 proto != ETH_P_PAE)
3727 return -EINVAL;
3728 if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT])
3729 settings->control_port_no_encrypt = true;
3730 } else
3731 settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE);
3732
3635 if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { 3733 if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) {
3636 void *data; 3734 void *data;
3637 int len, i; 3735 int len, i;
@@ -3718,7 +3816,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3718 goto out; 3816 goto out;
3719 } 3817 }
3720 3818
3721 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3819 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3820 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
3722 err = -EOPNOTSUPP; 3821 err = -EOPNOTSUPP;
3723 goto out; 3822 goto out;
3724 } 3823 }
@@ -3759,7 +3858,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3759 if (info->attrs[NL80211_ATTR_PREV_BSSID]) 3858 if (info->attrs[NL80211_ATTR_PREV_BSSID])
3760 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); 3859 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
3761 3860
3762 err = nl80211_crypto_settings(info, &crypto, 1); 3861 err = nl80211_crypto_settings(rdev, info, &crypto, 1);
3763 if (!err) 3862 if (!err)
3764 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, 3863 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
3765 ssid, ssid_len, ie, ie_len, use_mfp, 3864 ssid, ssid_len, ie, ie_len, use_mfp,
@@ -3802,7 +3901,8 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
3802 goto out; 3901 goto out;
3803 } 3902 }
3804 3903
3805 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3904 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3905 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
3806 err = -EOPNOTSUPP; 3906 err = -EOPNOTSUPP;
3807 goto out; 3907 goto out;
3808 } 3908 }
@@ -3868,7 +3968,8 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
3868 goto out; 3968 goto out;
3869 } 3969 }
3870 3970
3871 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3971 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3972 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
3872 err = -EOPNOTSUPP; 3973 err = -EOPNOTSUPP;
3873 goto out; 3974 goto out;
3874 } 3975 }
@@ -4018,23 +4119,6 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
4018 goto out; 4119 goto out;
4019 } 4120 }
4020 } 4121 }
4021 } else {
4022 /*
4023 * If no rates were explicitly configured,
4024 * use the mandatory rate set for 11b or
4025 * 11a for maximum compatibility.
4026 */
4027 struct ieee80211_supported_band *sband =
4028 wiphy->bands[ibss.channel->band];
4029 int j;
4030 u32 flag = ibss.channel->band == IEEE80211_BAND_5GHZ ?
4031 IEEE80211_RATE_MANDATORY_A :
4032 IEEE80211_RATE_MANDATORY_B;
4033
4034 for (j = 0; j < sband->n_bitrates; j++) {
4035 if (sband->bitrates[j].flags & flag)
4036 ibss.basic_rates |= BIT(j);
4037 }
4038 } 4122 }
4039 4123
4040 err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys); 4124 err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
@@ -4236,7 +4320,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
4236 4320
4237 connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; 4321 connect.privacy = info->attrs[NL80211_ATTR_PRIVACY];
4238 4322
4239 err = nl80211_crypto_settings(info, &connect.crypto, 4323 err = nl80211_crypto_settings(rdev, info, &connect.crypto,
4240 NL80211_MAX_NR_CIPHER_SUITES); 4324 NL80211_MAX_NR_CIPHER_SUITES);
4241 if (err) 4325 if (err)
4242 return err; 4326 return err;
@@ -4246,7 +4330,8 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
4246 if (err) 4330 if (err)
4247 goto unlock_rtnl; 4331 goto unlock_rtnl;
4248 4332
4249 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 4333 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4334 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4250 err = -EOPNOTSUPP; 4335 err = -EOPNOTSUPP;
4251 goto out; 4336 goto out;
4252 } 4337 }
@@ -4322,7 +4407,8 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
4322 if (err) 4407 if (err)
4323 goto unlock_rtnl; 4408 goto unlock_rtnl;
4324 4409
4325 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 4410 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4411 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4326 err = -EOPNOTSUPP; 4412 err = -EOPNOTSUPP;
4327 goto out; 4413 goto out;
4328 } 4414 }
@@ -4410,7 +4496,8 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
4410 pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); 4496 pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
4411 pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 4497 pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
4412 4498
4413 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 4499 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4500 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4414 err = -EOPNOTSUPP; 4501 err = -EOPNOTSUPP;
4415 goto out; 4502 goto out;
4416 } 4503 }
@@ -4455,7 +4542,8 @@ static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info)
4455 if (err) 4542 if (err)
4456 goto out_rtnl; 4543 goto out_rtnl;
4457 4544
4458 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 4545 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4546 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4459 err = -EOPNOTSUPP; 4547 err = -EOPNOTSUPP;
4460 goto out; 4548 goto out;
4461 } 4549 }
@@ -4717,17 +4805,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
4717 return err; 4805 return err;
4718} 4806}
4719 4807
4720static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) 4808static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
4721{ 4809{
4722 struct cfg80211_registered_device *rdev; 4810 struct cfg80211_registered_device *rdev;
4723 struct net_device *dev; 4811 struct net_device *dev;
4812 u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION;
4724 int err; 4813 int err;
4725 4814
4726 if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) 4815 if (!info->attrs[NL80211_ATTR_FRAME_MATCH])
4727 return -EINVAL; 4816 return -EINVAL;
4728 4817
4729 if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1) 4818 if (info->attrs[NL80211_ATTR_FRAME_TYPE])
4730 return -EINVAL; 4819 frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]);
4731 4820
4732 rtnl_lock(); 4821 rtnl_lock();
4733 4822
@@ -4736,18 +4825,20 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
4736 goto unlock_rtnl; 4825 goto unlock_rtnl;
4737 4826
4738 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && 4827 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4739 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { 4828 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
4829 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4740 err = -EOPNOTSUPP; 4830 err = -EOPNOTSUPP;
4741 goto out; 4831 goto out;
4742 } 4832 }
4743 4833
4744 /* not much point in registering if we can't reply */ 4834 /* not much point in registering if we can't reply */
4745 if (!rdev->ops->action) { 4835 if (!rdev->ops->mgmt_tx) {
4746 err = -EOPNOTSUPP; 4836 err = -EOPNOTSUPP;
4747 goto out; 4837 goto out;
4748 } 4838 }
4749 4839
4750 err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid, 4840 err = cfg80211_mlme_register_mgmt(dev->ieee80211_ptr, info->snd_pid,
4841 frame_type,
4751 nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), 4842 nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
4752 nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); 4843 nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]));
4753 out: 4844 out:
@@ -4758,7 +4849,7 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
4758 return err; 4849 return err;
4759} 4850}
4760 4851
4761static int nl80211_action(struct sk_buff *skb, struct genl_info *info) 4852static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
4762{ 4853{
4763 struct cfg80211_registered_device *rdev; 4854 struct cfg80211_registered_device *rdev;
4764 struct net_device *dev; 4855 struct net_device *dev;
@@ -4781,13 +4872,14 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4781 if (err) 4872 if (err)
4782 goto unlock_rtnl; 4873 goto unlock_rtnl;
4783 4874
4784 if (!rdev->ops->action) { 4875 if (!rdev->ops->mgmt_tx) {
4785 err = -EOPNOTSUPP; 4876 err = -EOPNOTSUPP;
4786 goto out; 4877 goto out;
4787 } 4878 }
4788 4879
4789 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && 4880 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4790 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { 4881 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
4882 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
4791 err = -EOPNOTSUPP; 4883 err = -EOPNOTSUPP;
4792 goto out; 4884 goto out;
4793 } 4885 }
@@ -4824,17 +4916,17 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4824 } 4916 }
4825 4917
4826 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 4918 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
4827 NL80211_CMD_ACTION); 4919 NL80211_CMD_FRAME);
4828 4920
4829 if (IS_ERR(hdr)) { 4921 if (IS_ERR(hdr)) {
4830 err = PTR_ERR(hdr); 4922 err = PTR_ERR(hdr);
4831 goto free_msg; 4923 goto free_msg;
4832 } 4924 }
4833 err = cfg80211_mlme_action(rdev, dev, chan, channel_type, 4925 err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type,
4834 channel_type_valid, 4926 channel_type_valid,
4835 nla_data(info->attrs[NL80211_ATTR_FRAME]), 4927 nla_data(info->attrs[NL80211_ATTR_FRAME]),
4836 nla_len(info->attrs[NL80211_ATTR_FRAME]), 4928 nla_len(info->attrs[NL80211_ATTR_FRAME]),
4837 &cookie); 4929 &cookie);
4838 if (err) 4930 if (err)
4839 goto free_msg; 4931 goto free_msg;
4840 4932
@@ -4881,7 +4973,7 @@ static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info)
4881 4973
4882 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 4974 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4883 if (err) 4975 if (err)
4884 goto unlock_rdev; 4976 goto unlock_rtnl;
4885 4977
4886 wdev = dev->ieee80211_ptr; 4978 wdev = dev->ieee80211_ptr;
4887 4979
@@ -4905,6 +4997,7 @@ static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info)
4905unlock_rdev: 4997unlock_rdev:
4906 cfg80211_unlock_rdev(rdev); 4998 cfg80211_unlock_rdev(rdev);
4907 dev_put(dev); 4999 dev_put(dev);
5000unlock_rtnl:
4908 rtnl_unlock(); 5001 rtnl_unlock();
4909 5002
4910out: 5003out:
@@ -5005,7 +5098,8 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
5005 goto unlock_rdev; 5098 goto unlock_rdev;
5006 } 5099 }
5007 5100
5008 if (wdev->iftype != NL80211_IFTYPE_STATION) { 5101 if (wdev->iftype != NL80211_IFTYPE_STATION &&
5102 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) {
5009 err = -EOPNOTSUPP; 5103 err = -EOPNOTSUPP;
5010 goto unlock_rdev; 5104 goto unlock_rdev;
5011 } 5105 }
@@ -5333,14 +5427,14 @@ static struct genl_ops nl80211_ops[] = {
5333 .flags = GENL_ADMIN_PERM, 5427 .flags = GENL_ADMIN_PERM,
5334 }, 5428 },
5335 { 5429 {
5336 .cmd = NL80211_CMD_REGISTER_ACTION, 5430 .cmd = NL80211_CMD_REGISTER_FRAME,
5337 .doit = nl80211_register_action, 5431 .doit = nl80211_register_mgmt,
5338 .policy = nl80211_policy, 5432 .policy = nl80211_policy,
5339 .flags = GENL_ADMIN_PERM, 5433 .flags = GENL_ADMIN_PERM,
5340 }, 5434 },
5341 { 5435 {
5342 .cmd = NL80211_CMD_ACTION, 5436 .cmd = NL80211_CMD_FRAME,
5343 .doit = nl80211_action, 5437 .doit = nl80211_tx_mgmt,
5344 .policy = nl80211_policy, 5438 .policy = nl80211_policy,
5345 .flags = GENL_ADMIN_PERM, 5439 .flags = GENL_ADMIN_PERM,
5346 }, 5440 },
@@ -6040,9 +6134,9 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
6040 nl80211_mlme_mcgrp.id, gfp); 6134 nl80211_mlme_mcgrp.id, gfp);
6041} 6135}
6042 6136
6043int nl80211_send_action(struct cfg80211_registered_device *rdev, 6137int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
6044 struct net_device *netdev, u32 nlpid, 6138 struct net_device *netdev, u32 nlpid,
6045 int freq, const u8 *buf, size_t len, gfp_t gfp) 6139 int freq, const u8 *buf, size_t len, gfp_t gfp)
6046{ 6140{
6047 struct sk_buff *msg; 6141 struct sk_buff *msg;
6048 void *hdr; 6142 void *hdr;
@@ -6052,7 +6146,7 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
6052 if (!msg) 6146 if (!msg)
6053 return -ENOMEM; 6147 return -ENOMEM;
6054 6148
6055 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION); 6149 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME);
6056 if (!hdr) { 6150 if (!hdr) {
6057 nlmsg_free(msg); 6151 nlmsg_free(msg);
6058 return -ENOMEM; 6152 return -ENOMEM;
@@ -6080,10 +6174,10 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
6080 return -ENOBUFS; 6174 return -ENOBUFS;
6081} 6175}
6082 6176
6083void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, 6177void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
6084 struct net_device *netdev, u64 cookie, 6178 struct net_device *netdev, u64 cookie,
6085 const u8 *buf, size_t len, bool ack, 6179 const u8 *buf, size_t len, bool ack,
6086 gfp_t gfp) 6180 gfp_t gfp)
6087{ 6181{
6088 struct sk_buff *msg; 6182 struct sk_buff *msg;
6089 void *hdr; 6183 void *hdr;
@@ -6092,7 +6186,7 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
6092 if (!msg) 6186 if (!msg)
6093 return; 6187 return;
6094 6188
6095 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS); 6189 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS);
6096 if (!hdr) { 6190 if (!hdr) {
6097 nlmsg_free(msg); 6191 nlmsg_free(msg);
6098 return; 6192 return;
@@ -6179,7 +6273,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
6179 6273
6180 list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) 6274 list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list)
6181 list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) 6275 list_for_each_entry_rcu(wdev, &rdev->netdev_list, list)
6182 cfg80211_mlme_unregister_actions(wdev, notify->pid); 6276 cfg80211_mlme_unregister_socket(wdev, notify->pid);
6183 6277
6184 rcu_read_unlock(); 6278 rcu_read_unlock();
6185 6279
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 2ad7fbc7d9f1..30d2f939150d 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -74,13 +74,13 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
74 struct net_device *dev, const u8 *mac_addr, 74 struct net_device *dev, const u8 *mac_addr,
75 struct station_info *sinfo, gfp_t gfp); 75 struct station_info *sinfo, gfp_t gfp);
76 76
77int nl80211_send_action(struct cfg80211_registered_device *rdev, 77int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
78 struct net_device *netdev, u32 nlpid, int freq, 78 struct net_device *netdev, u32 nlpid, int freq,
79 const u8 *buf, size_t len, gfp_t gfp); 79 const u8 *buf, size_t len, gfp_t gfp);
80void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, 80void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
81 struct net_device *netdev, u64 cookie, 81 struct net_device *netdev, u64 cookie,
82 const u8 *buf, size_t len, bool ack, 82 const u8 *buf, size_t len, bool ack,
83 gfp_t gfp); 83 gfp_t gfp);
84 84
85void 85void
86nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, 86nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 1332c445d1c7..c774bc0f155e 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -14,6 +14,7 @@
14 * See COPYING for more details. 14 * See COPYING for more details.
15 */ 15 */
16 16
17#include <linux/kernel.h>
17#include <net/cfg80211.h> 18#include <net/cfg80211.h>
18#include <net/ieee80211_radiotap.h> 19#include <net/ieee80211_radiotap.h>
19#include <asm/unaligned.h> 20#include <asm/unaligned.h>
@@ -45,7 +46,7 @@ static const struct radiotap_align_size rtap_namespace_sizes[] = {
45}; 46};
46 47
47static const struct ieee80211_radiotap_namespace radiotap_ns = { 48static const struct ieee80211_radiotap_namespace radiotap_ns = {
48 .n_bits = sizeof(rtap_namespace_sizes) / sizeof(rtap_namespace_sizes[0]), 49 .n_bits = ARRAY_SIZE(rtap_namespace_sizes),
49 .align_size = rtap_namespace_sizes, 50 .align_size = rtap_namespace_sizes,
50}; 51};
51 52
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f180db0de66c..d14bbf960c18 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -36,6 +36,7 @@
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/list.h> 37#include <linux/list.h>
38#include <linux/random.h> 38#include <linux/random.h>
39#include <linux/ctype.h>
39#include <linux/nl80211.h> 40#include <linux/nl80211.h>
40#include <linux/platform_device.h> 41#include <linux/platform_device.h>
41#include <net/cfg80211.h> 42#include <net/cfg80211.h>
@@ -73,7 +74,11 @@ const struct ieee80211_regdomain *cfg80211_regdomain;
73 * - last_request 74 * - last_request
74 */ 75 */
75static DEFINE_MUTEX(reg_mutex); 76static DEFINE_MUTEX(reg_mutex);
76#define assert_reg_lock() WARN_ON(!mutex_is_locked(&reg_mutex)) 77
78static inline void assert_reg_lock(void)
79{
80 lockdep_assert_held(&reg_mutex);
81}
77 82
78/* Used to queue up regulatory hints */ 83/* Used to queue up regulatory hints */
79static LIST_HEAD(reg_requests_list); 84static LIST_HEAD(reg_requests_list);
@@ -181,14 +186,6 @@ static bool is_alpha2_set(const char *alpha2)
181 return false; 186 return false;
182} 187}
183 188
184static bool is_alpha_upper(char letter)
185{
186 /* ASCII A - Z */
187 if (letter >= 65 && letter <= 90)
188 return true;
189 return false;
190}
191
192static bool is_unknown_alpha2(const char *alpha2) 189static bool is_unknown_alpha2(const char *alpha2)
193{ 190{
194 if (!alpha2) 191 if (!alpha2)
@@ -220,7 +217,7 @@ static bool is_an_alpha2(const char *alpha2)
220{ 217{
221 if (!alpha2) 218 if (!alpha2)
222 return false; 219 return false;
223 if (is_alpha_upper(alpha2[0]) && is_alpha_upper(alpha2[1])) 220 if (isalpha(alpha2[0]) && isalpha(alpha2[1]))
224 return true; 221 return true;
225 return false; 222 return false;
226} 223}
@@ -1399,6 +1396,11 @@ static DECLARE_WORK(reg_work, reg_todo);
1399 1396
1400static void queue_regulatory_request(struct regulatory_request *request) 1397static void queue_regulatory_request(struct regulatory_request *request)
1401{ 1398{
1399 if (isalpha(request->alpha2[0]))
1400 request->alpha2[0] = toupper(request->alpha2[0]);
1401 if (isalpha(request->alpha2[1]))
1402 request->alpha2[1] = toupper(request->alpha2[1]);
1403
1402 spin_lock(&reg_requests_lock); 1404 spin_lock(&reg_requests_lock);
1403 list_add_tail(&request->list, &reg_requests_list); 1405 list_add_tail(&request->list, &reg_requests_list);
1404 spin_unlock(&reg_requests_lock); 1406 spin_unlock(&reg_requests_lock);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index a8c2d6b877ae..f161b9844542 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -411,7 +411,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
411 411
412 ASSERT_WDEV_LOCK(wdev); 412 ASSERT_WDEV_LOCK(wdev);
413 413
414 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 414 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
415 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
415 return; 416 return;
416 417
417 if (wdev->sme_state != CFG80211_SME_CONNECTING) 418 if (wdev->sme_state != CFG80211_SME_CONNECTING)
@@ -548,7 +549,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid,
548 549
549 ASSERT_WDEV_LOCK(wdev); 550 ASSERT_WDEV_LOCK(wdev);
550 551
551 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 552 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
553 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
552 return; 554 return;
553 555
554 if (wdev->sme_state != CFG80211_SME_CONNECTED) 556 if (wdev->sme_state != CFG80211_SME_CONNECTED)
@@ -644,7 +646,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
644 646
645 ASSERT_WDEV_LOCK(wdev); 647 ASSERT_WDEV_LOCK(wdev);
646 648
647 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 649 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
650 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
648 return; 651 return;
649 652
650 if (wdev->sme_state != CFG80211_SME_CONNECTED) 653 if (wdev->sme_state != CFG80211_SME_CONNECTED)
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 9f2cef3e0ca0..74a9e3cce452 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -110,6 +110,13 @@ static int wiphy_resume(struct device *dev)
110 return ret; 110 return ret;
111} 111}
112 112
113static const void *wiphy_namespace(struct device *d)
114{
115 struct wiphy *wiphy = container_of(d, struct wiphy, dev);
116
117 return wiphy_net(wiphy);
118}
119
113struct class ieee80211_class = { 120struct class ieee80211_class = {
114 .name = "ieee80211", 121 .name = "ieee80211",
115 .owner = THIS_MODULE, 122 .owner = THIS_MODULE,
@@ -120,6 +127,8 @@ struct class ieee80211_class = {
120#endif 127#endif
121 .suspend = wiphy_suspend, 128 .suspend = wiphy_suspend,
122 .resume = wiphy_resume, 129 .resume = wiphy_resume,
130 .ns_type = &net_ns_type_operations,
131 .namespace = wiphy_namespace,
123}; 132};
124 133
125int wiphy_sysfs_init(void) 134int wiphy_sysfs_init(void)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 0c8a1e8b7690..fb5448f7d55a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -183,7 +183,14 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
183 return -EINVAL; 183 return -EINVAL;
184 break; 184 break;
185 default: 185 default:
186 return -EINVAL; 186 /*
187 * We don't know anything about this algorithm,
188 * allow using it -- but the driver must check
189 * all parameters! We still check below whether
190 * or not the driver supports this algorithm,
191 * of course.
192 */
193 break;
187 } 194 }
188 195
189 if (params->seq) { 196 if (params->seq) {
@@ -221,7 +228,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) =
221 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; 228 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
222EXPORT_SYMBOL(bridge_tunnel_header); 229EXPORT_SYMBOL(bridge_tunnel_header);
223 230
224unsigned int ieee80211_hdrlen(__le16 fc) 231unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc)
225{ 232{
226 unsigned int hdrlen = 24; 233 unsigned int hdrlen = 24;
227 234
@@ -319,7 +326,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
319 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { 326 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
320 case cpu_to_le16(IEEE80211_FCTL_TODS): 327 case cpu_to_le16(IEEE80211_FCTL_TODS):
321 if (unlikely(iftype != NL80211_IFTYPE_AP && 328 if (unlikely(iftype != NL80211_IFTYPE_AP &&
322 iftype != NL80211_IFTYPE_AP_VLAN)) 329 iftype != NL80211_IFTYPE_AP_VLAN &&
330 iftype != NL80211_IFTYPE_P2P_GO))
323 return -1; 331 return -1;
324 break; 332 break;
325 case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): 333 case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
@@ -347,7 +355,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
347 break; 355 break;
348 case cpu_to_le16(IEEE80211_FCTL_FROMDS): 356 case cpu_to_le16(IEEE80211_FCTL_FROMDS):
349 if ((iftype != NL80211_IFTYPE_STATION && 357 if ((iftype != NL80211_IFTYPE_STATION &&
350 iftype != NL80211_IFTYPE_MESH_POINT) || 358 iftype != NL80211_IFTYPE_P2P_CLIENT &&
359 iftype != NL80211_IFTYPE_MESH_POINT) ||
351 (is_multicast_ether_addr(dst) && 360 (is_multicast_ether_addr(dst) &&
352 !compare_ether_addr(src, addr))) 361 !compare_ether_addr(src, addr)))
353 return -1; 362 return -1;
@@ -424,6 +433,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
424 switch (iftype) { 433 switch (iftype) {
425 case NL80211_IFTYPE_AP: 434 case NL80211_IFTYPE_AP:
426 case NL80211_IFTYPE_AP_VLAN: 435 case NL80211_IFTYPE_AP_VLAN:
436 case NL80211_IFTYPE_P2P_GO:
427 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); 437 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
428 /* DA BSSID SA */ 438 /* DA BSSID SA */
429 memcpy(hdr.addr1, skb->data, ETH_ALEN); 439 memcpy(hdr.addr1, skb->data, ETH_ALEN);
@@ -432,6 +442,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
432 hdrlen = 24; 442 hdrlen = 24;
433 break; 443 break;
434 case NL80211_IFTYPE_STATION: 444 case NL80211_IFTYPE_STATION:
445 case NL80211_IFTYPE_P2P_CLIENT:
435 fc |= cpu_to_le16(IEEE80211_FCTL_TODS); 446 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
436 /* BSSID SA DA */ 447 /* BSSID SA DA */
437 memcpy(hdr.addr1, bssid, ETH_ALEN); 448 memcpy(hdr.addr1, bssid, ETH_ALEN);
@@ -771,7 +782,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
771 782
772 /* if it's part of a bridge, reject changing type to station/ibss */ 783 /* if it's part of a bridge, reject changing type to station/ibss */
773 if ((dev->priv_flags & IFF_BRIDGE_PORT) && 784 if ((dev->priv_flags & IFF_BRIDGE_PORT) &&
774 (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION)) 785 (ntype == NL80211_IFTYPE_ADHOC ||
786 ntype == NL80211_IFTYPE_STATION ||
787 ntype == NL80211_IFTYPE_P2P_CLIENT))
775 return -EBUSY; 788 return -EBUSY;
776 789
777 if (ntype != otype) { 790 if (ntype != otype) {
@@ -782,6 +795,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
782 cfg80211_leave_ibss(rdev, dev, false); 795 cfg80211_leave_ibss(rdev, dev, false);
783 break; 796 break;
784 case NL80211_IFTYPE_STATION: 797 case NL80211_IFTYPE_STATION:
798 case NL80211_IFTYPE_P2P_CLIENT:
785 cfg80211_disconnect(rdev, dev, 799 cfg80211_disconnect(rdev, dev,
786 WLAN_REASON_DEAUTH_LEAVING, true); 800 WLAN_REASON_DEAUTH_LEAVING, true);
787 break; 801 break;
@@ -810,9 +824,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
810 if (dev->ieee80211_ptr->use_4addr) 824 if (dev->ieee80211_ptr->use_4addr)
811 break; 825 break;
812 /* fall through */ 826 /* fall through */
827 case NL80211_IFTYPE_P2P_CLIENT:
813 case NL80211_IFTYPE_ADHOC: 828 case NL80211_IFTYPE_ADHOC:
814 dev->priv_flags |= IFF_DONT_BRIDGE; 829 dev->priv_flags |= IFF_DONT_BRIDGE;
815 break; 830 break;
831 case NL80211_IFTYPE_P2P_GO:
816 case NL80211_IFTYPE_AP: 832 case NL80211_IFTYPE_AP:
817 case NL80211_IFTYPE_AP_VLAN: 833 case NL80211_IFTYPE_AP_VLAN:
818 case NL80211_IFTYPE_WDS: 834 case NL80211_IFTYPE_WDS:
@@ -823,7 +839,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
823 /* monitor can't bridge anyway */ 839 /* monitor can't bridge anyway */
824 break; 840 break;
825 case NL80211_IFTYPE_UNSPECIFIED: 841 case NL80211_IFTYPE_UNSPECIFIED:
826 case __NL80211_IFTYPE_AFTER_LAST: 842 case NUM_NL80211_IFTYPES:
827 /* not happening */ 843 /* not happening */
828 break; 844 break;
829 } 845 }
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 8f5116f5af19..dc675a3daa3d 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -611,7 +611,7 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev)
611#endif 611#endif
612 612
613#ifdef CONFIG_CFG80211_WEXT 613#ifdef CONFIG_CFG80211_WEXT
614 if (dev->ieee80211_ptr && dev->ieee80211_ptr && 614 if (dev->ieee80211_ptr &&
615 dev->ieee80211_ptr->wiphy && 615 dev->ieee80211_ptr->wiphy &&
616 dev->ieee80211_ptr->wiphy->wext && 616 dev->ieee80211_ptr->wiphy->wext &&
617 dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) 617 dev->ieee80211_ptr->wiphy->wext->get_wireless_stats)
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 9818198add8a..6fffe62d7c25 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -197,6 +197,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
197 wdev->wext.connect.ssid_len = len; 197 wdev->wext.connect.ssid_len = len;
198 198
199 wdev->wext.connect.crypto.control_port = false; 199 wdev->wext.connect.crypto.control_port = false;
200 wdev->wext.connect.crypto.control_port_ethertype =
201 cpu_to_be16(ETH_P_PAE);
200 202
201 err = cfg80211_mgd_wext_connect(rdev, wdev); 203 err = cfg80211_mgd_wext_connect(rdev, wdev);
202 out: 204 out:
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5e86d4e97dce..f7af98dff409 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -507,14 +507,14 @@ static int x25_listen(struct socket *sock, int backlog)
507 struct sock *sk = sock->sk; 507 struct sock *sk = sock->sk;
508 int rc = -EOPNOTSUPP; 508 int rc = -EOPNOTSUPP;
509 509
510 lock_kernel(); 510 lock_sock(sk);
511 if (sk->sk_state != TCP_LISTEN) { 511 if (sk->sk_state != TCP_LISTEN) {
512 memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN); 512 memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN);
513 sk->sk_max_ack_backlog = backlog; 513 sk->sk_max_ack_backlog = backlog;
514 sk->sk_state = TCP_LISTEN; 514 sk->sk_state = TCP_LISTEN;
515 rc = 0; 515 rc = 0;
516 } 516 }
517 unlock_kernel(); 517 release_sock(sk);
518 518
519 return rc; 519 return rc;
520} 520}
@@ -688,7 +688,6 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
688 struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; 688 struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
689 int len, i, rc = 0; 689 int len, i, rc = 0;
690 690
691 lock_kernel();
692 if (!sock_flag(sk, SOCK_ZAPPED) || 691 if (!sock_flag(sk, SOCK_ZAPPED) ||
693 addr_len != sizeof(struct sockaddr_x25) || 692 addr_len != sizeof(struct sockaddr_x25) ||
694 addr->sx25_family != AF_X25) { 693 addr->sx25_family != AF_X25) {
@@ -704,12 +703,13 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
704 } 703 }
705 } 704 }
706 705
706 lock_sock(sk);
707 x25_sk(sk)->source_addr = addr->sx25_addr; 707 x25_sk(sk)->source_addr = addr->sx25_addr;
708 x25_insert_socket(sk); 708 x25_insert_socket(sk);
709 sock_reset_flag(sk, SOCK_ZAPPED); 709 sock_reset_flag(sk, SOCK_ZAPPED);
710 release_sock(sk);
710 SOCK_DEBUG(sk, "x25_bind: socket is bound\n"); 711 SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
711out: 712out:
712 unlock_kernel();
713 return rc; 713 return rc;
714} 714}
715 715
@@ -751,7 +751,6 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
751 struct x25_route *rt; 751 struct x25_route *rt;
752 int rc = 0; 752 int rc = 0;
753 753
754 lock_kernel();
755 lock_sock(sk); 754 lock_sock(sk);
756 if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { 755 if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
757 sock->state = SS_CONNECTED; 756 sock->state = SS_CONNECTED;
@@ -829,7 +828,6 @@ out_put_route:
829 x25_route_put(rt); 828 x25_route_put(rt);
830out: 829out:
831 release_sock(sk); 830 release_sock(sk);
832 unlock_kernel();
833 return rc; 831 return rc;
834} 832}
835 833
@@ -869,8 +867,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
869 struct sk_buff *skb; 867 struct sk_buff *skb;
870 int rc = -EINVAL; 868 int rc = -EINVAL;
871 869
872 lock_kernel(); 870 if (!sk)
873 if (!sk || sk->sk_state != TCP_LISTEN)
874 goto out; 871 goto out;
875 872
876 rc = -EOPNOTSUPP; 873 rc = -EOPNOTSUPP;
@@ -878,6 +875,10 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
878 goto out; 875 goto out;
879 876
880 lock_sock(sk); 877 lock_sock(sk);
878 rc = -EINVAL;
879 if (sk->sk_state != TCP_LISTEN)
880 goto out2;
881
881 rc = x25_wait_for_data(sk, sk->sk_rcvtimeo); 882 rc = x25_wait_for_data(sk, sk->sk_rcvtimeo);
882 if (rc) 883 if (rc)
883 goto out2; 884 goto out2;
@@ -897,7 +898,6 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
897out2: 898out2:
898 release_sock(sk); 899 release_sock(sk);
899out: 900out:
900 unlock_kernel();
901 return rc; 901 return rc;
902} 902}
903 903
@@ -909,7 +909,6 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
909 struct x25_sock *x25 = x25_sk(sk); 909 struct x25_sock *x25 = x25_sk(sk);
910 int rc = 0; 910 int rc = 0;
911 911
912 lock_kernel();
913 if (peer) { 912 if (peer) {
914 if (sk->sk_state != TCP_ESTABLISHED) { 913 if (sk->sk_state != TCP_ESTABLISHED) {
915 rc = -ENOTCONN; 914 rc = -ENOTCONN;
@@ -923,19 +922,6 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
923 *uaddr_len = sizeof(*sx25); 922 *uaddr_len = sizeof(*sx25);
924 923
925out: 924out:
926 unlock_kernel();
927 return rc;
928}
929
930static unsigned int x25_datagram_poll(struct file *file, struct socket *sock,
931 poll_table *wait)
932{
933 int rc;
934
935 lock_kernel();
936 rc = datagram_poll(file, sock, wait);
937 unlock_kernel();
938
939 return rc; 925 return rc;
940} 926}
941 927
@@ -1746,7 +1732,7 @@ static const struct proto_ops x25_proto_ops = {
1746 .socketpair = sock_no_socketpair, 1732 .socketpair = sock_no_socketpair,
1747 .accept = x25_accept, 1733 .accept = x25_accept,
1748 .getname = x25_getname, 1734 .getname = x25_getname,
1749 .poll = x25_datagram_poll, 1735 .poll = datagram_poll,
1750 .ioctl = x25_ioctl, 1736 .ioctl = x25_ioctl,
1751#ifdef CONFIG_COMPAT 1737#ifdef CONFIG_COMPAT
1752 .compat_ioctl = compat_x25_ioctl, 1738 .compat_ioctl = compat_x25_ioctl,